10 files changed, 1839 insertions, 0 deletions
diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile
new file mode 100644
index 000000000..7b197667f
--- /dev/null
+++ b/arch/parisc/lib/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for parisc-specific library files
+#
+
+lib-y	:= lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \
+	   ucmpdi2.o delay.o
+
+obj-y	:= iomap.o
diff --git a/arch/parisc/lib/bitops.c b/arch/parisc/lib/bitops.c
new file mode 100644
index 000000000..36a314199
--- /dev/null
+++ b/arch/parisc/lib/bitops.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bitops.c: atomic operations which got too long to be inlined all over
+ *      the place.
+ * 
+ * Copyright 1999 Philipp Rumpf (prumpf@tux.org)
+ * Copyright 2000 Grant Grundler (grundler@cup.hp.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+
+#ifdef CONFIG_SMP
+arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = {
+	[0 ... (ATOMIC_HASH_SIZE-1)]  = __ARCH_SPIN_LOCK_UNLOCKED
+};
+#endif
+
+#ifdef CONFIG_64BIT
+unsigned long notrace __xchg64(unsigned long x, volatile unsigned long *ptr)
+{
+	unsigned long temp, flags;
+
+	_atomic_spin_lock_irqsave(ptr, flags);
+	temp = *ptr;
+	*ptr = x;
+	_atomic_spin_unlock_irqrestore(ptr, flags);
+	return temp;
+}
+#endif
+
+unsigned long notrace __xchg32(int x, volatile int *ptr)
+{
+	unsigned long flags;
+	long temp;
+
+	_atomic_spin_lock_irqsave(ptr, flags);
+	temp = (long) *ptr;	/* XXX - sign extension wanted? */
+	*ptr = x;
+	_atomic_spin_unlock_irqrestore(ptr, flags);
+	return (unsigned long)temp;
+}
+
+
+unsigned long notrace __xchg8(char x, volatile char *ptr)
+{
+	unsigned long flags;
+	long temp;
+
+	_atomic_spin_lock_irqsave(ptr, flags);
+	temp = (long) *ptr;	/* XXX - sign extension wanted? */
+	*ptr = x;
+	_atomic_spin_unlock_irqrestore(ptr, flags);
+	return (unsigned long)temp;
+}
+
+
+u64 notrace __cmpxchg_u64(volatile u64 *ptr, u64 old, u64 new)
+{
+	unsigned long flags;
+	u64 prev;
+
+	_atomic_spin_lock_irqsave(ptr, flags);
+	if ((prev = *ptr) == old)
+		*ptr = new;
+	_atomic_spin_unlock_irqrestore(ptr, flags);
+	return prev;
+}
+
+unsigned long notrace __cmpxchg_u32(volatile unsigned int *ptr, unsigned int old, unsigned int new)
+{
+	unsigned long flags;
+	unsigned int prev;
+
+	_atomic_spin_lock_irqsave(ptr, flags);
+	if ((prev = *ptr) == old)
+		*ptr = new;
+	_atomic_spin_unlock_irqrestore(ptr, flags);
+	return (unsigned long)prev;
+}
+
+u8 notrace __cmpxchg_u8(volatile u8 *ptr, u8 old, u8 new)
+{
+	unsigned long flags;
+	u8 prev;
+
+	_atomic_spin_lock_irqsave(ptr, flags);
+	if ((prev = *ptr) == old)
+		*ptr = new;
+	_atomic_spin_unlock_irqrestore(ptr, flags);
+	return prev;
+}
diff --git a/arch/parisc/lib/checksum.c b/arch/parisc/lib/checksum.c
new file mode 100644
index 000000000..4818f3db8
--- /dev/null
+++ b/arch/parisc/lib/checksum.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		MIPS specific IP/TCP/UDP checksumming routines
+ *
+ * Authors:	Ralf Baechle, <ralf@waldorf-gmbh.de>
+ *		Lots of code moved from tcp.c and ip.c; see those files
+ *		for more names.
+ */
+#include <linux/module.h>
+#include <linux/types.h>
+
+#include <net/checksum.h>
+#include <asm/byteorder.h>
+#include <asm/string.h>
+#include <linux/uaccess.h>
+
+#define addc(_t,_r)                     \
+	__asm__ __volatile__ (          \
+"       add             %0, %1, %0\n"   \
+"       addc            %0, %%r0, %0\n" \
+	: "=r"(_t)                      \
+	: "r"(_r), "0"(_t));
+
+static inline unsigned short from32to16(unsigned int x)
+{
+	/* 32 bits --> 16 bits + carry */
+	x = (x & 0xffff) + (x >> 16);
+	/* 16 bits + carry --> 16 bits including carry */
+	x = (x & 0xffff) + (x >> 16);
+	return (unsigned short)x;
+}
+
+static inline unsigned int do_csum(const unsigned char * buff, int len)
+{
+	int odd, count;
+	unsigned int result = 0;
+
+	if (len <= 0)
+		goto out;
+	odd = 1 & (unsigned long) buff;
+	if (odd) {
+		result = be16_to_cpu(*buff);
+		len--;
+		buff++;
+	}
+	count = len >> 1;		/* nr of 16-bit words.. */
+	if (count) {
+		if (2 & (unsigned long) buff) {
+			result += *(unsigned short *) buff;
+			count--;
+			len -= 2;
+			buff += 2;
+		}
+		count >>= 1;		/* nr of 32-bit words.. */
+		if (count) {
+			while (count >= 4) {
+				unsigned int r1, r2, r3, r4;
+				r1 = *(unsigned int *)(buff + 0);
+				r2 = *(unsigned int *)(buff + 4);
+				r3 = *(unsigned int *)(buff + 8);
+				r4 = *(unsigned int *)(buff + 12);
+				addc(result, r1);
+				addc(result, r2);
+				addc(result, r3);
+				addc(result, r4);
+				count -= 4;
+				buff += 16;
+			}
+			while (count) {
+				unsigned int w = *(unsigned int *) buff;
+				count--;
+				buff += 4;
+				addc(result, w);
+			}
+			result = (result & 0xffff) + (result >> 16);
+		}
+		if (len & 2) {
+			result += *(unsigned short *) buff;
+			buff += 2;
+		}
+	}
+	if (len & 1)
+		result += le16_to_cpu(*buff);
+	result = from32to16(result);
+	if (odd)
+		result = swab16(result);
+out:
+	return result;
+}
+
+/*
+ * computes a partial checksum, e.g. for TCP/UDP fragments
+ */
+/*
+ * why bother folding?
+ */
+__wsum csum_partial(const void *buff, int len, __wsum sum)
+{
+	unsigned int result = do_csum(buff, len);
+	addc(result, sum);
+	return (__force __wsum)from32to16(result);
+}
+
+EXPORT_SYMBOL(csum_partial);
diff --git a/arch/parisc/lib/delay.c b/arch/parisc/lib/delay.c
new file mode 100644
index 000000000..66e506520
--- /dev/null
+++ b/arch/parisc/lib/delay.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	Precise Delay Loops for parisc
+ *
+ *	based on code by:
+ *	Copyright (C) 1993 Linus Torvalds
+ *	Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *	Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com>
+ *
+ *	parisc implementation:
+ *	Copyright (C) 2013 Helge Deller <deller@gmx.de>
+ */
+
+
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <linux/init.h>
+
+#include <asm/delay.h>
+#include <asm/special_insns.h>    /* for mfctl() */
+#include <asm/processor.h> /* for boot_cpu_data */
+
+/* CR16 based delay: */
+static void __cr16_delay(unsigned long __loops)
+{
+	/*
+	 * Note: Due to unsigned math, cr16 rollovers shouldn't be
+	 * a problem here. However, on 32 bit, we need to make sure
+	 * we don't pass in too big a value. The current default
+	 * value of MAX_UDELAY_MS should help prevent this.
+	 */
+	u32 bclock, now, loops = __loops;
+	int cpu;
+
+	preempt_disable();
+	cpu = smp_processor_id();
+	bclock = mfctl(16);
+	for (;;) {
+		now = mfctl(16);
+		if ((now - bclock) >= loops)
+			break;
+
+		/* Allow RT tasks to run */
+		preempt_enable();
+		asm volatile("	nop\n");
+		barrier();
+		preempt_disable();
+
+		/*
+		 * It is possible that we moved to another CPU, and
+		 * since CR16's are per-cpu we need to calculate
+		 * that. The delay must guarantee that we wait "at
+		 * least" the amount of time. Being moved to another
+		 * CPU could make the wait longer but we just need to
+		 * make sure we waited long enough. Rebalance the
+		 * counter for this CPU.
+		 */
+		if (unlikely(cpu != smp_processor_id())) {
+			loops -= (now - bclock);
+			cpu = smp_processor_id();
+			bclock = mfctl(16);
+		}
+	}
+	preempt_enable();
+}
+
+
+void __udelay(unsigned long usecs)
+{
+	__cr16_delay(usecs * ((unsigned long)boot_cpu_data.cpu_hz / 1000000UL));
+}
+EXPORT_SYMBOL(__udelay);
diff --git a/arch/parisc/lib/io.c b/arch/parisc/lib/io.c
new file mode 100644
index 000000000..7c00496b4
--- /dev/null
+++ b/arch/parisc/lib/io.c
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/parisc/lib/io.c
+ *
+ * Copyright (c) Matthew Wilcox 2001 for Hewlett-Packard
+ * Copyright (c) Randolph Chung 2001 <tausq@debian.org>
+ *
+ * IO accessing functions which shouldn't be inlined because they're too big
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/io.h>
+
+/* Copies a block of memory to a device in an efficient manner.
+ * Assumes the device can cope with 32-bit transfers.  If it can't,
+ * don't use this function.
+ */
+void memcpy_toio(volatile void __iomem *dst, const void *src, int count)
+{
+	if (((unsigned long)dst & 3) != ((unsigned long)src & 3))
+		goto bytecopy;
+	while ((unsigned long)dst & 3) {
+		writeb(*(char *)src, dst++);
+		src++;
+		count--;
+	}
+	while (count > 3) {
+		__raw_writel(*(u32 *)src, dst);
+		src += 4;
+		dst += 4;
+		count -= 4;
+	}
+ bytecopy:
+	while (count--) {
+		writeb(*(char *)src, dst++);
+		src++;
+	}
+}
+
+/*
+** Copies a block of memory from a device in an efficient manner.
+** Assumes the device can cope with 32-bit transfers.  If it can't,
+** don't use this function.
+**
+** CR16 counts on C3000 reading 256 bytes from Symbios 896 RAM:
+**	27341/64    = 427 cyc per int
+**	61311/128   = 478 cyc per short
+**	122637/256  = 479 cyc per byte
+** Ergo bus latencies dominant (not transfer size).
+**      Minimize total number of transfers at cost of CPU cycles.
+**	TODO: only look at src alignment and adjust the stores to dest.
+*/
+void memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
+{
+	/* first compare alignment of src/dst */ 
+	if ( (((unsigned long)dst ^ (unsigned long)src) & 1) || (count < 2) )
+		goto bytecopy;
+
+	if ( (((unsigned long)dst ^ (unsigned long)src) & 2) || (count < 4) )
+		goto shortcopy;
+
+	/* Then check for misaligned start address */
+	if ((unsigned long)src & 1) {
+		*(u8 *)dst = readb(src);
+		src++;
+		dst++;
+		count--;
+		if (count < 2) goto bytecopy;
+	}
+
+	if ((unsigned long)src & 2) {
+		*(u16 *)dst = __raw_readw(src);
+		src += 2;
+		dst += 2;
+		count -= 2;
+	}
+
+	while (count > 3) {
+		*(u32 *)dst = __raw_readl(src);
+		dst += 4;
+		src += 4;
+		count -= 4;
+	}
+
+ shortcopy:
+	while (count > 1) {
+		*(u16 *)dst = __raw_readw(src);
+		src += 2;
+		dst += 2;
+		count -= 2;
+	}
+
+ bytecopy:
+	while (count--) {
+		*(char *)dst = readb(src);
+		src++;
+		dst++;
+	}
+}
+
+/* Sets a block of memory on a device to a given value.
+ * Assumes the device can cope with 32-bit transfers.  If it can't,
+ * don't use this function.
+ */
+void memset_io(volatile void __iomem *addr, unsigned char val, int count)
+{
+	u32 val32 = (val << 24) | (val << 16) | (val << 8) | val;
+	while ((unsigned long)addr & 3) {
+		writeb(val, addr++);
+		count--;
+	}
+	while (count > 3) {
+		__raw_writel(val32, addr);
+		addr += 4;
+		count -= 4;
+	}
+	while (count--) {
+		writeb(val, addr++);
+	}
+}
+
+/*
+ * Read COUNT 8-bit bytes from port PORT into memory starting at
+ * SRC.
+ */
+void insb (unsigned long port, void *dst, unsigned long count)
+{
+	unsigned char *p;
+
+	p = (unsigned char *)dst;
+
+	while (((unsigned long)p) & 0x3) {
+		if (!count)
+			return;
+		count--;
+		*p = inb(port);
+		p++;
+	}
+
+	while (count >= 4) {
+		unsigned int w;
+		count -= 4;
+		w = inb(port) << 24;
+		w |= inb(port) << 16;
+		w |= inb(port) << 8;
+		w |= inb(port);
+		*(unsigned int *) p = w;
+		p += 4;
+	}
+
+	while (count) {
+		--count;
+		*p = inb(port);
+		p++;
+	}
+}
+
+
+/*
+ * Read COUNT 16-bit words from port PORT into memory starting at
+ * SRC.  SRC must be at least short aligned.  This is used by the
+ * IDE driver to read disk sectors.  Performance is important, but
+ * the interfaces seems to be slow: just using the inlined version
+ * of the inw() breaks things.
+ */
+void insw (unsigned long port, void *dst, unsigned long count)
+{
+	unsigned int l = 0, l2;
+	unsigned char *p;
+
+	p = (unsigned char *)dst;
+	
+	if (!count)
+		return;
+	
+	switch (((unsigned long)p) & 0x3)
+	{
+	 case 0x00:			/* Buffer 32-bit aligned */
+		while (count>=2) {
+			
+			count -= 2;
+			l = cpu_to_le16(inw(port)) << 16;
+			l |= cpu_to_le16(inw(port));
+			*(unsigned int *)p = l;
+			p += 4;
+		}
+		if (count) {
+			*(unsigned short *)p = cpu_to_le16(inw(port));
+		}
+		break;
+	
+	 case 0x02:			/* Buffer 16-bit aligned */
+		*(unsigned short *)p = cpu_to_le16(inw(port));
+		p += 2;
+		count--;
+		while (count>=2) {
+			
+			count -= 2;
+			l = cpu_to_le16(inw(port)) << 16;
+			l |= cpu_to_le16(inw(port));
+			*(unsigned int *)p = l;
+			p += 4;
+		}
+		if (count) {
+			*(unsigned short *)p = cpu_to_le16(inw(port));
+		}
+		break;
+		
+	 case 0x01:			/* Buffer 8-bit aligned */
+	 case 0x03:
+		/* I don't bother with 32bit transfers
+		 * in this case, 16bit will have to do -- DE */
+		--count;
+		
+		l = cpu_to_le16(inw(port));
+		*p = l >> 8;
+		p++;
+		while (count--)
+		{
+			l2 = cpu_to_le16(inw(port));
+			*(unsigned short *)p = (l & 0xff) << 8 | (l2 >> 8);
+			p += 2;
+			l = l2;
+		}
+		*p = l & 0xff;
+		break;
+	}
+}
+
+
+
+/*
+ * Read COUNT 32-bit words from port PORT into memory starting at
+ * SRC. Now works with any alignment in SRC. Performance is important,
+ * but the interfaces seems to be slow: just using the inlined version
+ * of the inl() breaks things.
+ */
+void insl (unsigned long port, void *dst, unsigned long count)
+{
+	unsigned int l = 0, l2;
+	unsigned char *p;
+
+	p = (unsigned char *)dst;
+	
+	if (!count)
+		return;
+	
+	switch (((unsigned long) dst) & 0x3)
+	{
+	 case 0x00:			/* Buffer 32-bit aligned */
+		while (count--)
+		{
+			*(unsigned int *)p = cpu_to_le32(inl(port));
+			p += 4;
+		}
+		break;
+	
+	 case 0x02:			/* Buffer 16-bit aligned */
+		--count;
+		
+		l = cpu_to_le32(inl(port));
+		*(unsigned short *)p = l >> 16;
+		p += 2;
+		
+		while (count--)
+		{
+			l2 = cpu_to_le32(inl(port));
+			*(unsigned int *)p = (l & 0xffff) << 16 | (l2 >> 16);
+			p += 4;
+			l = l2;
+		}
+		*(unsigned short *)p = l & 0xffff;
+		break;
+	 case 0x01:			/* Buffer 8-bit aligned */
+		--count;
+		
+		l = cpu_to_le32(inl(port));
+		*(unsigned char *)p = l >> 24;
+		p++;
+		*(unsigned short *)p = (l >> 8) & 0xffff;
+		p += 2;
+		while (count--)
+		{
+			l2 = cpu_to_le32(inl(port));
+			*(unsigned int *)p = (l & 0xff) << 24 | (l2 >> 8);
+			p += 4;
+			l = l2;
+		}
+		*p = l & 0xff;
+		break;
+	 case 0x03:			/* Buffer 8-bit aligned */
+		--count;
+		
+		l = cpu_to_le32(inl(port));
+		*p = l >> 24;
+		p++;
+		while (count--)
+		{
+			l2 = cpu_to_le32(inl(port));
+			*(unsigned int *)p = (l & 0xffffff) << 8 | l2 >> 24;
+			p += 4;
+			l = l2;
+		}
+		*(unsigned short *)p = (l >> 8) & 0xffff;
+		p += 2;
+		*p = l & 0xff;
+		break;
+	}
+}
+
+
+/*
+ * Like insb but in the opposite direction.
+ * Don't worry as much about doing aligned memory transfers:
+ * doing byte reads the "slow" way isn't nearly as slow as
+ * doing byte writes the slow way (no r-m-w cycle).
+ */
+void outsb(unsigned long port, const void * src, unsigned long count)
+{
+	const unsigned char *p;
+
+	p = (const unsigned char *)src;
+	while (count) {
+		count--;
+		outb(*p, port);
+		p++;
+	}
+}
+
+/*
+ * Like insw but in the opposite direction.  This is used by the IDE
+ * driver to write disk sectors.  Performance is important, but the
+ * interfaces seems to be slow: just using the inlined version of the
+ * outw() breaks things.
+ */
+void outsw (unsigned long port, const void *src, unsigned long count)
+{
+	unsigned int l = 0, l2;
+	const unsigned char *p;
+
+	p = (const unsigned char *)src;
+	
+	if (!count)
+		return;
+	
+	switch (((unsigned long)p) & 0x3)
+	{
+	 case 0x00:			/* Buffer 32-bit aligned */
+		while (count>=2) {
+			count -= 2;
+			l = *(unsigned int *)p;
+			p += 4;
+			outw(le16_to_cpu(l >> 16), port);
+			outw(le16_to_cpu(l & 0xffff), port);
+		}
+		if (count) {
+			outw(le16_to_cpu(*(unsigned short*)p), port);
+		}
+		break;
+	
+	 case 0x02:			/* Buffer 16-bit aligned */
+		
+		outw(le16_to_cpu(*(unsigned short*)p), port);
+		p += 2;
+		count--;
+		
+		while (count>=2) {
+			count -= 2;
+			l = *(unsigned int *)p;
+			p += 4;
+			outw(le16_to_cpu(l >> 16), port);
+			outw(le16_to_cpu(l & 0xffff), port);
+		}
+		if (count) {
+			outw(le16_to_cpu(*(unsigned short *)p), port);
+		}
+		break;
+		
+	 case 0x01:			/* Buffer 8-bit aligned */	
+		/* I don't bother with 32bit transfers
+		 * in this case, 16bit will have to do -- DE */
+		
+		l  = *p << 8;
+		p++;
+		count--;
+		while (count)
+		{
+			count--;
+			l2 = *(unsigned short *)p;
+			p += 2;
+			outw(le16_to_cpu(l | l2 >> 8), port);
+		        l = l2 << 8;
+		}
+		l2 = *(unsigned char *)p;
+		outw (le16_to_cpu(l | l2>>8), port);
+		break;
+	
+	}
+}
+
+
+/*
+ * Like insl but in the opposite direction.  This is used by the IDE
+ * driver to write disk sectors.  Works with any alignment in SRC.
+ *  Performance is important, but the interfaces seems to be slow:
+ * just using the inlined version of the outl() breaks things.
+ */
+void outsl (unsigned long port, const void *src, unsigned long count)
+{
+	unsigned int l = 0, l2;
+	const unsigned char *p;
+
+	p = (const unsigned char *)src;
+	
+	if (!count)
+		return;
+	
+	switch (((unsigned long)p) & 0x3)
+	{
+	 case 0x00:			/* Buffer 32-bit aligned */
+		while (count--)
+		{
+			outl(le32_to_cpu(*(unsigned int *)p), port);
+			p += 4;
+		}
+		break;
+	
+	 case 0x02:			/* Buffer 16-bit aligned */
+		--count;
+		
+		l = *(unsigned short *)p;
+		p += 2;
+		
+		while (count--)
+		{
+			l2 = *(unsigned int *)p;
+			p += 4;
+			outl (le32_to_cpu(l << 16 | l2 >> 16), port);
+			l = l2;
+		}
+		l2 = *(unsigned short *)p;
+		outl (le32_to_cpu(l << 16 | l2), port);
+		break;
+	 case 0x01:			/* Buffer 8-bit aligned */
+		--count;
+
+		l = *p << 24;
+		p++;
+		l |= *(unsigned short *)p << 8;
+		p += 2;
+
+		while (count--)
+		{
+			l2 = *(unsigned int *)p;
+			p += 4;
+			outl (le32_to_cpu(l | l2 >> 24), port);
+			l = l2 << 8;
+		}
+		l2 = *p;
+		outl (le32_to_cpu(l | l2), port);
+		break;
+	 case 0x03:			/* Buffer 8-bit aligned */
+		--count;
+		
+		l = *p << 24;
+		p++;
+
+		while (count--)
+		{
+			l2 = *(unsigned int *)p;
+			p += 4;
+			outl (le32_to_cpu(l | l2 >> 8), port);
+			l = l2 << 24;
+		}
+		l2 = *(unsigned short *)p << 16;
+		p += 2;
+		l2 |= *p;
+		outl (le32_to_cpu(l | l2), port);
+		break;
+	}
+}
+
+EXPORT_SYMBOL(insb);
+EXPORT_SYMBOL(insw);
+EXPORT_SYMBOL(insl);
+EXPORT_SYMBOL(outsb);
+EXPORT_SYMBOL(outsw);
+EXPORT_SYMBOL(outsl);
diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c
new file mode 100644
index 000000000..915c0c4da
--- /dev/null
+++ b/arch/parisc/lib/iomap.c
@@ -0,0 +1,551 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * iomap.c - Implement iomap interface for PA-RISC
+ * Copyright (c) 2004 Matthew Wilcox
+ */
+
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <linux/export.h>
+#include <asm/io.h>
+
+/*
+ * The iomap space on 32-bit PA-RISC is intended to look like this:
+ * 00000000-7fffffff virtual mapped IO
+ * 80000000-8fffffff ISA/EISA port space that can't be virtually mapped
+ * 90000000-9fffffff Dino port space
+ * a0000000-afffffff Astro port space
+ * b0000000-bfffffff PAT port space
+ * c0000000-cfffffff non-swapped memory IO
+ * f0000000-ffffffff legacy IO memory pointers
+ *
+ * For the moment, here's what it looks like:
+ * 80000000-8fffffff All ISA/EISA port space
+ * f0000000-ffffffff legacy IO memory pointers
+ *
+ * On 64-bit, everything is extended, so:
+ * 8000000000000000-8fffffffffffffff All ISA/EISA port space
+ * f000000000000000-ffffffffffffffff legacy IO memory pointers
+ */
+
+/*
+ * Technically, this should be 'if (VMALLOC_START < addr < VMALLOC_END),
+ * but that's slow and we know it'll be within the first 2GB.
+ */
+#ifdef CONFIG_64BIT
+#define INDIRECT_ADDR(addr)	(((unsigned long)(addr) & 1UL<<63) != 0)
+#define ADDR_TO_REGION(addr)    (((unsigned long)addr >> 60) & 7)
+#define IOPORT_MAP_BASE		(8UL << 60)
+#else
+#define INDIRECT_ADDR(addr)     (((unsigned long)(addr) & 1UL<<31) != 0)
+#define ADDR_TO_REGION(addr)    (((unsigned long)addr >> 28) & 7)
+#define IOPORT_MAP_BASE		(8UL << 28)
+#endif
+
+struct iomap_ops {
+	unsigned int (*read8)(const void __iomem *);
+	unsigned int (*read16)(const void __iomem *);
+	unsigned int (*read16be)(const void __iomem *);
+	unsigned int (*read32)(const void __iomem *);
+	unsigned int (*read32be)(const void __iomem *);
+#ifdef CONFIG_64BIT
+	u64 (*read64)(const void __iomem *);
+	u64 (*read64be)(const void __iomem *);
+#endif
+	void (*write8)(u8, void __iomem *);
+	void (*write16)(u16, void __iomem *);
+	void (*write16be)(u16, void __iomem *);
+	void (*write32)(u32, void __iomem *);
+	void (*write32be)(u32, void __iomem *);
+#ifdef CONFIG_64BIT
+	void (*write64)(u64, void __iomem *);
+	void (*write64be)(u64, void __iomem *);
+#endif
+	void (*read8r)(const void __iomem *, void *, unsigned long);
+	void (*read16r)(const void __iomem *, void *, unsigned long);
+	void (*read32r)(const void __iomem *, void *, unsigned long);
+	void (*write8r)(void __iomem *, const void *, unsigned long);
+	void (*write16r)(void __iomem *, const void *, unsigned long);
+	void (*write32r)(void __iomem *, const void *, unsigned long);
+};
+
+/* Generic ioport ops.  To be replaced later by specific dino/elroy/wax code */
+
+#define ADDR2PORT(addr) ((unsigned long __force)(addr) & 0xffffff)
+
+static unsigned int ioport_read8(const void __iomem *addr)
+{
+	return inb(ADDR2PORT(addr));
+}
+
+static unsigned int ioport_read16(const void __iomem *addr)
+{
+	return inw(ADDR2PORT(addr));
+}
+
+static unsigned int ioport_read32(const void __iomem *addr)
+{
+	return inl(ADDR2PORT(addr));
+}
+
+static void ioport_write8(u8 datum, void __iomem *addr)
+{
+	outb(datum, ADDR2PORT(addr));
+}
+
+static void ioport_write16(u16 datum, void __iomem *addr)
+{
+	outw(datum, ADDR2PORT(addr));
+}
+
+static void ioport_write32(u32 datum, void __iomem *addr)
+{
+	outl(datum, ADDR2PORT(addr));
+}
+
+static void ioport_read8r(const void __iomem *addr, void *dst, unsigned long count)
+{
+	insb(ADDR2PORT(addr), dst, count);
+}
+
+static void ioport_read16r(const void __iomem *addr, void *dst, unsigned long count)
+{
+	insw(ADDR2PORT(addr), dst, count);
+}
+
+static void ioport_read32r(const void __iomem *addr, void *dst, unsigned long count)
+{
+	insl(ADDR2PORT(addr), dst, count);
+}
+
+static void ioport_write8r(void __iomem *addr, const void *s, unsigned long n)
+{
+	outsb(ADDR2PORT(addr), s, n);
+}
+
+static void ioport_write16r(void __iomem *addr, const void *s, unsigned long n)
+{
+	outsw(ADDR2PORT(addr), s, n);
+}
+
+static void ioport_write32r(void __iomem *addr, const void *s, unsigned long n)
+{
+	outsl(ADDR2PORT(addr), s, n);
+}
+
+static const struct iomap_ops ioport_ops = {
+	.read8 = ioport_read8,
+	.read16 = ioport_read16,
+	.read16be = ioport_read16,
+	.read32 = ioport_read32,
+	.read32be = ioport_read32,
+	.write8 = ioport_write8,
+	.write16 = ioport_write16,
+	.write16be = ioport_write16,
+	.write32 = ioport_write32,
+	.write32be = ioport_write32,
+	.read8r = ioport_read8r,
+	.read16r = ioport_read16r,
+	.read32r = ioport_read32r,
+	.write8r = ioport_write8r,
+	.write16r = ioport_write16r,
+	.write32r = ioport_write32r,
+};
+
+/* Legacy I/O memory ops */
+
+static unsigned int iomem_read8(const void __iomem *addr)
+{
+	return readb(addr);
+}
+
+static unsigned int iomem_read16(const void __iomem *addr)
+{
+	return readw(addr);
+}
+
+static unsigned int iomem_read16be(const void __iomem *addr)
+{
+	return __raw_readw(addr);
+}
+
+static unsigned int iomem_read32(const void __iomem *addr)
+{
+	return readl(addr);
+}
+
+static unsigned int iomem_read32be(const void __iomem *addr)
+{
+	return __raw_readl(addr);
+}
+
+#ifdef CONFIG_64BIT
+static u64 iomem_read64(const void __iomem *addr)
+{
+	return readq(addr);
+}
+
+static u64 iomem_read64be(const void __iomem *addr)
+{
+	return __raw_readq(addr);
+}
+#endif
+
+static void iomem_write8(u8 datum, void __iomem *addr)
+{
+	writeb(datum, addr);
+}
+
+static void iomem_write16(u16 datum, void __iomem *addr)
+{
+	writew(datum, addr);
+}
+
+static void iomem_write16be(u16 datum, void __iomem *addr)
+{
+	__raw_writew(datum, addr);
+}
+
+static void iomem_write32(u32 datum, void __iomem *addr)
+{
+	writel(datum, addr);
+}
+
+static void iomem_write32be(u32 datum, void __iomem *addr)
+{
+	__raw_writel(datum, addr);
+}
+
+#ifdef CONFIG_64BIT
+static void iomem_write64(u64 datum, void __iomem *addr)
+{
+	writeq(datum, addr);
+}
+
+static void iomem_write64be(u64 datum, void __iomem *addr)
+{
+	__raw_writeq(datum, addr);
+}
+#endif
+
+static void iomem_read8r(const void __iomem *addr, void *dst, unsigned long count)
+{
+	while (count--) {
+		*(u8 *)dst = __raw_readb(addr);
+		dst++;
+	}
+}
+
+static void iomem_read16r(const void __iomem *addr, void *dst, unsigned long count)
+{
+	while (count--) {
+		*(u16 *)dst = __raw_readw(addr);
+		dst += 2;
+	}
+}
+
+static void iomem_read32r(const void __iomem *addr, void *dst, unsigned long count)
+{
+	while (count--) {
+		*(u32 *)dst = __raw_readl(addr);
+		dst += 4;
+	}
+}
+
+static void iomem_write8r(void __iomem *addr, const void *s, unsigned long n)
+{
+	while (n--) {
+		__raw_writeb(*(u8 *)s, addr);
+		s++;
+	}
+}
+
+static void iomem_write16r(void __iomem *addr, const void *s, unsigned long n)
+{
+	while (n--) {
+		__raw_writew(*(u16 *)s, addr);
+		s += 2;
+	}
+}
+
+static void iomem_write32r(void __iomem *addr, const void *s, unsigned long n)
+{
+	while (n--) {
+		__raw_writel(*(u32 *)s, addr);
+		s += 4;
+	}
+}
+
+static const struct iomap_ops iomem_ops = {
+	.read8 = iomem_read8,
+	.read16 = iomem_read16,
+	.read16be = iomem_read16be,
+	.read32 = iomem_read32,
+	.read32be = iomem_read32be,
+#ifdef CONFIG_64BIT
+	.read64 = iomem_read64,
+	.read64be = iomem_read64be,
+#endif
+	.write8 = iomem_write8,
+	.write16 = iomem_write16,
+	.write16be = iomem_write16be,
+	.write32 = iomem_write32,
+	.write32be = iomem_write32be,
+#ifdef CONFIG_64BIT
+	.write64 = iomem_write64,
+	.write64be = iomem_write64be,
+#endif
+	.read8r = iomem_read8r,
+	.read16r = iomem_read16r,
+	.read32r = iomem_read32r,
+	.write8r = iomem_write8r,
+	.write16r = iomem_write16r,
+	.write32r = iomem_write32r,
+};
+
+static const struct iomap_ops *iomap_ops[8] = {
+	[0] = &ioport_ops,
+	[7] = &iomem_ops
+};
+
+
+unsigned int ioread8(const void __iomem *addr)
+{
+	if (unlikely(INDIRECT_ADDR(addr)))
+		return iomap_ops[ADDR_TO_REGION(addr)]->read8(addr);
+	return *((u8 *)addr);
+}
+
+unsigned int ioread16(const void __iomem *addr)
+{
+	if (unlikely(INDIRECT_ADDR(addr)))
+		return iomap_ops[ADDR_TO_REGION(addr)]->read16(addr);
+	return le16_to_cpup((u16 *)addr);
+}
+
+unsigned int ioread16be(const void __iomem *addr)
+{
+	if (unlikely(INDIRECT_ADDR(addr)))
+		return iomap_ops[ADDR_TO_REGION(addr)]->read16be(addr);
+	return *((u16 *)addr);
+}
+
+unsigned int ioread32(const void __iomem *addr)
+{
+	if (unlikely(INDIRECT_ADDR(addr)))
+		return iomap_ops[ADDR_TO_REGION(addr)]->read32(addr);
+	return le32_to_cpup((u32 *)addr);
+}
+
+unsigned int ioread32be(const void __iomem *addr)
+{
+	if (unlikely(INDIRECT_ADDR(addr)))
+		return iomap_ops[ADDR_TO_REGION(addr)]->read32be(addr);
+	return *((u32 *)addr);
+}
+
+#ifdef CONFIG_64BIT
+u64 ioread64(const void __iomem *addr)
+{
+	if (unlikely(INDIRECT_ADDR(addr)))
+		return iomap_ops[ADDR_TO_REGION(addr)]->read64(addr);
+	return le64_to_cpup((u64 *)addr);
+}
+
+u64 ioread64be(const void __iomem *addr)
+{
+	if (unlikely(INDIRECT_ADDR(addr)))
+		return iomap_ops[ADDR_TO_REGION(addr)]->read64be(addr);
+	return *((u64 *)addr);
+}
+#endif
+
+void iowrite8(u8 datum, void __iomem *addr)
+{
+	if (unlikely(INDIRECT_ADDR(addr))) {
+		iomap_ops[ADDR_TO_REGION(addr)]->write8(datum, addr);
+	} else {
+		*((u8 *)addr) = datum;
+	}
+}
+
+void iowrite16(u16 datum, void __iomem *addr)
+{
+	if (unlikely(INDIRECT_ADDR(addr))) {
+		iomap_ops[ADDR_TO_REGION(addr)]->write16(datum, addr);
+	} else {
+		*((u16 *)addr) = cpu_to_le16(datum);
+	}
+}
+
+void iowrite16be(u16 datum, void __iomem *addr)
+{
+	if (unlikely(INDIRECT_ADDR(addr))) {
+		iomap_ops[ADDR_TO_REGION(addr)]->write16be(datum, addr);
+	} else {
+		*((u16 *)addr) = datum;
+	}
+}
+
+void iowrite32(u32 datum, void __iomem *addr)
+{
+	if (unlikely(INDIRECT_ADDR(addr))) {
+		iomap_ops[ADDR_TO_REGION(addr)]->write32(datum, addr);
+	} else {
+		*((u32 *)addr) = cpu_to_le32(datum);
+	}
+}
+
+void iowrite32be(u32 datum, void __iomem *addr)
+{
+	if (unlikely(INDIRECT_ADDR(addr))) {
+		iomap_ops[ADDR_TO_REGION(addr)]->write32be(datum, addr);
+	} else {
+		*((u32 *)addr) = datum;
+	}
+}
+
+#ifdef CONFIG_64BIT
+void iowrite64(u64 datum, void __iomem *addr)
+{
+	if (unlikely(INDIRECT_ADDR(addr))) {
+		iomap_ops[ADDR_TO_REGION(addr)]->write64(datum, addr);
+	} else {
+		*((u64 *)addr) = cpu_to_le64(datum);
+	}
+}
+
+void iowrite64be(u64 datum, void __iomem *addr)
+{
+	if (unlikely(INDIRECT_ADDR(addr))) {
+		iomap_ops[ADDR_TO_REGION(addr)]->write64be(datum, addr);
+	} else {
+		*((u64 *)addr) = datum;
+	}
+}
+#endif
+
+/* Repeating interfaces */
+
+void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count)
+{
+	if (unlikely(INDIRECT_ADDR(addr))) {
+		iomap_ops[ADDR_TO_REGION(addr)]->read8r(addr, dst, count);
+	} else {
+		while (count--) {
+			*(u8 *)dst = *(u8 *)addr;
+			dst++;
+		}
+	}
+}
+
+void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count)
+{
+	if (unlikely(INDIRECT_ADDR(addr))) {
+		iomap_ops[ADDR_TO_REGION(addr)]->read16r(addr, dst, count);
+	} else {
+		while (count--) {
+			*(u16 *)dst = *(u16 *)addr;
+			dst += 2;
+		}
+	}
+}
+
+void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count)
+{
+	if (unlikely(INDIRECT_ADDR(addr))) {
+		iomap_ops[ADDR_TO_REGION(addr)]->read32r(addr, dst, count);
+	} else {
+		while (count--) {
+			*(u32 *)dst = *(u32 *)addr;
+			dst += 4;
+		}
+	}
+}
+
+void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+	if (unlikely(INDIRECT_ADDR(addr))) {
+		iomap_ops[ADDR_TO_REGION(addr)]->write8r(addr, src, count);
+	} else {
+		while (count--) {
+			*(u8 *)addr = *(u8 *)src;
+			src++;
+		}
+	}
+}
+
+void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+	if (unlikely(INDIRECT_ADDR(addr))) {
+		iomap_ops[ADDR_TO_REGION(addr)]->write16r(addr, src, count);
+	} else {
+		while (count--) {
+			*(u16 *)addr = *(u16 *)src;
+			src += 2;
+		}
+	}
+}
+
+void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+	if (unlikely(INDIRECT_ADDR(addr))) {
+		iomap_ops[ADDR_TO_REGION(addr)]->write32r(addr, src, count);
+	} else {
+		while (count--) {
+			*(u32 *)addr = *(u32 *)src;
+			src += 4;
+		}
+	}
+}
+
+/* Mapping interfaces */
+
+void __iomem *ioport_map(unsigned long port, unsigned int nr)
+{
+	return (void __iomem *)(IOPORT_MAP_BASE | port);
+}
+
+void ioport_unmap(void __iomem *addr)
+{
+	if (!INDIRECT_ADDR(addr)) {
+		iounmap(addr);
+	}
+}
+
+#ifdef CONFIG_PCI
+void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
+{
+	if (!INDIRECT_ADDR(addr)) {
+		iounmap(addr);
+	}
+}
+EXPORT_SYMBOL(pci_iounmap);
+#endif
+
+EXPORT_SYMBOL(ioread8);
+EXPORT_SYMBOL(ioread16);
+EXPORT_SYMBOL(ioread16be);
+EXPORT_SYMBOL(ioread32);
+EXPORT_SYMBOL(ioread32be);
+#ifdef CONFIG_64BIT
+EXPORT_SYMBOL(ioread64);
+EXPORT_SYMBOL(ioread64be);
+#endif
+EXPORT_SYMBOL(iowrite8);
+EXPORT_SYMBOL(iowrite16);
+EXPORT_SYMBOL(iowrite16be);
+EXPORT_SYMBOL(iowrite32);
+EXPORT_SYMBOL(iowrite32be);
+#ifdef CONFIG_64BIT
+EXPORT_SYMBOL(iowrite64);
+EXPORT_SYMBOL(iowrite64be);
+#endif
+EXPORT_SYMBOL(ioread8_rep);
+EXPORT_SYMBOL(ioread16_rep);
+EXPORT_SYMBOL(ioread32_rep);
+EXPORT_SYMBOL(iowrite8_rep);
+EXPORT_SYMBOL(iowrite16_rep);
+EXPORT_SYMBOL(iowrite32_rep);
+EXPORT_SYMBOL(ioport_map);
+EXPORT_SYMBOL(ioport_unmap);
diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S
new file mode 100644
index 000000000..b428d29e4
--- /dev/null
+++ b/arch/parisc/lib/lusercopy.S
@@ -0,0 +1,362 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *    User Space Access Routines
+ *
+ *    Copyright (C) 2000-2002 Hewlett-Packard (John Marvin)
+ *    Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
+ *    Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
+ *    Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
+ *    Copyright (C) 2017 Helge Deller <deller@gmx.de>
+ *    Copyright (C) 2017 John David Anglin <dave.anglin@bell.net>
+ */
+
+/*
+ * These routines still have plenty of room for optimization
+ * (word & doubleword load/store, dual issue, store hints, etc.).
+ */
+
+/*
+ * The following routines assume that space register 3 (sr3) contains
+ * the space id associated with the current users address space.
+ */
+
+
+	.text
+	
+#include <asm/assembly.h>
+#include <asm/errno.h>
+#include <linux/linkage.h>
+
+	/*
+	 * unsigned long lclear_user(void *to, unsigned long n)
+	 *
+	 * Returns 0 for success.
+	 * otherwise, returns number of bytes not transferred.
+	 */
+
+ENTRY_CFI(lclear_user)
+	comib,=,n   0,%r25,$lclu_done
+$lclu_loop:
+	addib,<>    -1,%r25,$lclu_loop
+1:	stbs,ma     %r0,1(%sr3,%r26)
+
+$lclu_done:
+	bv          %r0(%r2)
+	copy        %r25,%r28
+
+2:	b           $lclu_done
+	ldo         1(%r25),%r25
+
+	ASM_EXCEPTIONTABLE_ENTRY(1b,2b)
+ENDPROC_CFI(lclear_user)
+
+
+/*
+ * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
+ *
+ * Inputs:
+ * - sr1 already contains space of source region
+ * - sr2 already contains space of destination region
+ *
+ * Returns:
+ * - number of bytes that could not be copied.
+ *   On success, this will be zero.
+ *
+ * This code is based on a C-implementation of a copy routine written by
+ * Randolph Chung, which in turn was derived from the glibc.
+ *
+ * Several strategies are tried to try to get the best performance for various
+ * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
+ * at a time using general registers.  Unaligned copies are handled either by
+ * aligning the destination and then using shift-and-write method, or in a few
+ * cases by falling back to a byte-at-a-time copy.
+ *
+ * Testing with various alignments and buffer sizes shows that this code is
+ * often >10x faster than a simple byte-at-a-time copy, even for strangely
+ * aligned operands. It is interesting to note that the glibc version of memcpy
+ * (written in C) is actually quite fast already. This routine is able to beat
+ * it by 30-40% for aligned copies because of the loop unrolling, but in some
+ * cases the glibc version is still slightly faster. This lends more
+ * credibility that gcc can generate very good code as long as we are careful.
+ *
+ * Possible optimizations:
+ * - add cache prefetching
+ * - try not to use the post-increment address modifiers; they may create
+ *   additional interlocks. Assumption is that those were only efficient on old
+ *   machines (pre PA8000 processors)
+ */
+
+	dst = arg0
+	src = arg1
+	len = arg2
+	end = arg3
+	t1  = r19
+	t2  = r20
+	t3  = r21
+	t4  = r22
+	srcspc = sr1
+	dstspc = sr2
+
+	t0 = r1
+	a1 = t1
+	a2 = t2
+	a3 = t3
+	a0 = t4
+
+	save_src = ret0
+	save_dst = ret1
+	save_len = r31
+
+ENTRY_CFI(pa_memcpy)
+	/* Last destination address */
+	add	dst,len,end
+
+	/* short copy with less than 16 bytes? */
+	cmpib,COND(>>=),n 15,len,.Lbyte_loop
+
+	/* same alignment? */
+	xor	src,dst,t0
+	extru	t0,31,2,t1
+	cmpib,<>,n  0,t1,.Lunaligned_copy
+
+#ifdef CONFIG_64BIT
+	/* only do 64-bit copies if we can get aligned. */
+	extru	t0,31,3,t1
+	cmpib,<>,n  0,t1,.Lalign_loop32
+
+	/* loop until we are 64-bit aligned */
+.Lalign_loop64:
+	extru	dst,31,3,t1
+	cmpib,=,n	0,t1,.Lcopy_loop_16_start
+20:	ldb,ma	1(srcspc,src),t1
+21:	stb,ma	t1,1(dstspc,dst)
+	b	.Lalign_loop64
+	ldo	-1(len),len
+
+	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+.Lcopy_loop_16_start:
+	ldi	31,t0
+.Lcopy_loop_16:
+	cmpb,COND(>>=),n t0,len,.Lword_loop
+
+10:	ldd	0(srcspc,src),t1
+11:	ldd	8(srcspc,src),t2
+	ldo	16(src),src
+12:	std,ma	t1,8(dstspc,dst)
+13:	std,ma	t2,8(dstspc,dst)
+14:	ldd	0(srcspc,src),t1
+15:	ldd	8(srcspc,src),t2
+	ldo	16(src),src
+16:	std,ma	t1,8(dstspc,dst)
+17:	std,ma	t2,8(dstspc,dst)
+
+	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
+	ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
+	ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
+
+	b	.Lcopy_loop_16
+	ldo	-32(len),len
+
+.Lword_loop:
+	cmpib,COND(>>=),n 3,len,.Lbyte_loop
+20:	ldw,ma	4(srcspc,src),t1
+21:	stw,ma	t1,4(dstspc,dst)
+	b	.Lword_loop
+	ldo	-4(len),len
+
+	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+#endif /* CONFIG_64BIT */
+
+	/* loop until we are 32-bit aligned */
+.Lalign_loop32:
+	extru	dst,31,2,t1
+	cmpib,=,n	0,t1,.Lcopy_loop_8
+20:	ldb,ma	1(srcspc,src),t1
+21:	stb,ma	t1,1(dstspc,dst)
+	b	.Lalign_loop32
+	ldo	-1(len),len
+
+	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+
+.Lcopy_loop_8:
+	cmpib,COND(>>=),n 15,len,.Lbyte_loop
+
+10:	ldw	0(srcspc,src),t1
+11:	ldw	4(srcspc,src),t2
+12:	stw,ma	t1,4(dstspc,dst)
+13:	stw,ma	t2,4(dstspc,dst)
+14:	ldw	8(srcspc,src),t1
+15:	ldw	12(srcspc,src),t2
+	ldo	16(src),src
+16:	stw,ma	t1,4(dstspc,dst)
+17:	stw,ma	t2,4(dstspc,dst)
+
+	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
+	ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
+	ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
+
+	b	.Lcopy_loop_8
+	ldo	-16(len),len
+
+.Lbyte_loop:
+	cmpclr,COND(<>) len,%r0,%r0
+	b,n	.Lcopy_done
+20:	ldb	0(srcspc,src),t1
+	ldo	1(src),src
+21:	stb,ma	t1,1(dstspc,dst)
+	b	.Lbyte_loop
+	ldo	-1(len),len
+
+	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+.Lcopy_done:
+	bv	%r0(%r2)
+	sub	end,dst,ret0
+
+
+	/* src and dst are not aligned the same way. */
+	/* need to go the hard way */
+.Lunaligned_copy:
+	/* align until dst is 32bit-word-aligned */
+	extru	dst,31,2,t1
+	cmpib,=,n	0,t1,.Lcopy_dstaligned
+20:	ldb	0(srcspc,src),t1
+	ldo	1(src),src
+21:	stb,ma	t1,1(dstspc,dst)
+	b	.Lunaligned_copy
+	ldo	-1(len),len
+
+	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+.Lcopy_dstaligned:
+
+	/* store src, dst and len in safe place */
+	copy	src,save_src
+	copy	dst,save_dst
+	copy	len,save_len
+
+	/* len now needs give number of words to copy */
+	SHRREG	len,2,len
+
+	/*
+	 * Copy from a not-aligned src to an aligned dst using shifts.
+	 * Handles 4 words per loop.
+	 */
+
+	depw,z src,28,2,t0
+	subi 32,t0,t0
+	mtsar t0
+	extru len,31,2,t0
+	cmpib,= 2,t0,.Lcase2
+	/* Make src aligned by rounding it down.  */
+	depi 0,31,2,src
+
+	cmpiclr,<> 3,t0,%r0
+	b,n .Lcase3
+	cmpiclr,<> 1,t0,%r0
+	b,n .Lcase1
+.Lcase0:
+	cmpb,COND(=) %r0,len,.Lcda_finish
+	nop
+
+1:	ldw,ma 4(srcspc,src), a3
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1:	ldw,ma 4(srcspc,src), a0
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+	b,n .Ldo3
+.Lcase1:
+1:	ldw,ma 4(srcspc,src), a2
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1:	ldw,ma 4(srcspc,src), a3
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+	ldo -1(len),len
+	cmpb,COND(=),n %r0,len,.Ldo0
+.Ldo4:
+1:	ldw,ma 4(srcspc,src), a0
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+	shrpw a2, a3, %sar, t0
+1:	stw,ma t0, 4(dstspc,dst)
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+.Ldo3:
+1:	ldw,ma 4(srcspc,src), a1
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+	shrpw a3, a0, %sar, t0
+1:	stw,ma t0, 4(dstspc,dst)
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+.Ldo2:
+1:	ldw,ma 4(srcspc,src), a2
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+	shrpw a0, a1, %sar, t0
+1:	stw,ma t0, 4(dstspc,dst)
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+.Ldo1:
+1:	ldw,ma 4(srcspc,src), a3
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+	shrpw a1, a2, %sar, t0
+1:	stw,ma t0, 4(dstspc,dst)
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+	ldo -4(len),len
+	cmpb,COND(<>) %r0,len,.Ldo4
+	nop
+.Ldo0:
+	shrpw a2, a3, %sar, t0
+1:	stw,ma t0, 4(dstspc,dst)
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+
+.Lcda_rdfault:
+.Lcda_finish:
+	/* calculate new src, dst and len and jump to byte-copy loop */
+	sub	dst,save_dst,t0
+	add	save_src,t0,src
+	b	.Lbyte_loop
+	sub	save_len,t0,len
+
+.Lcase3:
+1:	ldw,ma 4(srcspc,src), a0
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1:	ldw,ma 4(srcspc,src), a1
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+	b .Ldo2
+	ldo 1(len),len
+.Lcase2:
+1:	ldw,ma 4(srcspc,src), a1
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1:	ldw,ma 4(srcspc,src), a2
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+	b .Ldo1
+	ldo 2(len),len
+
+
+	/* fault exception fixup handlers: */
+#ifdef CONFIG_64BIT
+.Lcopy16_fault:
+	b	.Lcopy_done
+10:	std,ma	t1,8(dstspc,dst)
+	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+#endif
+
+.Lcopy8_fault:
+	b	.Lcopy_done
+10:	stw,ma	t1,4(dstspc,dst)
+	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+ENDPROC_CFI(pa_memcpy)
+
+	.end
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
new file mode 100644
index 000000000..5fc0c852c
--- /dev/null
+++ b/arch/parisc/lib/memcpy.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *    Optimized memory copy routines.
+ *
+ *    Copyright (C) 2004 Randolph Chung <tausq@debian.org>
+ *    Copyright (C) 2013-2017 Helge Deller <deller@gmx.de>
+ *
+ *    Portions derived from the GNU C Library
+ *    Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
+ */
+
+#include <linux/module.h>
+#include <linux/compiler.h>
+#include <linux/uaccess.h>
+
+#define get_user_space()	mfsp(SR_USER)
+#define get_kernel_space()	SR_KERNEL
+
+/* Returns 0 for success, otherwise, returns number of bytes not transferred. */
+extern unsigned long pa_memcpy(void *dst, const void *src,
+				unsigned long len);
+
+unsigned long raw_copy_to_user(void __user *dst, const void *src,
+			       unsigned long len)
+{
+	mtsp(get_kernel_space(), SR_TEMP1);
+	mtsp(get_user_space(), SR_TEMP2);
+	return pa_memcpy((void __force *)dst, src, len);
+}
+EXPORT_SYMBOL(raw_copy_to_user);
+
+unsigned long raw_copy_from_user(void *dst, const void __user *src,
+			       unsigned long len)
+{
+	mtsp(get_user_space(), SR_TEMP1);
+	mtsp(get_kernel_space(), SR_TEMP2);
+	return pa_memcpy(dst, (void __force *)src, len);
+}
+EXPORT_SYMBOL(raw_copy_from_user);
+
+void * memcpy(void * dst,const void *src, size_t count)
+{
+	mtsp(get_kernel_space(), SR_TEMP1);
+	mtsp(get_kernel_space(), SR_TEMP2);
+	pa_memcpy(dst, src, count);
+	return dst;
+}
+
+EXPORT_SYMBOL(memcpy);
+
+bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
+{
+	if ((unsigned long)unsafe_src < PAGE_SIZE)
+		return false;
+	/* check for I/O space F_EXTEND(0xfff00000) access as well? */
+	return true;
+}
diff --git a/arch/parisc/lib/memset.c b/arch/parisc/lib/memset.c
new file mode 100644
index 000000000..133e48098
--- /dev/null
+++ b/arch/parisc/lib/memset.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include <linux/types.h>
+#include <asm/string.h>
+
+#define OPSIZ (BITS_PER_LONG/8)
+typedef unsigned long op_t;
+
+void *
+memset (void *dstpp, int sc, size_t len)
+{
+  unsigned int c = sc;
+  long int dstp = (long int) dstpp;
+
+  if (len >= 8)
+    {
+      size_t xlen;
+      op_t cccc;
+
+      cccc = (unsigned char) c;
+      cccc |= cccc << 8;
+      cccc |= cccc << 16;
+      if (OPSIZ > 4)
+	/* Do the shift in two steps to avoid warning if long has 32 bits.  */
+	cccc |= (cccc << 16) << 16;
+
+      /* There are at least some bytes to set.
+	 No need to test for LEN == 0 in this alignment loop.  */
+      while (dstp % OPSIZ != 0)
+	{
+	  ((unsigned char *) dstp)[0] = c;
+	  dstp += 1;
+	  len -= 1;
+	}
+
+      /* Write 8 `op_t' per iteration until less than 8 `op_t' remain.  */
+      xlen = len / (OPSIZ * 8);
+      while (xlen > 0)
+	{
+	  ((op_t *) dstp)[0] = cccc;
+	  ((op_t *) dstp)[1] = cccc;
+	  ((op_t *) dstp)[2] = cccc;
+	  ((op_t *) dstp)[3] = cccc;
+	  ((op_t *) dstp)[4] = cccc;
+	  ((op_t *) dstp)[5] = cccc;
+	  ((op_t *) dstp)[6] = cccc;
+	  ((op_t *) dstp)[7] = cccc;
+	  dstp += 8 * OPSIZ;
+	  xlen -= 1;
+	}
+      len %= OPSIZ * 8;
+
+      /* Write 1 `op_t' per iteration until less than OPSIZ bytes remain.  */
+      xlen = len / OPSIZ;
+      while (xlen > 0)
+	{
+	  ((op_t *) dstp)[0] = cccc;
+	  dstp += OPSIZ;
+	  xlen -= 1;
+	}
+      len %= OPSIZ;
+    }
+
+  /* Write the last few bytes.  */
+  while (len > 0)
+    {
+      ((unsigned char *) dstp)[0] = c;
+      dstp += 1;
+      len -= 1;
+    }
+
+  return dstpp;
+}
diff --git a/arch/parisc/lib/ucmpdi2.c b/arch/parisc/lib/ucmpdi2.c
new file mode 100644
index 000000000..8e6014a14
--- /dev/null
+++ b/arch/parisc/lib/ucmpdi2.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+
+union ull_union {
+	unsigned long long ull;
+	struct {
+		unsigned int high;
+		unsigned int low;
+	} ui;
+};
+
+int __ucmpdi2(unsigned long long a, unsigned long long b)
+{
+	union ull_union au = {.ull = a};
+	union ull_union bu = {.ull = b};
+
+	if (au.ui.high < bu.ui.high)
+		return 0;
+	else if (au.ui.high > bu.ui.high)
+		return 2;
+	if (au.ui.low < bu.ui.low)
+		return 0;
+	else if (au.ui.low > bu.ui.low)
+		return 2;
+	return 1;
+}