5 files changed, 385 insertions, 0 deletions
diff --git a/arch/openrisc/lib/Makefile b/arch/openrisc/lib/Makefile
new file mode 100644
index 000000000..53327406b
--- /dev/null
+++ b/arch/openrisc/lib/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for or1k specific library files..
+#
+
+obj-y	:= delay.o string.o memset.o memcpy.o
diff --git a/arch/openrisc/lib/delay.c b/arch/openrisc/lib/delay.c
new file mode 100644
index 000000000..5e89e4131
--- /dev/null
+++ b/arch/openrisc/lib/delay.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * OpenRISC Linux
+ *
+ * Linux architectural port borrowing liberally from similar works of
+ * others.  All original copyrights apply as per the original source
+ * declaration.
+ *
+ * Modifications for the OpenRISC architecture:
+ * Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
+ *
+ * Precise Delay Loops
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/timex.h>
+#include <asm/param.h>
+#include <asm/delay.h>
+#include <asm/timex.h>
+#include <asm/processor.h>
+
+int read_current_timer(unsigned long *timer_value)
+{
+	*timer_value = get_cycles();
+	return 0;
+}
+
+void __delay(unsigned long cycles)
+{
+	cycles_t start = get_cycles();
+
+	while ((get_cycles() - start) < cycles)
+		cpu_relax();
+}
+EXPORT_SYMBOL(__delay);
+
+inline void __const_udelay(unsigned long xloops)
+{
+	unsigned long long loops;
+
+	loops = (unsigned long long)xloops * loops_per_jiffy * HZ;
+
+	__delay(loops >> 32);
+}
+EXPORT_SYMBOL(__const_udelay);
+
+void __udelay(unsigned long usecs)
+{
+	__const_udelay(usecs * 0x10C7UL); /* 2**32 / 1000000 (rounded up) */
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long nsecs)
+{
+	__const_udelay(nsecs * 0x5UL); /* 2**32 / 1000000000 (rounded up) */
+}
+EXPORT_SYMBOL(__ndelay);
diff --git a/arch/openrisc/lib/memcpy.c b/arch/openrisc/lib/memcpy.c
new file mode 100644
index 000000000..e2af9b510
--- /dev/null
+++ b/arch/openrisc/lib/memcpy.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/openrisc/lib/memcpy.c
+ *
+ * Optimized memory copy routines for openrisc.  These are mostly copied
+ * from ohter sources but slightly entended based on ideas discuassed in
+ * #openrisc.
+ *
+ * The word unroll implementation is an extension to the arm byte
+ * unrolled implementation, but using word copies (if things are
+ * properly aligned)
+ *
+ * The great arm loop unroll algorithm can be found at:
+ *  arch/arm/boot/compressed/string.c
+ */
+
+#include <linux/export.h>
+
+#include <linux/string.h>
+
+#ifdef CONFIG_OR1K_1200
+/*
+ * Do memcpy with word copies and loop unrolling. This gives the
+ * best performance on the OR1200 and MOR1KX archirectures
+ */
+void *memcpy(void *dest, __const void *src, __kernel_size_t n)
+{
+	int i = 0;
+	unsigned char *d, *s;
+	uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src;
+
+	/* If both source and dest are word aligned copy words */
+	if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) {
+		/* Copy 32 bytes per loop */
+		for (i = n >> 5; i > 0; i--) {
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+		}
+
+		if (n & 1 << 4) {
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+		}
+
+		if (n & 1 << 3) {
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+		}
+
+		if (n & 1 << 2)
+			*dest_w++ = *src_w++;
+
+		d = (unsigned char *)dest_w;
+		s = (unsigned char *)src_w;
+
+	} else {
+		d = (unsigned char *)dest_w;
+		s = (unsigned char *)src_w;
+
+		for (i = n >> 3; i > 0; i--) {
+			*d++ = *s++;
+			*d++ = *s++;
+			*d++ = *s++;
+			*d++ = *s++;
+			*d++ = *s++;
+			*d++ = *s++;
+			*d++ = *s++;
+			*d++ = *s++;
+		}
+
+		if (n & 1 << 2) {
+			*d++ = *s++;
+			*d++ = *s++;
+			*d++ = *s++;
+			*d++ = *s++;
+		}
+	}
+
+	if (n & 1 << 1) {
+		*d++ = *s++;
+		*d++ = *s++;
+	}
+
+	if (n & 1)
+		*d++ = *s++;
+
+	return dest;
+}
+#else
+/*
+ * Use word copies but no loop unrolling as we cannot assume there
+ * will be benefits on the archirecture
+ */
+void *memcpy(void *dest, __const void *src, __kernel_size_t n)
+{
+	unsigned char *d, *s;
+	uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src;
+
+	/* If both source and dest are word aligned copy words */
+	if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) {
+		for (; n >= 4; n -= 4)
+			*dest_w++ = *src_w++;
+	}
+
+	d = (unsigned char *)dest_w;
+	s = (unsigned char *)src_w;
+
+	/* For remaining or if not aligned, copy bytes */
+	for (; n >= 1; n -= 1)
+		*d++ = *s++;
+
+	return dest;
+
+}
+#endif
+
+EXPORT_SYMBOL(memcpy);
diff --git a/arch/openrisc/lib/memset.S b/arch/openrisc/lib/memset.S
new file mode 100644
index 000000000..c3ac2a8b6
--- /dev/null
+++ b/arch/openrisc/lib/memset.S
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * OpenRISC memset.S
+ *
+ * Hand-optimized assembler version of memset for OpenRISC.
+ * Algorithm inspired by several other arch-specific memset routines
+ * in the kernel tree
+ *
+ * Copyright (C) 2015 Olof Kindgren <olof.kindgren@gmail.com>
+ */
+
+	.global memset
+	.type	memset, @function
+memset:
+	/* arguments:
+	 * r3 = *s
+	 * r4 = c
+	 * r5 = n
+	 * r13, r15, r17, r19 used as temp regs
+	*/
+
+	/* Exit if n == 0 */
+	l.sfeqi		r5, 0
+	l.bf		4f
+
+	/* Truncate c to char */
+	l.andi  	r13, r4, 0xff
+
+	/* Skip word extension if c is 0 */
+	l.sfeqi		r13, 0
+	l.bf		1f
+	/* Check for at least two whole words (8 bytes) */
+	 l.sfleui	r5, 7
+
+	/* Extend char c to 32-bit word cccc in r13 */
+	l.slli		r15, r13, 16  // r13 = 000c, r15 = 0c00
+	l.or		r13, r13, r15 // r13 = 0c0c, r15 = 0c00
+	l.slli		r15, r13, 8   // r13 = 0c0c, r15 = c0c0
+	l.or		r13, r13, r15 // r13 = cccc, r15 = c0c0
+
+1:	l.addi		r19, r3, 0 // Set r19 = src
+	/* Jump to byte copy loop if less than two words */
+	l.bf		3f
+	 l.or		r17, r5, r0 // Set r17 = n
+
+	/* Mask out two LSBs to check alignment */
+	l.andi		r15, r3, 0x3
+
+	/* lsb == 00, jump to word copy loop */
+	l.sfeqi		r15, 0
+	l.bf		2f
+	 l.addi		r19, r3, 0 // Set r19 = src
+
+	/* lsb == 01,10 or 11 */
+	l.sb		0(r3), r13   // *src = c
+	l.addi		r17, r17, -1 // Decrease n
+
+	l.sfeqi		r15, 3
+	l.bf		2f
+	 l.addi		r19, r3, 1  // src += 1
+
+	/* lsb == 01 or 10 */
+	l.sb		1(r3), r13   // *(src+1) = c
+	l.addi		r17, r17, -1 // Decrease n
+
+	l.sfeqi		r15, 2
+	l.bf		2f
+	 l.addi		r19, r3, 2  // src += 2
+
+	/* lsb == 01 */
+	l.sb		2(r3), r13   // *(src+2) = c
+	l.addi		r17, r17, -1 // Decrease n
+	l.addi		r19, r3, 3   // src += 3
+
+	/* Word copy loop */
+2:	l.sw		0(r19), r13  // *src = cccc
+	l.addi		r17, r17, -4 // Decrease n
+	l.sfgeui	r17, 4
+	l.bf		2b
+	 l.addi		r19, r19, 4  // Increase src
+
+	/* When n > 0, copy the remaining bytes, otherwise jump to exit */
+	l.sfeqi		r17, 0
+	l.bf		4f
+
+	/* Byte copy loop */
+3:	l.addi		r17, r17, -1 // Decrease n
+	l.sb		0(r19), r13  // *src = cccc
+	l.sfnei		r17, 0
+	l.bf		3b
+	 l.addi		r19, r19, 1  // Increase src
+
+4:	l.jr		r9
+	 l.ori		r11, r3, 0
diff --git a/arch/openrisc/lib/string.S b/arch/openrisc/lib/string.S
new file mode 100644
index 000000000..74046f2eb
--- /dev/null
+++ b/arch/openrisc/lib/string.S
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * OpenRISC string.S
+ *
+ * Linux architectural port borrowing liberally from similar works of
+ * others.  All original copyrights apply as per the original source
+ * declaration.
+ *
+ * Modifications for the OpenRISC architecture:
+ * Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com>
+ * Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
+ */
+
+#include <linux/linkage.h>
+#include <asm/errno.h>
+
+	/*
+	 * this can be optimized by doing gcc inline assemlby with
+	 * proper constraints (no need to save args registers...)
+	 *
+	 */
+
+
+/*
+ *
+ * int __copy_tofrom_user(void *to, const void *from, unsigned long size);
+ *
+ * NOTE: it returns number of bytes NOT copied !!!
+ *
+ */
+	.global	__copy_tofrom_user
+__copy_tofrom_user:
+	l.addi  r1,r1,-12
+	l.sw    0(r1),r6
+	l.sw    4(r1),r4
+	l.sw    8(r1),r3
+
+	l.addi  r11,r5,0
+2:  	l.sfeq  r11,r0
+	l.bf    1f
+	l.addi  r11,r11,-1
+8:    	l.lbz   r6,0(r4)
+9:    	l.sb    0(r3),r6
+	l.addi  r3,r3,1
+	l.j     2b
+	l.addi  r4,r4,1
+1:
+	l.addi  r11,r11,1               // r11 holds the return value
+
+	l.lwz   r6,0(r1)
+	l.lwz   r4,4(r1)
+	l.lwz   r3,8(r1)
+	l.jr    r9
+	l.addi  r1,r1,12
+
+	.section .fixup, "ax"
+99:
+		l.j     1b
+		l.nop
+	.previous
+
+	.section __ex_table, "a"
+		.long 8b, 99b		// read fault
+		.long 9b, 99b		// write fault
+	.previous
+
+/*
+ * unsigned long clear_user(void *addr, unsigned long size) ;
+ *
+ * NOTE: it returns number of bytes NOT cleared !!!
+ */
+	.global	__clear_user
+__clear_user:
+	l.addi  r1,r1,-8
+	l.sw    0(r1),r4
+	l.sw    4(r1),r3
+
+2:	l.sfeq	r4,r0
+	l.bf	1f
+	l.addi	r4,r4,-1
+9:	l.sb	0(r3),r0
+	l.j	2b
+	l.addi  r3,r3,1
+
+1:
+	l.addi  r11,r4,1
+
+	l.lwz	r4,0(r1)
+	l.lwz	r3,4(r1)
+	l.jr	r9
+	l.addi	r1,r1,8
+
+	.section .fixup, "ax"
+99:
+		l.j     1b
+		l.nop
+	.previous
+
+	.section __ex_table, "a"
+		.long 9b, 99b		// write fault
+	.previous