summaryrefslogtreecommitdiffstats
path: root/arch/sh/lib/checksum.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sh/lib/checksum.S')
-rw-r--r--arch/sh/lib/checksum.S365
1 files changed, 365 insertions, 0 deletions
diff --git a/arch/sh/lib/checksum.S b/arch/sh/lib/checksum.S
new file mode 100644
index 000000000..3e07074e0
--- /dev/null
+++ b/arch/sh/lib/checksum.S
@@ -0,0 +1,365 @@
+/* SPDX-License-Identifier: GPL-2.0+
+ *
+ * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
+ *
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * IP/TCP/UDP checksumming routines
+ *
+ * Authors: Jorge Cwik, <jorge@laser.satlink.net>
+ * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ * Tom May, <ftom@netcom.com>
+ * Pentium Pro/II routines:
+ * Alexander Kjeldaas <astor@guardian.no>
+ * Finn Arne Gangstad <finnag@guardian.no>
+ * Lots of code moved from tcp.c and ip.c; see those files
+ * for more names.
+ *
+ * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
+ * handling.
+ * Andi Kleen, add zeroing on error
+ * converted to pure assembler
+ *
+ * SuperH version: Copyright (C) 1999 Niibe Yutaka
+ */
+
+#include <asm/errno.h>
+#include <linux/linkage.h>
+
+/*
+ * computes a partial checksum, e.g. for TCP/UDP fragments
+ */
+
+/*
+ * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
+ */
+
+.text
+ENTRY(csum_partial)
+ /*
+ * Experiments with Ethernet and SLIP connections show that buff
+ * is aligned on either a 2-byte or 4-byte boundary. We get at
+ * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
+ * Fortunately, it is easy to convert 2-byte alignment to 4-byte
+ * alignment for the unrolled loop.
+ */
+ mov r4, r0
+ tst #3, r0 ! Check alignment.
+ bt/s 2f ! Jump if alignment is ok.
+ mov r4, r7 ! Keep a copy to check for alignment
+ !
+ tst #1, r0 ! Check alignment.
+ bt 21f ! Jump if alignment is boundary of 2bytes.
+
+ ! buf is odd
+ tst r5, r5
+ add #-1, r5
+ bt 9f
+ mov.b @r4+, r0
+ extu.b r0, r0
+ addc r0, r6 ! t=0 from previous tst
+ mov r6, r0
+ shll8 r6
+ shlr16 r0
+ shlr8 r0
+ or r0, r6
+ mov r4, r0
+ tst #2, r0
+ bt 2f
+21:
+ ! buf is 2 byte aligned (len could be 0)
+ add #-2, r5 ! Alignment uses up two bytes.
+ cmp/pz r5 !
+ bt/s 1f ! Jump if we had at least two bytes.
+ clrt
+ bra 6f
+ add #2, r5 ! r5 was < 2. Deal with it.
+1:
+ mov.w @r4+, r0
+ extu.w r0, r0
+ addc r0, r6
+ bf 2f
+ add #1, r6
+2:
+ ! buf is 4 byte aligned (len could be 0)
+ mov r5, r1
+ mov #-5, r0
+ shld r0, r1
+ tst r1, r1
+ bt/s 4f ! if it's =0, go to 4f
+ clrt
+ .align 2
+3:
+ mov.l @r4+, r0
+ mov.l @r4+, r2
+ mov.l @r4+, r3
+ addc r0, r6
+ mov.l @r4+, r0
+ addc r2, r6
+ mov.l @r4+, r2
+ addc r3, r6
+ mov.l @r4+, r3
+ addc r0, r6
+ mov.l @r4+, r0
+ addc r2, r6
+ mov.l @r4+, r2
+ addc r3, r6
+ addc r0, r6
+ addc r2, r6
+ movt r0
+ dt r1
+ bf/s 3b
+ cmp/eq #1, r0
+ ! here, we know r1==0
+ addc r1, r6 ! add carry to r6
+4:
+ mov r5, r0
+ and #0x1c, r0
+ tst r0, r0
+ bt 6f
+ ! 4 bytes or more remaining
+ mov r0, r1
+ shlr2 r1
+ mov #0, r2
+5:
+ addc r2, r6
+ mov.l @r4+, r2
+ movt r0
+ dt r1
+ bf/s 5b
+ cmp/eq #1, r0
+ addc r2, r6
+ addc r1, r6 ! r1==0 here, so it means add carry-bit
+6:
+ ! 3 bytes or less remaining
+ mov #3, r0
+ and r0, r5
+ tst r5, r5
+ bt 9f ! if it's =0 go to 9f
+ mov #2, r1
+ cmp/hs r1, r5
+ bf 7f
+ mov.w @r4+, r0
+ extu.w r0, r0
+ cmp/eq r1, r5
+ bt/s 8f
+ clrt
+ shll16 r0
+ addc r0, r6
+7:
+ mov.b @r4+, r0
+ extu.b r0, r0
+#ifndef __LITTLE_ENDIAN__
+ shll8 r0
+#endif
+8:
+ addc r0, r6
+ mov #0, r0
+ addc r0, r6
+9:
+ ! Check if the buffer was misaligned, if so realign sum
+ mov r7, r0
+ tst #1, r0
+ bt 10f
+ mov r6, r0
+ shll8 r6
+ shlr16 r0
+ shlr8 r0
+ or r0, r6
+10:
+ rts
+ mov r6, r0
+
+/*
+unsigned int csum_partial_copy_generic (const char *src, char *dst, int len)
+ */
+
+/*
+ * Copy from ds while checksumming, otherwise like csum_partial with initial
+ * sum being ~0U
+ */
+
+#define EXC(...) \
+ 9999: __VA_ARGS__ ; \
+ .section __ex_table, "a"; \
+ .long 9999b, 6001f ; \
+ .previous
+
+!
+! r4: const char *SRC
+! r5: char *DST
+! r6: int LEN
+!
+ENTRY(csum_partial_copy_generic)
+ mov #-1,r7
+ mov #3,r0 ! Check src and dest are equally aligned
+ mov r4,r1
+ and r0,r1
+ and r5,r0
+ cmp/eq r1,r0
+ bf 3f ! Different alignments, use slow version
+ tst #1,r0 ! Check dest word aligned
+ bf 3f ! If not, do it the slow way
+
+ mov #2,r0
+ tst r0,r5 ! Check dest alignment.
+ bt 2f ! Jump if alignment is ok.
+ add #-2,r6 ! Alignment uses up two bytes.
+ cmp/pz r6 ! Jump if we had at least two bytes.
+ bt/s 1f
+ clrt
+ add #2,r6 ! r6 was < 2. Deal with it.
+ bra 4f
+ mov r6,r2
+
+3: ! Handle different src and dest alignments.
+ ! This is not common, so simple byte by byte copy will do.
+ mov r6,r2
+ shlr r6
+ tst r6,r6
+ bt 4f
+ clrt
+ .align 2
+5:
+EXC( mov.b @r4+,r1 )
+EXC( mov.b @r4+,r0 )
+ extu.b r1,r1
+EXC( mov.b r1,@r5 )
+EXC( mov.b r0,@(1,r5) )
+ extu.b r0,r0
+ add #2,r5
+
+#ifdef __LITTLE_ENDIAN__
+ shll8 r0
+#else
+ shll8 r1
+#endif
+ or r1,r0
+
+ addc r0,r7
+ movt r0
+ dt r6
+ bf/s 5b
+ cmp/eq #1,r0
+ mov #0,r0
+ addc r0, r7
+
+ mov r2, r0
+ tst #1, r0
+ bt 7f
+ bra 5f
+ clrt
+
+ ! src and dest equally aligned, but to a two byte boundary.
+ ! Handle first two bytes as a special case
+ .align 2
+1:
+EXC( mov.w @r4+,r0 )
+EXC( mov.w r0,@r5 )
+ add #2,r5
+ extu.w r0,r0
+ addc r0,r7
+ mov #0,r0
+ addc r0,r7
+2:
+ mov r6,r2
+ mov #-5,r0
+ shld r0,r6
+ tst r6,r6
+ bt/s 2f
+ clrt
+ .align 2
+1:
+EXC( mov.l @r4+,r0 )
+EXC( mov.l @r4+,r1 )
+ addc r0,r7
+EXC( mov.l r0,@r5 )
+EXC( mov.l r1,@(4,r5) )
+ addc r1,r7
+
+EXC( mov.l @r4+,r0 )
+EXC( mov.l @r4+,r1 )
+ addc r0,r7
+EXC( mov.l r0,@(8,r5) )
+EXC( mov.l r1,@(12,r5) )
+ addc r1,r7
+
+EXC( mov.l @r4+,r0 )
+EXC( mov.l @r4+,r1 )
+ addc r0,r7
+EXC( mov.l r0,@(16,r5) )
+EXC( mov.l r1,@(20,r5) )
+ addc r1,r7
+
+EXC( mov.l @r4+,r0 )
+EXC( mov.l @r4+,r1 )
+ addc r0,r7
+EXC( mov.l r0,@(24,r5) )
+EXC( mov.l r1,@(28,r5) )
+ addc r1,r7
+ add #32,r5
+ movt r0
+ dt r6
+ bf/s 1b
+ cmp/eq #1,r0
+ mov #0,r0
+ addc r0,r7
+
+2: mov r2,r6
+ mov #0x1c,r0
+ and r0,r6
+ cmp/pl r6
+ bf/s 4f
+ clrt
+ shlr2 r6
+3:
+EXC( mov.l @r4+,r0 )
+ addc r0,r7
+EXC( mov.l r0,@r5 )
+ add #4,r5
+ movt r0
+ dt r6
+ bf/s 3b
+ cmp/eq #1,r0
+ mov #0,r0
+ addc r0,r7
+4: mov r2,r6
+ mov #3,r0
+ and r0,r6
+ cmp/pl r6
+ bf 7f
+ mov #2,r1
+ cmp/hs r1,r6
+ bf 5f
+EXC( mov.w @r4+,r0 )
+EXC( mov.w r0,@r5 )
+ extu.w r0,r0
+ add #2,r5
+ cmp/eq r1,r6
+ bt/s 6f
+ clrt
+ shll16 r0
+ addc r0,r7
+5:
+EXC( mov.b @r4+,r0 )
+EXC( mov.b r0,@r5 )
+ extu.b r0,r0
+#ifndef __LITTLE_ENDIAN__
+ shll8 r0
+#endif
+6: addc r0,r7
+ mov #0,r0
+ addc r0,r7
+7:
+
+# Exception handler:
+.section .fixup, "ax"
+
+6001:
+ rts
+ mov #0,r0
+.previous
+ rts
+ mov r7,r0