summaryrefslogtreecommitdiffstats
path: root/src/runtime/memmove_mipsx.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/memmove_mipsx.s')
-rw-r--r--src/runtime/memmove_mipsx.s260
1 files changed, 260 insertions, 0 deletions
diff --git a/src/runtime/memmove_mipsx.s b/src/runtime/memmove_mipsx.s
new file mode 100644
index 0000000..494288c
--- /dev/null
+++ b/src/runtime/memmove_mipsx.s
@@ -0,0 +1,260 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build mips || mipsle
+
+#include "textflag.h"
+
+#ifdef GOARCH_mips
+#define MOVWHI MOVWL
+#define MOVWLO MOVWR
+#else
+#define MOVWHI MOVWR
+#define MOVWLO MOVWL
+#endif
+
+// See memmove Go doc for important implementation constraints.
+
+// func memmove(to, from unsafe.Pointer, n uintptr)
+TEXT runtime·memmove(SB),NOSPLIT,$-0-12
+ MOVW n+8(FP), R3
+ MOVW from+4(FP), R2
+ MOVW to+0(FP), R1
+
+ ADDU R3, R2, R4 // end pointer for source
+ ADDU R3, R1, R5 // end pointer for destination
+
+ // if destination is ahead of source, start at the end of the buffer and go backward.
+ SGTU R1, R2, R6
+ BNE R6, backward
+
+ // if less than 4 bytes, use byte by byte copying
+ SGTU $4, R3, R6
+ BNE R6, f_small_copy
+
+ // align destination to 4 bytes
+ AND $3, R1, R6
+ BEQ R6, f_dest_aligned
+ SUBU R1, R0, R6
+ AND $3, R6
+ MOVWHI 0(R2), R7
+ SUBU R6, R3
+ MOVWLO 3(R2), R7
+ ADDU R6, R2
+ MOVWHI R7, 0(R1)
+ ADDU R6, R1
+
+f_dest_aligned:
+ AND $31, R3, R7
+ AND $3, R3, R6
+ SUBU R7, R5, R7 // end pointer for 32-byte chunks
+ SUBU R6, R5, R6 // end pointer for 4-byte chunks
+
+ // if source is not aligned, use unaligned reads
+ AND $3, R2, R8
+ BNE R8, f_large_ua
+
+f_large:
+ BEQ R1, R7, f_words
+ ADDU $32, R1
+ MOVW 0(R2), R8
+ MOVW 4(R2), R9
+ MOVW 8(R2), R10
+ MOVW 12(R2), R11
+ MOVW 16(R2), R12
+ MOVW 20(R2), R13
+ MOVW 24(R2), R14
+ MOVW 28(R2), R15
+ ADDU $32, R2
+ MOVW R8, -32(R1)
+ MOVW R9, -28(R1)
+ MOVW R10, -24(R1)
+ MOVW R11, -20(R1)
+ MOVW R12, -16(R1)
+ MOVW R13, -12(R1)
+ MOVW R14, -8(R1)
+ MOVW R15, -4(R1)
+ JMP f_large
+
+f_words:
+ BEQ R1, R6, f_tail
+ ADDU $4, R1
+ MOVW 0(R2), R8
+ ADDU $4, R2
+ MOVW R8, -4(R1)
+ JMP f_words
+
+f_tail:
+ BEQ R1, R5, ret
+ MOVWLO -1(R4), R8
+ MOVWLO R8, -1(R5)
+
+ret:
+ RET
+
+f_large_ua:
+ BEQ R1, R7, f_words_ua
+ ADDU $32, R1
+ MOVWHI 0(R2), R8
+ MOVWHI 4(R2), R9
+ MOVWHI 8(R2), R10
+ MOVWHI 12(R2), R11
+ MOVWHI 16(R2), R12
+ MOVWHI 20(R2), R13
+ MOVWHI 24(R2), R14
+ MOVWHI 28(R2), R15
+ MOVWLO 3(R2), R8
+ MOVWLO 7(R2), R9
+ MOVWLO 11(R2), R10
+ MOVWLO 15(R2), R11
+ MOVWLO 19(R2), R12
+ MOVWLO 23(R2), R13
+ MOVWLO 27(R2), R14
+ MOVWLO 31(R2), R15
+ ADDU $32, R2
+ MOVW R8, -32(R1)
+ MOVW R9, -28(R1)
+ MOVW R10, -24(R1)
+ MOVW R11, -20(R1)
+ MOVW R12, -16(R1)
+ MOVW R13, -12(R1)
+ MOVW R14, -8(R1)
+ MOVW R15, -4(R1)
+ JMP f_large_ua
+
+f_words_ua:
+ BEQ R1, R6, f_tail_ua
+ MOVWHI 0(R2), R8
+ ADDU $4, R1
+ MOVWLO 3(R2), R8
+ ADDU $4, R2
+ MOVW R8, -4(R1)
+ JMP f_words_ua
+
+f_tail_ua:
+ BEQ R1, R5, ret
+ MOVWHI -4(R4), R8
+ MOVWLO -1(R4), R8
+ MOVWLO R8, -1(R5)
+ JMP ret
+
+f_small_copy:
+ BEQ R1, R5, ret
+ ADDU $1, R1
+ MOVB 0(R2), R6
+ ADDU $1, R2
+ MOVB R6, -1(R1)
+ JMP f_small_copy
+
+backward:
+ SGTU $4, R3, R6
+ BNE R6, b_small_copy
+
+ AND $3, R5, R6
+ BEQ R6, b_dest_aligned
+ MOVWHI -4(R4), R7
+ SUBU R6, R3
+ MOVWLO -1(R4), R7
+ SUBU R6, R4
+ MOVWLO R7, -1(R5)
+ SUBU R6, R5
+
+b_dest_aligned:
+ AND $31, R3, R7
+ AND $3, R3, R6
+ ADDU R7, R1, R7
+ ADDU R6, R1, R6
+
+ AND $3, R4, R8
+ BNE R8, b_large_ua
+
+b_large:
+ BEQ R5, R7, b_words
+ ADDU $-32, R5
+ MOVW -4(R4), R8
+ MOVW -8(R4), R9
+ MOVW -12(R4), R10
+ MOVW -16(R4), R11
+ MOVW -20(R4), R12
+ MOVW -24(R4), R13
+ MOVW -28(R4), R14
+ MOVW -32(R4), R15
+ ADDU $-32, R4
+ MOVW R8, 28(R5)
+ MOVW R9, 24(R5)
+ MOVW R10, 20(R5)
+ MOVW R11, 16(R5)
+ MOVW R12, 12(R5)
+ MOVW R13, 8(R5)
+ MOVW R14, 4(R5)
+ MOVW R15, 0(R5)
+ JMP b_large
+
+b_words:
+ BEQ R5, R6, b_tail
+ ADDU $-4, R5
+ MOVW -4(R4), R8
+ ADDU $-4, R4
+ MOVW R8, 0(R5)
+ JMP b_words
+
+b_tail:
+ BEQ R5, R1, ret
+ MOVWHI 0(R2), R8 // R2 and R1 have the same alignment so we don't need to load a whole word
+ MOVWHI R8, 0(R1)
+ JMP ret
+
+b_large_ua:
+ BEQ R5, R7, b_words_ua
+ ADDU $-32, R5
+ MOVWHI -4(R4), R8
+ MOVWHI -8(R4), R9
+ MOVWHI -12(R4), R10
+ MOVWHI -16(R4), R11
+ MOVWHI -20(R4), R12
+ MOVWHI -24(R4), R13
+ MOVWHI -28(R4), R14
+ MOVWHI -32(R4), R15
+ MOVWLO -1(R4), R8
+ MOVWLO -5(R4), R9
+ MOVWLO -9(R4), R10
+ MOVWLO -13(R4), R11
+ MOVWLO -17(R4), R12
+ MOVWLO -21(R4), R13
+ MOVWLO -25(R4), R14
+ MOVWLO -29(R4), R15
+ ADDU $-32, R4
+ MOVW R8, 28(R5)
+ MOVW R9, 24(R5)
+ MOVW R10, 20(R5)
+ MOVW R11, 16(R5)
+ MOVW R12, 12(R5)
+ MOVW R13, 8(R5)
+ MOVW R14, 4(R5)
+ MOVW R15, 0(R5)
+ JMP b_large_ua
+
+b_words_ua:
+ BEQ R5, R6, b_tail_ua
+ MOVWHI -4(R4), R8
+ ADDU $-4, R5
+ MOVWLO -1(R4), R8
+ ADDU $-4, R4
+ MOVW R8, 0(R5)
+ JMP b_words_ua
+
+b_tail_ua:
+ BEQ R5, R1, ret
+ MOVWHI (R2), R8
+ MOVWLO 3(R2), R8
+ MOVWHI R8, 0(R1)
+ JMP ret
+
+b_small_copy:
+ BEQ R5, R1, ret
+ ADDU $-1, R5
+ MOVB -1(R4), R6
+ ADDU $-1, R4
+ MOVB R6, 0(R5)
+ JMP b_small_copy