summaryrefslogtreecommitdiffstats
path: root/src/runtime/memmove_arm.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/memmove_arm.s')
-rw-r--r--src/runtime/memmove_arm.s264
1 files changed, 264 insertions, 0 deletions
diff --git a/src/runtime/memmove_arm.s b/src/runtime/memmove_arm.s
new file mode 100644
index 0000000..43d53fa
--- /dev/null
+++ b/src/runtime/memmove_arm.s
@@ -0,0 +1,264 @@
+// Inferno's libkern/memmove-arm.s
+// https://bitbucket.org/inferno-os/inferno-os/src/master/libkern/memmove-arm.s
+//
+// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
+// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved.
+// Portions Copyright 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "textflag.h"
+
+// TE or TS are spilled to the stack during bulk register moves.
+#define TS R0
+#define TE R8
+
+// Warning: the linker will use R11 to synthesize certain instructions. Please
+// take care and double check with objdump.
+#define FROM R11
+#define N R12
+#define TMP R12 /* N and TMP don't overlap */
+#define TMP1 R5
+
+#define RSHIFT R5
+#define LSHIFT R6
+#define OFFSET R7
+
+#define BR0 R0 /* shared with TS */
+#define BW0 R1
+#define BR1 R1
+#define BW1 R2
+#define BR2 R2
+#define BW2 R3
+#define BR3 R3
+#define BW3 R4
+
+#define FW0 R1
+#define FR0 R2
+#define FW1 R2
+#define FR1 R3
+#define FW2 R3
+#define FR2 R4
+#define FW3 R4
+#define FR3 R8 /* shared with TE */
+
+// See memmove Go doc for important implementation constraints.
+
+// func memmove(to, from unsafe.Pointer, n uintptr)
+TEXT runtime·memmove(SB), NOSPLIT, $4-12
+_memmove:
+ MOVW to+0(FP), TS
+ MOVW from+4(FP), FROM
+ MOVW n+8(FP), N
+
+ ADD N, TS, TE /* to end pointer */
+
+ CMP FROM, TS
+ BLS _forward
+
+_back:
+ ADD N, FROM /* from end pointer */
+ CMP $4, N /* need at least 4 bytes to copy */
+ BLT _b1tail
+
+_b4align: /* align destination on 4 */
+ AND.S $3, TE, TMP
+ BEQ _b4aligned
+
+ MOVBU.W -1(FROM), TMP /* pre-indexed */
+ MOVBU.W TMP, -1(TE) /* pre-indexed */
+ B _b4align
+
+_b4aligned: /* is source now aligned? */
+ AND.S $3, FROM, TMP
+ BNE _bunaligned
+
+ ADD $31, TS, TMP /* do 32-byte chunks if possible */
+ MOVW TS, savedts-4(SP)
+_b32loop:
+ CMP TMP, TE
+ BLS _b4tail
+
+ MOVM.DB.W (FROM), [R0-R7]
+ MOVM.DB.W [R0-R7], (TE)
+ B _b32loop
+
+_b4tail: /* do remaining words if possible */
+ MOVW savedts-4(SP), TS
+ ADD $3, TS, TMP
+_b4loop:
+ CMP TMP, TE
+ BLS _b1tail
+
+ MOVW.W -4(FROM), TMP1 /* pre-indexed */
+ MOVW.W TMP1, -4(TE) /* pre-indexed */
+ B _b4loop
+
+_b1tail: /* remaining bytes */
+ CMP TE, TS
+ BEQ _return
+
+ MOVBU.W -1(FROM), TMP /* pre-indexed */
+ MOVBU.W TMP, -1(TE) /* pre-indexed */
+ B _b1tail
+
+_forward:
+ CMP $4, N /* need at least 4 bytes to copy */
+ BLT _f1tail
+
+_f4align: /* align destination on 4 */
+ AND.S $3, TS, TMP
+ BEQ _f4aligned
+
+ MOVBU.P 1(FROM), TMP /* implicit write back */
+ MOVBU.P TMP, 1(TS) /* implicit write back */
+ B _f4align
+
+_f4aligned: /* is source now aligned? */
+ AND.S $3, FROM, TMP
+ BNE _funaligned
+
+ SUB $31, TE, TMP /* do 32-byte chunks if possible */
+ MOVW TE, savedte-4(SP)
+_f32loop:
+ CMP TMP, TS
+ BHS _f4tail
+
+ MOVM.IA.W (FROM), [R1-R8]
+ MOVM.IA.W [R1-R8], (TS)
+ B _f32loop
+
+_f4tail:
+ MOVW savedte-4(SP), TE
+ SUB $3, TE, TMP /* do remaining words if possible */
+_f4loop:
+ CMP TMP, TS
+ BHS _f1tail
+
+ MOVW.P 4(FROM), TMP1 /* implicit write back */
+ MOVW.P TMP1, 4(TS) /* implicit write back */
+ B _f4loop
+
+_f1tail:
+ CMP TS, TE
+ BEQ _return
+
+ MOVBU.P 1(FROM), TMP /* implicit write back */
+ MOVBU.P TMP, 1(TS) /* implicit write back */
+ B _f1tail
+
+_return:
+ MOVW to+0(FP), R0
+ RET
+
+_bunaligned:
+ CMP $2, TMP /* is TMP < 2 ? */
+
+ MOVW.LT $8, RSHIFT /* (R(n)<<24)|(R(n-1)>>8) */
+ MOVW.LT $24, LSHIFT
+ MOVW.LT $1, OFFSET
+
+ MOVW.EQ $16, RSHIFT /* (R(n)<<16)|(R(n-1)>>16) */
+ MOVW.EQ $16, LSHIFT
+ MOVW.EQ $2, OFFSET
+
+ MOVW.GT $24, RSHIFT /* (R(n)<<8)|(R(n-1)>>24) */
+ MOVW.GT $8, LSHIFT
+ MOVW.GT $3, OFFSET
+
+ ADD $16, TS, TMP /* do 16-byte chunks if possible */
+ CMP TMP, TE
+ BLS _b1tail
+
+ BIC $3, FROM /* align source */
+ MOVW TS, savedts-4(SP)
+ MOVW (FROM), BR0 /* prime first block register */
+
+_bu16loop:
+ CMP TMP, TE
+ BLS _bu1tail
+
+ MOVW BR0<<LSHIFT, BW3
+ MOVM.DB.W (FROM), [BR0-BR3]
+ ORR BR3>>RSHIFT, BW3
+
+ MOVW BR3<<LSHIFT, BW2
+ ORR BR2>>RSHIFT, BW2
+
+ MOVW BR2<<LSHIFT, BW1
+ ORR BR1>>RSHIFT, BW1
+
+ MOVW BR1<<LSHIFT, BW0
+ ORR BR0>>RSHIFT, BW0
+
+ MOVM.DB.W [BW0-BW3], (TE)
+ B _bu16loop
+
+_bu1tail:
+ MOVW savedts-4(SP), TS
+ ADD OFFSET, FROM
+ B _b1tail
+
+_funaligned:
+ CMP $2, TMP
+
+ MOVW.LT $8, RSHIFT /* (R(n+1)<<24)|(R(n)>>8) */
+ MOVW.LT $24, LSHIFT
+ MOVW.LT $3, OFFSET
+
+ MOVW.EQ $16, RSHIFT /* (R(n+1)<<16)|(R(n)>>16) */
+ MOVW.EQ $16, LSHIFT
+ MOVW.EQ $2, OFFSET
+
+ MOVW.GT $24, RSHIFT /* (R(n+1)<<8)|(R(n)>>24) */
+ MOVW.GT $8, LSHIFT
+ MOVW.GT $1, OFFSET
+
+ SUB $16, TE, TMP /* do 16-byte chunks if possible */
+ CMP TMP, TS
+ BHS _f1tail
+
+ BIC $3, FROM /* align source */
+ MOVW TE, savedte-4(SP)
+ MOVW.P 4(FROM), FR3 /* prime last block register, implicit write back */
+
+_fu16loop:
+ CMP TMP, TS
+ BHS _fu1tail
+
+ MOVW FR3>>RSHIFT, FW0
+ MOVM.IA.W (FROM), [FR0,FR1,FR2,FR3]
+ ORR FR0<<LSHIFT, FW0
+
+ MOVW FR0>>RSHIFT, FW1
+ ORR FR1<<LSHIFT, FW1
+
+ MOVW FR1>>RSHIFT, FW2
+ ORR FR2<<LSHIFT, FW2
+
+ MOVW FR2>>RSHIFT, FW3
+ ORR FR3<<LSHIFT, FW3
+
+ MOVM.IA.W [FW0,FW1,FW2,FW3], (TS)
+ B _fu16loop
+
+_fu1tail:
+ MOVW savedte-4(SP), TE
+ SUB OFFSET, FROM
+ B _f1tail