diff options
Diffstat (limited to 'src/runtime/memmove_arm.s')
-rw-r--r-- | src/runtime/memmove_arm.s | 264 |
1 files changed, 264 insertions, 0 deletions
diff --git a/src/runtime/memmove_arm.s b/src/runtime/memmove_arm.s new file mode 100644 index 0000000..43d53fa --- /dev/null +++ b/src/runtime/memmove_arm.s @@ -0,0 +1,264 @@ +// Inferno's libkern/memmove-arm.s +// https://bitbucket.org/inferno-os/inferno-os/src/master/libkern/memmove-arm.s +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. +// Portions Copyright 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "textflag.h" + +// TE or TS are spilled to the stack during bulk register moves. +#define TS R0 +#define TE R8 + +// Warning: the linker will use R11 to synthesize certain instructions. Please +// take care and double check with objdump. +#define FROM R11 +#define N R12 +#define TMP R12 /* N and TMP don't overlap */ +#define TMP1 R5 + +#define RSHIFT R5 +#define LSHIFT R6 +#define OFFSET R7 + +#define BR0 R0 /* shared with TS */ +#define BW0 R1 +#define BR1 R1 +#define BW1 R2 +#define BR2 R2 +#define BW2 R3 +#define BR3 R3 +#define BW3 R4 + +#define FW0 R1 +#define FR0 R2 +#define FW1 R2 +#define FR1 R3 +#define FW2 R3 +#define FR2 R4 +#define FW3 R4 +#define FR3 R8 /* shared with TE */ + +// See memmove Go doc for important implementation constraints. + +// func memmove(to, from unsafe.Pointer, n uintptr) +TEXT runtime·memmove(SB), NOSPLIT, $4-12 +_memmove: + MOVW to+0(FP), TS + MOVW from+4(FP), FROM + MOVW n+8(FP), N + + ADD N, TS, TE /* to end pointer */ + + CMP FROM, TS + BLS _forward + +_back: + ADD N, FROM /* from end pointer */ + CMP $4, N /* need at least 4 bytes to copy */ + BLT _b1tail + +_b4align: /* align destination on 4 */ + AND.S $3, TE, TMP + BEQ _b4aligned + + MOVBU.W -1(FROM), TMP /* pre-indexed */ + MOVBU.W TMP, -1(TE) /* pre-indexed */ + B _b4align + +_b4aligned: /* is source now aligned? */ + AND.S $3, FROM, TMP + BNE _bunaligned + + ADD $31, TS, TMP /* do 32-byte chunks if possible */ + MOVW TS, savedts-4(SP) +_b32loop: + CMP TMP, TE + BLS _b4tail + + MOVM.DB.W (FROM), [R0-R7] + MOVM.DB.W [R0-R7], (TE) + B _b32loop + +_b4tail: /* do remaining words if possible */ + MOVW savedts-4(SP), TS + ADD $3, TS, TMP +_b4loop: + CMP TMP, TE + BLS _b1tail + + MOVW.W -4(FROM), TMP1 /* pre-indexed */ + MOVW.W TMP1, -4(TE) /* pre-indexed */ + B _b4loop + +_b1tail: /* remaining bytes */ + CMP TE, TS + BEQ _return + + MOVBU.W -1(FROM), TMP /* pre-indexed */ + MOVBU.W TMP, -1(TE) /* pre-indexed */ + B _b1tail + +_forward: + CMP $4, N /* need at least 4 bytes to copy */ + BLT _f1tail + +_f4align: /* align destination on 4 */ + AND.S $3, TS, TMP + BEQ _f4aligned + + MOVBU.P 1(FROM), TMP /* implicit write back */ + MOVBU.P TMP, 1(TS) /* implicit write back */ + B _f4align + +_f4aligned: /* is source now aligned? */ + AND.S $3, FROM, TMP + BNE _funaligned + + SUB $31, TE, TMP /* do 32-byte chunks if possible */ + MOVW TE, savedte-4(SP) +_f32loop: + CMP TMP, TS + BHS _f4tail + + MOVM.IA.W (FROM), [R1-R8] + MOVM.IA.W [R1-R8], (TS) + B _f32loop + +_f4tail: + MOVW savedte-4(SP), TE + SUB $3, TE, TMP /* do remaining words if possible */ +_f4loop: + CMP TMP, TS + BHS _f1tail + + MOVW.P 4(FROM), TMP1 /* implicit write back */ + MOVW.P TMP1, 4(TS) /* implicit write back */ + B _f4loop + +_f1tail: + CMP TS, TE + BEQ _return + + MOVBU.P 1(FROM), TMP /* implicit write back */ + MOVBU.P TMP, 1(TS) /* implicit write back */ + B _f1tail + +_return: + MOVW to+0(FP), R0 + RET + +_bunaligned: + CMP $2, TMP /* is TMP < 2 ? */ + + MOVW.LT $8, RSHIFT /* (R(n)<<24)|(R(n-1)>>8) */ + MOVW.LT $24, LSHIFT + MOVW.LT $1, OFFSET + + MOVW.EQ $16, RSHIFT /* (R(n)<<16)|(R(n-1)>>16) */ + MOVW.EQ $16, LSHIFT + MOVW.EQ $2, OFFSET + + MOVW.GT $24, RSHIFT /* (R(n)<<8)|(R(n-1)>>24) */ + MOVW.GT $8, LSHIFT + MOVW.GT $3, OFFSET + + ADD $16, TS, TMP /* do 16-byte chunks if possible */ + CMP TMP, TE + BLS _b1tail + + BIC $3, FROM /* align source */ + MOVW TS, savedts-4(SP) + MOVW (FROM), BR0 /* prime first block register */ + +_bu16loop: + CMP TMP, TE + BLS _bu1tail + + MOVW BR0<<LSHIFT, BW3 + MOVM.DB.W (FROM), [BR0-BR3] + ORR BR3>>RSHIFT, BW3 + + MOVW BR3<<LSHIFT, BW2 + ORR BR2>>RSHIFT, BW2 + + MOVW BR2<<LSHIFT, BW1 + ORR BR1>>RSHIFT, BW1 + + MOVW BR1<<LSHIFT, BW0 + ORR BR0>>RSHIFT, BW0 + + MOVM.DB.W [BW0-BW3], (TE) + B _bu16loop + +_bu1tail: + MOVW savedts-4(SP), TS + ADD OFFSET, FROM + B _b1tail + +_funaligned: + CMP $2, TMP + + MOVW.LT $8, RSHIFT /* (R(n+1)<<24)|(R(n)>>8) */ + MOVW.LT $24, LSHIFT + MOVW.LT $3, OFFSET + + MOVW.EQ $16, RSHIFT /* (R(n+1)<<16)|(R(n)>>16) */ + MOVW.EQ $16, LSHIFT + MOVW.EQ $2, OFFSET + + MOVW.GT $24, RSHIFT /* (R(n+1)<<8)|(R(n)>>24) */ + MOVW.GT $8, LSHIFT + MOVW.GT $1, OFFSET + + SUB $16, TE, TMP /* do 16-byte chunks if possible */ + CMP TMP, TS + BHS _f1tail + + BIC $3, FROM /* align source */ + MOVW TE, savedte-4(SP) + MOVW.P 4(FROM), FR3 /* prime last block register, implicit write back */ + +_fu16loop: + CMP TMP, TS + BHS _fu1tail + + MOVW FR3>>RSHIFT, FW0 + MOVM.IA.W (FROM), [FR0,FR1,FR2,FR3] + ORR FR0<<LSHIFT, FW0 + + MOVW FR0>>RSHIFT, FW1 + ORR FR1<<LSHIFT, FW1 + + MOVW FR1>>RSHIFT, FW2 + ORR FR2<<LSHIFT, FW2 + + MOVW FR2>>RSHIFT, FW3 + ORR FR3<<LSHIFT, FW3 + + MOVM.IA.W [FW0,FW1,FW2,FW3], (TS) + B _fu16loop + +_fu1tail: + MOVW savedte-4(SP), TE + SUB OFFSET, FROM + B _f1tail |