summaryrefslogtreecommitdiffstats
path: root/arch/alpha/lib/memmove.S
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:49:45 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:49:45 +0000
commit2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch)
tree848558de17fb3008cdf4d861b01ac7781903ce39 /arch/alpha/lib/memmove.S
parentInitial commit. (diff)
downloadlinux-b8823030eac27fc7a3d149e3a443a0b68810a78f.tar.xz
linux-b8823030eac27fc7a3d149e3a443a0b68810a78f.zip
Adding upstream version 6.1.76.upstream/6.1.76upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/alpha/lib/memmove.S')
-rw-r--r--arch/alpha/lib/memmove.S183
1 files changed, 183 insertions, 0 deletions
diff --git a/arch/alpha/lib/memmove.S b/arch/alpha/lib/memmove.S
new file mode 100644
index 000000000..42d1922d0
--- /dev/null
+++ b/arch/alpha/lib/memmove.S
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/memmove.S
+ *
+ * Barely optimized memmove routine for Alpha EV5.
+ *
+ * This is hand-massaged output from the original memcpy.c. We defer to
+ * memcpy whenever possible; the backwards copy loops are not unrolled.
+ */
+#include <asm/export.h>
+ .set noat
+ .set noreorder
+ .text
+
+ .align 4
+ .globl memmove
+ .ent memmove
+memmove:
+ ldgp $29, 0($27)
+ unop
+ nop
+ .prologue 1
+
+ addq $16,$18,$4
+ addq $17,$18,$5
+ cmpule $4,$17,$1 /* dest + n <= src */
+ cmpule $5,$16,$2 /* dest >= src + n */
+
+ bis $1,$2,$1
+ mov $16,$0
+ xor $16,$17,$2
+ bne $1,memcpy !samegp
+
+ and $2,7,$2 /* Test for src/dest co-alignment. */
+ and $16,7,$1
+ cmpule $16,$17,$3
+ bne $3,$memmove_up /* dest < src */
+
+ and $4,7,$1
+ bne $2,$misaligned_dn
+ unop
+ beq $1,$skip_aligned_byte_loop_head_dn
+
+$aligned_byte_loop_head_dn:
+ lda $4,-1($4)
+ lda $5,-1($5)
+ unop
+ ble $18,$egress
+
+ ldq_u $3,0($5)
+ ldq_u $2,0($4)
+ lda $18,-1($18)
+ extbl $3,$5,$1
+
+ insbl $1,$4,$1
+ mskbl $2,$4,$2
+ bis $1,$2,$1
+ and $4,7,$6
+
+ stq_u $1,0($4)
+ bne $6,$aligned_byte_loop_head_dn
+
+$skip_aligned_byte_loop_head_dn:
+ lda $18,-8($18)
+ blt $18,$skip_aligned_word_loop_dn
+
+$aligned_word_loop_dn:
+ ldq $1,-8($5)
+ nop
+ lda $5,-8($5)
+ lda $18,-8($18)
+
+ stq $1,-8($4)
+ nop
+ lda $4,-8($4)
+ bge $18,$aligned_word_loop_dn
+
+$skip_aligned_word_loop_dn:
+ lda $18,8($18)
+ bgt $18,$byte_loop_tail_dn
+ unop
+ ret $31,($26),1
+
+ .align 4
+$misaligned_dn:
+ nop
+ fnop
+ unop
+ beq $18,$egress
+
+$byte_loop_tail_dn:
+ ldq_u $3,-1($5)
+ ldq_u $2,-1($4)
+ lda $5,-1($5)
+ lda $4,-1($4)
+
+ lda $18,-1($18)
+ extbl $3,$5,$1
+ insbl $1,$4,$1
+ mskbl $2,$4,$2
+
+ bis $1,$2,$1
+ stq_u $1,0($4)
+ bgt $18,$byte_loop_tail_dn
+ br $egress
+
+$memmove_up:
+ mov $16,$4
+ mov $17,$5
+ bne $2,$misaligned_up
+ beq $1,$skip_aligned_byte_loop_head_up
+
+$aligned_byte_loop_head_up:
+ unop
+ ble $18,$egress
+ ldq_u $3,0($5)
+ ldq_u $2,0($4)
+
+ lda $18,-1($18)
+ extbl $3,$5,$1
+ insbl $1,$4,$1
+ mskbl $2,$4,$2
+
+ bis $1,$2,$1
+ lda $5,1($5)
+ stq_u $1,0($4)
+ lda $4,1($4)
+
+ and $4,7,$6
+ bne $6,$aligned_byte_loop_head_up
+
+$skip_aligned_byte_loop_head_up:
+ lda $18,-8($18)
+ blt $18,$skip_aligned_word_loop_up
+
+$aligned_word_loop_up:
+ ldq $1,0($5)
+ nop
+ lda $5,8($5)
+ lda $18,-8($18)
+
+ stq $1,0($4)
+ nop
+ lda $4,8($4)
+ bge $18,$aligned_word_loop_up
+
+$skip_aligned_word_loop_up:
+ lda $18,8($18)
+ bgt $18,$byte_loop_tail_up
+ unop
+ ret $31,($26),1
+
+ .align 4
+$misaligned_up:
+ nop
+ fnop
+ unop
+ beq $18,$egress
+
+$byte_loop_tail_up:
+ ldq_u $3,0($5)
+ ldq_u $2,0($4)
+ lda $18,-1($18)
+ extbl $3,$5,$1
+
+ insbl $1,$4,$1
+ mskbl $2,$4,$2
+ bis $1,$2,$1
+ stq_u $1,0($4)
+
+ lda $5,1($5)
+ lda $4,1($4)
+ nop
+ bgt $18,$byte_loop_tail_up
+
+$egress:
+ ret $31,($26),1
+ nop
+ nop
+ nop
+
+ .end memmove
+ EXPORT_SYMBOL(memmove)