1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
|
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ppc64 ppc64le
#include "textflag.h"
// See memmove Go doc for important implementation constraints.
// func memmove(to, from unsafe.Pointer, n uintptr)
// target address
#define TGT R3
// source address
#define SRC R4
// length to move
#define LEN R5
// number of doublewords
#define DWORDS R6
// number of bytes < 8
#define BYTES R7
// const 16 used as index
#define IDX16 R8
// temp used for copies, etc.
#define TMP R9
// number of 32 byte chunks
#define QWORDS R10
TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
MOVD to+0(FP), TGT
MOVD from+8(FP), SRC
MOVD n+16(FP), LEN
// Determine if there are doublewords to
// copy so a more efficient move can be done
check:
ANDCC $7, LEN, BYTES // R7: bytes to copy
SRD $3, LEN, DWORDS // R6: double words to copy
MOVFL CR0, CR3 // save CR from ANDCC
CMP DWORDS, $0, CR1 // CR1[EQ] set if no double words to copy
// Determine overlap by subtracting dest - src and comparing against the
// length. This catches the cases where src and dest are in different types
// of storage such as stack and static to avoid doing backward move when not
// necessary.
SUB SRC, TGT, TMP // dest - src
CMPU TMP, LEN, CR2 // < len?
BC 12, 8, backward // BLT CR2 backward
// Copying forward if no overlap.
BC 12, 6, checkbytes // BEQ CR1, checkbytes
SRDCC $2, DWORDS, QWORDS // 32 byte chunks?
BEQ lt32gt8 // < 32 bytes
// Prepare for moves of 32 bytes at a time.
forward32setup:
DCBTST (TGT) // prepare data cache
DCBT (SRC)
MOVD QWORDS, CTR // Number of 32 byte chunks
MOVD $16, IDX16 // 16 for index
forward32:
LXVD2X (R0)(SRC), VS32 // load 16 bytes
LXVD2X (IDX16)(SRC), VS33 // load 16 bytes
ADD $32, SRC
STXVD2X VS32, (R0)(TGT) // store 16 bytes
STXVD2X VS33, (IDX16)(TGT)
ADD $32,TGT // bump up for next set
BC 16, 0, forward32 // continue
ANDCC $3, DWORDS // remaining doublewords
BEQ checkbytes // only bytes remain
lt32gt8:
// At this point >= 8 and < 32
// Move 16 bytes if possible
CMP DWORDS, $2
BLT lt16
LXVD2X (R0)(SRC), VS32
ADD $-2, DWORDS
STXVD2X VS32, (R0)(TGT)
ADD $16, SRC
ADD $16, TGT
lt16: // Move 8 bytes if possible
CMP DWORDS, $1
BLT checkbytes
MOVD 0(SRC), TMP
ADD $8, SRC
MOVD TMP, 0(TGT)
ADD $8, TGT
checkbytes:
BC 12, 14, LR // BEQ lr
lt8: // Move word if possible
CMP BYTES, $4
BLT lt4
MOVWZ 0(SRC), TMP
ADD $-4, BYTES
MOVW TMP, 0(TGT)
ADD $4, SRC
ADD $4, TGT
lt4: // Move halfword if possible
CMP BYTES, $2
BLT lt2
MOVHZ 0(SRC), TMP
ADD $-2, BYTES
MOVH TMP, 0(TGT)
ADD $2, SRC
ADD $2, TGT
lt2: // Move last byte if 1 left
CMP BYTES, $1
BC 12, 0, LR // ble lr
MOVBZ 0(SRC), TMP
MOVBZ TMP, 0(TGT)
RET
backward:
// Copying backwards proceeds by copying R7 bytes then copying R6 double words.
// R3 and R4 are advanced to the end of the destination/source buffers
// respectively and moved back as we copy.
ADD LEN, SRC, SRC // end of source
ADD TGT, LEN, TGT // end of dest
BEQ nobackwardtail // earlier condition
MOVD BYTES, CTR // bytes to move
backwardtailloop:
MOVBZ -1(SRC), TMP // point to last byte
SUB $1,SRC
MOVBZ TMP, -1(TGT)
SUB $1,TGT
BC 16, 0, backwardtailloop // bndz
nobackwardtail:
BC 4, 5, LR // ble CR1 lr
backwardlarge:
MOVD DWORDS, CTR
SUB TGT, SRC, TMP // Use vsx if moving
CMP TMP, $32 // at least 32 byte chunks
BLT backwardlargeloop // and distance >= 32
SRDCC $2,DWORDS,QWORDS // 32 byte chunks
BNE backward32setup
backwardlargeloop:
MOVD -8(SRC), TMP
SUB $8,SRC
MOVD TMP, -8(TGT)
SUB $8,TGT
BC 16, 0, backwardlargeloop // bndz
RET
backward32setup:
MOVD QWORDS, CTR // set up loop ctr
MOVD $16, IDX16 // 32 bytes at at time
backward32loop:
SUB $32, TGT
SUB $32, SRC
LXVD2X (R0)(TGT), VS32 // load 16 bytes
LXVD2X (IDX16)(TGT), VS33
STXVD2X VS32, (R0)(SRC) // store 16 bytes
STXVD2X VS33, (IDX16)(SRC)
BC 16, 0, backward32loop // bndz
BC 4, 5, LR // ble CR1 lr
MOVD DWORDS, CTR
BR backwardlargeloop
|