1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
|
diff --git a/lib/arm/arm2gnu.pl b/lib/arm/arm2gnu.pl
index 8cb68e4..d6fe09c 100755
--- a/lib/arm/arm2gnu.pl
+++ b/lib/arm/arm2gnu.pl
@@ -25,6 +25,8 @@ $n=0;
$thumb = 0; # ARM mode by default, not Thumb.
@proc_stack = ();
+printf (" .syntax unified\n");
+
LINE:
while (<>) {
diff --git a/lib/arm/armbits.s b/lib/arm/armbits.s
index 9400722..fd6e444 100644
--- a/lib/arm/armbits.s
+++ b/lib/arm/armbits.s
@@ -67,28 +67,28 @@ oc_pack_read_refill
; negative.
CMP r10,r11 ; ptr<stop => HI
CMPHI r3,#7 ; available<=24 => HI
- LDRHIB r14,[r11],#1 ; r14 = *ptr++
+ LDRBHI r14,[r11],#1 ; r14 = *ptr++
SUBHI r3,#8 ; available += 8
; (HI) Stall...
- ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available
+ ORRHI r2,r2,r14,LSL r3 ; r2 = window|=r14<<32-available
CMPHI r10,r11 ; ptr<stop => HI
CMPHI r3,#7 ; available<=24 => HI
- LDRHIB r14,[r11],#1 ; r14 = *ptr++
+ LDRBHI r14,[r11],#1 ; r14 = *ptr++
SUBHI r3,#8 ; available += 8
; (HI) Stall...
- ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available
+ ORRHI r2,r2,r14,LSL r3 ; r2 = window|=r14<<32-available
CMPHI r10,r11 ; ptr<stop => HI
CMPHI r3,#7 ; available<=24 => HI
- LDRHIB r14,[r11],#1 ; r14 = *ptr++
+ LDRBHI r14,[r11],#1 ; r14 = *ptr++
SUBHI r3,#8 ; available += 8
; (HI) Stall...
- ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available
+ ORRHI r2,r2,r14,LSL r3 ; r2 = window|=r14<<32-available
CMPHI r10,r11 ; ptr<stop => HI
CMPHI r3,#7 ; available<=24 => HI
- LDRHIB r14,[r11],#1 ; r14 = *ptr++
+ LDRBHI r14,[r11],#1 ; r14 = *ptr++
SUBHI r3,#8 ; available += 8
; (HI) Stall...
- ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available
+ ORRHI r2,r2,r14,LSL r3 ; r2 = window|=r14<<32-available
SUBS r3,r0,r3 ; r3 = available-=_bits, available<bits => GT
BLT oc_pack_read_refill_last
MOV r0,r2,LSR r0 ; r0 = window>>32-_bits
@@ -104,14 +104,14 @@ oc_pack_read_refill_last
CMP r11,r10 ; ptr<stop => LO
; If we didn't hit the end of the packet, then pull enough of the next byte to
; to fill up the window.
- LDRLOB r14,[r11] ; (LO) r14 = *ptr
+ LDRBLO r14,[r11] ; (LO) r14 = *ptr
; Otherwise, set the EOF flag and pretend we have lots of available bits.
MOVHS r14,#1 ; (HS) r14 = 1
ADDLO r10,r3,r1 ; (LO) r10 = available
STRHS r14,[r12,#8] ; (HS) eof = 1
ANDLO r10,r10,#7 ; (LO) r10 = available&7
MOVHS r3,#1<<30 ; (HS) available = OC_LOTS_OF_BITS
- ORRLO r2,r14,LSL r10 ; (LO) r2 = window|=*ptr>>(available&7)
+ ORRLO r2,r2,r14,LSL r10 ; (LO) r2 = window|=*ptr>>(available&7)
MOV r0,r2,LSR r0 ; r0 = window>>32-_bits
MOV r2,r2,LSL r1 ; r2 = window<<=_bits
STR r11,[r12,#-4] ; ptr = r11
@@ -183,32 +183,32 @@ oc_huff_token_decode_refill
; We can't possibly need more than 15 bits, so available must be <= 15.
; Therefore we can load at least two bytes without checking it.
CMP r2,r3 ; ptr<stop => HI
- LDRHIB r14,[r3],#1 ; r14 = *ptr++
+ LDRBHI r14,[r3],#1 ; r14 = *ptr++
RSBHI r5,r5,#24 ; (HI) available = 32-(available+=8)
RSBLS r5,r5,#32 ; (LS) r5 = 32-available
- ORRHI r4,r14,LSL r5 ; r4 = window|=r14<<32-available
+ ORRHI r4,r4,r14,LSL r5 ; r4 = window|=r14<<32-available
CMPHI r2,r3 ; ptr<stop => HI
- LDRHIB r14,[r3],#1 ; r14 = *ptr++
+ LDRBHI r14,[r3],#1 ; r14 = *ptr++
SUBHI r5,#8 ; available += 8
; (HI) Stall...
- ORRHI r4,r14,LSL r5 ; r4 = window|=r14<<32-available
+ ORRHI r4,r4,r14,LSL r5 ; r4 = window|=r14<<32-available
; We can use unsigned compares for both the pointers and for available
; (allowing us to chain condition codes) because available will never be
; larger than 32 (or we wouldn't be here), and thus 32-available will never be
; negative.
CMPHI r2,r3 ; ptr<stop => HI
CMPHI r5,#7 ; available<=24 => HI
- LDRHIB r14,[r3],#1 ; r14 = *ptr++
+ LDRBHI r14,[r3],#1 ; r14 = *ptr++
SUBHI r5,#8 ; available += 8
; (HI) Stall...
- ORRHI r4,r14,LSL r5 ; r4 = window|=r14<<32-available
+ ORRHI r4,r4,r14,LSL r5 ; r4 = window|=r14<<32-available
CMP r2,r3 ; ptr<stop => HI
MOVLS r5,#-1<<30 ; (LS) available = OC_LOTS_OF_BITS+32
CMPHI r5,#7 ; (HI) available<=24 => HI
- LDRHIB r14,[r3],#1 ; (HI) r14 = *ptr++
+ LDRBHI r14,[r3],#1 ; (HI) r14 = *ptr++
SUBHI r5,#8 ; (HI) available += 8
; (HI) Stall...
- ORRHI r4,r14,LSL r5 ; (HI) r4 = window|=r14<<32-available
+ ORRHI r4,r4,r14,LSL r5 ; (HI) r4 = window|=r14<<32-available
RSB r14,r10,#32 ; r14 = 32-n
MOV r14,r4,LSR r14 ; r14 = bits=window>>32-n
ADD r12,r12,r14 ;
diff --git a/lib/arm/armfrag.s b/lib/arm/armfrag.s
index 38627ed..38ee775 100644
--- a/lib/arm/armfrag.s
+++ b/lib/arm/armfrag.s
@@ -357,7 +357,7 @@ ofrintra_v6_lp
ORR r5, r5, r5, LSR #8 ; r5 = __777766
PKHBT r2, r2, r3, LSL #16 ; r2 = 33221100
PKHBT r3, r4, r5, LSL #16 ; r3 = 77665544
- STRD r2, [r0], r1
+ STRD r2, r3, [r0], r1
BGT ofrintra_v6_lp
LDMFD r13!,{r4-r6,PC}
ENDP
@@ -397,7 +397,7 @@ ofrinter_v6_lp
USAT16 r12,#8, r12 ; r12= __66__44
USAT16 r5, #8, r5 ; r4 = __77__55
ORR r5, r12,r5, LSL #8 ; r5 = 33221100
- STRD r4, [r0], r2
+ STRD r4, r5, [r0], r2
BGT ofrinter_v6_lp
LDMFD r13!,{r4-r7,PC}
ENDP
@@ -439,7 +439,7 @@ ofrinter2_v6_lp
USAT16 r8, #8, r8 ; r8 = __22__00
USAT16 r7, #8, r7 ; r7 = __33__11
ORR r8, r8, r7, LSL #8 ; r8 = 33221100
- STRD r8, [r0], r3
+ STRD r8, r9, [r0], r3
BGT ofrinter2_v6_lp
LDMFD r13!,{r4-r9,PC}
ENDP
diff --git a/lib/arm/armidct.s b/lib/arm/armidct.s
index 68530c7..269f74b 100644
--- a/lib/arm/armidct.s
+++ b/lib/arm/armidct.s
@@ -875,7 +875,7 @@ idct2_1core_v6 PROC
LDR r3, OC_C4S4
LDRSH r6, [r1], #16 ; r6 = x[1,0]
SMULWB r12,r3, r2 ; r12= t[0,0]=OC_C4S4*x[0,0]>>16
- LDRD r4, OC_C7S1 ; r4 = OC_C7S1; r5 = OC_C1S7
+ LDRD r4, r5, OC_C7S1 ; r4 = OC_C7S1; r5 = OC_C1S7
SMULWB r6, r3, r6 ; r6 = t[1,0]=OC_C4S4*x[1,0]>>16
SMULWT r4, r4, r2 ; r4 = t[0,4]=OC_C7S1*x[0,1]>>16
SMULWT r7, r5, r2 ; r7 = t[0,7]=OC_C1S7*x[0,1]>>16
@@ -937,7 +937,7 @@ idct2_2core_down_v6 PROC
MOV r7 ,#8 ; r7 = 8
LDR r6, [r1], #16 ; r6 = <x[1,1]|x[1,0]>
SMLAWB r12,r3, r2, r7 ; r12= (t[0,0]=OC_C4S4*x[0,0]>>16)+8
- LDRD r4, OC_C7S1 ; r4 = OC_C7S1; r5 = OC_C1S7
+ LDRD r4, r5, OC_C7S1 ; r4 = OC_C7S1; r5 = OC_C1S7
SMLAWB r7, r3, r6, r7 ; r7 = (t[1,0]=OC_C4S4*x[1,0]>>16)+8
SMULWT r5, r5, r2 ; r2 = t[0,7]=OC_C1S7*x[0,1]>>16
PKHBT r12,r12,r7, LSL #16 ; r12= <t[1,0]+8|t[0,0]+8>
@@ -1053,7 +1053,7 @@ idct3_2core_v6 PROC
; r1 = const ogg_int16_t *_x (source)
; Stage 1:
LDRD r4, [r1], #16 ; r4 = <x[0,1]|x[0,0]>; r5 = <*|x[0,2]>
- LDRD r10,OC_C6S2_3_v6 ; r10= OC_C6S2; r11= OC_C2S6
+ LDRD r10, r11, OC_C6S2_3_v6 ; r10= OC_C6S2; r11= OC_C2S6
; Stall
SMULWB r3, r11,r5 ; r3 = t[0,3]=OC_C2S6*x[0,2]>>16
LDR r11,OC_C4S4
@@ -1132,12 +1132,12 @@ idct4_3core_v6 PROC
; r1 = const ogg_int16_t *_x (source)
; Stage 1:
LDRD r10,[r1], #16 ; r10= <x[0,1]|x[0,0]>; r11= <x[0,3]|x[0,2]>
- LDRD r2, OC_C5S3_4_v6 ; r2 = OC_C5S3; r3 = OC_C3S5
+ LDRD r2, r3, OC_C5S3_4_v6 ; r2 = OC_C5S3; r3 = OC_C3S5
LDRD r4, [r1], #16 ; r4 = <x[1,1]|x[1,0]>; r5 = <??|x[1,2]>
SMULWT r9, r3, r11 ; r9 = t[0,6]=OC_C3S5*x[0,3]>>16
SMULWT r8, r2, r11 ; r8 = -t[0,5]=OC_C5S3*x[0,3]>>16
PKHBT r9, r9, r2 ; r9 = <0|t[0,6]>
- LDRD r6, OC_C6S2_4_v6 ; r6 = OC_C6S2; r7 = OC_C2S6
+ LDRD r6, r7, OC_C6S2_4_v6 ; r6 = OC_C6S2; r7 = OC_C2S6
PKHBT r8, r8, r2 ; r9 = <0|-t[0,5]>
SMULWB r3, r7, r11 ; r3 = t[0,3]=OC_C2S6*x[0,2]>>16
SMULWB r2, r6, r11 ; r2 = t[0,2]=OC_C6S2*x[0,2]>>16
@@ -1148,7 +1148,7 @@ idct4_3core_v6 PROC
SMULWB r12,r11,r10 ; r12= t[0,0]=OC_C4S4*x[0,0]>>16
PKHBT r2, r2, r5, LSL #16 ; r2 = <t[1,2]|t[0,2]>
SMULWB r5, r11,r4 ; r5 = t[1,0]=OC_C4S4*x[1,0]>>16
- LDRD r6, OC_C7S1_4_v6 ; r6 = OC_C7S1; r7 = OC_C1S7
+ LDRD r6, r7, OC_C7S1_4_v6 ; r6 = OC_C7S1; r7 = OC_C1S7
PKHBT r12,r12,r5, LSL #16 ; r12= <t[1,0]|t[0,0]>
SMULWT r5, r7, r4 ; r5 = t[1,7]=OC_C1S7*x[1,1]>>16
SMULWT r7, r7, r10 ; r7 = t[0,7]=OC_C1S7*x[0,1]>>16
@@ -1216,10 +1216,10 @@ idct4_4core_down_v6 PROC
; r1 = const ogg_int16_t *_x (source)
; Stage 1:
LDRD r10,[r1], #16 ; r10= <x[0,1]|x[0,0]>; r11= <x[0,3]|x[0,2]>
- LDRD r2, OC_C5S3_4_v6 ; r2 = OC_C5S3; r3 = OC_C3S5
+ LDRD r2, r3, OC_C5S3_4_v6 ; r2 = OC_C5S3; r3 = OC_C3S5
LDRD r4, [r1], #16 ; r4 = <x[1,1]|x[1,0]>; r5 = <x[1,3]|x[1,2]>
SMULWT r9, r3, r11 ; r9 = t[0,6]=OC_C3S5*x[0,3]>>16
- LDRD r6, OC_C6S2_4_v6 ; r6 = OC_C6S2; r7 = OC_C2S6
+ LDRD r6, r7, OC_C6S2_4_v6 ; r6 = OC_C6S2; r7 = OC_C2S6
SMULWT r8, r2, r11 ; r8 = -t[0,5]=OC_C5S3*x[0,3]>>16
; Here we cheat: row 3 had just a DC, so x[0,3]==x[1,3] by definition.
PKHBT r9, r9, r9, LSL #16 ; r9 = <t[0,6]|t[0,6]>
@@ -1234,7 +1234,7 @@ idct4_4core_down_v6 PROC
SMLAWB r12,r11,r10,r7 ; r12= t[0,0]+8=(OC_C4S4*x[0,0]>>16)+8
PKHBT r2, r2, r5, LSL #16 ; r2 = <t[1,2]|t[0,2]>
SMLAWB r5, r11,r4 ,r7 ; r5 = t[1,0]+8=(OC_C4S4*x[1,0]>>16)+8
- LDRD r6, OC_C7S1_4_v6 ; r6 = OC_C7S1; r7 = OC_C1S7
+ LDRD r6, r7, OC_C7S1_4_v6 ; r6 = OC_C7S1; r7 = OC_C1S7
PKHBT r12,r12,r5, LSL #16 ; r12= <t[1,0]+8|t[0,0]+8>
SMULWT r5, r7, r4 ; r5 = t[1,7]=OC_C1S7*x[1,1]>>16
SMULWT r7, r7, r10 ; r7 = t[0,7]=OC_C1S7*x[0,1]>>16
@@ -1264,7 +1264,7 @@ idct8_8core_v6 PROC
STMFD r13!,{r0,r14}
; Stage 1:
;5-6 rotation by 3pi/16
- LDRD r10,OC_C5S3_4_v6 ; r10= OC_C5S3, r11= OC_C3S5
+ LDRD r10, r11, OC_C5S3_4_v6 ; r10= OC_C5S3, r11= OC_C3S5
LDR r4, [r1,#8] ; r4 = <x[0,5]|x[0,4]>
LDR r7, [r1,#24] ; r7 = <x[1,5]|x[1,4]>
SMULWT r5, r11,r4 ; r5 = OC_C3S5*x[0,5]>>16
@@ -1281,7 +1281,7 @@ idct8_8core_v6 PROC
PKHBT r6, r6, r11,LSL #16 ; r6 = <t[1,6]|t[0,6]>
SMULWT r8, r10,r12 ; r8 = OC_C5S3*x[1,3]>>16
;2-3 rotation by 6pi/16
- LDRD r10,OC_C6S2_4_v6 ; r10= OC_C6S2, r11= OC_C2S6
+ LDRD r10, r11, OC_C6S2_4_v6 ; r10= OC_C6S2, r11= OC_C2S6
PKHBT r3, r3, r8, LSL #16 ; r3 = <r8|r3>
LDR r8, [r1,#12] ; r8 = <x[0,7]|x[0,6]>
SMULWB r2, r10,r0 ; r2 = OC_C6S2*x[0,2]>>16
@@ -1297,7 +1297,7 @@ idct8_8core_v6 PROC
PKHBT r3, r3, r10,LSL #16 ; r3 = <t[1,6]|t[0,6]>
SMULWB r12,r11,r7 ; r12= OC_C2S6*x[1,6]>>16
;4-7 rotation by 7pi/16
- LDRD r10,OC_C7S1_8_v6 ; r10= OC_C7S1, r11= OC_C1S7
+ LDRD r10, r11, OC_C7S1_8_v6 ; r10= OC_C7S1, r11= OC_C1S7
PKHBT r9, r9, r12,LSL #16 ; r9 = <r9|r12>
LDR r0, [r1],#16 ; r0 = <x[0,1]|x[0,0]>
PKHTB r7, r7, r8, ASR #16 ; r7 = <x[1,7]|x[0,7]>
@@ -1363,7 +1363,7 @@ idct8_8core_down_v6 PROC
STMFD r13!,{r0,r14}
; Stage 1:
;5-6 rotation by 3pi/16
- LDRD r10,OC_C5S3_8_v6 ; r10= OC_C5S3, r11= OC_C3S5
+ LDRD r10, r11, OC_C5S3_8_v6 ; r10= OC_C5S3, r11= OC_C3S5
LDR r4, [r1,#8] ; r4 = <x[0,5]|x[0,4]>
LDR r7, [r1,#24] ; r7 = <x[1,5]|x[1,4]>
SMULWT r5, r11,r4 ; r5 = OC_C3S5*x[0,5]>>16
@@ -1380,7 +1380,7 @@ idct8_8core_down_v6 PROC
PKHBT r6, r6, r11,LSL #16 ; r6 = <t[1,6]|t[0,6]>
SMULWT r8, r10,r12 ; r8 = OC_C5S3*x[1,3]>>16
;2-3 rotation by 6pi/16
- LDRD r10,OC_C6S2_8_v6 ; r10= OC_C6S2, r11= OC_C2S6
+ LDRD r10, r11, OC_C6S2_8_v6 ; r10= OC_C6S2, r11= OC_C2S6
PKHBT r3, r3, r8, LSL #16 ; r3 = <r8|r3>
LDR r8, [r1,#12] ; r8 = <x[0,7]|x[0,6]>
SMULWB r2, r10,r0 ; r2 = OC_C6S2*x[0,2]>>16
@@ -1396,7 +1396,7 @@ idct8_8core_down_v6 PROC
PKHBT r3, r3, r10,LSL #16 ; r3 = <t[1,6]|t[0,6]>
SMULWB r12,r11,r7 ; r12= OC_C2S6*x[1,6]>>16
;4-7 rotation by 7pi/16
- LDRD r10,OC_C7S1_8_v6 ; r10= OC_C7S1, r11= OC_C1S7
+ LDRD r10, r11, OC_C7S1_8_v6 ; r10= OC_C7S1, r11= OC_C1S7
PKHBT r9, r9, r12,LSL #16 ; r9 = <r9|r12>
LDR r0, [r1],#16 ; r0 = <x[0,1]|x[0,0]>
PKHTB r7, r7, r8, ASR #16 ; r7 = <x[1,7]|x[0,7]>
--
2.39.1
|