1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
|
@//
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@//
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@//
@// This is a modification of armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.s
@// to support float instead of SC32.
@//
@//
@// Description:
@// Compute an inverse FFT for a complex signal
@//
@//
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe
@// Set debugging level
@//DEBUG_ON SETL {TRUE}
@// Guarding implementation by the processor name
@// Guarding implementation by the processor name
@// Import symbols required from other files
@// (For example tables)
.extern armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
.extern armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
@//Input Registers
#define pSrc r0
#define pDst r1
#define pFFTSpec r2
@// Output registers
#define result r0
@//Local Scratch Registers
#define argTwiddle r1
#define argDst r2
#define argScale r4
#define tmpOrder r4
#define pTwiddle r4
#define pOut r5
#define subFFTSize r7
#define subFFTNum r6
#define N r6
#define order r14
#define diff r9
@// Total num of radix stages required to comple the FFT
#define count r8
#define x0r r4
#define x0i r5
#define diffMinusOne r2
@// Neon registers
#define dX0 D0.F32
#define qX0 Q0.F32
#define sN S0.S32
#define fN S1.F32
@// one must be the same as dScale[0]!
#define dScale D4.F32
#define one S8.F32
@// Allocate stack memory required by the function
M_ALLOC4 fftSize, 4
@// Write function header
M_START omxSP_FFTInv_CToC_FC32_Sfs,r11,d15
@ Structure offsets for the FFTSpec
.set ARMsFFTSpec_N, 0
.set ARMsFFTSpec_pBitRev, 4
.set ARMsFFTSpec_pTwiddle, 8
.set ARMsFFTSpec_pBuf, 12
@// Define stack arguments
@// Read the size from structure and take log
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
M_STR N, fftSize
@// Read other structure parameters
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
CLZ order,N @// N = 2^order
RSB order,order,#31
MOV subFFTSize,#1
@//MOV subFFTNum,N
CMP order,#3
BGT orderGreaterthan3 @// order > 3
CMP order,#1
BGE orderGreaterthan0 @// order > 0
VLD1 dX0,[pSrc]
VST1 dX0,[pDst]
MOV pSrc,pDst
BLT FFTEnd
orderGreaterthan0:
@// set the buffers appropriately for various orders
CMP order,#2
MOVNE argDst,pDst
MOVEQ argDst,pOut
@// Pass the first stage destination in RN5
MOVEQ pOut,pDst
MOV argTwiddle,pTwiddle
BGE orderGreaterthan1
@// order = 1
BLLT armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan1:
MOV tmpOrder,order @// tmpOrder = RN 4
BL armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
CMP tmpOrder,#2
BLGT armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe
BL armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
B FFTEnd
orderGreaterthan3:
@// Set input args to fft stages
TST order, #2
MOVNE argDst,pDst
MOVEQ argDst,pOut
@// Pass the first stage destination in RN5
MOVEQ pOut,pDst
MOV argTwiddle,pTwiddle
@//check for even or odd order
@// NOTE: The following combination of BL's would work fine even though
@// the first BL would corrupt the flags. This is because the end of
@// the "grpZeroSetLoop" loop inside
@// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag
@// to EQ
TST order,#0x00000001
BLEQ armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
BLNE armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
CMP subFFTNum,#4
BLT FFTEnd
unscaledRadix4Loop:
BEQ lastStageUnscaledRadix4
BL armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe
CMP subFFTNum,#4
B unscaledRadix4Loop
lastStageUnscaledRadix4:
BL armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
B FFTEnd
FFTEnd: @// Does only the scaling
M_LDR N, fftSize
VMOV sN,N
VCVT fN, sN @ fn = fftSize, as a float
VMOV one, 1.0
VDIV one, one, fN @ one = dScale[0] = 1 / fftSize
@ Scale data, doing 2 complex values at a time (because N is
@ always even).
@// N = subFFTSize ; dataptr = pDst ; scale = diff
scaleFFTData:
VLD1 {qX0},[pSrc, :128] @// pSrc contains pDst pointer
SUBS subFFTSize,subFFTSize,#2
VMUL qX0, qX0, dScale[0]
VST1 {qX0},[pSrc, :128]!
BGT scaleFFTData
End:
@// Set return value
MOV result, #OMX_Sts_NoErr
@// Write function tail
M_END
.end
|