summaryrefslogtreecommitdiffstats
path: root/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix2_unsafe_s.S
diff options
context:
space:
mode:
Diffstat (limited to 'media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix2_unsafe_s.S')
-rw-r--r--media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix2_unsafe_s.S191
1 files changed, 191 insertions, 0 deletions
diff --git a/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix2_unsafe_s.S b/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix2_unsafe_s.S
new file mode 100644
index 0000000000..73c1f4b82f
--- /dev/null
+++ b/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix2_unsafe_s.S
@@ -0,0 +1,191 @@
+@//
+@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+@//
+@// Use of this source code is governed by a BSD-style license
+@// that can be found in the LICENSE file in the root of the source
+@// tree. An additional intellectual property rights grant can be found
+@// in the file PATENTS. All contributing project authors may
+@// be found in the AUTHORS file in the root of the source tree.
+@//
+@// This is a modification of armSP_FFT_CToC_SC32_Radix2_unsafe_s.s
+@// to support float instead of SC32.
+@//
+
+@// Description:
+@// Compute a Radix 2 DIT in-order out-of-place FFT stage for an N point
+@// complex signal. This handles the general stage, not the first or last
+@// stage.
+@//
+@//
+
+
+@// Include standard headers
+
+#include "dl/api/armCOMM_s.h"
+#include "dl/api/omxtypes_s.h"
+
+
+@// Import symbols required from other files
+@// (For example tables)
+
+
+
+@// Set debugging level
+@//DEBUG_ON SETL {TRUE}
+
+
+
+@// Guarding implementation by the processor name
+
+
+
+
+@// Guarding implementation by the processor name
+
+
+@//Input Registers
+
+#define pSrc r0
+#define pDst r2
+#define pTwiddle r1
+#define subFFTNum r6
+#define subFFTSize r7
+
+
+@//Output Registers
+
+
+@//Local Scratch Registers
+
+#define outPointStep r3
+#define pointStep r4
+#define grpCount r5
+#define setCount r8
+@//const RN 9
+#define step r10
+#define dstStep r11
+#define pTable r9
+#define pTmp r9
+
+@// Neon Registers
+
+#define dW D0.F32
+#define dX0 D2.F32
+#define dX1 D3.F32
+#define dX2 D4.F32
+#define dX3 D5.F32
+#define dY0 D6.F32
+#define dY1 D7.F32
+#define dY2 D8.F32
+#define dY3 D9.F32
+#define qT0 D10.F32
+#define qT1 D11.F32
+
+
+ .MACRO FFTSTAGE scaled, inverse, name
+
+ @// Define stack arguments
+
+
+ @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount
+ @// and pGrpSize regs
+
+ LSR subFFTNum,subFFTNum,#1 @//grpSize
+ LSL grpCount,subFFTSize,#1
+
+
+ @// pT0+1 increments pT0 by 8 bytes
+ @// pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes
+ MOV pointStep,subFFTNum,LSL #2
+
+ @// update subFFTSize for the next stage
+ MOV subFFTSize,grpCount
+
+ @// pOut0+1 increments pOut0 by 8 bytes
+ @// pOut0+outPointStep == increment of 8*outPointStep bytes =
+ @// 4*size bytes
+ SMULBB outPointStep,grpCount,pointStep
+ LSL pointStep,pointStep,#1
+
+
+ RSB step,pointStep,#16
+ RSB dstStep,outPointStep,#16
+
+ @// Loop on the groups
+
+radix2GrpLoop\name :
+ MOV setCount,pointStep,LSR #3
+ VLD1 dW,[pTwiddle],pointStep @//[wi | wr]
+
+
+ @// Loop on the sets
+
+
+radix2SetLoop\name :
+
+
+ @// point0: dX0-real part dX1-img part
+ VLD2 {dX0,dX1},[pSrc],pointStep
+ @// point1: dX2-real part dX3-img part
+ VLD2 {dX2,dX3},[pSrc],step
+
+ SUBS setCount,setCount,#2
+
+ .ifeqs "\inverse", "TRUE"
+ VMUL qT0,dX2,dW[0]
+ VMLA qT0,dX3,dW[1] @// real part
+ VMUL qT1,dX3,dW[0]
+ VMLS qT1,dX2,dW[1] @// imag part
+
+ .else
+
+ VMUL qT0,dX2,dW[0]
+ VMLS qT0,dX3,dW[1] @// real part
+ VMUL qT1,dX3,dW[0]
+ VMLA qT1,dX2,dW[1] @// imag part
+
+ .endif
+
+ VSUB dY0,dX0,qT0
+ VSUB dY1,dX1,qT1
+ VADD dY2,dX0,qT0
+ VADD dY3,dX1,qT1
+
+ VST2 {dY0,dY1},[pDst],outPointStep
+ @// dstStep = -outPointStep + 16
+ VST2 {dY2,dY3},[pDst],dstStep
+
+ BGT radix2SetLoop\name
+
+ SUBS grpCount,grpCount,#2
+ ADD pSrc,pSrc,pointStep
+ BGT radix2GrpLoop\name
+
+
+ @// Reset and Swap pSrc and pDst for the next stage
+ MOV pTmp,pDst
+ @// pDst -= 4*size; pSrc -= 8*size bytes
+ SUB pDst,pSrc,outPointStep,LSL #1
+ SUB pSrc,pTmp,outPointStep
+
+ @// Reset pTwiddle for the next stage
+ @// pTwiddle -= 4*size bytes
+ SUB pTwiddle,pTwiddle,outPointStep
+
+
+ .endm
+
+
+
+ M_START armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe,r4
+ FFTSTAGE "FALSE","FALSE",FWD
+ M_END
+
+
+
+ M_START armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe,r4
+ FFTSTAGE "FALSE","TRUE",INV
+ M_END
+
+
+ .end