From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sun, 7 Apr 2024 19:32:43 +0200
Subject: Adding upstream version 1:115.7.0.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 .../src/omxSP_FFTInv_CCSToR_F32_Sfs_unscaled_s.S   | 284 +++++++++++++++++++++
 1 file changed, 284 insertions(+)
 create mode 100644 media/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_unscaled_s.S

(limited to 'media/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_unscaled_s.S')

diff --git a/media/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_unscaled_s.S b/media/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_unscaled_s.S
new file mode 100644
index 0000000000..0a8cb4dccc
--- /dev/null
+++ b/media/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_unscaled_s.S
@@ -0,0 +1,284 @@
+@//
+@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+@//
+@//  Copyright 2016, Mozilla Foundation and contributors
+@//
+@//  Use of this source code is governed by a BSD-style license
+@//  that can be found in the LICENSE file in the root of the source
+@//  tree. An additional intellectual property rights grant can be found
+@//  in the file PATENTS.  All contributing project authors may
+@//  be found in the AUTHORS file in the root of the source tree.
+@//
+@//  This is a modification of omxSP_FFTInv_CCSToR_S32_Sfs_s.s
+@//  to support float instead of SC32.
+@//
+@//  It is further modified to produce an "unscaled" version, which
+@//  actually multiplies by two for consistency with the other FFT functions
+@//  in use.
+@//
+
+@//
+@// Description:
+@// Compute an inverse FFT for a complex signal
+@//
+@//
+
+
+@// Include standard headers
+
+#include "dl/api/armCOMM_s.h"
+#include "dl/api/omxtypes_s.h"
+
+
+@// Import symbols required from other files
+@// (For example tables)
+
+        .extern  armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
+        .extern  armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
+        .extern  armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
+        .extern  armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe
+        .extern  armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe
+        .extern  armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe
+
+
+@// Set debugging level
+@//DEBUG_ON    SETL {TRUE}
+
+
+
+@// Guarding implementation by the processor name
+
+
+
+      @// Guarding implementation by the processor name
+
+@// Import symbols required from other files
+@// (For example tables)
+        .extern  armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
+        .extern  armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
+
+
+@//Input Registers
+
+#define pSrc            r0
+#define pDst            r1
+#define pFFTSpec        r2
+#define scale           r3
+
+
+@// Output registers
+#define result          r0
+
+@//Local Scratch Registers
+
+#define argTwiddle      r1
+#define argDst          r2
+#define argScale        r4
+#define tmpOrder        r4
+#define pTwiddle        r4
+#define pOut            r5
+#define subFFTSize      r7
+#define subFFTNum       r6
+#define N               r6
+#define order           r14
+#define diff            r9
+@// Total num of radix stages required to comple the FFT
+#define count           r8
+#define x0r             r4
+#define x0i             r5
+#define diffMinusOne    r2
+#define round           r3
+
+#define pOut1           r2
+#define size            r7
+#define step            r8
+#define step1           r9
+#define twStep          r10
+#define pTwiddleTmp     r11
+#define argTwiddle1     r12
+#define zero            r14
+
+@// Neon registers
+
+#define dX0     D0.F32
+#define dShift  D1.F32
+#define dX1     D1.F32
+#define dY0     D2.F32
+#define dY1     D3.F32
+#define dX0r    D0.F32
+#define dX0i    D1.F32
+#define dX1r    D2.F32
+#define dX1i    D3.F32
+#define dW0r    D4.F32
+#define dW0i    D5.F32
+#define dW1r    D6.F32
+#define dW1i    D7.F32
+#define dT0     D8.F32
+#define dT1     D9.F32
+#define dT2     D10.F32
+#define dT3     D11.F32
+#define qT0     d12.F32
+#define qT1     d14.F32
+#define qT2     d16.F32
+#define qT3     d18.F32
+#define dY0r    D4.F32
+#define dY0i    D5.F32
+#define dY1r    D6.F32
+#define dY1i    D7.F32
+#define dzero   D20.F32
+
+#define dY2     D4.F32
+#define dY3     D5.F32
+#define dW0     D6.F32
+#define dW1     D7.F32
+#define dW0Tmp  D10.F32
+#define dW1Neg  D11.F32
+
+#define sN      S0.S32
+#define fN      S1.F32
+@// two must be the same as dScale[0]!
+#define dScale  D2.F32
+#define two S4.F32
+
+
+    @// Allocate stack memory required by the function
+        M_ALLOC4        complexFFTSize, 4
+
+    @// Write function header
+        M_START     omxSP_FFTInv_CCSToR_F32_Sfs_unscaled,r11,d15
+
+@ Structure offsets for the FFTSpec
+        .set    ARMsFFTSpec_N, 0
+        .set    ARMsFFTSpec_pBitRev, 4
+        .set    ARMsFFTSpec_pTwiddle, 8
+        .set    ARMsFFTSpec_pBuf, 12
+
+        @// Define stack arguments
+
+        @// Read the size from structure and take log
+        LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
+
+        @// Read other structure parameters
+        LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
+        LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
+
+        @//  N=1 Treat seperately
+        CMP     N,#1
+        BGT     sizeGreaterThanOne
+        VLD1    dX0[0],[pSrc]
+        VST1    dX0[0],[pDst]
+
+        B       End
+
+sizeGreaterThanOne:
+
+        @// Call the preTwiddle Radix2 stage before doing the compledIFFT
+
+
+        BL    armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe
+
+
+complexIFFT:
+
+        ASR     N,N,#1                             @// N/2 point complex IFFT
+        M_STR   N, complexFFTSize                  @ Save N for scaling later
+        ADD     pSrc,pOut,N,LSL #3                 @// set pSrc as pOut1
+
+        CLZ     order,N                             @// N = 2^order
+        RSB     order,order,#31
+        MOV     subFFTSize,#1
+        @//MOV     subFFTNum,N
+
+        CMP     order,#3
+        BGT     orderGreaterthan3                   @// order > 3
+
+        CMP     order,#1
+        BGE     orderGreaterthan0                   @// order > 0
+
+        VLD1    dX0,[pSrc]
+        VST1    dX0,[pDst]
+        MOV     pSrc,pDst
+        BLT     FFTEnd
+
+orderGreaterthan0:
+        @// set the buffers appropriately for various orders
+        CMP     order,#2
+        MOVNE   argDst,pDst
+        MOVEQ   argDst,pOut
+        @// Pass the first stage destination in RN5
+        MOVEQ   pOut,pDst
+        MOV     argTwiddle,pTwiddle
+
+        BGE     orderGreaterthan1
+        BLLT    armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe  @// order = 1
+        B       FFTEnd
+
+orderGreaterthan1:
+        MOV     tmpOrder,order                          @// tmpOrder = RN 4
+        BL      armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
+        CMP     tmpOrder,#2
+        BLGT    armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe
+        BL      armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
+        B       FFTEnd
+
+
+orderGreaterthan3:
+specialScaleCase:
+
+        @// Set input args to fft stages
+        TST     order, #2
+        MOVNE   argDst,pDst
+        MOVEQ   argDst,pOut
+        @// Pass the first stage destination in RN5
+        MOVEQ   pOut,pDst
+        MOV     argTwiddle,pTwiddle
+
+        @//check for even or odd order
+        @// NOTE: The following combination of BL's would work fine even though
+        @// the first BL would corrupt the flags. This is because the end of
+        @// the "grpZeroSetLoop" loop inside
+        @// armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag
+        @// to EQ
+
+        TST     order,#0x00000001
+        BLEQ    armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
+        BLNE    armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
+
+        CMP        subFFTNum,#4
+        BLT     FFTEnd
+
+
+unscaledRadix4Loop:
+        BEQ        lastStageUnscaledRadix4
+         BL        armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe
+         CMP        subFFTNum,#4
+         B        unscaledRadix4Loop
+
+lastStageUnscaledRadix4:
+        BL      armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
+        B        FFTEnd
+
+FFTEnd:                                               @// Does only the scaling
+        @ Scale inverse FFT result by 2 for consistency with other FFTs
+        VMOV    two, 2.0                   @ two = dScale[0]
+
+        @// N = subFFTSize  ; dataptr = pDst
+scaleFFTData:
+        VLD1    {dX0},[pSrc]            @// pSrc contains pDst pointer
+        SUBS    subFFTSize,subFFTSize,#1
+        VMUL    dX0, dX0, dScale[0]
+        VST1    {dX0},[pSrc]!
+
+        BGT     scaleFFTData
+
+
+End:
+        @// Set return value
+        MOV     result, #OMX_Sts_NoErr
+
+        @// Write function tail
+        M_END
+
+
+
+        .end
-- 
cgit v1.2.3