summaryrefslogtreecommitdiffstats
path: root/third_party/libwebrtc/webrtc/modules/audio_processing/utility
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/libwebrtc/webrtc/modules/audio_processing/utility')
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator.cc53
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator.h52
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator_unittest.cc58
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator.cc703
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator.h251
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_internal.h48
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_unittest.cc618
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.cc486
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h244
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft.cc543
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft.h60
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_mips.cc1185
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_neon.cc352
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc438
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h54
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h94
16 files changed, 5239 insertions, 0 deletions
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator.cc
new file mode 100644
index 0000000000..3d766929c6
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2016 The WebRTC Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/utility/block_mean_calculator.h"
+
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+BlockMeanCalculator::BlockMeanCalculator(size_t block_length)
+ : block_length_(block_length),
+ count_(0),
+ sum_(0.0),
+ mean_(0.0) {
+ RTC_DCHECK(block_length_ != 0);
+}
+
+void BlockMeanCalculator::Reset() {
+ Clear();
+ mean_ = 0.0;
+}
+
+void BlockMeanCalculator::AddValue(float value) {
+ sum_ += value;
+ ++count_;
+ if (count_ == block_length_) {
+ mean_ = sum_ / block_length_;
+ Clear();
+ }
+}
+
+bool BlockMeanCalculator::EndOfBlock() const {
+ return count_ == 0;
+}
+
+float BlockMeanCalculator::GetLatestMean() const {
+ return mean_;
+}
+
+// Flush all samples added.
+void BlockMeanCalculator::Clear() {
+ count_ = 0;
+ sum_ = 0.0;
+}
+
+} // namespace webrtc
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator.h
new file mode 100644
index 0000000000..cfa7cfbeba
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2016 The WebRTC Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_UTILITY_BLOCK_MEAN_CALCULATOR_H_
+#define MODULES_AUDIO_PROCESSING_UTILITY_BLOCK_MEAN_CALCULATOR_H_
+
+#include <stddef.h>
+
+#include "rtc_base/constructormagic.h"
+
+namespace webrtc {
+
+// BlockMeanCalculator calculates the mean of a block of values. Values are
+// added one after another, and the mean is updated at the end of every block.
+class BlockMeanCalculator {
+ public:
+ explicit BlockMeanCalculator(size_t block_length);
+
+ // Reset.
+ void Reset();
+
+ // Add one value to the sequence.
+ void AddValue(float value);
+
+ // Return whether the latest added value was at the end of a block.
+ bool EndOfBlock() const;
+
+ // Return the latest mean.
+ float GetLatestMean() const;
+
+ private:
+ // Clear all values added.
+ void Clear();
+
+ const size_t block_length_;
+ size_t count_;
+ float sum_;
+ float mean_;
+
+ RTC_DISALLOW_COPY_AND_ASSIGN(BlockMeanCalculator);
+};
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_UTILITY_BLOCK_MEAN_CALCULATOR_H_
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator_unittest.cc
new file mode 100644
index 0000000000..1f4ebf1b67
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator_unittest.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/utility/block_mean_calculator.h"
+#include "test/gtest.h"
+
+namespace webrtc {
+
+TEST(MeanCalculatorTest, Correctness) {
+ const size_t kBlockLength = 10;
+ BlockMeanCalculator mean_calculator(kBlockLength);
+ size_t i = 0;
+ float reference = 0.0;
+
+ for (; i < kBlockLength - 1; ++i) {
+ mean_calculator.AddValue(static_cast<float>(i));
+ EXPECT_FALSE(mean_calculator.EndOfBlock());
+ }
+ mean_calculator.AddValue(static_cast<float>(i++));
+ EXPECT_TRUE(mean_calculator.EndOfBlock());
+
+ for (; i < 3 * kBlockLength; ++i) {
+ const bool end_of_block = i % kBlockLength == 0;
+ if (end_of_block) {
+ // Sum of (i - kBlockLength) ... (i - 1)
+ reference = i - 0.5 * (1 + kBlockLength);
+ }
+ EXPECT_EQ(mean_calculator.EndOfBlock(), end_of_block);
+ EXPECT_EQ(reference, mean_calculator.GetLatestMean());
+ mean_calculator.AddValue(static_cast<float>(i));
+ }
+}
+
+TEST(MeanCalculatorTest, Reset) {
+ const size_t kBlockLength = 10;
+ BlockMeanCalculator mean_calculator(kBlockLength);
+ for (size_t i = 0; i < kBlockLength - 1; ++i) {
+ mean_calculator.AddValue(static_cast<float>(i));
+ }
+ mean_calculator.Reset();
+ size_t i = 0;
+ for (; i < kBlockLength - 1; ++i) {
+ mean_calculator.AddValue(static_cast<float>(i));
+ EXPECT_FALSE(mean_calculator.EndOfBlock());
+ }
+ mean_calculator.AddValue(static_cast<float>(i));
+ EXPECT_TRUE(mean_calculator.EndOfBlock());
+ EXPECT_EQ(mean_calculator.GetLatestMean(), 0.5 * (kBlockLength - 1));
+}
+
+} // namespace webrtc
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator.cc
new file mode 100644
index 0000000000..871b541651
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator.cc
@@ -0,0 +1,703 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/utility/delay_estimator.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <algorithm>
+
+#include "rtc_base/checks.h"
+
+// Number of right shifts for scaling is linearly depending on number of bits in
+// the far-end binary spectrum.
+static const int kShiftsAtZero = 13; // Right shifts at zero binary spectrum.
+static const int kShiftsLinearSlope = 3;
+
+static const int32_t kProbabilityOffset = 1024; // 2 in Q9.
+static const int32_t kProbabilityLowerLimit = 8704; // 17 in Q9.
+static const int32_t kProbabilityMinSpread = 2816; // 5.5 in Q9.
+
+// Robust validation settings
+static const float kHistogramMax = 3000.f;
+static const float kLastHistogramMax = 250.f;
+static const float kMinHistogramThreshold = 1.5f;
+static const int kMinRequiredHits = 10;
+static const int kMaxHitsWhenPossiblyNonCausal = 10;
+static const int kMaxHitsWhenPossiblyCausal = 1000;
+static const float kQ14Scaling = 1.f / (1 << 14); // Scaling by 2^14 to get Q0.
+static const float kFractionSlope = 0.05f;
+static const float kMinFractionWhenPossiblyCausal = 0.5f;
+static const float kMinFractionWhenPossiblyNonCausal = 0.25f;
+
+// Counts and returns number of bits of a 32-bit word.
+static int BitCount(uint32_t u32) {
+ uint32_t tmp = u32 - ((u32 >> 1) & 033333333333) -
+ ((u32 >> 2) & 011111111111);
+ tmp = ((tmp + (tmp >> 3)) & 030707070707);
+ tmp = (tmp + (tmp >> 6));
+ tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077;
+
+ return ((int) tmp);
+}
+
+// Compares the |binary_vector| with all rows of the |binary_matrix| and counts
+// per row the number of times they have the same value.
+//
+// Inputs:
+// - binary_vector : binary "vector" stored in a long
+// - binary_matrix : binary "matrix" stored as a vector of long
+// - matrix_size : size of binary "matrix"
+//
+// Output:
+// - bit_counts : "Vector" stored as a long, containing for each
+// row the number of times the matrix row and the
+// input vector have the same value
+//
+static void BitCountComparison(uint32_t binary_vector,
+ const uint32_t* binary_matrix,
+ int matrix_size,
+ int32_t* bit_counts) {
+ int n = 0;
+
+ // Compare |binary_vector| with all rows of the |binary_matrix|
+ for (; n < matrix_size; n++) {
+ bit_counts[n] = (int32_t) BitCount(binary_vector ^ binary_matrix[n]);
+ }
+}
+
+// Collects necessary statistics for the HistogramBasedValidation(). This
+// function has to be called prior to calling HistogramBasedValidation(). The
+// statistics updated and used by the HistogramBasedValidation() are:
+// 1. the number of |candidate_hits|, which states for how long we have had the
+// same |candidate_delay|
+// 2. the |histogram| of candidate delays over time. This histogram is
+// weighted with respect to a reliability measure and time-varying to cope
+// with possible delay shifts.
+// For further description see commented code.
+//
+// Inputs:
+// - candidate_delay : The delay to validate.
+// - valley_depth_q14 : The cost function has a valley/minimum at the
+// |candidate_delay| location. |valley_depth_q14| is the
+// cost function difference between the minimum and
+// maximum locations. The value is in the Q14 domain.
+// - valley_level_q14 : Is the cost function value at the minimum, in Q14.
+static void UpdateRobustValidationStatistics(BinaryDelayEstimator* self,
+ int candidate_delay,
+ int32_t valley_depth_q14,
+ int32_t valley_level_q14) {
+ const float valley_depth = valley_depth_q14 * kQ14Scaling;
+ float decrease_in_last_set = valley_depth;
+ const int max_hits_for_slow_change = (candidate_delay < self->last_delay) ?
+ kMaxHitsWhenPossiblyNonCausal : kMaxHitsWhenPossiblyCausal;
+ int i = 0;
+
+ RTC_DCHECK_EQ(self->history_size, self->farend->history_size);
+ // Reset |candidate_hits| if we have a new candidate.
+ if (candidate_delay != self->last_candidate_delay) {
+ self->candidate_hits = 0;
+ self->last_candidate_delay = candidate_delay;
+ }
+ self->candidate_hits++;
+
+ // The |histogram| is updated differently across the bins.
+ // 1. The |candidate_delay| histogram bin is increased with the
+ // |valley_depth|, which is a simple measure of how reliable the
+ // |candidate_delay| is. The histogram is not increased above
+ // |kHistogramMax|.
+ self->histogram[candidate_delay] += valley_depth;
+ if (self->histogram[candidate_delay] > kHistogramMax) {
+ self->histogram[candidate_delay] = kHistogramMax;
+ }
+ // 2. The histogram bins in the neighborhood of |candidate_delay| are
+ // unaffected. The neighborhood is defined as x + {-2, -1, 0, 1}.
+ // 3. The histogram bins in the neighborhood of |last_delay| are decreased
+ // with |decrease_in_last_set|. This value equals the difference between
+ // the cost function values at the locations |candidate_delay| and
+ // |last_delay| until we reach |max_hits_for_slow_change| consecutive hits
+ // at the |candidate_delay|. If we exceed this amount of hits the
+ // |candidate_delay| is a "potential" candidate and we start decreasing
+ // these histogram bins more rapidly with |valley_depth|.
+ if (self->candidate_hits < max_hits_for_slow_change) {
+ decrease_in_last_set = (self->mean_bit_counts[self->compare_delay] -
+ valley_level_q14) * kQ14Scaling;
+ }
+ // 4. All other bins are decreased with |valley_depth|.
+ // TODO(bjornv): Investigate how to make this loop more efficient. Split up
+ // the loop? Remove parts that doesn't add too much.
+ for (i = 0; i < self->history_size; ++i) {
+ int is_in_last_set = (i >= self->last_delay - 2) &&
+ (i <= self->last_delay + 1) && (i != candidate_delay);
+ int is_in_candidate_set = (i >= candidate_delay - 2) &&
+ (i <= candidate_delay + 1);
+ self->histogram[i] -= decrease_in_last_set * is_in_last_set +
+ valley_depth * (!is_in_last_set && !is_in_candidate_set);
+ // 5. No histogram bin can go below 0.
+ if (self->histogram[i] < 0) {
+ self->histogram[i] = 0;
+ }
+ }
+}
+
+// Validates the |candidate_delay|, estimated in WebRtc_ProcessBinarySpectrum(),
+// based on a mix of counting concurring hits with a modified histogram
+// of recent delay estimates. In brief a candidate is valid (returns 1) if it
+// is the most likely according to the histogram. There are a couple of
+// exceptions that are worth mentioning:
+// 1. If the |candidate_delay| < |last_delay| it can be that we are in a
+// non-causal state, breaking a possible echo control algorithm. Hence, we
+// open up for a quicker change by allowing the change even if the
+// |candidate_delay| is not the most likely one according to the histogram.
+// 2. There's a minimum number of hits (kMinRequiredHits) and the histogram
+// value has to reached a minimum (kMinHistogramThreshold) to be valid.
+// 3. The action is also depending on the filter length used for echo control.
+// If the delay difference is larger than what the filter can capture, we
+// also move quicker towards a change.
+// For further description see commented code.
+//
+// Input:
+// - candidate_delay : The delay to validate.
+//
+// Return value:
+// - is_histogram_valid : 1 - The |candidate_delay| is valid.
+// 0 - Otherwise.
+static int HistogramBasedValidation(const BinaryDelayEstimator* self,
+ int candidate_delay) {
+ float fraction = 1.f;
+ float histogram_threshold = self->histogram[self->compare_delay];
+ const int delay_difference = candidate_delay - self->last_delay;
+ int is_histogram_valid = 0;
+
+ // The histogram based validation of |candidate_delay| is done by comparing
+ // the |histogram| at bin |candidate_delay| with a |histogram_threshold|.
+ // This |histogram_threshold| equals a |fraction| of the |histogram| at bin
+ // |last_delay|. The |fraction| is a piecewise linear function of the
+ // |delay_difference| between the |candidate_delay| and the |last_delay|
+ // allowing for a quicker move if
+ // i) a potential echo control filter can not handle these large differences.
+ // ii) keeping |last_delay| instead of updating to |candidate_delay| could
+ // force an echo control into a non-causal state.
+ // We further require the histogram to have reached a minimum value of
+ // |kMinHistogramThreshold|. In addition, we also require the number of
+ // |candidate_hits| to be more than |kMinRequiredHits| to remove spurious
+ // values.
+
+ // Calculate a comparison histogram value (|histogram_threshold|) that is
+ // depending on the distance between the |candidate_delay| and |last_delay|.
+ // TODO(bjornv): How much can we gain by turning the fraction calculation
+ // into tables?
+ if (delay_difference > self->allowed_offset) {
+ fraction = 1.f - kFractionSlope * (delay_difference - self->allowed_offset);
+ fraction = (fraction > kMinFractionWhenPossiblyCausal ? fraction :
+ kMinFractionWhenPossiblyCausal);
+ } else if (delay_difference < 0) {
+ fraction = kMinFractionWhenPossiblyNonCausal -
+ kFractionSlope * delay_difference;
+ fraction = (fraction > 1.f ? 1.f : fraction);
+ }
+ histogram_threshold *= fraction;
+ histogram_threshold = (histogram_threshold > kMinHistogramThreshold ?
+ histogram_threshold : kMinHistogramThreshold);
+
+ is_histogram_valid =
+ (self->histogram[candidate_delay] >= histogram_threshold) &&
+ (self->candidate_hits > kMinRequiredHits);
+
+ return is_histogram_valid;
+}
+
+// Performs a robust validation of the |candidate_delay| estimated in
+// WebRtc_ProcessBinarySpectrum(). The algorithm takes the
+// |is_instantaneous_valid| and the |is_histogram_valid| and combines them
+// into a robust validation. The HistogramBasedValidation() has to be called
+// prior to this call.
+// For further description on how the combination is done, see commented code.
+//
+// Inputs:
+// - candidate_delay : The delay to validate.
+// - is_instantaneous_valid : The instantaneous validation performed in
+// WebRtc_ProcessBinarySpectrum().
+// - is_histogram_valid : The histogram based validation.
+//
+// Return value:
+// - is_robust : 1 - The candidate_delay is valid according to a
+// combination of the two inputs.
+// : 0 - Otherwise.
+static int RobustValidation(const BinaryDelayEstimator* self,
+ int candidate_delay,
+ int is_instantaneous_valid,
+ int is_histogram_valid) {
+ int is_robust = 0;
+
+ // The final robust validation is based on the two algorithms; 1) the
+ // |is_instantaneous_valid| and 2) the histogram based with result stored in
+ // |is_histogram_valid|.
+ // i) Before we actually have a valid estimate (|last_delay| == -2), we say
+ // a candidate is valid if either algorithm states so
+ // (|is_instantaneous_valid| OR |is_histogram_valid|).
+ is_robust = (self->last_delay < 0) &&
+ (is_instantaneous_valid || is_histogram_valid);
+ // ii) Otherwise, we need both algorithms to be certain
+ // (|is_instantaneous_valid| AND |is_histogram_valid|)
+ is_robust |= is_instantaneous_valid && is_histogram_valid;
+ // iii) With one exception, i.e., the histogram based algorithm can overrule
+ // the instantaneous one if |is_histogram_valid| = 1 and the histogram
+ // is significantly strong.
+ is_robust |= is_histogram_valid &&
+ (self->histogram[candidate_delay] > self->last_delay_histogram);
+
+ return is_robust;
+}
+
+void WebRtc_FreeBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self) {
+
+ if (self == NULL) {
+ return;
+ }
+
+ free(self->binary_far_history);
+ self->binary_far_history = NULL;
+
+ free(self->far_bit_counts);
+ self->far_bit_counts = NULL;
+
+ free(self);
+}
+
+BinaryDelayEstimatorFarend* WebRtc_CreateBinaryDelayEstimatorFarend(
+ int history_size) {
+ BinaryDelayEstimatorFarend* self = NULL;
+
+ if (history_size > 1) {
+ // Sanity conditions fulfilled.
+ self = static_cast<BinaryDelayEstimatorFarend*>(
+ malloc(sizeof(BinaryDelayEstimatorFarend)));
+ }
+ if (self == NULL) {
+ return NULL;
+ }
+
+ self->history_size = 0;
+ self->binary_far_history = NULL;
+ self->far_bit_counts = NULL;
+ if (WebRtc_AllocateFarendBufferMemory(self, history_size) == 0) {
+ WebRtc_FreeBinaryDelayEstimatorFarend(self);
+ self = NULL;
+ }
+ return self;
+}
+
+int WebRtc_AllocateFarendBufferMemory(BinaryDelayEstimatorFarend* self,
+ int history_size) {
+ RTC_DCHECK(self);
+ // (Re-)Allocate memory for history buffers.
+ self->binary_far_history = static_cast<uint32_t*>(
+ realloc(self->binary_far_history,
+ history_size * sizeof(*self->binary_far_history)));
+ self->far_bit_counts = static_cast<int*>(
+ realloc(self->far_bit_counts,
+ history_size * sizeof(*self->far_bit_counts)));
+ if ((self->binary_far_history == NULL) || (self->far_bit_counts == NULL)) {
+ history_size = 0;
+ }
+ // Fill with zeros if we have expanded the buffers.
+ if (history_size > self->history_size) {
+ int size_diff = history_size - self->history_size;
+ memset(&self->binary_far_history[self->history_size],
+ 0,
+ sizeof(*self->binary_far_history) * size_diff);
+ memset(&self->far_bit_counts[self->history_size],
+ 0,
+ sizeof(*self->far_bit_counts) * size_diff);
+ }
+ self->history_size = history_size;
+
+ return self->history_size;
+}
+
+void WebRtc_InitBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self) {
+ RTC_DCHECK(self);
+ memset(self->binary_far_history, 0, sizeof(uint32_t) * self->history_size);
+ memset(self->far_bit_counts, 0, sizeof(int) * self->history_size);
+}
+
+void WebRtc_SoftResetBinaryDelayEstimatorFarend(
+ BinaryDelayEstimatorFarend* self, int delay_shift) {
+ int abs_shift = abs(delay_shift);
+ int shift_size = 0;
+ int dest_index = 0;
+ int src_index = 0;
+ int padding_index = 0;
+
+ RTC_DCHECK(self);
+ shift_size = self->history_size - abs_shift;
+ RTC_DCHECK_GT(shift_size, 0);
+ if (delay_shift == 0) {
+ return;
+ } else if (delay_shift > 0) {
+ dest_index = abs_shift;
+ } else if (delay_shift < 0) {
+ src_index = abs_shift;
+ padding_index = shift_size;
+ }
+
+ // Shift and zero pad buffers.
+ memmove(&self->binary_far_history[dest_index],
+ &self->binary_far_history[src_index],
+ sizeof(*self->binary_far_history) * shift_size);
+ memset(&self->binary_far_history[padding_index], 0,
+ sizeof(*self->binary_far_history) * abs_shift);
+ memmove(&self->far_bit_counts[dest_index],
+ &self->far_bit_counts[src_index],
+ sizeof(*self->far_bit_counts) * shift_size);
+ memset(&self->far_bit_counts[padding_index], 0,
+ sizeof(*self->far_bit_counts) * abs_shift);
+}
+
+void WebRtc_AddBinaryFarSpectrum(BinaryDelayEstimatorFarend* handle,
+ uint32_t binary_far_spectrum) {
+ RTC_DCHECK(handle);
+ // Shift binary spectrum history and insert current |binary_far_spectrum|.
+ memmove(&(handle->binary_far_history[1]), &(handle->binary_far_history[0]),
+ (handle->history_size - 1) * sizeof(uint32_t));
+ handle->binary_far_history[0] = binary_far_spectrum;
+
+ // Shift history of far-end binary spectrum bit counts and insert bit count
+ // of current |binary_far_spectrum|.
+ memmove(&(handle->far_bit_counts[1]), &(handle->far_bit_counts[0]),
+ (handle->history_size - 1) * sizeof(int));
+ handle->far_bit_counts[0] = BitCount(binary_far_spectrum);
+}
+
+void WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* self) {
+
+ if (self == NULL) {
+ return;
+ }
+
+ free(self->mean_bit_counts);
+ self->mean_bit_counts = NULL;
+
+ free(self->bit_counts);
+ self->bit_counts = NULL;
+
+ free(self->binary_near_history);
+ self->binary_near_history = NULL;
+
+ free(self->histogram);
+ self->histogram = NULL;
+
+ // BinaryDelayEstimator does not have ownership of |farend|, hence we do not
+ // free the memory here. That should be handled separately by the user.
+ self->farend = NULL;
+
+ free(self);
+}
+
+BinaryDelayEstimator* WebRtc_CreateBinaryDelayEstimator(
+ BinaryDelayEstimatorFarend* farend, int max_lookahead) {
+ BinaryDelayEstimator* self = NULL;
+
+ if ((farend != NULL) && (max_lookahead >= 0)) {
+ // Sanity conditions fulfilled.
+ self = static_cast<BinaryDelayEstimator*>(
+ malloc(sizeof(BinaryDelayEstimator)));
+ }
+ if (self == NULL) {
+ return NULL;
+ }
+
+ self->farend = farend;
+ self->near_history_size = max_lookahead + 1;
+ self->history_size = 0;
+ self->robust_validation_enabled = 0; // Disabled by default.
+ self->allowed_offset = 0;
+
+ self->lookahead = max_lookahead;
+
+ // Allocate memory for spectrum and history buffers.
+ self->mean_bit_counts = NULL;
+ self->bit_counts = NULL;
+ self->histogram = NULL;
+ self->binary_near_history = static_cast<uint32_t*>(
+ malloc((max_lookahead + 1) * sizeof(*self->binary_near_history)));
+ if (self->binary_near_history == NULL ||
+ WebRtc_AllocateHistoryBufferMemory(self, farend->history_size) == 0) {
+ WebRtc_FreeBinaryDelayEstimator(self);
+ self = NULL;
+ }
+
+ return self;
+}
+
+int WebRtc_AllocateHistoryBufferMemory(BinaryDelayEstimator* self,
+ int history_size) {
+ BinaryDelayEstimatorFarend* far = self->farend;
+ // (Re-)Allocate memory for spectrum and history buffers.
+ if (history_size != far->history_size) {
+ // Only update far-end buffers if we need.
+ history_size = WebRtc_AllocateFarendBufferMemory(far, history_size);
+ }
+ // The extra array element in |mean_bit_counts| and |histogram| is a dummy
+ // element only used while |last_delay| == -2, i.e., before we have a valid
+ // estimate.
+ self->mean_bit_counts = static_cast<int32_t*>(
+ realloc(self->mean_bit_counts,
+ (history_size + 1) * sizeof(*self->mean_bit_counts)));
+ self->bit_counts = static_cast<int32_t*>(
+ realloc(self->bit_counts, history_size * sizeof(*self->bit_counts)));
+ self->histogram = static_cast<float*>(
+ realloc(self->histogram, (history_size + 1) * sizeof(*self->histogram)));
+
+ if ((self->mean_bit_counts == NULL) ||
+ (self->bit_counts == NULL) ||
+ (self->histogram == NULL)) {
+ history_size = 0;
+ }
+ // Fill with zeros if we have expanded the buffers.
+ if (history_size > self->history_size) {
+ int size_diff = history_size - self->history_size;
+ memset(&self->mean_bit_counts[self->history_size],
+ 0,
+ sizeof(*self->mean_bit_counts) * size_diff);
+ memset(&self->bit_counts[self->history_size],
+ 0,
+ sizeof(*self->bit_counts) * size_diff);
+ memset(&self->histogram[self->history_size],
+ 0,
+ sizeof(*self->histogram) * size_diff);
+ }
+ self->history_size = history_size;
+
+ return self->history_size;
+}
+
+void WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* self) {
+ int i = 0;
+ RTC_DCHECK(self);
+
+ memset(self->bit_counts, 0, sizeof(int32_t) * self->history_size);
+ memset(self->binary_near_history,
+ 0,
+ sizeof(uint32_t) * self->near_history_size);
+ for (i = 0; i <= self->history_size; ++i) {
+ self->mean_bit_counts[i] = (20 << 9); // 20 in Q9.
+ self->histogram[i] = 0.f;
+ }
+ self->minimum_probability = kMaxBitCountsQ9; // 32 in Q9.
+ self->last_delay_probability = (int) kMaxBitCountsQ9; // 32 in Q9.
+
+ // Default return value if we're unable to estimate. -1 is used for errors.
+ self->last_delay = -2;
+
+ self->last_candidate_delay = -2;
+ self->compare_delay = self->history_size;
+ self->candidate_hits = 0;
+ self->last_delay_histogram = 0.f;
+}
+
+int WebRtc_SoftResetBinaryDelayEstimator(BinaryDelayEstimator* self,
+ int delay_shift) {
+ int lookahead = 0;
+ RTC_DCHECK(self);
+ lookahead = self->lookahead;
+ self->lookahead -= delay_shift;
+ if (self->lookahead < 0) {
+ self->lookahead = 0;
+ }
+ if (self->lookahead > self->near_history_size - 1) {
+ self->lookahead = self->near_history_size - 1;
+ }
+ return lookahead - self->lookahead;
+}
+
+int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* self,
+ uint32_t binary_near_spectrum) {
+ int i = 0;
+ int candidate_delay = -1;
+ int valid_candidate = 0;
+
+ int32_t value_best_candidate = kMaxBitCountsQ9;
+ int32_t value_worst_candidate = 0;
+ int32_t valley_depth = 0;
+
+ RTC_DCHECK(self);
+ if (self->farend->history_size != self->history_size) {
+ // Non matching history sizes.
+ return -1;
+ }
+ if (self->near_history_size > 1) {
+ // If we apply lookahead, shift near-end binary spectrum history. Insert
+ // current |binary_near_spectrum| and pull out the delayed one.
+ memmove(&(self->binary_near_history[1]), &(self->binary_near_history[0]),
+ (self->near_history_size - 1) * sizeof(uint32_t));
+ self->binary_near_history[0] = binary_near_spectrum;
+ binary_near_spectrum = self->binary_near_history[self->lookahead];
+ }
+
+ // Compare with delayed spectra and store the |bit_counts| for each delay.
+ BitCountComparison(binary_near_spectrum, self->farend->binary_far_history,
+ self->history_size, self->bit_counts);
+
+ // Update |mean_bit_counts|, which is the smoothed version of |bit_counts|.
+ for (i = 0; i < self->history_size; i++) {
+ // |bit_counts| is constrained to [0, 32], meaning we can smooth with a
+ // factor up to 2^26. We use Q9.
+ int32_t bit_count = (self->bit_counts[i] << 9); // Q9.
+
+ // Update |mean_bit_counts| only when far-end signal has something to
+ // contribute. If |far_bit_counts| is zero the far-end signal is weak and
+ // we likely have a poor echo condition, hence don't update.
+ if (self->farend->far_bit_counts[i] > 0) {
+ // Make number of right shifts piecewise linear w.r.t. |far_bit_counts|.
+ int shifts = kShiftsAtZero;
+ shifts -= (kShiftsLinearSlope * self->farend->far_bit_counts[i]) >> 4;
+ WebRtc_MeanEstimatorFix(bit_count, shifts, &(self->mean_bit_counts[i]));
+ }
+ }
+
+ // Find |candidate_delay|, |value_best_candidate| and |value_worst_candidate|
+ // of |mean_bit_counts|.
+ for (i = 0; i < self->history_size; i++) {
+ if (self->mean_bit_counts[i] < value_best_candidate) {
+ value_best_candidate = self->mean_bit_counts[i];
+ candidate_delay = i;
+ }
+ if (self->mean_bit_counts[i] > value_worst_candidate) {
+ value_worst_candidate = self->mean_bit_counts[i];
+ }
+ }
+ valley_depth = value_worst_candidate - value_best_candidate;
+
+ // The |value_best_candidate| is a good indicator on the probability of
+ // |candidate_delay| being an accurate delay (a small |value_best_candidate|
+ // means a good binary match). In the following sections we make a decision
+ // whether to update |last_delay| or not.
+ // 1) If the difference bit counts between the best and the worst delay
+ // candidates is too small we consider the situation to be unreliable and
+ // don't update |last_delay|.
+ // 2) If the situation is reliable we update |last_delay| if the value of the
+ // best candidate delay has a value less than
+ // i) an adaptive threshold |minimum_probability|, or
+ // ii) this corresponding value |last_delay_probability|, but updated at
+ // this time instant.
+
+ // Update |minimum_probability|.
+ if ((self->minimum_probability > kProbabilityLowerLimit) &&
+ (valley_depth > kProbabilityMinSpread)) {
+ // The "hard" threshold can't be lower than 17 (in Q9).
+ // The valley in the curve also has to be distinct, i.e., the
+ // difference between |value_worst_candidate| and |value_best_candidate| has
+ // to be large enough.
+ int32_t threshold = value_best_candidate + kProbabilityOffset;
+ if (threshold < kProbabilityLowerLimit) {
+ threshold = kProbabilityLowerLimit;
+ }
+ if (self->minimum_probability > threshold) {
+ self->minimum_probability = threshold;
+ }
+ }
+ // Update |last_delay_probability|.
+ // We use a Markov type model, i.e., a slowly increasing level over time.
+ self->last_delay_probability++;
+ // Validate |candidate_delay|. We have a reliable instantaneous delay
+ // estimate if
+ // 1) The valley is distinct enough (|valley_depth| > |kProbabilityOffset|)
+ // and
+ // 2) The depth of the valley is deep enough
+ // (|value_best_candidate| < |minimum_probability|)
+ // and deeper than the best estimate so far
+ // (|value_best_candidate| < |last_delay_probability|)
+ valid_candidate = ((valley_depth > kProbabilityOffset) &&
+ ((value_best_candidate < self->minimum_probability) ||
+ (value_best_candidate < self->last_delay_probability)));
+
+ // Check for nonstationary farend signal.
+ const bool non_stationary_farend =
+ std::any_of(self->farend->far_bit_counts,
+ self->farend->far_bit_counts + self->history_size,
+ [](int a) { return a > 0; });
+
+ if (non_stationary_farend) {
+ // Only update the validation statistics when the farend is nonstationary
+ // as the underlying estimates are otherwise frozen.
+ UpdateRobustValidationStatistics(self, candidate_delay, valley_depth,
+ value_best_candidate);
+ }
+
+ if (self->robust_validation_enabled) {
+ int is_histogram_valid = HistogramBasedValidation(self, candidate_delay);
+ valid_candidate = RobustValidation(self, candidate_delay, valid_candidate,
+ is_histogram_valid);
+
+ }
+
+ // Only update the delay estimate when the farend is nonstationary and when
+ // a valid delay candidate is available.
+ if (non_stationary_farend && valid_candidate) {
+ if (candidate_delay != self->last_delay) {
+ self->last_delay_histogram =
+ (self->histogram[candidate_delay] > kLastHistogramMax ?
+ kLastHistogramMax : self->histogram[candidate_delay]);
+ // Adjust the histogram if we made a change to |last_delay|, though it was
+ // not the most likely one according to the histogram.
+ if (self->histogram[candidate_delay] <
+ self->histogram[self->compare_delay]) {
+ self->histogram[self->compare_delay] = self->histogram[candidate_delay];
+ }
+ }
+ self->last_delay = candidate_delay;
+ if (value_best_candidate < self->last_delay_probability) {
+ self->last_delay_probability = value_best_candidate;
+ }
+ self->compare_delay = self->last_delay;
+ }
+
+ return self->last_delay;
+}
+
+int WebRtc_binary_last_delay(BinaryDelayEstimator* self) {
+ RTC_DCHECK(self);
+ return self->last_delay;
+}
+
+float WebRtc_binary_last_delay_quality(BinaryDelayEstimator* self) {
+ float quality = 0;
+ RTC_DCHECK(self);
+
+ if (self->robust_validation_enabled) {
+ // Simply a linear function of the histogram height at delay estimate.
+ quality = self->histogram[self->compare_delay] / kHistogramMax;
+ } else {
+ // Note that |last_delay_probability| states how deep the minimum of the
+ // cost function is, so it is rather an error probability.
+ quality = (float) (kMaxBitCountsQ9 - self->last_delay_probability) /
+ kMaxBitCountsQ9;
+ if (quality < 0) {
+ quality = 0;
+ }
+ }
+ return quality;
+}
+
+void WebRtc_MeanEstimatorFix(int32_t new_value,
+ int factor,
+ int32_t* mean_value) {
+ int32_t diff = new_value - *mean_value;
+
+ // mean_new = mean_value + ((new_value - mean_value) >> factor);
+ if (diff < 0) {
+ diff = -((-diff) >> factor);
+ } else {
+ diff = (diff >> factor);
+ }
+ *mean_value += diff;
+}
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator.h
new file mode 100644
index 0000000000..cce6113a53
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator.h
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Performs delay estimation on binary converted spectra.
+// The return value is 0 - OK and -1 - Error, unless otherwise stated.
+
+#ifndef MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_
+#define MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_
+
+#include "typedefs.h" // NOLINT(build/include)
+
+static const int32_t kMaxBitCountsQ9 = (32 << 9); // 32 matching bits in Q9.
+
+typedef struct {
+ // Pointer to bit counts.
+ int* far_bit_counts;
+ // Binary history variables.
+ uint32_t* binary_far_history;
+ int history_size;
+} BinaryDelayEstimatorFarend;
+
+typedef struct {
+ // Pointer to bit counts.
+ int32_t* mean_bit_counts;
+ // Array only used locally in ProcessBinarySpectrum() but whose size is
+ // determined at run-time.
+ int32_t* bit_counts;
+
+ // Binary history variables.
+ uint32_t* binary_near_history;
+ int near_history_size;
+ int history_size;
+
+ // Delay estimation variables.
+ int32_t minimum_probability;
+ int last_delay_probability;
+
+ // Delay memory.
+ int last_delay;
+
+ // Robust validation
+ int robust_validation_enabled;
+ int allowed_offset;
+ int last_candidate_delay;
+ int compare_delay;
+ int candidate_hits;
+ float* histogram;
+ float last_delay_histogram;
+
+ // For dynamically changing the lookahead when using SoftReset...().
+ int lookahead;
+
+ // Far-end binary spectrum history buffer etc.
+ BinaryDelayEstimatorFarend* farend;
+} BinaryDelayEstimator;
+
+// Releases the memory allocated by
+// WebRtc_CreateBinaryDelayEstimatorFarend(...).
+// Input:
+// - self : Pointer to the binary delay estimation far-end
+// instance which is the return value of
+// WebRtc_CreateBinaryDelayEstimatorFarend().
+//
+void WebRtc_FreeBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self);
+
+// Allocates the memory needed by the far-end part of the binary delay
+// estimation. The memory needs to be initialized separately through
+// WebRtc_InitBinaryDelayEstimatorFarend(...).
+//
+// Inputs:
+// - history_size : Size of the far-end binary spectrum history.
+//
+// Return value:
+// - BinaryDelayEstimatorFarend*
+// : Created |handle|. If the memory can't be allocated
+// or if any of the input parameters are invalid NULL
+// is returned.
+//
+BinaryDelayEstimatorFarend* WebRtc_CreateBinaryDelayEstimatorFarend(
+ int history_size);
+
+// Re-allocates the buffers.
+//
+// Inputs:
+// - self : Pointer to the binary estimation far-end instance
+// which is the return value of
+// WebRtc_CreateBinaryDelayEstimatorFarend().
+// - history_size : Size of the far-end binary spectrum history.
+//
+// Return value:
+// - history_size : The history size allocated.
+int WebRtc_AllocateFarendBufferMemory(BinaryDelayEstimatorFarend* self,
+ int history_size);
+
+// Initializes the delay estimation far-end instance created with
+// WebRtc_CreateBinaryDelayEstimatorFarend(...).
+//
+// Input:
+// - self : Pointer to the delay estimation far-end instance.
+//
+// Output:
+// - self : Initialized far-end instance.
+//
+void WebRtc_InitBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self);
+
+// Soft resets the delay estimation far-end instance created with
+// WebRtc_CreateBinaryDelayEstimatorFarend(...).
+//
+// Input:
+// - delay_shift : The amount of blocks to shift history buffers.
+//
+void WebRtc_SoftResetBinaryDelayEstimatorFarend(
+ BinaryDelayEstimatorFarend* self, int delay_shift);
+
+// Adds the binary far-end spectrum to the internal far-end history buffer. This
+// spectrum is used as reference when calculating the delay using
+// WebRtc_ProcessBinarySpectrum().
+//
+// Inputs:
+// - self : Pointer to the delay estimation far-end
+// instance.
+// - binary_far_spectrum : Far-end binary spectrum.
+//
+// Output:
+// - self : Updated far-end instance.
+//
+void WebRtc_AddBinaryFarSpectrum(BinaryDelayEstimatorFarend* self,
+ uint32_t binary_far_spectrum);
+
+// Releases the memory allocated by WebRtc_CreateBinaryDelayEstimator(...).
+//
+// Note that BinaryDelayEstimator utilizes BinaryDelayEstimatorFarend, but does
+// not take ownership of it, hence the BinaryDelayEstimator has to be torn down
+// before the far-end.
+//
+// Input:
+// - self : Pointer to the binary delay estimation instance
+// which is the return value of
+// WebRtc_CreateBinaryDelayEstimator().
+//
+void WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* self);
+
+// Allocates the memory needed by the binary delay estimation. The memory needs
+// to be initialized separately through WebRtc_InitBinaryDelayEstimator(...).
+//
+// See WebRtc_CreateDelayEstimator(..) in delay_estimator_wrapper.c for detailed
+// description.
+BinaryDelayEstimator* WebRtc_CreateBinaryDelayEstimator(
+ BinaryDelayEstimatorFarend* farend, int max_lookahead);
+
+// Re-allocates |history_size| dependent buffers. The far-end buffers will be
+// updated at the same time if needed.
+//
+// Input:
+// - self : Pointer to the binary estimation instance which is
+// the return value of
+// WebRtc_CreateBinaryDelayEstimator().
+// - history_size : Size of the history buffers.
+//
+// Return value:
+// - history_size : The history size allocated.
+int WebRtc_AllocateHistoryBufferMemory(BinaryDelayEstimator* self,
+ int history_size);
+
+// Initializes the delay estimation instance created with
+// WebRtc_CreateBinaryDelayEstimator(...).
+//
+// Input:
+// - self : Pointer to the delay estimation instance.
+//
+// Output:
+// - self : Initialized instance.
+//
+void WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* self);
+
+// Soft resets the delay estimation instance created with
+// WebRtc_CreateBinaryDelayEstimator(...).
+//
+// Input:
+// - delay_shift : The amount of blocks to shift history buffers.
+//
+// Return value:
+// - actual_shifts : The actual number of shifts performed.
+//
+int WebRtc_SoftResetBinaryDelayEstimator(BinaryDelayEstimator* self,
+ int delay_shift);
+
+// Estimates and returns the delay between the binary far-end and binary near-
+// end spectra. It is assumed the binary far-end spectrum has been added using
+// WebRtc_AddBinaryFarSpectrum() prior to this call. The value will be offset by
+// the lookahead (i.e. the lookahead should be subtracted from the returned
+// value).
+//
+// Inputs:
+// - self : Pointer to the delay estimation instance.
+// - binary_near_spectrum : Near-end binary spectrum of the current block.
+//
+// Output:
+// - self : Updated instance.
+//
+// Return value:
+// - delay : >= 0 - Calculated delay value.
+// -2 - Insufficient data for estimation.
+//
+int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* self,
+ uint32_t binary_near_spectrum);
+
+// Returns the last calculated delay updated by the function
+// WebRtc_ProcessBinarySpectrum(...).
+//
+// Input:
+// - self : Pointer to the delay estimation instance.
+//
+// Return value:
+// - delay : >= 0 - Last calculated delay value
+// -2 - Insufficient data for estimation.
+//
+int WebRtc_binary_last_delay(BinaryDelayEstimator* self);
+
+// Returns the estimation quality of the last calculated delay updated by the
+// function WebRtc_ProcessBinarySpectrum(...). The estimation quality is a value
+// in the interval [0, 1]. The higher the value, the better the quality.
+//
+// Return value:
+// - delay_quality : >= 0 - Estimation quality of last calculated
+// delay value.
+float WebRtc_binary_last_delay_quality(BinaryDelayEstimator* self);
+
+// Updates the |mean_value| recursively with a step size of 2^-|factor|. This
+// function is used internally in the Binary Delay Estimator as well as the
+// Fixed point wrapper.
+//
+// Inputs:
+// - new_value : The new value the mean should be updated with.
+// - factor : The step size, in number of right shifts.
+//
+// Input/Output:
+// - mean_value : Pointer to the mean value.
+//
+void WebRtc_MeanEstimatorFix(int32_t new_value,
+ int factor,
+ int32_t* mean_value);
+
+#endif // MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_internal.h b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_internal.h
new file mode 100644
index 0000000000..46eea3ec18
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_internal.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Header file including the delay estimator handle used for testing.
+
+#ifndef MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_
+#define MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_
+
+#include "modules/audio_processing/utility/delay_estimator.h"
+#include "typedefs.h" // NOLINT(build/include)
+
+typedef union {
+ float float_;
+ int32_t int32_;
+} SpectrumType;
+
+typedef struct {
+ // Pointers to mean values of spectrum.
+ SpectrumType* mean_far_spectrum;
+ // |mean_far_spectrum| initialization indicator.
+ int far_spectrum_initialized;
+
+ int spectrum_size;
+
+ // Far-end part of binary spectrum based delay estimation.
+ BinaryDelayEstimatorFarend* binary_farend;
+} DelayEstimatorFarend;
+
+typedef struct {
+ // Pointers to mean values of spectrum.
+ SpectrumType* mean_near_spectrum;
+ // |mean_near_spectrum| initialization indicator.
+ int near_spectrum_initialized;
+
+ int spectrum_size;
+
+ // Binary spectrum based delay estimator
+ BinaryDelayEstimator* binary_handle;
+} DelayEstimator;
+
+#endif // MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_unittest.cc
new file mode 100644
index 0000000000..36700e5706
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_unittest.cc
@@ -0,0 +1,618 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/utility/delay_estimator.h"
+#include "modules/audio_processing/utility/delay_estimator_internal.h"
+#include "modules/audio_processing/utility/delay_estimator_wrapper.h"
+#include "test/gtest.h"
+#include "typedefs.h" // NOLINT(build/include)
+
+namespace {
+
+enum { kSpectrumSize = 65 };
+// Delay history sizes.
+enum { kMaxDelay = 100 };
+enum { kLookahead = 10 };
+enum { kHistorySize = kMaxDelay + kLookahead };
+// Length of binary spectrum sequence.
+enum { kSequenceLength = 400 };
+
+const int kDifferentHistorySize = 3;
+const int kDifferentLookahead = 1;
+
+const int kEnable[] = { 0, 1 };
+const size_t kSizeEnable = sizeof(kEnable) / sizeof(*kEnable);
+
+class DelayEstimatorTest : public ::testing::Test {
+ protected:
+ DelayEstimatorTest();
+ virtual void SetUp();
+ virtual void TearDown();
+
+ void Init();
+ void InitBinary();
+ void VerifyDelay(BinaryDelayEstimator* binary_handle, int offset, int delay);
+ void RunBinarySpectra(BinaryDelayEstimator* binary1,
+ BinaryDelayEstimator* binary2,
+ int near_offset, int lookahead_offset, int far_offset);
+ void RunBinarySpectraTest(int near_offset, int lookahead_offset,
+ int ref_robust_validation, int robust_validation);
+
+ void* handle_;
+ DelayEstimator* self_;
+ void* farend_handle_;
+ DelayEstimatorFarend* farend_self_;
+ BinaryDelayEstimator* binary_;
+ BinaryDelayEstimatorFarend* binary_farend_;
+ int spectrum_size_;
+ // Dummy input spectra.
+ float far_f_[kSpectrumSize];
+ float near_f_[kSpectrumSize];
+ uint16_t far_u16_[kSpectrumSize];
+ uint16_t near_u16_[kSpectrumSize];
+ uint32_t binary_spectrum_[kSequenceLength + kHistorySize];
+};
+
+DelayEstimatorTest::DelayEstimatorTest()
+ : handle_(NULL),
+ self_(NULL),
+ farend_handle_(NULL),
+ farend_self_(NULL),
+ binary_(NULL),
+ binary_farend_(NULL),
+ spectrum_size_(kSpectrumSize) {
+ // Dummy input data are set with more or less arbitrary non-zero values.
+ memset(far_f_, 1, sizeof(far_f_));
+ memset(near_f_, 2, sizeof(near_f_));
+ memset(far_u16_, 1, sizeof(far_u16_));
+ memset(near_u16_, 2, sizeof(near_u16_));
+ // Construct a sequence of binary spectra used to verify delay estimate. The
+ // |kSequenceLength| has to be long enough for the delay estimation to leave
+ // the initialized state.
+ binary_spectrum_[0] = 1;
+ for (int i = 1; i < (kSequenceLength + kHistorySize); i++) {
+ binary_spectrum_[i] = 3 * binary_spectrum_[i - 1];
+ }
+}
+
+void DelayEstimatorTest::SetUp() {
+ farend_handle_ = WebRtc_CreateDelayEstimatorFarend(kSpectrumSize,
+ kHistorySize);
+ ASSERT_TRUE(farend_handle_ != NULL);
+ farend_self_ = reinterpret_cast<DelayEstimatorFarend*>(farend_handle_);
+ handle_ = WebRtc_CreateDelayEstimator(farend_handle_, kLookahead);
+ ASSERT_TRUE(handle_ != NULL);
+ self_ = reinterpret_cast<DelayEstimator*>(handle_);
+ binary_farend_ = WebRtc_CreateBinaryDelayEstimatorFarend(kHistorySize);
+ ASSERT_TRUE(binary_farend_ != NULL);
+ binary_ = WebRtc_CreateBinaryDelayEstimator(binary_farend_, kLookahead);
+ ASSERT_TRUE(binary_ != NULL);
+}
+
+void DelayEstimatorTest::TearDown() {
+ WebRtc_FreeDelayEstimator(handle_);
+ handle_ = NULL;
+ self_ = NULL;
+ WebRtc_FreeDelayEstimatorFarend(farend_handle_);
+ farend_handle_ = NULL;
+ farend_self_ = NULL;
+ WebRtc_FreeBinaryDelayEstimator(binary_);
+ binary_ = NULL;
+ WebRtc_FreeBinaryDelayEstimatorFarend(binary_farend_);
+ binary_farend_ = NULL;
+}
+
+void DelayEstimatorTest::Init() {
+ // Initialize Delay Estimator
+ EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_));
+ EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_));
+ // Verify initialization.
+ EXPECT_EQ(0, farend_self_->far_spectrum_initialized);
+ EXPECT_EQ(0, self_->near_spectrum_initialized);
+ EXPECT_EQ(-2, WebRtc_last_delay(handle_)); // Delay in initial state.
+ EXPECT_FLOAT_EQ(0, WebRtc_last_delay_quality(handle_)); // Zero quality.
+}
+
+void DelayEstimatorTest::InitBinary() {
+ // Initialize Binary Delay Estimator (far-end part).
+ WebRtc_InitBinaryDelayEstimatorFarend(binary_farend_);
+ // Initialize Binary Delay Estimator
+ WebRtc_InitBinaryDelayEstimator(binary_);
+ // Verify initialization. This does not guarantee a complete check, since
+ // |last_delay| may be equal to -2 before initialization if done on the fly.
+ EXPECT_EQ(-2, binary_->last_delay);
+}
+
+void DelayEstimatorTest::VerifyDelay(BinaryDelayEstimator* binary_handle,
+ int offset, int delay) {
+ // Verify that we WebRtc_binary_last_delay() returns correct delay.
+ EXPECT_EQ(delay, WebRtc_binary_last_delay(binary_handle));
+
+ if (delay != -2) {
+ // Verify correct delay estimate. In the non-causal case the true delay
+ // is equivalent with the |offset|.
+ EXPECT_EQ(offset, delay);
+ }
+}
+
+void DelayEstimatorTest::RunBinarySpectra(BinaryDelayEstimator* binary1,
+ BinaryDelayEstimator* binary2,
+ int near_offset,
+ int lookahead_offset,
+ int far_offset) {
+ int different_validations = binary1->robust_validation_enabled ^
+ binary2->robust_validation_enabled;
+ WebRtc_InitBinaryDelayEstimatorFarend(binary_farend_);
+ WebRtc_InitBinaryDelayEstimator(binary1);
+ WebRtc_InitBinaryDelayEstimator(binary2);
+ // Verify initialization. This does not guarantee a complete check, since
+ // |last_delay| may be equal to -2 before initialization if done on the fly.
+ EXPECT_EQ(-2, binary1->last_delay);
+ EXPECT_EQ(-2, binary2->last_delay);
+ for (int i = kLookahead; i < (kSequenceLength + kLookahead); i++) {
+ WebRtc_AddBinaryFarSpectrum(binary_farend_,
+ binary_spectrum_[i + far_offset]);
+ int delay_1 = WebRtc_ProcessBinarySpectrum(binary1, binary_spectrum_[i]);
+ int delay_2 =
+ WebRtc_ProcessBinarySpectrum(binary2,
+ binary_spectrum_[i - near_offset]);
+
+ VerifyDelay(binary1, far_offset + kLookahead, delay_1);
+ VerifyDelay(binary2,
+ far_offset + kLookahead + lookahead_offset + near_offset,
+ delay_2);
+ // Expect the two delay estimates to be offset by |lookahead_offset| +
+ // |near_offset| when we have left the initial state.
+ if ((delay_1 != -2) && (delay_2 != -2)) {
+ EXPECT_EQ(delay_1, delay_2 - lookahead_offset - near_offset);
+ }
+ // For the case of identical signals |delay_1| and |delay_2| should match
+ // all the time, unless one of them has robust validation turned on. In
+ // that case the robust validation leaves the initial state faster.
+ if ((near_offset == 0) && (lookahead_offset == 0)) {
+ if (!different_validations) {
+ EXPECT_EQ(delay_1, delay_2);
+ } else {
+ if (binary1->robust_validation_enabled) {
+ EXPECT_GE(delay_1, delay_2);
+ } else {
+ EXPECT_GE(delay_2, delay_1);
+ }
+ }
+ }
+ }
+ // Verify that we have left the initialized state.
+ EXPECT_NE(-2, WebRtc_binary_last_delay(binary1));
+ EXPECT_LT(0, WebRtc_binary_last_delay_quality(binary1));
+ EXPECT_NE(-2, WebRtc_binary_last_delay(binary2));
+ EXPECT_LT(0, WebRtc_binary_last_delay_quality(binary2));
+}
+
+void DelayEstimatorTest::RunBinarySpectraTest(int near_offset,
+ int lookahead_offset,
+ int ref_robust_validation,
+ int robust_validation) {
+ BinaryDelayEstimator* binary2 =
+ WebRtc_CreateBinaryDelayEstimator(binary_farend_,
+ kLookahead + lookahead_offset);
+ // Verify the delay for both causal and non-causal systems. For causal systems
+ // the delay is equivalent with a positive |offset| of the far-end sequence.
+ // For non-causal systems the delay is equivalent with a negative |offset| of
+ // the far-end sequence.
+ binary_->robust_validation_enabled = ref_robust_validation;
+ binary2->robust_validation_enabled = robust_validation;
+ for (int offset = -kLookahead;
+ offset < kMaxDelay - lookahead_offset - near_offset;
+ offset++) {
+ RunBinarySpectra(binary_, binary2, near_offset, lookahead_offset, offset);
+ }
+ WebRtc_FreeBinaryDelayEstimator(binary2);
+ binary2 = NULL;
+ binary_->robust_validation_enabled = 0; // Reset reference.
+}
+
+TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfWrapper) {
+ // In this test we verify correct error returns on invalid API calls.
+
+ // WebRtc_CreateDelayEstimatorFarend() and WebRtc_CreateDelayEstimator()
+ // should return a NULL pointer on invalid input values.
+ // Make sure we have a non-NULL value at start, so we can detect NULL after
+ // create failure.
+ void* handle = farend_handle_;
+ handle = WebRtc_CreateDelayEstimatorFarend(33, kHistorySize);
+ EXPECT_TRUE(handle == NULL);
+ handle = WebRtc_CreateDelayEstimatorFarend(kSpectrumSize, 1);
+ EXPECT_TRUE(handle == NULL);
+
+ handle = handle_;
+ handle = WebRtc_CreateDelayEstimator(NULL, kLookahead);
+ EXPECT_TRUE(handle == NULL);
+ handle = WebRtc_CreateDelayEstimator(farend_handle_, -1);
+ EXPECT_TRUE(handle == NULL);
+
+ // WebRtc_InitDelayEstimatorFarend() and WebRtc_InitDelayEstimator() should
+ // return -1 if we have a NULL pointer as |handle|.
+ EXPECT_EQ(-1, WebRtc_InitDelayEstimatorFarend(NULL));
+ EXPECT_EQ(-1, WebRtc_InitDelayEstimator(NULL));
+
+ // WebRtc_AddFarSpectrumFloat() should return -1 if we have:
+ // 1) NULL pointer as |handle|.
+ // 2) NULL pointer as far-end spectrum.
+ // 3) Incorrect spectrum size.
+ EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(NULL, far_f_, spectrum_size_));
+ // Use |farend_handle_| which is properly created at SetUp().
+ EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(farend_handle_, NULL,
+ spectrum_size_));
+ EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_,
+ spectrum_size_ + 1));
+
+ // WebRtc_AddFarSpectrumFix() should return -1 if we have:
+ // 1) NULL pointer as |handle|.
+ // 2) NULL pointer as far-end spectrum.
+ // 3) Incorrect spectrum size.
+ // 4) Too high precision in far-end spectrum (Q-domain > 15).
+ EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(NULL, far_u16_, spectrum_size_, 0));
+ EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, NULL, spectrum_size_,
+ 0));
+ EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_,
+ spectrum_size_ + 1, 0));
+ EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_,
+ spectrum_size_, 16));
+
+ // WebRtc_set_history_size() should return -1 if:
+ // 1) |handle| is a NULL.
+ // 2) |history_size| <= 1.
+ EXPECT_EQ(-1, WebRtc_set_history_size(NULL, 1));
+ EXPECT_EQ(-1, WebRtc_set_history_size(handle_, 1));
+ // WebRtc_history_size() should return -1 if:
+ // 1) NULL pointer input.
+ EXPECT_EQ(-1, WebRtc_history_size(NULL));
+ // 2) there is a mismatch between history size.
+ void* tmp_handle = WebRtc_CreateDelayEstimator(farend_handle_, kHistorySize);
+ EXPECT_EQ(0, WebRtc_InitDelayEstimator(tmp_handle));
+ EXPECT_EQ(kDifferentHistorySize,
+ WebRtc_set_history_size(tmp_handle, kDifferentHistorySize));
+ EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(tmp_handle));
+ EXPECT_EQ(kHistorySize, WebRtc_set_history_size(handle_, kHistorySize));
+ EXPECT_EQ(-1, WebRtc_history_size(tmp_handle));
+
+ // WebRtc_set_lookahead() should return -1 if we try a value outside the
+ /// buffer.
+ EXPECT_EQ(-1, WebRtc_set_lookahead(handle_, kLookahead + 1));
+ EXPECT_EQ(-1, WebRtc_set_lookahead(handle_, -1));
+
+ // WebRtc_set_allowed_offset() should return -1 if we have:
+ // 1) NULL pointer as |handle|.
+ // 2) |allowed_offset| < 0.
+ EXPECT_EQ(-1, WebRtc_set_allowed_offset(NULL, 0));
+ EXPECT_EQ(-1, WebRtc_set_allowed_offset(handle_, -1));
+
+ EXPECT_EQ(-1, WebRtc_get_allowed_offset(NULL));
+
+ // WebRtc_enable_robust_validation() should return -1 if we have:
+ // 1) NULL pointer as |handle|.
+ // 2) Incorrect |enable| value (not 0 or 1).
+ EXPECT_EQ(-1, WebRtc_enable_robust_validation(NULL, kEnable[0]));
+ EXPECT_EQ(-1, WebRtc_enable_robust_validation(handle_, -1));
+ EXPECT_EQ(-1, WebRtc_enable_robust_validation(handle_, 2));
+
+ // WebRtc_is_robust_validation_enabled() should return -1 if we have NULL
+ // pointer as |handle|.
+ EXPECT_EQ(-1, WebRtc_is_robust_validation_enabled(NULL));
+
+ // WebRtc_DelayEstimatorProcessFloat() should return -1 if we have:
+ // 1) NULL pointer as |handle|.
+ // 2) NULL pointer as near-end spectrum.
+ // 3) Incorrect spectrum size.
+ // 4) Non matching history sizes if multiple delay estimators using the same
+ // far-end reference.
+ EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(NULL, near_f_,
+ spectrum_size_));
+ // Use |handle_| which is properly created at SetUp().
+ EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(handle_, NULL,
+ spectrum_size_));
+ EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(handle_, near_f_,
+ spectrum_size_ + 1));
+ // |tmp_handle| is already in a non-matching state.
+ EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(tmp_handle,
+ near_f_,
+ spectrum_size_));
+
+ // WebRtc_DelayEstimatorProcessFix() should return -1 if we have:
+ // 1) NULL pointer as |handle|.
+ // 2) NULL pointer as near-end spectrum.
+ // 3) Incorrect spectrum size.
+ // 4) Too high precision in near-end spectrum (Q-domain > 15).
+ // 5) Non matching history sizes if multiple delay estimators using the same
+ // far-end reference.
+ EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(NULL, near_u16_, spectrum_size_,
+ 0));
+ EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, NULL, spectrum_size_,
+ 0));
+ EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_,
+ spectrum_size_ + 1, 0));
+ EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_,
+ spectrum_size_, 16));
+ // |tmp_handle| is already in a non-matching state.
+ EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(tmp_handle,
+ near_u16_,
+ spectrum_size_,
+ 0));
+ WebRtc_FreeDelayEstimator(tmp_handle);
+
+ // WebRtc_last_delay() should return -1 if we have a NULL pointer as |handle|.
+ EXPECT_EQ(-1, WebRtc_last_delay(NULL));
+
+ // Free any local memory if needed.
+ WebRtc_FreeDelayEstimator(handle);
+}
+
+TEST_F(DelayEstimatorTest, VerifyAllowedOffset) {
+ // Is set to zero by default.
+ EXPECT_EQ(0, WebRtc_get_allowed_offset(handle_));
+ for (int i = 1; i >= 0; i--) {
+ EXPECT_EQ(0, WebRtc_set_allowed_offset(handle_, i));
+ EXPECT_EQ(i, WebRtc_get_allowed_offset(handle_));
+ Init();
+ // Unaffected over a reset.
+ EXPECT_EQ(i, WebRtc_get_allowed_offset(handle_));
+ }
+}
+
+TEST_F(DelayEstimatorTest, VerifyEnableRobustValidation) {
+ // Disabled by default.
+ EXPECT_EQ(0, WebRtc_is_robust_validation_enabled(handle_));
+ for (size_t i = 0; i < kSizeEnable; ++i) {
+ EXPECT_EQ(0, WebRtc_enable_robust_validation(handle_, kEnable[i]));
+ EXPECT_EQ(kEnable[i], WebRtc_is_robust_validation_enabled(handle_));
+ Init();
+ // Unaffected over a reset.
+ EXPECT_EQ(kEnable[i], WebRtc_is_robust_validation_enabled(handle_));
+ }
+}
+
+TEST_F(DelayEstimatorTest, InitializedSpectrumAfterProcess) {
+ // In this test we verify that the mean spectra are initialized after first
+ // time we call WebRtc_AddFarSpectrum() and Process() respectively. The test
+ // also verifies the state is not left for zero spectra.
+ const float kZerosFloat[kSpectrumSize] = { 0.0 };
+ const uint16_t kZerosU16[kSpectrumSize] = { 0 };
+
+ // For floating point operations, process one frame and verify initialization
+ // flag.
+ Init();
+ EXPECT_EQ(0, WebRtc_AddFarSpectrumFloat(farend_handle_, kZerosFloat,
+ spectrum_size_));
+ EXPECT_EQ(0, farend_self_->far_spectrum_initialized);
+ EXPECT_EQ(0, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_,
+ spectrum_size_));
+ EXPECT_EQ(1, farend_self_->far_spectrum_initialized);
+ EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFloat(handle_, kZerosFloat,
+ spectrum_size_));
+ EXPECT_EQ(0, self_->near_spectrum_initialized);
+ EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFloat(handle_, near_f_,
+ spectrum_size_));
+ EXPECT_EQ(1, self_->near_spectrum_initialized);
+
+ // For fixed point operations, process one frame and verify initialization
+ // flag.
+ Init();
+ EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, kZerosU16,
+ spectrum_size_, 0));
+ EXPECT_EQ(0, farend_self_->far_spectrum_initialized);
+ EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_,
+ spectrum_size_, 0));
+ EXPECT_EQ(1, farend_self_->far_spectrum_initialized);
+ EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFix(handle_, kZerosU16,
+ spectrum_size_, 0));
+ EXPECT_EQ(0, self_->near_spectrum_initialized);
+ EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_,
+ spectrum_size_, 0));
+ EXPECT_EQ(1, self_->near_spectrum_initialized);
+}
+
+TEST_F(DelayEstimatorTest, CorrectLastDelay) {
+ // In this test we verify that we get the correct last delay upon valid call.
+ // We simply process the same data until we leave the initialized state
+ // (|last_delay| = -2). Then we compare the Process() output with the
+ // last_delay() call.
+
+ // TODO(bjornv): Update quality values for robust validation.
+ int last_delay = 0;
+ // Floating point operations.
+ Init();
+ for (int i = 0; i < 200; i++) {
+ EXPECT_EQ(0, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_,
+ spectrum_size_));
+ last_delay = WebRtc_DelayEstimatorProcessFloat(handle_, near_f_,
+ spectrum_size_);
+ if (last_delay != -2) {
+ EXPECT_EQ(last_delay, WebRtc_last_delay(handle_));
+ if (!WebRtc_is_robust_validation_enabled(handle_)) {
+ EXPECT_FLOAT_EQ(7203.f / kMaxBitCountsQ9,
+ WebRtc_last_delay_quality(handle_));
+ }
+ break;
+ }
+ }
+ // Verify that we have left the initialized state.
+ EXPECT_NE(-2, WebRtc_last_delay(handle_));
+ EXPECT_LT(0, WebRtc_last_delay_quality(handle_));
+
+ // Fixed point operations.
+ Init();
+ for (int i = 0; i < 200; i++) {
+ EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_,
+ spectrum_size_, 0));
+ last_delay = WebRtc_DelayEstimatorProcessFix(handle_, near_u16_,
+ spectrum_size_, 0);
+ if (last_delay != -2) {
+ EXPECT_EQ(last_delay, WebRtc_last_delay(handle_));
+ if (!WebRtc_is_robust_validation_enabled(handle_)) {
+ EXPECT_FLOAT_EQ(7203.f / kMaxBitCountsQ9,
+ WebRtc_last_delay_quality(handle_));
+ }
+ break;
+ }
+ }
+ // Verify that we have left the initialized state.
+ EXPECT_NE(-2, WebRtc_last_delay(handle_));
+ EXPECT_LT(0, WebRtc_last_delay_quality(handle_));
+}
+
+TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfBinaryEstimatorFarend) {
+ // In this test we verify correct output on invalid API calls to the Binary
+ // Delay Estimator (far-end part).
+
+ BinaryDelayEstimatorFarend* binary = binary_farend_;
+ // WebRtc_CreateBinaryDelayEstimatorFarend() should return -1 if the input
+ // history size is less than 2. This is to make sure the buffer shifting
+ // applies properly.
+ // Make sure we have a non-NULL value at start, so we can detect NULL after
+ // create failure.
+ binary = WebRtc_CreateBinaryDelayEstimatorFarend(1);
+ EXPECT_TRUE(binary == NULL);
+}
+
+TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfBinaryEstimator) {
+ // In this test we verify correct output on invalid API calls to the Binary
+ // Delay Estimator.
+
+ BinaryDelayEstimator* binary_handle = binary_;
+ // WebRtc_CreateBinaryDelayEstimator() should return -1 if we have a NULL
+ // pointer as |binary_farend| or invalid input values. Upon failure, the
+ // |binary_handle| should be NULL.
+ // Make sure we have a non-NULL value at start, so we can detect NULL after
+ // create failure.
+ binary_handle = WebRtc_CreateBinaryDelayEstimator(NULL, kLookahead);
+ EXPECT_TRUE(binary_handle == NULL);
+ binary_handle = WebRtc_CreateBinaryDelayEstimator(binary_farend_, -1);
+ EXPECT_TRUE(binary_handle == NULL);
+}
+
+TEST_F(DelayEstimatorTest, MeanEstimatorFix) {
+ // In this test we verify that we update the mean value in correct direction
+ // only. With "direction" we mean increase or decrease.
+
+ int32_t mean_value = 4000;
+ int32_t mean_value_before = mean_value;
+ int32_t new_mean_value = mean_value * 2;
+
+ // Increasing |mean_value|.
+ WebRtc_MeanEstimatorFix(new_mean_value, 10, &mean_value);
+ EXPECT_LT(mean_value_before, mean_value);
+ EXPECT_GT(new_mean_value, mean_value);
+
+ // Decreasing |mean_value|.
+ new_mean_value = mean_value / 2;
+ mean_value_before = mean_value;
+ WebRtc_MeanEstimatorFix(new_mean_value, 10, &mean_value);
+ EXPECT_GT(mean_value_before, mean_value);
+ EXPECT_LT(new_mean_value, mean_value);
+}
+
+TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearSameSpectrum) {
+ // In this test we verify that we get the correct delay estimates if we shift
+ // the signal accordingly. We create two Binary Delay Estimators and feed them
+ // with the same signals, so they should output the same results.
+ // We verify both causal and non-causal delays.
+ // For these noise free signals, the robust validation should not have an
+ // impact, hence we turn robust validation on/off for both reference and
+ // delayed near end.
+
+ for (size_t i = 0; i < kSizeEnable; ++i) {
+ for (size_t j = 0; j < kSizeEnable; ++j) {
+ RunBinarySpectraTest(0, 0, kEnable[i], kEnable[j]);
+ }
+ }
+}
+
+TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearDifferentSpectrum) {
+ // In this test we use the same setup as above, but we now feed the two Binary
+ // Delay Estimators with different signals, so they should output different
+ // results.
+ // For these noise free signals, the robust validation should not have an
+ // impact, hence we turn robust validation on/off for both reference and
+ // delayed near end.
+
+ const int kNearOffset = 1;
+ for (size_t i = 0; i < kSizeEnable; ++i) {
+ for (size_t j = 0; j < kSizeEnable; ++j) {
+ RunBinarySpectraTest(kNearOffset, 0, kEnable[i], kEnable[j]);
+ }
+ }
+}
+
+TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearDifferentLookahead) {
+ // In this test we use the same setup as above, feeding the two Binary
+ // Delay Estimators with the same signals. The difference is that we create
+ // them with different lookahead.
+ // For these noise free signals, the robust validation should not have an
+ // impact, hence we turn robust validation on/off for both reference and
+ // delayed near end.
+
+ const int kLookaheadOffset = 1;
+ for (size_t i = 0; i < kSizeEnable; ++i) {
+ for (size_t j = 0; j < kSizeEnable; ++j) {
+ RunBinarySpectraTest(0, kLookaheadOffset, kEnable[i], kEnable[j]);
+ }
+ }
+}
+
+TEST_F(DelayEstimatorTest, AllowedOffsetNoImpactWhenRobustValidationDisabled) {
+ // The same setup as in ExactDelayEstimateMultipleNearSameSpectrum with the
+ // difference that |allowed_offset| is set for the reference binary delay
+ // estimator.
+
+ binary_->allowed_offset = 10;
+ RunBinarySpectraTest(0, 0, 0, 0);
+ binary_->allowed_offset = 0; // Reset reference.
+}
+
+TEST_F(DelayEstimatorTest, VerifyLookaheadAtCreate) {
+ void* farend_handle = WebRtc_CreateDelayEstimatorFarend(kSpectrumSize,
+ kMaxDelay);
+ ASSERT_TRUE(farend_handle != NULL);
+ void* handle = WebRtc_CreateDelayEstimator(farend_handle, kLookahead);
+ ASSERT_TRUE(handle != NULL);
+ EXPECT_EQ(kLookahead, WebRtc_lookahead(handle));
+ WebRtc_FreeDelayEstimator(handle);
+ WebRtc_FreeDelayEstimatorFarend(farend_handle);
+}
+
+TEST_F(DelayEstimatorTest, VerifyLookaheadIsSetAndKeptAfterInit) {
+ EXPECT_EQ(kLookahead, WebRtc_lookahead(handle_));
+ EXPECT_EQ(kDifferentLookahead,
+ WebRtc_set_lookahead(handle_, kDifferentLookahead));
+ EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_));
+ EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_));
+ EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_));
+ EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_));
+ EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_));
+}
+
+TEST_F(DelayEstimatorTest, VerifyHistorySizeAtCreate) {
+ EXPECT_EQ(kHistorySize, WebRtc_history_size(handle_));
+}
+
+TEST_F(DelayEstimatorTest, VerifyHistorySizeIsSetAndKeptAfterInit) {
+ EXPECT_EQ(kHistorySize, WebRtc_history_size(handle_));
+ EXPECT_EQ(kDifferentHistorySize,
+ WebRtc_set_history_size(handle_, kDifferentHistorySize));
+ EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_));
+ EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_));
+ EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_));
+ EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_));
+ EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_));
+}
+
+// TODO(bjornv): Add tests for SoftReset...(...).
+
+} // namespace
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.cc
new file mode 100644
index 0000000000..f907c80a35
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.cc
@@ -0,0 +1,486 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/utility/delay_estimator_wrapper.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "modules/audio_processing/utility/delay_estimator.h"
+#include "modules/audio_processing/utility/delay_estimator_internal.h"
+#include "rtc_base/checks.h"
+
+// Only bit |kBandFirst| through bit |kBandLast| are processed and
+// |kBandFirst| - |kBandLast| must be < 32.
+enum { kBandFirst = 12 };
+enum { kBandLast = 43 };
+
+static __inline uint32_t SetBit(uint32_t in, int pos) {
+ uint32_t mask = (1 << pos);
+ uint32_t out = (in | mask);
+
+ return out;
+}
+
+// Calculates the mean recursively. Same version as WebRtc_MeanEstimatorFix(),
+// but for float.
+//
+// Inputs:
+// - new_value : New additional value.
+// - scale : Scale for smoothing (should be less than 1.0).
+//
+// Input/Output:
+// - mean_value : Pointer to the mean value for updating.
+//
+static void MeanEstimatorFloat(float new_value,
+ float scale,
+ float* mean_value) {
+ RTC_DCHECK_LT(scale, 1.0f);
+ *mean_value += (new_value - *mean_value) * scale;
+}
+
+// Computes the binary spectrum by comparing the input |spectrum| with a
+// |threshold_spectrum|. Float and fixed point versions.
+//
+// Inputs:
+// - spectrum : Spectrum of which the binary spectrum should be
+// calculated.
+// - threshold_spectrum : Threshold spectrum with which the input
+// spectrum is compared.
+// Return:
+// - out : Binary spectrum.
+//
+static uint32_t BinarySpectrumFix(const uint16_t* spectrum,
+ SpectrumType* threshold_spectrum,
+ int q_domain,
+ int* threshold_initialized) {
+ int i = kBandFirst;
+ uint32_t out = 0;
+
+ RTC_DCHECK_LT(q_domain, 16);
+
+ if (!(*threshold_initialized)) {
+ // Set the |threshold_spectrum| to half the input |spectrum| as starting
+ // value. This speeds up the convergence.
+ for (i = kBandFirst; i <= kBandLast; i++) {
+ if (spectrum[i] > 0) {
+ // Convert input spectrum from Q(|q_domain|) to Q15.
+ int32_t spectrum_q15 = ((int32_t) spectrum[i]) << (15 - q_domain);
+ threshold_spectrum[i].int32_ = (spectrum_q15 >> 1);
+ *threshold_initialized = 1;
+ }
+ }
+ }
+ for (i = kBandFirst; i <= kBandLast; i++) {
+ // Convert input spectrum from Q(|q_domain|) to Q15.
+ int32_t spectrum_q15 = ((int32_t) spectrum[i]) << (15 - q_domain);
+ // Update the |threshold_spectrum|.
+ WebRtc_MeanEstimatorFix(spectrum_q15, 6, &(threshold_spectrum[i].int32_));
+ // Convert |spectrum| at current frequency bin to a binary value.
+ if (spectrum_q15 > threshold_spectrum[i].int32_) {
+ out = SetBit(out, i - kBandFirst);
+ }
+ }
+
+ return out;
+}
+
+static uint32_t BinarySpectrumFloat(const float* spectrum,
+ SpectrumType* threshold_spectrum,
+ int* threshold_initialized) {
+ int i = kBandFirst;
+ uint32_t out = 0;
+ const float kScale = 1 / 64.0;
+
+ if (!(*threshold_initialized)) {
+ // Set the |threshold_spectrum| to half the input |spectrum| as starting
+ // value. This speeds up the convergence.
+ for (i = kBandFirst; i <= kBandLast; i++) {
+ if (spectrum[i] > 0.0f) {
+ threshold_spectrum[i].float_ = (spectrum[i] / 2);
+ *threshold_initialized = 1;
+ }
+ }
+ }
+
+ for (i = kBandFirst; i <= kBandLast; i++) {
+ // Update the |threshold_spectrum|.
+ MeanEstimatorFloat(spectrum[i], kScale, &(threshold_spectrum[i].float_));
+ // Convert |spectrum| at current frequency bin to a binary value.
+ if (spectrum[i] > threshold_spectrum[i].float_) {
+ out = SetBit(out, i - kBandFirst);
+ }
+ }
+
+ return out;
+}
+
+void WebRtc_FreeDelayEstimatorFarend(void* handle) {
+ DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle;
+
+ if (handle == NULL) {
+ return;
+ }
+
+ free(self->mean_far_spectrum);
+ self->mean_far_spectrum = NULL;
+
+ WebRtc_FreeBinaryDelayEstimatorFarend(self->binary_farend);
+ self->binary_farend = NULL;
+
+ free(self);
+}
+
+void* WebRtc_CreateDelayEstimatorFarend(int spectrum_size, int history_size) {
+ DelayEstimatorFarend* self = NULL;
+
+ // Check if the sub band used in the delay estimation is small enough to fit
+ // the binary spectra in a uint32_t.
+ static_assert(kBandLast - kBandFirst < 32, "");
+
+ if (spectrum_size >= kBandLast) {
+ self = static_cast<DelayEstimatorFarend*>(
+ malloc(sizeof(DelayEstimatorFarend)));
+ }
+
+ if (self != NULL) {
+ int memory_fail = 0;
+
+ // Allocate memory for the binary far-end spectrum handling.
+ self->binary_farend = WebRtc_CreateBinaryDelayEstimatorFarend(history_size);
+ memory_fail |= (self->binary_farend == NULL);
+
+ // Allocate memory for spectrum buffers.
+ self->mean_far_spectrum =
+ static_cast<SpectrumType*>(malloc(spectrum_size * sizeof(SpectrumType)));
+ memory_fail |= (self->mean_far_spectrum == NULL);
+
+ self->spectrum_size = spectrum_size;
+
+ if (memory_fail) {
+ WebRtc_FreeDelayEstimatorFarend(self);
+ self = NULL;
+ }
+ }
+
+ return self;
+}
+
+int WebRtc_InitDelayEstimatorFarend(void* handle) {
+ DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle;
+
+ if (self == NULL) {
+ return -1;
+ }
+
+ // Initialize far-end part of binary delay estimator.
+ WebRtc_InitBinaryDelayEstimatorFarend(self->binary_farend);
+
+ // Set averaged far and near end spectra to zero.
+ memset(self->mean_far_spectrum, 0,
+ sizeof(SpectrumType) * self->spectrum_size);
+ // Reset initialization indicators.
+ self->far_spectrum_initialized = 0;
+
+ return 0;
+}
+
+void WebRtc_SoftResetDelayEstimatorFarend(void* handle, int delay_shift) {
+ DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle;
+ RTC_DCHECK(self);
+ WebRtc_SoftResetBinaryDelayEstimatorFarend(self->binary_farend, delay_shift);
+}
+
+int WebRtc_AddFarSpectrumFix(void* handle,
+ const uint16_t* far_spectrum,
+ int spectrum_size,
+ int far_q) {
+ DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle;
+ uint32_t binary_spectrum = 0;
+
+ if (self == NULL) {
+ return -1;
+ }
+ if (far_spectrum == NULL) {
+ // Empty far end spectrum.
+ return -1;
+ }
+ if (spectrum_size != self->spectrum_size) {
+ // Data sizes don't match.
+ return -1;
+ }
+ if (far_q > 15) {
+ // If |far_q| is larger than 15 we cannot guarantee no wrap around.
+ return -1;
+ }
+
+ // Get binary spectrum.
+ binary_spectrum = BinarySpectrumFix(far_spectrum, self->mean_far_spectrum,
+ far_q, &(self->far_spectrum_initialized));
+ WebRtc_AddBinaryFarSpectrum(self->binary_farend, binary_spectrum);
+
+ return 0;
+}
+
+int WebRtc_AddFarSpectrumFloat(void* handle,
+ const float* far_spectrum,
+ int spectrum_size) {
+ DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle;
+ uint32_t binary_spectrum = 0;
+
+ if (self == NULL) {
+ return -1;
+ }
+ if (far_spectrum == NULL) {
+ // Empty far end spectrum.
+ return -1;
+ }
+ if (spectrum_size != self->spectrum_size) {
+ // Data sizes don't match.
+ return -1;
+ }
+
+ // Get binary spectrum.
+ binary_spectrum = BinarySpectrumFloat(far_spectrum, self->mean_far_spectrum,
+ &(self->far_spectrum_initialized));
+ WebRtc_AddBinaryFarSpectrum(self->binary_farend, binary_spectrum);
+
+ return 0;
+}
+
+void WebRtc_FreeDelayEstimator(void* handle) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+
+ if (handle == NULL) {
+ return;
+ }
+
+ free(self->mean_near_spectrum);
+ self->mean_near_spectrum = NULL;
+
+ WebRtc_FreeBinaryDelayEstimator(self->binary_handle);
+ self->binary_handle = NULL;
+
+ free(self);
+}
+
+void* WebRtc_CreateDelayEstimator(void* farend_handle, int max_lookahead) {
+ DelayEstimator* self = NULL;
+ DelayEstimatorFarend* farend = (DelayEstimatorFarend*) farend_handle;
+
+ if (farend_handle != NULL) {
+ self = static_cast<DelayEstimator*>(malloc(sizeof(DelayEstimator)));
+ }
+
+ if (self != NULL) {
+ int memory_fail = 0;
+
+ // Allocate memory for the farend spectrum handling.
+ self->binary_handle =
+ WebRtc_CreateBinaryDelayEstimator(farend->binary_farend, max_lookahead);
+ memory_fail |= (self->binary_handle == NULL);
+
+ // Allocate memory for spectrum buffers.
+ self->mean_near_spectrum = static_cast<SpectrumType*>(
+ malloc(farend->spectrum_size * sizeof(SpectrumType)));
+ memory_fail |= (self->mean_near_spectrum == NULL);
+
+ self->spectrum_size = farend->spectrum_size;
+
+ if (memory_fail) {
+ WebRtc_FreeDelayEstimator(self);
+ self = NULL;
+ }
+ }
+
+ return self;
+}
+
+int WebRtc_InitDelayEstimator(void* handle) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+
+ if (self == NULL) {
+ return -1;
+ }
+
+ // Initialize binary delay estimator.
+ WebRtc_InitBinaryDelayEstimator(self->binary_handle);
+
+ // Set averaged far and near end spectra to zero.
+ memset(self->mean_near_spectrum, 0,
+ sizeof(SpectrumType) * self->spectrum_size);
+ // Reset initialization indicators.
+ self->near_spectrum_initialized = 0;
+
+ return 0;
+}
+
+int WebRtc_SoftResetDelayEstimator(void* handle, int delay_shift) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+ RTC_DCHECK(self);
+ return WebRtc_SoftResetBinaryDelayEstimator(self->binary_handle, delay_shift);
+}
+
+int WebRtc_set_history_size(void* handle, int history_size) {
+ DelayEstimator* self = static_cast<DelayEstimator*>(handle);
+
+ if ((self == NULL) || (history_size <= 1)) {
+ return -1;
+ }
+ return WebRtc_AllocateHistoryBufferMemory(self->binary_handle, history_size);
+}
+
+int WebRtc_history_size(const void* handle) {
+ const DelayEstimator* self = static_cast<const DelayEstimator*>(handle);
+
+ if (self == NULL) {
+ return -1;
+ }
+ if (self->binary_handle->farend->history_size !=
+ self->binary_handle->history_size) {
+ // Non matching history sizes.
+ return -1;
+ }
+ return self->binary_handle->history_size;
+}
+
+int WebRtc_set_lookahead(void* handle, int lookahead) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+ RTC_DCHECK(self);
+ RTC_DCHECK(self->binary_handle);
+ if ((lookahead > self->binary_handle->near_history_size - 1) ||
+ (lookahead < 0)) {
+ return -1;
+ }
+ self->binary_handle->lookahead = lookahead;
+ return self->binary_handle->lookahead;
+}
+
+int WebRtc_lookahead(void* handle) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+ RTC_DCHECK(self);
+ RTC_DCHECK(self->binary_handle);
+ return self->binary_handle->lookahead;
+}
+
+int WebRtc_set_allowed_offset(void* handle, int allowed_offset) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+
+ if ((self == NULL) || (allowed_offset < 0)) {
+ return -1;
+ }
+ self->binary_handle->allowed_offset = allowed_offset;
+ return 0;
+}
+
+int WebRtc_get_allowed_offset(const void* handle) {
+ const DelayEstimator* self = (const DelayEstimator*) handle;
+
+ if (self == NULL) {
+ return -1;
+ }
+ return self->binary_handle->allowed_offset;
+}
+
+int WebRtc_enable_robust_validation(void* handle, int enable) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+
+ if (self == NULL) {
+ return -1;
+ }
+ if ((enable < 0) || (enable > 1)) {
+ return -1;
+ }
+ RTC_DCHECK(self->binary_handle);
+ self->binary_handle->robust_validation_enabled = enable;
+ return 0;
+}
+
+int WebRtc_is_robust_validation_enabled(const void* handle) {
+ const DelayEstimator* self = (const DelayEstimator*) handle;
+
+ if (self == NULL) {
+ return -1;
+ }
+ return self->binary_handle->robust_validation_enabled;
+}
+
+int WebRtc_DelayEstimatorProcessFix(void* handle,
+ const uint16_t* near_spectrum,
+ int spectrum_size,
+ int near_q) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+ uint32_t binary_spectrum = 0;
+
+ if (self == NULL) {
+ return -1;
+ }
+ if (near_spectrum == NULL) {
+ // Empty near end spectrum.
+ return -1;
+ }
+ if (spectrum_size != self->spectrum_size) {
+ // Data sizes don't match.
+ return -1;
+ }
+ if (near_q > 15) {
+ // If |near_q| is larger than 15 we cannot guarantee no wrap around.
+ return -1;
+ }
+
+ // Get binary spectra.
+ binary_spectrum = BinarySpectrumFix(near_spectrum,
+ self->mean_near_spectrum,
+ near_q,
+ &(self->near_spectrum_initialized));
+
+ return WebRtc_ProcessBinarySpectrum(self->binary_handle, binary_spectrum);
+}
+
+int WebRtc_DelayEstimatorProcessFloat(void* handle,
+ const float* near_spectrum,
+ int spectrum_size) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+ uint32_t binary_spectrum = 0;
+
+ if (self == NULL) {
+ return -1;
+ }
+ if (near_spectrum == NULL) {
+ // Empty near end spectrum.
+ return -1;
+ }
+ if (spectrum_size != self->spectrum_size) {
+ // Data sizes don't match.
+ return -1;
+ }
+
+ // Get binary spectrum.
+ binary_spectrum = BinarySpectrumFloat(near_spectrum, self->mean_near_spectrum,
+ &(self->near_spectrum_initialized));
+
+ return WebRtc_ProcessBinarySpectrum(self->binary_handle, binary_spectrum);
+}
+
+int WebRtc_last_delay(void* handle) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+
+ if (self == NULL) {
+ return -1;
+ }
+
+ return WebRtc_binary_last_delay(self->binary_handle);
+}
+
+float WebRtc_last_delay_quality(void* handle) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+ RTC_DCHECK(self);
+ return WebRtc_binary_last_delay_quality(self->binary_handle);
+}
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h
new file mode 100644
index 0000000000..6b6e51f82c
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Performs delay estimation on block by block basis.
+// The return value is 0 - OK and -1 - Error, unless otherwise stated.
+
+#ifndef MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_
+#define MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_
+
+#include "typedefs.h" // NOLINT(build/include)
+
+// Releases the memory allocated by WebRtc_CreateDelayEstimatorFarend(...)
+void WebRtc_FreeDelayEstimatorFarend(void* handle);
+
+// Allocates the memory needed by the far-end part of the delay estimation. The
+// memory needs to be initialized separately through
+// WebRtc_InitDelayEstimatorFarend(...).
+//
+// Inputs:
+// - spectrum_size : Size of the spectrum used both in far-end and
+// near-end. Used to allocate memory for spectrum
+// specific buffers.
+// - history_size : The far-end history buffer size. A change in buffer
+// size can be forced with WebRtc_set_history_size().
+// Note that the maximum delay which can be estimated is
+// determined together with WebRtc_set_lookahead().
+//
+// Return value:
+// - void* : Created |handle|. If the memory can't be allocated or
+// if any of the input parameters are invalid NULL is
+// returned.
+void* WebRtc_CreateDelayEstimatorFarend(int spectrum_size, int history_size);
+
+// Initializes the far-end part of the delay estimation instance returned by
+// WebRtc_CreateDelayEstimatorFarend(...)
+int WebRtc_InitDelayEstimatorFarend(void* handle);
+
+// Soft resets the far-end part of the delay estimation instance returned by
+// WebRtc_CreateDelayEstimatorFarend(...).
+// Input:
+// - delay_shift : The amount of blocks to shift history buffers.
+void WebRtc_SoftResetDelayEstimatorFarend(void* handle, int delay_shift);
+
+// Adds the far-end spectrum to the far-end history buffer. This spectrum is
+// used as reference when calculating the delay using
+// WebRtc_ProcessSpectrum().
+//
+// Inputs:
+// - far_spectrum : Far-end spectrum.
+// - spectrum_size : The size of the data arrays (same for both far- and
+// near-end).
+// - far_q : The Q-domain of the far-end data.
+//
+// Output:
+// - handle : Updated far-end instance.
+//
+int WebRtc_AddFarSpectrumFix(void* handle,
+ const uint16_t* far_spectrum,
+ int spectrum_size,
+ int far_q);
+
+// See WebRtc_AddFarSpectrumFix() for description.
+int WebRtc_AddFarSpectrumFloat(void* handle,
+ const float* far_spectrum,
+ int spectrum_size);
+
+// Releases the memory allocated by WebRtc_CreateDelayEstimator(...)
+void WebRtc_FreeDelayEstimator(void* handle);
+
+// Allocates the memory needed by the delay estimation. The memory needs to be
+// initialized separately through WebRtc_InitDelayEstimator(...).
+//
+// Inputs:
+// - farend_handle : Pointer to the far-end part of the delay estimation
+// instance created prior to this call using
+// WebRtc_CreateDelayEstimatorFarend().
+//
+// Note that WebRtc_CreateDelayEstimator does not take
+// ownership of |farend_handle|, which has to be torn
+// down properly after this instance.
+//
+// - max_lookahead : Maximum amount of non-causal lookahead allowed. The
+// actual amount of lookahead used can be controlled by
+// WebRtc_set_lookahead(...). The default |lookahead| is
+// set to |max_lookahead| at create time. Use
+// WebRtc_set_lookahead(...) before start if a different
+// value is desired.
+//
+// Using lookahead can detect cases in which a near-end
+// signal occurs before the corresponding far-end signal.
+// It will delay the estimate for the current block by an
+// equal amount, and the returned values will be offset
+// by it.
+//
+// A value of zero is the typical no-lookahead case.
+// This also represents the minimum delay which can be
+// estimated.
+//
+// Note that the effective range of delay estimates is
+// [-|lookahead|,... ,|history_size|-|lookahead|)
+// where |history_size| is set through
+// WebRtc_set_history_size().
+//
+// Return value:
+// - void* : Created |handle|. If the memory can't be allocated or
+// if any of the input parameters are invalid NULL is
+// returned.
+void* WebRtc_CreateDelayEstimator(void* farend_handle, int max_lookahead);
+
+// Initializes the delay estimation instance returned by
+// WebRtc_CreateDelayEstimator(...)
+int WebRtc_InitDelayEstimator(void* handle);
+
+// Soft resets the delay estimation instance returned by
+// WebRtc_CreateDelayEstimator(...)
+// Input:
+// - delay_shift : The amount of blocks to shift history buffers.
+//
+// Return value:
+// - actual_shifts : The actual number of shifts performed.
+int WebRtc_SoftResetDelayEstimator(void* handle, int delay_shift);
+
+// Sets the effective |history_size| used. Valid values from 2. We simply need
+// at least two delays to compare to perform an estimate. If |history_size| is
+// changed, buffers are reallocated filling in with zeros if necessary.
+// Note that changing the |history_size| affects both buffers in far-end and
+// near-end. Hence it is important to change all DelayEstimators that use the
+// same reference far-end, to the same |history_size| value.
+// Inputs:
+// - handle : Pointer to the delay estimation instance.
+// - history_size : Effective history size to be used.
+// Return value:
+// - new_history_size : The new history size used. If the memory was not able
+// to be allocated 0 is returned.
+int WebRtc_set_history_size(void* handle, int history_size);
+
+// Returns the history_size currently used.
+// Input:
+// - handle : Pointer to the delay estimation instance.
+int WebRtc_history_size(const void* handle);
+
+// Sets the amount of |lookahead| to use. Valid values are [0, max_lookahead]
+// where |max_lookahead| was set at create time through
+// WebRtc_CreateDelayEstimator(...).
+//
+// Input:
+// - handle : Pointer to the delay estimation instance.
+// - lookahead : The amount of lookahead to be used.
+//
+// Return value:
+// - new_lookahead : The actual amount of lookahead set, unless |handle| is
+// a NULL pointer or |lookahead| is invalid, for which an
+// error is returned.
+int WebRtc_set_lookahead(void* handle, int lookahead);
+
+// Returns the amount of lookahead we currently use.
+// Input:
+// - handle : Pointer to the delay estimation instance.
+int WebRtc_lookahead(void* handle);
+
+// Sets the |allowed_offset| used in the robust validation scheme. If the
+// delay estimator is used in an echo control component, this parameter is
+// related to the filter length. In principle |allowed_offset| should be set to
+// the echo control filter length minus the expected echo duration, i.e., the
+// delay offset the echo control can handle without quality regression. The
+// default value, used if not set manually, is zero. Note that |allowed_offset|
+// has to be non-negative.
+// Inputs:
+// - handle : Pointer to the delay estimation instance.
+// - allowed_offset : The amount of delay offset, measured in partitions,
+// the echo control filter can handle.
+int WebRtc_set_allowed_offset(void* handle, int allowed_offset);
+
+// Returns the |allowed_offset| in number of partitions.
+int WebRtc_get_allowed_offset(const void* handle);
+
+// Enables/Disables a robust validation functionality in the delay estimation.
+// This is by default set to disabled at create time. The state is preserved
+// over a reset.
+// Inputs:
+// - handle : Pointer to the delay estimation instance.
+// - enable : Enable (1) or disable (0) this feature.
+int WebRtc_enable_robust_validation(void* handle, int enable);
+
+// Returns 1 if robust validation is enabled and 0 if disabled.
+int WebRtc_is_robust_validation_enabled(const void* handle);
+
+// Estimates and returns the delay between the far-end and near-end blocks. The
+// value will be offset by the lookahead (i.e. the lookahead should be
+// subtracted from the returned value).
+// Inputs:
+// - handle : Pointer to the delay estimation instance.
+// - near_spectrum : Pointer to the near-end spectrum data of the current
+// block.
+// - spectrum_size : The size of the data arrays (same for both far- and
+// near-end).
+// - near_q : The Q-domain of the near-end data.
+//
+// Output:
+// - handle : Updated instance.
+//
+// Return value:
+// - delay : >= 0 - Calculated delay value.
+// -1 - Error.
+// -2 - Insufficient data for estimation.
+int WebRtc_DelayEstimatorProcessFix(void* handle,
+ const uint16_t* near_spectrum,
+ int spectrum_size,
+ int near_q);
+
+// See WebRtc_DelayEstimatorProcessFix() for description.
+int WebRtc_DelayEstimatorProcessFloat(void* handle,
+ const float* near_spectrum,
+ int spectrum_size);
+
+// Returns the last calculated delay updated by the function
+// WebRtc_DelayEstimatorProcess(...).
+//
+// Input:
+// - handle : Pointer to the delay estimation instance.
+//
+// Return value:
+// - delay : >= 0 - Last calculated delay value.
+// -1 - Error.
+// -2 - Insufficient data for estimation.
+int WebRtc_last_delay(void* handle);
+
+// Returns the estimation quality/probability of the last calculated delay
+// updated by the function WebRtc_DelayEstimatorProcess(...). The estimation
+// quality is a value in the interval [0, 1]. The higher the value, the better
+// the quality.
+//
+// Return value:
+// - delay_quality : >= 0 - Estimation quality of last calculated delay.
+float WebRtc_last_delay_quality(void* handle);
+
+#endif // MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft.cc
new file mode 100644
index 0000000000..59631117a2
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft.cc
@@ -0,0 +1,543 @@
+/*
+ * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
+ * Copyright Takuya OOURA, 1996-2001
+ *
+ * You may use, copy, modify and distribute this code for any purpose (include
+ * commercial use) and without fee. Please refer to this package when you modify
+ * this code.
+ *
+ * Changes by the WebRTC authors:
+ * - Trivial type modifications.
+ * - Minimal code subset to do rdft of length 128.
+ * - Optimizations because of known length.
+ * - Removed the global variables by moving the code in to a class in order
+ * to make it thread safe.
+ *
+ * All changes are covered by the WebRTC license and IP grant:
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing//utility/ooura_fft.h"
+
+#include <math.h>
+
+#include "modules/audio_processing/utility/ooura_fft_tables_common.h"
+#include "system_wrappers/include/cpu_features_wrapper.h"
+#include "typedefs.h" // NOLINT(build/include)
+
+namespace webrtc {
+
+namespace {
+
+#if !(defined(MIPS_FPU_LE) || defined(WEBRTC_HAS_NEON))
+static void cft1st_128_C(float* a) {
+ const int n = 128;
+ int j, k1, k2;
+ float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+ float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ // The processing of the first set of elements was simplified in C to avoid
+ // some operations (multiplication by zero or one, addition of two elements
+ // multiplied by the same weight, ...).
+ x0r = a[0] + a[2];
+ x0i = a[1] + a[3];
+ x1r = a[0] - a[2];
+ x1i = a[1] - a[3];
+ x2r = a[4] + a[6];
+ x2i = a[5] + a[7];
+ x3r = a[4] - a[6];
+ x3i = a[5] - a[7];
+ a[0] = x0r + x2r;
+ a[1] = x0i + x2i;
+ a[4] = x0r - x2r;
+ a[5] = x0i - x2i;
+ a[2] = x1r - x3i;
+ a[3] = x1i + x3r;
+ a[6] = x1r + x3i;
+ a[7] = x1i - x3r;
+ wk1r = rdft_w[2];
+ x0r = a[8] + a[10];
+ x0i = a[9] + a[11];
+ x1r = a[8] - a[10];
+ x1i = a[9] - a[11];
+ x2r = a[12] + a[14];
+ x2i = a[13] + a[15];
+ x3r = a[12] - a[14];
+ x3i = a[13] - a[15];
+ a[8] = x0r + x2r;
+ a[9] = x0i + x2i;
+ a[12] = x2i - x0i;
+ a[13] = x0r - x2r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[10] = wk1r * (x0r - x0i);
+ a[11] = wk1r * (x0r + x0i);
+ x0r = x3i + x1r;
+ x0i = x3r - x1i;
+ a[14] = wk1r * (x0i - x0r);
+ a[15] = wk1r * (x0i + x0r);
+ k1 = 0;
+ for (j = 16; j < n; j += 16) {
+ k1 += 2;
+ k2 = 2 * k1;
+ wk2r = rdft_w[k1 + 0];
+ wk2i = rdft_w[k1 + 1];
+ wk1r = rdft_w[k2 + 0];
+ wk1i = rdft_w[k2 + 1];
+ wk3r = rdft_wk3ri_first[k1 + 0];
+ wk3i = rdft_wk3ri_first[k1 + 1];
+ x0r = a[j + 0] + a[j + 2];
+ x0i = a[j + 1] + a[j + 3];
+ x1r = a[j + 0] - a[j + 2];
+ x1i = a[j + 1] - a[j + 3];
+ x2r = a[j + 4] + a[j + 6];
+ x2i = a[j + 5] + a[j + 7];
+ x3r = a[j + 4] - a[j + 6];
+ x3i = a[j + 5] - a[j + 7];
+ a[j + 0] = x0r + x2r;
+ a[j + 1] = x0i + x2i;
+ x0r -= x2r;
+ x0i -= x2i;
+ a[j + 4] = wk2r * x0r - wk2i * x0i;
+ a[j + 5] = wk2r * x0i + wk2i * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j + 2] = wk1r * x0r - wk1i * x0i;
+ a[j + 3] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j + 6] = wk3r * x0r - wk3i * x0i;
+ a[j + 7] = wk3r * x0i + wk3i * x0r;
+ wk1r = rdft_w[k2 + 2];
+ wk1i = rdft_w[k2 + 3];
+ wk3r = rdft_wk3ri_second[k1 + 0];
+ wk3i = rdft_wk3ri_second[k1 + 1];
+ x0r = a[j + 8] + a[j + 10];
+ x0i = a[j + 9] + a[j + 11];
+ x1r = a[j + 8] - a[j + 10];
+ x1i = a[j + 9] - a[j + 11];
+ x2r = a[j + 12] + a[j + 14];
+ x2i = a[j + 13] + a[j + 15];
+ x3r = a[j + 12] - a[j + 14];
+ x3i = a[j + 13] - a[j + 15];
+ a[j + 8] = x0r + x2r;
+ a[j + 9] = x0i + x2i;
+ x0r -= x2r;
+ x0i -= x2i;
+ a[j + 12] = -wk2i * x0r - wk2r * x0i;
+ a[j + 13] = -wk2i * x0i + wk2r * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j + 10] = wk1r * x0r - wk1i * x0i;
+ a[j + 11] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j + 14] = wk3r * x0r - wk3i * x0i;
+ a[j + 15] = wk3r * x0i + wk3i * x0r;
+ }
+}
+
+static void cftmdl_128_C(float* a) {
+ const int l = 8;
+ const int n = 128;
+ const int m = 32;
+ int j0, j1, j2, j3, k, k1, k2, m2;
+ float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+ float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ for (j0 = 0; j0 < l; j0 += 2) {
+ j1 = j0 + 8;
+ j2 = j0 + 16;
+ j3 = j0 + 24;
+ x0r = a[j0 + 0] + a[j1 + 0];
+ x0i = a[j0 + 1] + a[j1 + 1];
+ x1r = a[j0 + 0] - a[j1 + 0];
+ x1i = a[j0 + 1] - a[j1 + 1];
+ x2r = a[j2 + 0] + a[j3 + 0];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2 + 0] - a[j3 + 0];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j0 + 0] = x0r + x2r;
+ a[j0 + 1] = x0i + x2i;
+ a[j2 + 0] = x0r - x2r;
+ a[j2 + 1] = x0i - x2i;
+ a[j1 + 0] = x1r - x3i;
+ a[j1 + 1] = x1i + x3r;
+ a[j3 + 0] = x1r + x3i;
+ a[j3 + 1] = x1i - x3r;
+ }
+ wk1r = rdft_w[2];
+ for (j0 = m; j0 < l + m; j0 += 2) {
+ j1 = j0 + 8;
+ j2 = j0 + 16;
+ j3 = j0 + 24;
+ x0r = a[j0 + 0] + a[j1 + 0];
+ x0i = a[j0 + 1] + a[j1 + 1];
+ x1r = a[j0 + 0] - a[j1 + 0];
+ x1i = a[j0 + 1] - a[j1 + 1];
+ x2r = a[j2 + 0] + a[j3 + 0];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2 + 0] - a[j3 + 0];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j0 + 0] = x0r + x2r;
+ a[j0 + 1] = x0i + x2i;
+ a[j2 + 0] = x2i - x0i;
+ a[j2 + 1] = x0r - x2r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j1 + 0] = wk1r * (x0r - x0i);
+ a[j1 + 1] = wk1r * (x0r + x0i);
+ x0r = x3i + x1r;
+ x0i = x3r - x1i;
+ a[j3 + 0] = wk1r * (x0i - x0r);
+ a[j3 + 1] = wk1r * (x0i + x0r);
+ }
+ k1 = 0;
+ m2 = 2 * m;
+ for (k = m2; k < n; k += m2) {
+ k1 += 2;
+ k2 = 2 * k1;
+ wk2r = rdft_w[k1 + 0];
+ wk2i = rdft_w[k1 + 1];
+ wk1r = rdft_w[k2 + 0];
+ wk1i = rdft_w[k2 + 1];
+ wk3r = rdft_wk3ri_first[k1 + 0];
+ wk3i = rdft_wk3ri_first[k1 + 1];
+ for (j0 = k; j0 < l + k; j0 += 2) {
+ j1 = j0 + 8;
+ j2 = j0 + 16;
+ j3 = j0 + 24;
+ x0r = a[j0 + 0] + a[j1 + 0];
+ x0i = a[j0 + 1] + a[j1 + 1];
+ x1r = a[j0 + 0] - a[j1 + 0];
+ x1i = a[j0 + 1] - a[j1 + 1];
+ x2r = a[j2 + 0] + a[j3 + 0];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2 + 0] - a[j3 + 0];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j0 + 0] = x0r + x2r;
+ a[j0 + 1] = x0i + x2i;
+ x0r -= x2r;
+ x0i -= x2i;
+ a[j2 + 0] = wk2r * x0r - wk2i * x0i;
+ a[j2 + 1] = wk2r * x0i + wk2i * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j1 + 0] = wk1r * x0r - wk1i * x0i;
+ a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j3 + 0] = wk3r * x0r - wk3i * x0i;
+ a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+ }
+ wk1r = rdft_w[k2 + 2];
+ wk1i = rdft_w[k2 + 3];
+ wk3r = rdft_wk3ri_second[k1 + 0];
+ wk3i = rdft_wk3ri_second[k1 + 1];
+ for (j0 = k + m; j0 < l + (k + m); j0 += 2) {
+ j1 = j0 + 8;
+ j2 = j0 + 16;
+ j3 = j0 + 24;
+ x0r = a[j0 + 0] + a[j1 + 0];
+ x0i = a[j0 + 1] + a[j1 + 1];
+ x1r = a[j0 + 0] - a[j1 + 0];
+ x1i = a[j0 + 1] - a[j1 + 1];
+ x2r = a[j2 + 0] + a[j3 + 0];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2 + 0] - a[j3 + 0];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j0 + 0] = x0r + x2r;
+ a[j0 + 1] = x0i + x2i;
+ x0r -= x2r;
+ x0i -= x2i;
+ a[j2 + 0] = -wk2i * x0r - wk2r * x0i;
+ a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j1 + 0] = wk1r * x0r - wk1i * x0i;
+ a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j3 + 0] = wk3r * x0r - wk3i * x0i;
+ a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+ }
+ }
+}
+
+static void rftfsub_128_C(float* a) {
+ const float* c = rdft_w + 32;
+ int j1, j2, k1, k2;
+ float wkr, wki, xr, xi, yr, yi;
+
+ for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
+ k2 = 128 - j2;
+ k1 = 32 - j1;
+ wkr = 0.5f - c[k1];
+ wki = c[j1];
+ xr = a[j2 + 0] - a[k2 + 0];
+ xi = a[j2 + 1] + a[k2 + 1];
+ yr = wkr * xr - wki * xi;
+ yi = wkr * xi + wki * xr;
+ a[j2 + 0] -= yr;
+ a[j2 + 1] -= yi;
+ a[k2 + 0] += yr;
+ a[k2 + 1] -= yi;
+ }
+}
+
+static void rftbsub_128_C(float* a) {
+ const float* c = rdft_w + 32;
+ int j1, j2, k1, k2;
+ float wkr, wki, xr, xi, yr, yi;
+
+ a[1] = -a[1];
+ for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
+ k2 = 128 - j2;
+ k1 = 32 - j1;
+ wkr = 0.5f - c[k1];
+ wki = c[j1];
+ xr = a[j2 + 0] - a[k2 + 0];
+ xi = a[j2 + 1] + a[k2 + 1];
+ yr = wkr * xr + wki * xi;
+ yi = wkr * xi - wki * xr;
+ a[j2 + 0] = a[j2 + 0] - yr;
+ a[j2 + 1] = yi - a[j2 + 1];
+ a[k2 + 0] = yr + a[k2 + 0];
+ a[k2 + 1] = yi - a[k2 + 1];
+ }
+ a[65] = -a[65];
+}
+#endif
+
+
+} // namespace
+
+OouraFft::OouraFft() {
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+ use_sse2_ = (WebRtc_GetCPUInfo(kSSE2) != 0);
+#else
+ use_sse2_ = false;
+#endif
+}
+
+OouraFft::~OouraFft() = default;
+
+void OouraFft::Fft(float* a) const {
+ float xi;
+ bitrv2_128(a);
+ cftfsub_128(a);
+ rftfsub_128(a);
+ xi = a[0] - a[1];
+ a[0] += a[1];
+ a[1] = xi;
+}
+void OouraFft::InverseFft(float* a) const {
+ a[1] = 0.5f * (a[0] - a[1]);
+ a[0] -= a[1];
+ rftbsub_128(a);
+ bitrv2_128(a);
+ cftbsub_128(a);
+}
+
+void OouraFft::cft1st_128(float* a) const {
+#if defined(MIPS_FPU_LE)
+ cft1st_128_mips(a);
+#elif defined(WEBRTC_HAS_NEON)
+ cft1st_128_neon(a);
+#elif defined(WEBRTC_ARCH_X86_FAMILY)
+ if (use_sse2_) {
+ cft1st_128_SSE2(a);
+ } else {
+ cft1st_128_C(a);
+ }
+#else
+ cft1st_128_C(a);
+#endif
+}
+void OouraFft::cftmdl_128(float* a) const {
+#if defined(MIPS_FPU_LE)
+ cftmdl_128_mips(a);
+#elif defined(WEBRTC_HAS_NEON)
+ cftmdl_128_neon(a);
+#elif defined(WEBRTC_ARCH_X86_FAMILY)
+ if (use_sse2_) {
+ cftmdl_128_SSE2(a);
+ } else {
+ cftmdl_128_C(a);
+ }
+#else
+ cftmdl_128_C(a);
+#endif
+}
+void OouraFft::rftfsub_128(float* a) const {
+#if defined(MIPS_FPU_LE)
+ rftfsub_128_mips(a);
+#elif defined(WEBRTC_HAS_NEON)
+ rftfsub_128_neon(a);
+#elif defined(WEBRTC_ARCH_X86_FAMILY)
+ if (use_sse2_) {
+ rftfsub_128_SSE2(a);
+ } else {
+ rftfsub_128_C(a);
+ }
+#else
+ rftfsub_128_C(a);
+#endif
+}
+
+void OouraFft::rftbsub_128(float* a) const {
+#if defined(MIPS_FPU_LE)
+ rftbsub_128_mips(a);
+#elif defined(WEBRTC_HAS_NEON)
+ rftbsub_128_neon(a);
+#elif defined(WEBRTC_ARCH_X86_FAMILY)
+ if (use_sse2_) {
+ rftbsub_128_SSE2(a);
+ } else {
+ rftbsub_128_C(a);
+ }
+#else
+ rftbsub_128_C(a);
+#endif
+}
+
+void OouraFft::cftbsub_128(float* a) const {
+ int j, j1, j2, j3, l;
+ float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ cft1st_128(a);
+ cftmdl_128(a);
+ l = 32;
+
+ for (j = 0; j < l; j += 2) {
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ x0r = a[j] + a[j1];
+ x0i = -a[j + 1] - a[j1 + 1];
+ x1r = a[j] - a[j1];
+ x1i = -a[j + 1] + a[j1 + 1];
+ x2r = a[j2] + a[j3];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2] - a[j3];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i - x2i;
+ a[j2] = x0r - x2r;
+ a[j2 + 1] = x0i + x2i;
+ a[j1] = x1r - x3i;
+ a[j1 + 1] = x1i - x3r;
+ a[j3] = x1r + x3i;
+ a[j3 + 1] = x1i + x3r;
+ }
+}
+
+void OouraFft::cftfsub_128(float* a) const {
+ int j, j1, j2, j3, l;
+ float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ cft1st_128(a);
+ cftmdl_128(a);
+ l = 32;
+ for (j = 0; j < l; j += 2) {
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ x0r = a[j] + a[j1];
+ x0i = a[j + 1] + a[j1 + 1];
+ x1r = a[j] - a[j1];
+ x1i = a[j + 1] - a[j1 + 1];
+ x2r = a[j2] + a[j3];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2] - a[j3];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i + x2i;
+ a[j2] = x0r - x2r;
+ a[j2 + 1] = x0i - x2i;
+ a[j1] = x1r - x3i;
+ a[j1 + 1] = x1i + x3r;
+ a[j3] = x1r + x3i;
+ a[j3 + 1] = x1i - x3r;
+ }
+}
+
+void OouraFft::bitrv2_128(float* a) const {
+ /*
+ Following things have been attempted but are no faster:
+ (a) Storing the swap indexes in a LUT (index calculations are done
+ for 'free' while waiting on memory/L1).
+ (b) Consolidate the load/store of two consecutive floats by a 64 bit
+ integer (execution is memory/L1 bound).
+ (c) Do a mix of floats and 64 bit integer to maximize register
+ utilization (execution is memory/L1 bound).
+ (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5).
+ (e) Hard-coding of the offsets to completely eliminates index
+ calculations.
+ */
+
+ unsigned int j, j1, k, k1;
+ float xr, xi, yr, yi;
+
+ const int ip[4] = {0, 64, 32, 96};
+ for (k = 0; k < 4; k++) {
+ for (j = 0; j < k; j++) {
+ j1 = 2 * j + ip[k];
+ k1 = 2 * k + ip[j];
+ xr = a[j1 + 0];
+ xi = a[j1 + 1];
+ yr = a[k1 + 0];
+ yi = a[k1 + 1];
+ a[j1 + 0] = yr;
+ a[j1 + 1] = yi;
+ a[k1 + 0] = xr;
+ a[k1 + 1] = xi;
+ j1 += 8;
+ k1 += 16;
+ xr = a[j1 + 0];
+ xi = a[j1 + 1];
+ yr = a[k1 + 0];
+ yi = a[k1 + 1];
+ a[j1 + 0] = yr;
+ a[j1 + 1] = yi;
+ a[k1 + 0] = xr;
+ a[k1 + 1] = xi;
+ j1 += 8;
+ k1 -= 8;
+ xr = a[j1 + 0];
+ xi = a[j1 + 1];
+ yr = a[k1 + 0];
+ yi = a[k1 + 1];
+ a[j1 + 0] = yr;
+ a[j1 + 1] = yi;
+ a[k1 + 0] = xr;
+ a[k1 + 1] = xi;
+ j1 += 8;
+ k1 += 16;
+ xr = a[j1 + 0];
+ xi = a[j1 + 1];
+ yr = a[k1 + 0];
+ yi = a[k1 + 1];
+ a[j1 + 0] = yr;
+ a[j1 + 1] = yi;
+ a[k1 + 0] = xr;
+ a[k1 + 1] = xi;
+ }
+ j1 = 2 * k + 8 + ip[k];
+ k1 = j1 + 8;
+ xr = a[j1 + 0];
+ xi = a[j1 + 1];
+ yr = a[k1 + 0];
+ yi = a[k1 + 1];
+ a[j1 + 0] = yr;
+ a[j1 + 1] = yi;
+ a[k1 + 0] = xr;
+ a[k1 + 1] = xi;
+ }
+}
+
+} // namespace webrtc
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft.h b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft.h
new file mode 100644
index 0000000000..96d57dc908
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_
+#define MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_
+
+#include "typedefs.h" // NOLINT(build/include)
+
+namespace webrtc {
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+void cft1st_128_SSE2(float* a);
+void cftmdl_128_SSE2(float* a);
+void rftfsub_128_SSE2(float* a);
+void rftbsub_128_SSE2(float* a);
+#endif
+
+#if defined(MIPS_FPU_LE)
+void cft1st_128_mips(float* a);
+void cftmdl_128_mips(float* a);
+void rftfsub_128_mips(float* a);
+void rftbsub_128_mips(float* a);
+#endif
+
+#if defined(WEBRTC_HAS_NEON)
+void cft1st_128_neon(float* a);
+void cftmdl_128_neon(float* a);
+void rftfsub_128_neon(float* a);
+void rftbsub_128_neon(float* a);
+#endif
+
+class OouraFft {
+ public:
+ OouraFft();
+ ~OouraFft();
+ void Fft(float* a) const;
+ void InverseFft(float* a) const;
+
+ private:
+ void cft1st_128(float* a) const;
+ void cftmdl_128(float* a) const;
+ void rftfsub_128(float* a) const;
+ void rftbsub_128(float* a) const;
+
+ void cftfsub_128(float* a) const;
+ void cftbsub_128(float* a) const;
+ void bitrv2_128(float* a) const;
+ bool use_sse2_;
+};
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_mips.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_mips.cc
new file mode 100644
index 0000000000..569e1d7e82
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_mips.cc
@@ -0,0 +1,1185 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/utility/ooura_fft.h"
+
+#include "modules/audio_processing/utility/ooura_fft_tables_common.h"
+#include "typedefs.h" // NOLINT(build/include)
+
+namespace webrtc {
+
+#if defined(MIPS_FPU_LE)
+void bitrv2_128_mips(float* a) {
+ // n is 128
+ float xr, xi, yr, yi;
+
+ xr = a[8];
+ xi = a[9];
+ yr = a[16];
+ yi = a[17];
+ a[8] = yr;
+ a[9] = yi;
+ a[16] = xr;
+ a[17] = xi;
+
+ xr = a[64];
+ xi = a[65];
+ yr = a[2];
+ yi = a[3];
+ a[64] = yr;
+ a[65] = yi;
+ a[2] = xr;
+ a[3] = xi;
+
+ xr = a[72];
+ xi = a[73];
+ yr = a[18];
+ yi = a[19];
+ a[72] = yr;
+ a[73] = yi;
+ a[18] = xr;
+ a[19] = xi;
+
+ xr = a[80];
+ xi = a[81];
+ yr = a[10];
+ yi = a[11];
+ a[80] = yr;
+ a[81] = yi;
+ a[10] = xr;
+ a[11] = xi;
+
+ xr = a[88];
+ xi = a[89];
+ yr = a[26];
+ yi = a[27];
+ a[88] = yr;
+ a[89] = yi;
+ a[26] = xr;
+ a[27] = xi;
+
+ xr = a[74];
+ xi = a[75];
+ yr = a[82];
+ yi = a[83];
+ a[74] = yr;
+ a[75] = yi;
+ a[82] = xr;
+ a[83] = xi;
+
+ xr = a[32];
+ xi = a[33];
+ yr = a[4];
+ yi = a[5];
+ a[32] = yr;
+ a[33] = yi;
+ a[4] = xr;
+ a[5] = xi;
+
+ xr = a[40];
+ xi = a[41];
+ yr = a[20];
+ yi = a[21];
+ a[40] = yr;
+ a[41] = yi;
+ a[20] = xr;
+ a[21] = xi;
+
+ xr = a[48];
+ xi = a[49];
+ yr = a[12];
+ yi = a[13];
+ a[48] = yr;
+ a[49] = yi;
+ a[12] = xr;
+ a[13] = xi;
+
+ xr = a[56];
+ xi = a[57];
+ yr = a[28];
+ yi = a[29];
+ a[56] = yr;
+ a[57] = yi;
+ a[28] = xr;
+ a[29] = xi;
+
+ xr = a[34];
+ xi = a[35];
+ yr = a[68];
+ yi = a[69];
+ a[34] = yr;
+ a[35] = yi;
+ a[68] = xr;
+ a[69] = xi;
+
+ xr = a[42];
+ xi = a[43];
+ yr = a[84];
+ yi = a[85];
+ a[42] = yr;
+ a[43] = yi;
+ a[84] = xr;
+ a[85] = xi;
+
+ xr = a[50];
+ xi = a[51];
+ yr = a[76];
+ yi = a[77];
+ a[50] = yr;
+ a[51] = yi;
+ a[76] = xr;
+ a[77] = xi;
+
+ xr = a[58];
+ xi = a[59];
+ yr = a[92];
+ yi = a[93];
+ a[58] = yr;
+ a[59] = yi;
+ a[92] = xr;
+ a[93] = xi;
+
+ xr = a[44];
+ xi = a[45];
+ yr = a[52];
+ yi = a[53];
+ a[44] = yr;
+ a[45] = yi;
+ a[52] = xr;
+ a[53] = xi;
+
+ xr = a[96];
+ xi = a[97];
+ yr = a[6];
+ yi = a[7];
+ a[96] = yr;
+ a[97] = yi;
+ a[6] = xr;
+ a[7] = xi;
+
+ xr = a[104];
+ xi = a[105];
+ yr = a[22];
+ yi = a[23];
+ a[104] = yr;
+ a[105] = yi;
+ a[22] = xr;
+ a[23] = xi;
+
+ xr = a[112];
+ xi = a[113];
+ yr = a[14];
+ yi = a[15];
+ a[112] = yr;
+ a[113] = yi;
+ a[14] = xr;
+ a[15] = xi;
+
+ xr = a[120];
+ xi = a[121];
+ yr = a[30];
+ yi = a[31];
+ a[120] = yr;
+ a[121] = yi;
+ a[30] = xr;
+ a[31] = xi;
+
+ xr = a[98];
+ xi = a[99];
+ yr = a[70];
+ yi = a[71];
+ a[98] = yr;
+ a[99] = yi;
+ a[70] = xr;
+ a[71] = xi;
+
+ xr = a[106];
+ xi = a[107];
+ yr = a[86];
+ yi = a[87];
+ a[106] = yr;
+ a[107] = yi;
+ a[86] = xr;
+ a[87] = xi;
+
+ xr = a[114];
+ xi = a[115];
+ yr = a[78];
+ yi = a[79];
+ a[114] = yr;
+ a[115] = yi;
+ a[78] = xr;
+ a[79] = xi;
+
+ xr = a[122];
+ xi = a[123];
+ yr = a[94];
+ yi = a[95];
+ a[122] = yr;
+ a[123] = yi;
+ a[94] = xr;
+ a[95] = xi;
+
+ xr = a[100];
+ xi = a[101];
+ yr = a[38];
+ yi = a[39];
+ a[100] = yr;
+ a[101] = yi;
+ a[38] = xr;
+ a[39] = xi;
+
+ xr = a[108];
+ xi = a[109];
+ yr = a[54];
+ yi = a[55];
+ a[108] = yr;
+ a[109] = yi;
+ a[54] = xr;
+ a[55] = xi;
+
+ xr = a[116];
+ xi = a[117];
+ yr = a[46];
+ yi = a[47];
+ a[116] = yr;
+ a[117] = yi;
+ a[46] = xr;
+ a[47] = xi;
+
+ xr = a[124];
+ xi = a[125];
+ yr = a[62];
+ yi = a[63];
+ a[124] = yr;
+ a[125] = yi;
+ a[62] = xr;
+ a[63] = xi;
+
+ xr = a[110];
+ xi = a[111];
+ yr = a[118];
+ yi = a[119];
+ a[110] = yr;
+ a[111] = yi;
+ a[118] = xr;
+ a[119] = xi;
+}
+
+void cft1st_128_mips(float* a) {
+ float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14;
+ int a_ptr, p1_rdft, p2_rdft, count;
+ const float* first = rdft_wk3ri_first;
+ const float* second = rdft_wk3ri_second;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ // first 8
+ "lwc1 %[f0], 0(%[a]) \n\t"
+ "lwc1 %[f1], 4(%[a]) \n\t"
+ "lwc1 %[f2], 8(%[a]) \n\t"
+ "lwc1 %[f3], 12(%[a]) \n\t"
+ "lwc1 %[f4], 16(%[a]) \n\t"
+ "lwc1 %[f5], 20(%[a]) \n\t"
+ "lwc1 %[f6], 24(%[a]) \n\t"
+ "lwc1 %[f7], 28(%[a]) \n\t"
+ "add.s %[f8], %[f0], %[f2] \n\t"
+ "sub.s %[f0], %[f0], %[f2] \n\t"
+ "add.s %[f2], %[f4], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "add.s %[f6], %[f1], %[f3] \n\t"
+ "sub.s %[f1], %[f1], %[f3] \n\t"
+ "add.s %[f3], %[f5], %[f7] \n\t"
+ "sub.s %[f5], %[f5], %[f7] \n\t"
+ "add.s %[f7], %[f8], %[f2] \n\t"
+ "sub.s %[f8], %[f8], %[f2] \n\t"
+ "sub.s %[f2], %[f1], %[f4] \n\t"
+ "add.s %[f1], %[f1], %[f4] \n\t"
+ "add.s %[f4], %[f6], %[f3] \n\t"
+ "sub.s %[f6], %[f6], %[f3] \n\t"
+ "sub.s %[f3], %[f0], %[f5] \n\t"
+ "add.s %[f0], %[f0], %[f5] \n\t"
+ "swc1 %[f7], 0(%[a]) \n\t"
+ "swc1 %[f8], 16(%[a]) \n\t"
+ "swc1 %[f2], 28(%[a]) \n\t"
+ "swc1 %[f1], 12(%[a]) \n\t"
+ "swc1 %[f4], 4(%[a]) \n\t"
+ "swc1 %[f6], 20(%[a]) \n\t"
+ "swc1 %[f3], 8(%[a]) \n\t"
+ "swc1 %[f0], 24(%[a]) \n\t"
+ // second 8
+ "lwc1 %[f0], 32(%[a]) \n\t"
+ "lwc1 %[f1], 36(%[a]) \n\t"
+ "lwc1 %[f2], 40(%[a]) \n\t"
+ "lwc1 %[f3], 44(%[a]) \n\t"
+ "lwc1 %[f4], 48(%[a]) \n\t"
+ "lwc1 %[f5], 52(%[a]) \n\t"
+ "lwc1 %[f6], 56(%[a]) \n\t"
+ "lwc1 %[f7], 60(%[a]) \n\t"
+ "add.s %[f8], %[f4], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "add.s %[f6], %[f1], %[f3] \n\t"
+ "sub.s %[f1], %[f1], %[f3] \n\t"
+ "add.s %[f3], %[f0], %[f2] \n\t"
+ "sub.s %[f0], %[f0], %[f2] \n\t"
+ "add.s %[f2], %[f5], %[f7] \n\t"
+ "sub.s %[f5], %[f5], %[f7] \n\t"
+ "add.s %[f7], %[f4], %[f1] \n\t"
+ "sub.s %[f4], %[f4], %[f1] \n\t"
+ "add.s %[f1], %[f3], %[f8] \n\t"
+ "sub.s %[f3], %[f3], %[f8] \n\t"
+ "sub.s %[f8], %[f0], %[f5] \n\t"
+ "add.s %[f0], %[f0], %[f5] \n\t"
+ "add.s %[f5], %[f6], %[f2] \n\t"
+ "sub.s %[f6], %[f2], %[f6] \n\t"
+ "lwc1 %[f9], 8(%[rdft_w]) \n\t"
+ "sub.s %[f2], %[f8], %[f7] \n\t"
+ "add.s %[f8], %[f8], %[f7] \n\t"
+ "sub.s %[f7], %[f4], %[f0] \n\t"
+ "add.s %[f4], %[f4], %[f0] \n\t"
+ // prepare for loop
+ "addiu %[a_ptr], %[a], 64 \n\t"
+ "addiu %[p1_rdft], %[rdft_w], 8 \n\t"
+ "addiu %[p2_rdft], %[rdft_w], 16 \n\t"
+ "addiu %[count], $zero, 7 \n\t"
+ // finish second 8
+ "mul.s %[f2], %[f9], %[f2] \n\t"
+ "mul.s %[f8], %[f9], %[f8] \n\t"
+ "mul.s %[f7], %[f9], %[f7] \n\t"
+ "mul.s %[f4], %[f9], %[f4] \n\t"
+ "swc1 %[f1], 32(%[a]) \n\t"
+ "swc1 %[f3], 52(%[a]) \n\t"
+ "swc1 %[f5], 36(%[a]) \n\t"
+ "swc1 %[f6], 48(%[a]) \n\t"
+ "swc1 %[f2], 40(%[a]) \n\t"
+ "swc1 %[f8], 44(%[a]) \n\t"
+ "swc1 %[f7], 56(%[a]) \n\t"
+ "swc1 %[f4], 60(%[a]) \n\t"
+ // loop
+ "1: \n\t"
+ "lwc1 %[f0], 0(%[a_ptr]) \n\t"
+ "lwc1 %[f1], 4(%[a_ptr]) \n\t"
+ "lwc1 %[f2], 8(%[a_ptr]) \n\t"
+ "lwc1 %[f3], 12(%[a_ptr]) \n\t"
+ "lwc1 %[f4], 16(%[a_ptr]) \n\t"
+ "lwc1 %[f5], 20(%[a_ptr]) \n\t"
+ "lwc1 %[f6], 24(%[a_ptr]) \n\t"
+ "lwc1 %[f7], 28(%[a_ptr]) \n\t"
+ "add.s %[f8], %[f0], %[f2] \n\t"
+ "sub.s %[f0], %[f0], %[f2] \n\t"
+ "add.s %[f2], %[f4], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "add.s %[f6], %[f1], %[f3] \n\t"
+ "sub.s %[f1], %[f1], %[f3] \n\t"
+ "add.s %[f3], %[f5], %[f7] \n\t"
+ "sub.s %[f5], %[f5], %[f7] \n\t"
+ "lwc1 %[f10], 4(%[p1_rdft]) \n\t"
+ "lwc1 %[f11], 0(%[p2_rdft]) \n\t"
+ "lwc1 %[f12], 4(%[p2_rdft]) \n\t"
+ "lwc1 %[f13], 8(%[first]) \n\t"
+ "lwc1 %[f14], 12(%[first]) \n\t"
+ "add.s %[f7], %[f8], %[f2] \n\t"
+ "sub.s %[f8], %[f8], %[f2] \n\t"
+ "add.s %[f2], %[f6], %[f3] \n\t"
+ "sub.s %[f6], %[f6], %[f3] \n\t"
+ "add.s %[f3], %[f0], %[f5] \n\t"
+ "sub.s %[f0], %[f0], %[f5] \n\t"
+ "add.s %[f5], %[f1], %[f4] \n\t"
+ "sub.s %[f1], %[f1], %[f4] \n\t"
+ "swc1 %[f7], 0(%[a_ptr]) \n\t"
+ "swc1 %[f2], 4(%[a_ptr]) \n\t"
+ "mul.s %[f4], %[f9], %[f8] \n\t"
+#if defined(MIPS32_R2_LE)
+ "mul.s %[f8], %[f10], %[f8] \n\t"
+ "mul.s %[f7], %[f11], %[f0] \n\t"
+ "mul.s %[f0], %[f12], %[f0] \n\t"
+ "mul.s %[f2], %[f13], %[f3] \n\t"
+ "mul.s %[f3], %[f14], %[f3] \n\t"
+ "nmsub.s %[f4], %[f4], %[f10], %[f6] \n\t"
+ "madd.s %[f8], %[f8], %[f9], %[f6] \n\t"
+ "nmsub.s %[f7], %[f7], %[f12], %[f5] \n\t"
+ "madd.s %[f0], %[f0], %[f11], %[f5] \n\t"
+ "nmsub.s %[f2], %[f2], %[f14], %[f1] \n\t"
+ "madd.s %[f3], %[f3], %[f13], %[f1] \n\t"
+#else
+ "mul.s %[f7], %[f10], %[f6] \n\t"
+ "mul.s %[f6], %[f9], %[f6] \n\t"
+ "mul.s %[f8], %[f10], %[f8] \n\t"
+ "mul.s %[f2], %[f11], %[f0] \n\t"
+ "mul.s %[f11], %[f11], %[f5] \n\t"
+ "mul.s %[f5], %[f12], %[f5] \n\t"
+ "mul.s %[f0], %[f12], %[f0] \n\t"
+ "mul.s %[f12], %[f13], %[f3] \n\t"
+ "mul.s %[f13], %[f13], %[f1] \n\t"
+ "mul.s %[f1], %[f14], %[f1] \n\t"
+ "mul.s %[f3], %[f14], %[f3] \n\t"
+ "sub.s %[f4], %[f4], %[f7] \n\t"
+ "add.s %[f8], %[f6], %[f8] \n\t"
+ "sub.s %[f7], %[f2], %[f5] \n\t"
+ "add.s %[f0], %[f11], %[f0] \n\t"
+ "sub.s %[f2], %[f12], %[f1] \n\t"
+ "add.s %[f3], %[f13], %[f3] \n\t"
+#endif
+ "swc1 %[f4], 16(%[a_ptr]) \n\t"
+ "swc1 %[f8], 20(%[a_ptr]) \n\t"
+ "swc1 %[f7], 8(%[a_ptr]) \n\t"
+ "swc1 %[f0], 12(%[a_ptr]) \n\t"
+ "swc1 %[f2], 24(%[a_ptr]) \n\t"
+ "swc1 %[f3], 28(%[a_ptr]) \n\t"
+ "lwc1 %[f0], 32(%[a_ptr]) \n\t"
+ "lwc1 %[f1], 36(%[a_ptr]) \n\t"
+ "lwc1 %[f2], 40(%[a_ptr]) \n\t"
+ "lwc1 %[f3], 44(%[a_ptr]) \n\t"
+ "lwc1 %[f4], 48(%[a_ptr]) \n\t"
+ "lwc1 %[f5], 52(%[a_ptr]) \n\t"
+ "lwc1 %[f6], 56(%[a_ptr]) \n\t"
+ "lwc1 %[f7], 60(%[a_ptr]) \n\t"
+ "add.s %[f8], %[f0], %[f2] \n\t"
+ "sub.s %[f0], %[f0], %[f2] \n\t"
+ "add.s %[f2], %[f4], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "add.s %[f6], %[f1], %[f3] \n\t"
+ "sub.s %[f1], %[f1], %[f3] \n\t"
+ "add.s %[f3], %[f5], %[f7] \n\t"
+ "sub.s %[f5], %[f5], %[f7] \n\t"
+ "lwc1 %[f11], 8(%[p2_rdft]) \n\t"
+ "lwc1 %[f12], 12(%[p2_rdft]) \n\t"
+ "lwc1 %[f13], 8(%[second]) \n\t"
+ "lwc1 %[f14], 12(%[second]) \n\t"
+ "add.s %[f7], %[f8], %[f2] \n\t"
+ "sub.s %[f8], %[f2], %[f8] \n\t"
+ "add.s %[f2], %[f6], %[f3] \n\t"
+ "sub.s %[f6], %[f3], %[f6] \n\t"
+ "add.s %[f3], %[f0], %[f5] \n\t"
+ "sub.s %[f0], %[f0], %[f5] \n\t"
+ "add.s %[f5], %[f1], %[f4] \n\t"
+ "sub.s %[f1], %[f1], %[f4] \n\t"
+ "swc1 %[f7], 32(%[a_ptr]) \n\t"
+ "swc1 %[f2], 36(%[a_ptr]) \n\t"
+ "mul.s %[f4], %[f10], %[f8] \n\t"
+#if defined(MIPS32_R2_LE)
+ "mul.s %[f10], %[f10], %[f6] \n\t"
+ "mul.s %[f7], %[f11], %[f0] \n\t"
+ "mul.s %[f11], %[f11], %[f5] \n\t"
+ "mul.s %[f2], %[f13], %[f3] \n\t"
+ "mul.s %[f13], %[f13], %[f1] \n\t"
+ "madd.s %[f4], %[f4], %[f9], %[f6] \n\t"
+ "nmsub.s %[f10], %[f10], %[f9], %[f8] \n\t"
+ "nmsub.s %[f7], %[f7], %[f12], %[f5] \n\t"
+ "madd.s %[f11], %[f11], %[f12], %[f0] \n\t"
+ "nmsub.s %[f2], %[f2], %[f14], %[f1] \n\t"
+ "madd.s %[f13], %[f13], %[f14], %[f3] \n\t"
+#else
+ "mul.s %[f2], %[f9], %[f6] \n\t"
+ "mul.s %[f10], %[f10], %[f6] \n\t"
+ "mul.s %[f9], %[f9], %[f8] \n\t"
+ "mul.s %[f7], %[f11], %[f0] \n\t"
+ "mul.s %[f8], %[f12], %[f5] \n\t"
+ "mul.s %[f11], %[f11], %[f5] \n\t"
+ "mul.s %[f12], %[f12], %[f0] \n\t"
+ "mul.s %[f5], %[f13], %[f3] \n\t"
+ "mul.s %[f0], %[f14], %[f1] \n\t"
+ "mul.s %[f13], %[f13], %[f1] \n\t"
+ "mul.s %[f14], %[f14], %[f3] \n\t"
+ "add.s %[f4], %[f4], %[f2] \n\t"
+ "sub.s %[f10], %[f10], %[f9] \n\t"
+ "sub.s %[f7], %[f7], %[f8] \n\t"
+ "add.s %[f11], %[f11], %[f12] \n\t"
+ "sub.s %[f2], %[f5], %[f0] \n\t"
+ "add.s %[f13], %[f13], %[f14] \n\t"
+#endif
+ "swc1 %[f4], 48(%[a_ptr]) \n\t"
+ "swc1 %[f10], 52(%[a_ptr]) \n\t"
+ "swc1 %[f7], 40(%[a_ptr]) \n\t"
+ "swc1 %[f11], 44(%[a_ptr]) \n\t"
+ "swc1 %[f2], 56(%[a_ptr]) \n\t"
+ "swc1 %[f13], 60(%[a_ptr]) \n\t"
+ "addiu %[count], %[count], -1 \n\t"
+ "lwc1 %[f9], 8(%[p1_rdft]) \n\t"
+ "addiu %[a_ptr], %[a_ptr], 64 \n\t"
+ "addiu %[p1_rdft], %[p1_rdft], 8 \n\t"
+ "addiu %[p2_rdft], %[p2_rdft], 16 \n\t"
+ "addiu %[first], %[first], 8 \n\t"
+ "bgtz %[count], 1b \n\t"
+ " addiu %[second], %[second], 8 \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+ [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+ [f8] "=&f" (f8), [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
+ [f12] "=&f" (f12), [f13] "=&f" (f13), [f14] "=&f" (f14),
+ [a_ptr] "=&r" (a_ptr), [p1_rdft] "=&r" (p1_rdft), [first] "+r" (first),
+ [p2_rdft] "=&r" (p2_rdft), [count] "=&r" (count), [second] "+r" (second)
+ : [a] "r" (a), [rdft_w] "r" (rdft_w)
+ : "memory"
+ );
+}
+
+void cftmdl_128_mips(float* a) {
+ float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14;
+ int tmp_a, count;
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[tmp_a], %[a], 0 \n\t"
+ "addiu %[count], $zero, 4 \n\t"
+ "1: \n\t"
+ "addiu %[count], %[count], -1 \n\t"
+ "lwc1 %[f0], 0(%[tmp_a]) \n\t"
+ "lwc1 %[f2], 32(%[tmp_a]) \n\t"
+ "lwc1 %[f4], 64(%[tmp_a]) \n\t"
+ "lwc1 %[f6], 96(%[tmp_a]) \n\t"
+ "lwc1 %[f1], 4(%[tmp_a]) \n\t"
+ "lwc1 %[f3], 36(%[tmp_a]) \n\t"
+ "lwc1 %[f5], 68(%[tmp_a]) \n\t"
+ "lwc1 %[f7], 100(%[tmp_a]) \n\t"
+ "add.s %[f8], %[f0], %[f2] \n\t"
+ "sub.s %[f0], %[f0], %[f2] \n\t"
+ "add.s %[f2], %[f4], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "add.s %[f6], %[f1], %[f3] \n\t"
+ "sub.s %[f1], %[f1], %[f3] \n\t"
+ "add.s %[f3], %[f5], %[f7] \n\t"
+ "sub.s %[f5], %[f5], %[f7] \n\t"
+ "add.s %[f7], %[f8], %[f2] \n\t"
+ "sub.s %[f8], %[f8], %[f2] \n\t"
+ "add.s %[f2], %[f1], %[f4] \n\t"
+ "sub.s %[f1], %[f1], %[f4] \n\t"
+ "add.s %[f4], %[f6], %[f3] \n\t"
+ "sub.s %[f6], %[f6], %[f3] \n\t"
+ "sub.s %[f3], %[f0], %[f5] \n\t"
+ "add.s %[f0], %[f0], %[f5] \n\t"
+ "swc1 %[f7], 0(%[tmp_a]) \n\t"
+ "swc1 %[f8], 64(%[tmp_a]) \n\t"
+ "swc1 %[f2], 36(%[tmp_a]) \n\t"
+ "swc1 %[f1], 100(%[tmp_a]) \n\t"
+ "swc1 %[f4], 4(%[tmp_a]) \n\t"
+ "swc1 %[f6], 68(%[tmp_a]) \n\t"
+ "swc1 %[f3], 32(%[tmp_a]) \n\t"
+ "swc1 %[f0], 96(%[tmp_a]) \n\t"
+ "bgtz %[count], 1b \n\t"
+ " addiu %[tmp_a], %[tmp_a], 8 \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+ [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+ [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
+ : [a] "r" (a)
+ : "memory"
+ );
+ f9 = rdft_w[2];
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[tmp_a], %[a], 128 \n\t"
+ "addiu %[count], $zero, 4 \n\t"
+ "1: \n\t"
+ "addiu %[count], %[count], -1 \n\t"
+ "lwc1 %[f0], 0(%[tmp_a]) \n\t"
+ "lwc1 %[f2], 32(%[tmp_a]) \n\t"
+ "lwc1 %[f5], 68(%[tmp_a]) \n\t"
+ "lwc1 %[f7], 100(%[tmp_a]) \n\t"
+ "lwc1 %[f1], 4(%[tmp_a]) \n\t"
+ "lwc1 %[f3], 36(%[tmp_a]) \n\t"
+ "lwc1 %[f4], 64(%[tmp_a]) \n\t"
+ "lwc1 %[f6], 96(%[tmp_a]) \n\t"
+ "sub.s %[f8], %[f0], %[f2] \n\t"
+ "add.s %[f0], %[f0], %[f2] \n\t"
+ "sub.s %[f2], %[f5], %[f7] \n\t"
+ "add.s %[f5], %[f5], %[f7] \n\t"
+ "sub.s %[f7], %[f1], %[f3] \n\t"
+ "add.s %[f1], %[f1], %[f3] \n\t"
+ "sub.s %[f3], %[f4], %[f6] \n\t"
+ "add.s %[f4], %[f4], %[f6] \n\t"
+ "sub.s %[f6], %[f8], %[f2] \n\t"
+ "add.s %[f8], %[f8], %[f2] \n\t"
+ "add.s %[f2], %[f5], %[f1] \n\t"
+ "sub.s %[f5], %[f5], %[f1] \n\t"
+ "add.s %[f1], %[f3], %[f7] \n\t"
+ "sub.s %[f3], %[f3], %[f7] \n\t"
+ "add.s %[f7], %[f0], %[f4] \n\t"
+ "sub.s %[f0], %[f0], %[f4] \n\t"
+ "sub.s %[f4], %[f6], %[f1] \n\t"
+ "add.s %[f6], %[f6], %[f1] \n\t"
+ "sub.s %[f1], %[f3], %[f8] \n\t"
+ "add.s %[f3], %[f3], %[f8] \n\t"
+ "mul.s %[f4], %[f4], %[f9] \n\t"
+ "mul.s %[f6], %[f6], %[f9] \n\t"
+ "mul.s %[f1], %[f1], %[f9] \n\t"
+ "mul.s %[f3], %[f3], %[f9] \n\t"
+ "swc1 %[f7], 0(%[tmp_a]) \n\t"
+ "swc1 %[f2], 4(%[tmp_a]) \n\t"
+ "swc1 %[f5], 64(%[tmp_a]) \n\t"
+ "swc1 %[f0], 68(%[tmp_a]) \n\t"
+ "swc1 %[f4], 32(%[tmp_a]) \n\t"
+ "swc1 %[f6], 36(%[tmp_a]) \n\t"
+ "swc1 %[f1], 96(%[tmp_a]) \n\t"
+ "swc1 %[f3], 100(%[tmp_a]) \n\t"
+ "bgtz %[count], 1b \n\t"
+ " addiu %[tmp_a], %[tmp_a], 8 \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+ [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+ [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
+ : [a] "r" (a), [f9] "f" (f9)
+ : "memory"
+ );
+ f10 = rdft_w[3];
+ f11 = rdft_w[4];
+ f12 = rdft_w[5];
+ f13 = rdft_wk3ri_first[2];
+ f14 = rdft_wk3ri_first[3];
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[tmp_a], %[a], 256 \n\t"
+ "addiu %[count], $zero, 4 \n\t"
+ "1: \n\t"
+ "addiu %[count], %[count], -1 \n\t"
+ "lwc1 %[f0], 0(%[tmp_a]) \n\t"
+ "lwc1 %[f2], 32(%[tmp_a]) \n\t"
+ "lwc1 %[f4], 64(%[tmp_a]) \n\t"
+ "lwc1 %[f6], 96(%[tmp_a]) \n\t"
+ "lwc1 %[f1], 4(%[tmp_a]) \n\t"
+ "lwc1 %[f3], 36(%[tmp_a]) \n\t"
+ "lwc1 %[f5], 68(%[tmp_a]) \n\t"
+ "lwc1 %[f7], 100(%[tmp_a]) \n\t"
+ "add.s %[f8], %[f0], %[f2] \n\t"
+ "sub.s %[f0], %[f0], %[f2] \n\t"
+ "add.s %[f2], %[f4], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "add.s %[f6], %[f1], %[f3] \n\t"
+ "sub.s %[f1], %[f1], %[f3] \n\t"
+ "add.s %[f3], %[f5], %[f7] \n\t"
+ "sub.s %[f5], %[f5], %[f7] \n\t"
+ "sub.s %[f7], %[f8], %[f2] \n\t"
+ "add.s %[f8], %[f8], %[f2] \n\t"
+ "add.s %[f2], %[f1], %[f4] \n\t"
+ "sub.s %[f1], %[f1], %[f4] \n\t"
+ "sub.s %[f4], %[f6], %[f3] \n\t"
+ "add.s %[f6], %[f6], %[f3] \n\t"
+ "sub.s %[f3], %[f0], %[f5] \n\t"
+ "add.s %[f0], %[f0], %[f5] \n\t"
+ "swc1 %[f8], 0(%[tmp_a]) \n\t"
+ "swc1 %[f6], 4(%[tmp_a]) \n\t"
+ "mul.s %[f5], %[f9], %[f7] \n\t"
+#if defined(MIPS32_R2_LE)
+ "mul.s %[f7], %[f10], %[f7] \n\t"
+ "mul.s %[f8], %[f11], %[f3] \n\t"
+ "mul.s %[f3], %[f12], %[f3] \n\t"
+ "mul.s %[f6], %[f13], %[f0] \n\t"
+ "mul.s %[f0], %[f14], %[f0] \n\t"
+ "nmsub.s %[f5], %[f5], %[f10], %[f4] \n\t"
+ "madd.s %[f7], %[f7], %[f9], %[f4] \n\t"
+ "nmsub.s %[f8], %[f8], %[f12], %[f2] \n\t"
+ "madd.s %[f3], %[f3], %[f11], %[f2] \n\t"
+ "nmsub.s %[f6], %[f6], %[f14], %[f1] \n\t"
+ "madd.s %[f0], %[f0], %[f13], %[f1] \n\t"
+ "swc1 %[f5], 64(%[tmp_a]) \n\t"
+ "swc1 %[f7], 68(%[tmp_a]) \n\t"
+#else
+ "mul.s %[f8], %[f10], %[f4] \n\t"
+ "mul.s %[f4], %[f9], %[f4] \n\t"
+ "mul.s %[f7], %[f10], %[f7] \n\t"
+ "mul.s %[f6], %[f11], %[f3] \n\t"
+ "mul.s %[f3], %[f12], %[f3] \n\t"
+ "sub.s %[f5], %[f5], %[f8] \n\t"
+ "mul.s %[f8], %[f12], %[f2] \n\t"
+ "mul.s %[f2], %[f11], %[f2] \n\t"
+ "add.s %[f7], %[f4], %[f7] \n\t"
+ "mul.s %[f4], %[f13], %[f0] \n\t"
+ "mul.s %[f0], %[f14], %[f0] \n\t"
+ "sub.s %[f8], %[f6], %[f8] \n\t"
+ "mul.s %[f6], %[f14], %[f1] \n\t"
+ "mul.s %[f1], %[f13], %[f1] \n\t"
+ "add.s %[f3], %[f2], %[f3] \n\t"
+ "swc1 %[f5], 64(%[tmp_a]) \n\t"
+ "swc1 %[f7], 68(%[tmp_a]) \n\t"
+ "sub.s %[f6], %[f4], %[f6] \n\t"
+ "add.s %[f0], %[f1], %[f0] \n\t"
+#endif
+ "swc1 %[f8], 32(%[tmp_a]) \n\t"
+ "swc1 %[f3], 36(%[tmp_a]) \n\t"
+ "swc1 %[f6], 96(%[tmp_a]) \n\t"
+ "swc1 %[f0], 100(%[tmp_a]) \n\t"
+ "bgtz %[count], 1b \n\t"
+ " addiu %[tmp_a], %[tmp_a], 8 \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+ [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+ [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
+ : [a] "r" (a), [f9] "f" (f9), [f10] "f" (f10), [f11] "f" (f11),
+ [f12] "f" (f12), [f13] "f" (f13), [f14] "f" (f14)
+ : "memory"
+ );
+ f11 = rdft_w[6];
+ f12 = rdft_w[7];
+ f13 = rdft_wk3ri_second[2];
+ f14 = rdft_wk3ri_second[3];
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[tmp_a], %[a], 384 \n\t"
+ "addiu %[count], $zero, 4 \n\t"
+ "1: \n\t"
+ "addiu %[count], %[count], -1 \n\t"
+ "lwc1 %[f0], 0(%[tmp_a]) \n\t"
+ "lwc1 %[f1], 4(%[tmp_a]) \n\t"
+ "lwc1 %[f2], 32(%[tmp_a]) \n\t"
+ "lwc1 %[f3], 36(%[tmp_a]) \n\t"
+ "lwc1 %[f4], 64(%[tmp_a]) \n\t"
+ "lwc1 %[f5], 68(%[tmp_a]) \n\t"
+ "lwc1 %[f6], 96(%[tmp_a]) \n\t"
+ "lwc1 %[f7], 100(%[tmp_a]) \n\t"
+ "add.s %[f8], %[f0], %[f2] \n\t"
+ "sub.s %[f0], %[f0], %[f2] \n\t"
+ "add.s %[f2], %[f4], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "add.s %[f6], %[f1], %[f3] \n\t"
+ "sub.s %[f1], %[f1], %[f3] \n\t"
+ "add.s %[f3], %[f5], %[f7] \n\t"
+ "sub.s %[f5], %[f5], %[f7] \n\t"
+ "sub.s %[f7], %[f2], %[f8] \n\t"
+ "add.s %[f2], %[f2], %[f8] \n\t"
+ "add.s %[f8], %[f1], %[f4] \n\t"
+ "sub.s %[f1], %[f1], %[f4] \n\t"
+ "sub.s %[f4], %[f3], %[f6] \n\t"
+ "add.s %[f3], %[f3], %[f6] \n\t"
+ "sub.s %[f6], %[f0], %[f5] \n\t"
+ "add.s %[f0], %[f0], %[f5] \n\t"
+ "swc1 %[f2], 0(%[tmp_a]) \n\t"
+ "swc1 %[f3], 4(%[tmp_a]) \n\t"
+ "mul.s %[f5], %[f10], %[f7] \n\t"
+#if defined(MIPS32_R2_LE)
+ "mul.s %[f7], %[f9], %[f7] \n\t"
+ "mul.s %[f2], %[f12], %[f8] \n\t"
+ "mul.s %[f8], %[f11], %[f8] \n\t"
+ "mul.s %[f3], %[f14], %[f1] \n\t"
+ "mul.s %[f1], %[f13], %[f1] \n\t"
+ "madd.s %[f5], %[f5], %[f9], %[f4] \n\t"
+ "msub.s %[f7], %[f7], %[f10], %[f4] \n\t"
+ "msub.s %[f2], %[f2], %[f11], %[f6] \n\t"
+ "madd.s %[f8], %[f8], %[f12], %[f6] \n\t"
+ "msub.s %[f3], %[f3], %[f13], %[f0] \n\t"
+ "madd.s %[f1], %[f1], %[f14], %[f0] \n\t"
+ "swc1 %[f5], 64(%[tmp_a]) \n\t"
+ "swc1 %[f7], 68(%[tmp_a]) \n\t"
+#else
+ "mul.s %[f2], %[f9], %[f4] \n\t"
+ "mul.s %[f4], %[f10], %[f4] \n\t"
+ "mul.s %[f7], %[f9], %[f7] \n\t"
+ "mul.s %[f3], %[f11], %[f6] \n\t"
+ "mul.s %[f6], %[f12], %[f6] \n\t"
+ "add.s %[f5], %[f5], %[f2] \n\t"
+ "sub.s %[f7], %[f4], %[f7] \n\t"
+ "mul.s %[f2], %[f12], %[f8] \n\t"
+ "mul.s %[f8], %[f11], %[f8] \n\t"
+ "mul.s %[f4], %[f14], %[f1] \n\t"
+ "mul.s %[f1], %[f13], %[f1] \n\t"
+ "sub.s %[f2], %[f3], %[f2] \n\t"
+ "mul.s %[f3], %[f13], %[f0] \n\t"
+ "mul.s %[f0], %[f14], %[f0] \n\t"
+ "add.s %[f8], %[f8], %[f6] \n\t"
+ "swc1 %[f5], 64(%[tmp_a]) \n\t"
+ "swc1 %[f7], 68(%[tmp_a]) \n\t"
+ "sub.s %[f3], %[f3], %[f4] \n\t"
+ "add.s %[f1], %[f1], %[f0] \n\t"
+#endif
+ "swc1 %[f2], 32(%[tmp_a]) \n\t"
+ "swc1 %[f8], 36(%[tmp_a]) \n\t"
+ "swc1 %[f3], 96(%[tmp_a]) \n\t"
+ "swc1 %[f1], 100(%[tmp_a]) \n\t"
+ "bgtz %[count], 1b \n\t"
+ " addiu %[tmp_a], %[tmp_a], 8 \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+ [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+ [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
+ : [a] "r" (a), [f9] "f" (f9), [f10] "f" (f10), [f11] "f" (f11),
+ [f12] "f" (f12), [f13] "f" (f13), [f14] "f" (f14)
+ : "memory"
+ );
+}
+
+void cftfsub_128_mips(float* a) {
+ float f0, f1, f2, f3, f4, f5, f6, f7, f8;
+ int tmp_a, count;
+
+ cft1st_128_mips(a);
+ cftmdl_128_mips(a);
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[tmp_a], %[a], 0 \n\t"
+ "addiu %[count], $zero, 16 \n\t"
+ "1: \n\t"
+ "addiu %[count], %[count], -1 \n\t"
+ "lwc1 %[f0], 0(%[tmp_a]) \n\t"
+ "lwc1 %[f2], 128(%[tmp_a]) \n\t"
+ "lwc1 %[f4], 256(%[tmp_a]) \n\t"
+ "lwc1 %[f6], 384(%[tmp_a]) \n\t"
+ "lwc1 %[f1], 4(%[tmp_a]) \n\t"
+ "lwc1 %[f3], 132(%[tmp_a]) \n\t"
+ "lwc1 %[f5], 260(%[tmp_a]) \n\t"
+ "lwc1 %[f7], 388(%[tmp_a]) \n\t"
+ "add.s %[f8], %[f0], %[f2] \n\t"
+ "sub.s %[f0], %[f0], %[f2] \n\t"
+ "add.s %[f2], %[f4], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "add.s %[f6], %[f1], %[f3] \n\t"
+ "sub.s %[f1], %[f1], %[f3] \n\t"
+ "add.s %[f3], %[f5], %[f7] \n\t"
+ "sub.s %[f5], %[f5], %[f7] \n\t"
+ "add.s %[f7], %[f8], %[f2] \n\t"
+ "sub.s %[f8], %[f8], %[f2] \n\t"
+ "add.s %[f2], %[f1], %[f4] \n\t"
+ "sub.s %[f1], %[f1], %[f4] \n\t"
+ "add.s %[f4], %[f6], %[f3] \n\t"
+ "sub.s %[f6], %[f6], %[f3] \n\t"
+ "sub.s %[f3], %[f0], %[f5] \n\t"
+ "add.s %[f0], %[f0], %[f5] \n\t"
+ "swc1 %[f7], 0(%[tmp_a]) \n\t"
+ "swc1 %[f8], 256(%[tmp_a]) \n\t"
+ "swc1 %[f2], 132(%[tmp_a]) \n\t"
+ "swc1 %[f1], 388(%[tmp_a]) \n\t"
+ "swc1 %[f4], 4(%[tmp_a]) \n\t"
+ "swc1 %[f6], 260(%[tmp_a]) \n\t"
+ "swc1 %[f3], 128(%[tmp_a]) \n\t"
+ "swc1 %[f0], 384(%[tmp_a]) \n\t"
+ "bgtz %[count], 1b \n\t"
+ " addiu %[tmp_a], %[tmp_a], 8 \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+ [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+ [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a),
+ [count] "=&r" (count)
+ : [a] "r" (a)
+ : "memory"
+ );
+}
+
+void cftbsub_128_mips(float* a) {
+ float f0, f1, f2, f3, f4, f5, f6, f7, f8;
+ int tmp_a, count;
+
+ cft1st_128_mips(a);
+ cftmdl_128_mips(a);
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[tmp_a], %[a], 0 \n\t"
+ "addiu %[count], $zero, 16 \n\t"
+ "1: \n\t"
+ "addiu %[count], %[count], -1 \n\t"
+ "lwc1 %[f0], 0(%[tmp_a]) \n\t"
+ "lwc1 %[f2], 128(%[tmp_a]) \n\t"
+ "lwc1 %[f4], 256(%[tmp_a]) \n\t"
+ "lwc1 %[f6], 384(%[tmp_a]) \n\t"
+ "lwc1 %[f1], 4(%[tmp_a]) \n\t"
+ "lwc1 %[f3], 132(%[tmp_a]) \n\t"
+ "lwc1 %[f5], 260(%[tmp_a]) \n\t"
+ "lwc1 %[f7], 388(%[tmp_a]) \n\t"
+ "add.s %[f8], %[f0], %[f2] \n\t"
+ "sub.s %[f0], %[f0], %[f2] \n\t"
+ "add.s %[f2], %[f4], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "add.s %[f6], %[f1], %[f3] \n\t"
+ "sub.s %[f1], %[f3], %[f1] \n\t"
+ "add.s %[f3], %[f5], %[f7] \n\t"
+ "sub.s %[f5], %[f5], %[f7] \n\t"
+ "add.s %[f7], %[f8], %[f2] \n\t"
+ "sub.s %[f8], %[f8], %[f2] \n\t"
+ "sub.s %[f2], %[f1], %[f4] \n\t"
+ "add.s %[f1], %[f1], %[f4] \n\t"
+ "add.s %[f4], %[f3], %[f6] \n\t"
+ "sub.s %[f6], %[f3], %[f6] \n\t"
+ "sub.s %[f3], %[f0], %[f5] \n\t"
+ "add.s %[f0], %[f0], %[f5] \n\t"
+ "neg.s %[f4], %[f4] \n\t"
+ "swc1 %[f7], 0(%[tmp_a]) \n\t"
+ "swc1 %[f8], 256(%[tmp_a]) \n\t"
+ "swc1 %[f2], 132(%[tmp_a]) \n\t"
+ "swc1 %[f1], 388(%[tmp_a]) \n\t"
+ "swc1 %[f6], 260(%[tmp_a]) \n\t"
+ "swc1 %[f3], 128(%[tmp_a]) \n\t"
+ "swc1 %[f0], 384(%[tmp_a]) \n\t"
+ "swc1 %[f4], 4(%[tmp_a]) \n\t"
+ "bgtz %[count], 1b \n\t"
+ " addiu %[tmp_a], %[tmp_a], 8 \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+ [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+ [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
+ : [a] "r" (a)
+ : "memory"
+ );
+}
+
+void rftfsub_128_mips(float* a) {
+ const float* c = rdft_w + 32;
+ const float f0 = 0.5f;
+ float* a1 = &a[2];
+ float* a2 = &a[126];
+ const float* c1 = &c[1];
+ const float* c2 = &c[31];
+ float f1, f2, f3 ,f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15;
+ int count;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "lwc1 %[f6], 0(%[c2]) \n\t"
+ "lwc1 %[f1], 0(%[a1]) \n\t"
+ "lwc1 %[f2], 0(%[a2]) \n\t"
+ "lwc1 %[f3], 4(%[a1]) \n\t"
+ "lwc1 %[f4], 4(%[a2]) \n\t"
+ "lwc1 %[f5], 0(%[c1]) \n\t"
+ "sub.s %[f6], %[f0], %[f6] \n\t"
+ "sub.s %[f7], %[f1], %[f2] \n\t"
+ "add.s %[f8], %[f3], %[f4] \n\t"
+ "addiu %[count], $zero, 15 \n\t"
+ "mul.s %[f9], %[f6], %[f7] \n\t"
+ "mul.s %[f6], %[f6], %[f8] \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[f8], %[f5], %[f8] \n\t"
+ "mul.s %[f5], %[f5], %[f7] \n\t"
+ "sub.s %[f9], %[f9], %[f8] \n\t"
+ "add.s %[f6], %[f6], %[f5] \n\t"
+#else
+ "nmsub.s %[f9], %[f9], %[f5], %[f8] \n\t"
+ "madd.s %[f6], %[f6], %[f5], %[f7] \n\t"
+#endif
+ "sub.s %[f1], %[f1], %[f9] \n\t"
+ "add.s %[f2], %[f2], %[f9] \n\t"
+ "sub.s %[f3], %[f3], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "swc1 %[f1], 0(%[a1]) \n\t"
+ "swc1 %[f2], 0(%[a2]) \n\t"
+ "swc1 %[f3], 4(%[a1]) \n\t"
+ "swc1 %[f4], 4(%[a2]) \n\t"
+ "addiu %[a1], %[a1], 8 \n\t"
+ "addiu %[a2], %[a2], -8 \n\t"
+ "addiu %[c1], %[c1], 4 \n\t"
+ "addiu %[c2], %[c2], -4 \n\t"
+ "1: \n\t"
+ "lwc1 %[f6], 0(%[c2]) \n\t"
+ "lwc1 %[f1], 0(%[a1]) \n\t"
+ "lwc1 %[f2], 0(%[a2]) \n\t"
+ "lwc1 %[f3], 4(%[a1]) \n\t"
+ "lwc1 %[f4], 4(%[a2]) \n\t"
+ "lwc1 %[f5], 0(%[c1]) \n\t"
+ "sub.s %[f6], %[f0], %[f6] \n\t"
+ "sub.s %[f7], %[f1], %[f2] \n\t"
+ "add.s %[f8], %[f3], %[f4] \n\t"
+ "lwc1 %[f10], -4(%[c2]) \n\t"
+ "lwc1 %[f11], 8(%[a1]) \n\t"
+ "lwc1 %[f12], -8(%[a2]) \n\t"
+ "mul.s %[f9], %[f6], %[f7] \n\t"
+ "mul.s %[f6], %[f6], %[f8] \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[f8], %[f5], %[f8] \n\t"
+ "mul.s %[f5], %[f5], %[f7] \n\t"
+ "lwc1 %[f13], 12(%[a1]) \n\t"
+ "lwc1 %[f14], -4(%[a2]) \n\t"
+ "lwc1 %[f15], 4(%[c1]) \n\t"
+ "sub.s %[f9], %[f9], %[f8] \n\t"
+ "add.s %[f6], %[f6], %[f5] \n\t"
+#else
+ "lwc1 %[f13], 12(%[a1]) \n\t"
+ "lwc1 %[f14], -4(%[a2]) \n\t"
+ "lwc1 %[f15], 4(%[c1]) \n\t"
+ "nmsub.s %[f9], %[f9], %[f5], %[f8] \n\t"
+ "madd.s %[f6], %[f6], %[f5], %[f7] \n\t"
+#endif
+ "sub.s %[f10], %[f0], %[f10] \n\t"
+ "sub.s %[f5], %[f11], %[f12] \n\t"
+ "add.s %[f7], %[f13], %[f14] \n\t"
+ "sub.s %[f1], %[f1], %[f9] \n\t"
+ "add.s %[f2], %[f2], %[f9] \n\t"
+ "sub.s %[f3], %[f3], %[f6] \n\t"
+ "mul.s %[f8], %[f10], %[f5] \n\t"
+ "mul.s %[f10], %[f10], %[f7] \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[f9], %[f15], %[f7] \n\t"
+ "mul.s %[f15], %[f15], %[f5] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "swc1 %[f1], 0(%[a1]) \n\t"
+ "swc1 %[f2], 0(%[a2]) \n\t"
+ "sub.s %[f8], %[f8], %[f9] \n\t"
+ "add.s %[f10], %[f10], %[f15] \n\t"
+#else
+ "swc1 %[f1], 0(%[a1]) \n\t"
+ "swc1 %[f2], 0(%[a2]) \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "nmsub.s %[f8], %[f8], %[f15], %[f7] \n\t"
+ "madd.s %[f10], %[f10], %[f15], %[f5] \n\t"
+#endif
+ "swc1 %[f3], 4(%[a1]) \n\t"
+ "swc1 %[f4], 4(%[a2]) \n\t"
+ "sub.s %[f11], %[f11], %[f8] \n\t"
+ "add.s %[f12], %[f12], %[f8] \n\t"
+ "sub.s %[f13], %[f13], %[f10] \n\t"
+ "sub.s %[f14], %[f14], %[f10] \n\t"
+ "addiu %[c2], %[c2], -8 \n\t"
+ "addiu %[c1], %[c1], 8 \n\t"
+ "swc1 %[f11], 8(%[a1]) \n\t"
+ "swc1 %[f12], -8(%[a2]) \n\t"
+ "swc1 %[f13], 12(%[a1]) \n\t"
+ "swc1 %[f14], -4(%[a2]) \n\t"
+ "addiu %[a1], %[a1], 16 \n\t"
+ "addiu %[count], %[count], -1 \n\t"
+ "bgtz %[count], 1b \n\t"
+ " addiu %[a2], %[a2], -16 \n\t"
+ ".set pop \n\t"
+ : [a1] "+r" (a1), [a2] "+r" (a2), [c1] "+r" (c1), [c2] "+r" (c2),
+ [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), [f4] "=&f" (f4),
+ [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
+ [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), [f12] "=&f" (f12),
+ [f13] "=&f" (f13), [f14] "=&f" (f14), [f15] "=&f" (f15),
+ [count] "=&r" (count)
+ : [f0] "f" (f0)
+ : "memory"
+ );
+}
+
+void rftbsub_128_mips(float* a) {
+ const float *c = rdft_w + 32;
+ const float f0 = 0.5f;
+ float* a1 = &a[2];
+ float* a2 = &a[126];
+ const float* c1 = &c[1];
+ const float* c2 = &c[31];
+ float f1, f2, f3 ,f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15;
+ int count;
+
+ a[1] = -a[1];
+ a[65] = -a[65];
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "lwc1 %[f6], 0(%[c2]) \n\t"
+ "lwc1 %[f1], 0(%[a1]) \n\t"
+ "lwc1 %[f2], 0(%[a2]) \n\t"
+ "lwc1 %[f3], 4(%[a1]) \n\t"
+ "lwc1 %[f4], 4(%[a2]) \n\t"
+ "lwc1 %[f5], 0(%[c1]) \n\t"
+ "sub.s %[f6], %[f0], %[f6] \n\t"
+ "sub.s %[f7], %[f1], %[f2] \n\t"
+ "add.s %[f8], %[f3], %[f4] \n\t"
+ "addiu %[count], $zero, 15 \n\t"
+ "mul.s %[f9], %[f6], %[f7] \n\t"
+ "mul.s %[f6], %[f6], %[f8] \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[f8], %[f5], %[f8] \n\t"
+ "mul.s %[f5], %[f5], %[f7] \n\t"
+ "add.s %[f9], %[f9], %[f8] \n\t"
+ "sub.s %[f6], %[f6], %[f5] \n\t"
+#else
+ "madd.s %[f9], %[f9], %[f5], %[f8] \n\t"
+ "nmsub.s %[f6], %[f6], %[f5], %[f7] \n\t"
+#endif
+ "sub.s %[f1], %[f1], %[f9] \n\t"
+ "add.s %[f2], %[f2], %[f9] \n\t"
+ "sub.s %[f3], %[f6], %[f3] \n\t"
+ "sub.s %[f4], %[f6], %[f4] \n\t"
+ "swc1 %[f1], 0(%[a1]) \n\t"
+ "swc1 %[f2], 0(%[a2]) \n\t"
+ "swc1 %[f3], 4(%[a1]) \n\t"
+ "swc1 %[f4], 4(%[a2]) \n\t"
+ "addiu %[a1], %[a1], 8 \n\t"
+ "addiu %[a2], %[a2], -8 \n\t"
+ "addiu %[c1], %[c1], 4 \n\t"
+ "addiu %[c2], %[c2], -4 \n\t"
+ "1: \n\t"
+ "lwc1 %[f6], 0(%[c2]) \n\t"
+ "lwc1 %[f1], 0(%[a1]) \n\t"
+ "lwc1 %[f2], 0(%[a2]) \n\t"
+ "lwc1 %[f3], 4(%[a1]) \n\t"
+ "lwc1 %[f4], 4(%[a2]) \n\t"
+ "lwc1 %[f5], 0(%[c1]) \n\t"
+ "sub.s %[f6], %[f0], %[f6] \n\t"
+ "sub.s %[f7], %[f1], %[f2] \n\t"
+ "add.s %[f8], %[f3], %[f4] \n\t"
+ "lwc1 %[f10], -4(%[c2]) \n\t"
+ "lwc1 %[f11], 8(%[a1]) \n\t"
+ "lwc1 %[f12], -8(%[a2]) \n\t"
+ "mul.s %[f9], %[f6], %[f7] \n\t"
+ "mul.s %[f6], %[f6], %[f8] \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[f8], %[f5], %[f8] \n\t"
+ "mul.s %[f5], %[f5], %[f7] \n\t"
+ "lwc1 %[f13], 12(%[a1]) \n\t"
+ "lwc1 %[f14], -4(%[a2]) \n\t"
+ "lwc1 %[f15], 4(%[c1]) \n\t"
+ "add.s %[f9], %[f9], %[f8] \n\t"
+ "sub.s %[f6], %[f6], %[f5] \n\t"
+#else
+ "lwc1 %[f13], 12(%[a1]) \n\t"
+ "lwc1 %[f14], -4(%[a2]) \n\t"
+ "lwc1 %[f15], 4(%[c1]) \n\t"
+ "madd.s %[f9], %[f9], %[f5], %[f8] \n\t"
+ "nmsub.s %[f6], %[f6], %[f5], %[f7] \n\t"
+#endif
+ "sub.s %[f10], %[f0], %[f10] \n\t"
+ "sub.s %[f5], %[f11], %[f12] \n\t"
+ "add.s %[f7], %[f13], %[f14] \n\t"
+ "sub.s %[f1], %[f1], %[f9] \n\t"
+ "add.s %[f2], %[f2], %[f9] \n\t"
+ "sub.s %[f3], %[f6], %[f3] \n\t"
+ "mul.s %[f8], %[f10], %[f5] \n\t"
+ "mul.s %[f10], %[f10], %[f7] \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[f9], %[f15], %[f7] \n\t"
+ "mul.s %[f15], %[f15], %[f5] \n\t"
+ "sub.s %[f4], %[f6], %[f4] \n\t"
+ "swc1 %[f1], 0(%[a1]) \n\t"
+ "swc1 %[f2], 0(%[a2]) \n\t"
+ "add.s %[f8], %[f8], %[f9] \n\t"
+ "sub.s %[f10], %[f10], %[f15] \n\t"
+#else
+ "swc1 %[f1], 0(%[a1]) \n\t"
+ "swc1 %[f2], 0(%[a2]) \n\t"
+ "sub.s %[f4], %[f6], %[f4] \n\t"
+ "madd.s %[f8], %[f8], %[f15], %[f7] \n\t"
+ "nmsub.s %[f10], %[f10], %[f15], %[f5] \n\t"
+#endif
+ "swc1 %[f3], 4(%[a1]) \n\t"
+ "swc1 %[f4], 4(%[a2]) \n\t"
+ "sub.s %[f11], %[f11], %[f8] \n\t"
+ "add.s %[f12], %[f12], %[f8] \n\t"
+ "sub.s %[f13], %[f10], %[f13] \n\t"
+ "sub.s %[f14], %[f10], %[f14] \n\t"
+ "addiu %[c2], %[c2], -8 \n\t"
+ "addiu %[c1], %[c1], 8 \n\t"
+ "swc1 %[f11], 8(%[a1]) \n\t"
+ "swc1 %[f12], -8(%[a2]) \n\t"
+ "swc1 %[f13], 12(%[a1]) \n\t"
+ "swc1 %[f14], -4(%[a2]) \n\t"
+ "addiu %[a1], %[a1], 16 \n\t"
+ "addiu %[count], %[count], -1 \n\t"
+ "bgtz %[count], 1b \n\t"
+ " addiu %[a2], %[a2], -16 \n\t"
+ ".set pop \n\t"
+ : [a1] "+r" (a1), [a2] "+r" (a2), [c1] "+r" (c1), [c2] "+r" (c2),
+ [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), [f4] "=&f" (f4),
+ [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
+ [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), [f12] "=&f" (f12),
+ [f13] "=&f" (f13), [f14] "=&f" (f14), [f15] "=&f" (f15),
+ [count] "=&r" (count)
+ : [f0] "f" (f0)
+ : "memory"
+ );
+}
+#endif
+
+} // namespace webrtc
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_neon.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_neon.cc
new file mode 100644
index 0000000000..401387a643
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_neon.cc
@@ -0,0 +1,352 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * The rdft AEC algorithm, neon version of speed-critical functions.
+ *
+ * Based on the sse2 version.
+ */
+
+#include "modules/audio_processing/utility/ooura_fft.h"
+
+#include <arm_neon.h>
+
+#include "modules/audio_processing/utility/ooura_fft_tables_common.h"
+#include "modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h"
+
+namespace webrtc {
+
+#if defined(WEBRTC_HAS_NEON)
+void cft1st_128_neon(float* a) {
+ const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
+ int j, k2;
+
+ for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) {
+ float32x4_t a00v = vld1q_f32(&a[j + 0]);
+ float32x4_t a04v = vld1q_f32(&a[j + 4]);
+ float32x4_t a08v = vld1q_f32(&a[j + 8]);
+ float32x4_t a12v = vld1q_f32(&a[j + 12]);
+ float32x4_t a01v = vcombine_f32(vget_low_f32(a00v), vget_low_f32(a08v));
+ float32x4_t a23v = vcombine_f32(vget_high_f32(a00v), vget_high_f32(a08v));
+ float32x4_t a45v = vcombine_f32(vget_low_f32(a04v), vget_low_f32(a12v));
+ float32x4_t a67v = vcombine_f32(vget_high_f32(a04v), vget_high_f32(a12v));
+ const float32x4_t wk1rv = vld1q_f32(&rdft_wk1r[k2]);
+ const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2]);
+ const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2]);
+ const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2]);
+ const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2]);
+ const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2]);
+ float32x4_t x0v = vaddq_f32(a01v, a23v);
+ const float32x4_t x1v = vsubq_f32(a01v, a23v);
+ const float32x4_t x2v = vaddq_f32(a45v, a67v);
+ const float32x4_t x3v = vsubq_f32(a45v, a67v);
+ const float32x4_t x3w = vrev64q_f32(x3v);
+ float32x4_t x0w;
+ a01v = vaddq_f32(x0v, x2v);
+ x0v = vsubq_f32(x0v, x2v);
+ x0w = vrev64q_f32(x0v);
+ a45v = vmulq_f32(wk2rv, x0v);
+ a45v = vmlaq_f32(a45v, wk2iv, x0w);
+ x0v = vmlaq_f32(x1v, x3w, vec_swap_sign);
+ x0w = vrev64q_f32(x0v);
+ a23v = vmulq_f32(wk1rv, x0v);
+ a23v = vmlaq_f32(a23v, wk1iv, x0w);
+ x0v = vmlsq_f32(x1v, x3w, vec_swap_sign);
+ x0w = vrev64q_f32(x0v);
+ a67v = vmulq_f32(wk3rv, x0v);
+ a67v = vmlaq_f32(a67v, wk3iv, x0w);
+ a00v = vcombine_f32(vget_low_f32(a01v), vget_low_f32(a23v));
+ a04v = vcombine_f32(vget_low_f32(a45v), vget_low_f32(a67v));
+ a08v = vcombine_f32(vget_high_f32(a01v), vget_high_f32(a23v));
+ a12v = vcombine_f32(vget_high_f32(a45v), vget_high_f32(a67v));
+ vst1q_f32(&a[j + 0], a00v);
+ vst1q_f32(&a[j + 4], a04v);
+ vst1q_f32(&a[j + 8], a08v);
+ vst1q_f32(&a[j + 12], a12v);
+ }
+}
+
+void cftmdl_128_neon(float* a) {
+ int j;
+ const int l = 8;
+ const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
+ float32x4_t wk1rv = vld1q_f32(cftmdl_wk1r);
+
+ for (j = 0; j < l; j += 2) {
+ const float32x2_t a_00 = vld1_f32(&a[j + 0]);
+ const float32x2_t a_08 = vld1_f32(&a[j + 8]);
+ const float32x2_t a_32 = vld1_f32(&a[j + 32]);
+ const float32x2_t a_40 = vld1_f32(&a[j + 40]);
+ const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
+ const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
+ const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
+ const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
+ const float32x2_t a_16 = vld1_f32(&a[j + 16]);
+ const float32x2_t a_24 = vld1_f32(&a[j + 24]);
+ const float32x2_t a_48 = vld1_f32(&a[j + 48]);
+ const float32x2_t a_56 = vld1_f32(&a[j + 56]);
+ const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
+ const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
+ const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
+ const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
+ const float32x4_t xx0 = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+ const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+ const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
+ const float32x4_t x1_x3_add =
+ vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+ const float32x4_t x1_x3_sub =
+ vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+ const float32x2_t yy0_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 0);
+ const float32x2_t yy0_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 0);
+ const float32x4_t yy0_as = vcombine_f32(yy0_a, yy0_s);
+ const float32x2_t yy1_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 1);
+ const float32x2_t yy1_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 1);
+ const float32x4_t yy1_as = vcombine_f32(yy1_a, yy1_s);
+ const float32x4_t yy0 = vmlaq_f32(yy0_as, vec_swap_sign, yy1_as);
+ const float32x4_t yy4 = vmulq_f32(wk1rv, yy0);
+ const float32x4_t xx1_rev = vrev64q_f32(xx1);
+ const float32x4_t yy4_rev = vrev64q_f32(yy4);
+
+ vst1_f32(&a[j + 0], vget_low_f32(xx0));
+ vst1_f32(&a[j + 32], vget_high_f32(xx0));
+ vst1_f32(&a[j + 16], vget_low_f32(xx1));
+ vst1_f32(&a[j + 48], vget_high_f32(xx1_rev));
+
+ a[j + 48] = -a[j + 48];
+
+ vst1_f32(&a[j + 8], vget_low_f32(x1_x3_add));
+ vst1_f32(&a[j + 24], vget_low_f32(x1_x3_sub));
+ vst1_f32(&a[j + 40], vget_low_f32(yy4));
+ vst1_f32(&a[j + 56], vget_high_f32(yy4_rev));
+ }
+
+ {
+ const int k = 64;
+ const int k1 = 2;
+ const int k2 = 2 * k1;
+ const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2 + 0]);
+ const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2 + 0]);
+ const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2 + 0]);
+ const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2 + 0]);
+ const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2 + 0]);
+ wk1rv = vld1q_f32(&rdft_wk1r[k2 + 0]);
+ for (j = k; j < l + k; j += 2) {
+ const float32x2_t a_00 = vld1_f32(&a[j + 0]);
+ const float32x2_t a_08 = vld1_f32(&a[j + 8]);
+ const float32x2_t a_32 = vld1_f32(&a[j + 32]);
+ const float32x2_t a_40 = vld1_f32(&a[j + 40]);
+ const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
+ const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
+ const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
+ const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
+ const float32x2_t a_16 = vld1_f32(&a[j + 16]);
+ const float32x2_t a_24 = vld1_f32(&a[j + 24]);
+ const float32x2_t a_48 = vld1_f32(&a[j + 48]);
+ const float32x2_t a_56 = vld1_f32(&a[j + 56]);
+ const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
+ const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
+ const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
+ const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
+ const float32x4_t xx = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+ const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+ const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
+ const float32x4_t x1_x3_add =
+ vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+ const float32x4_t x1_x3_sub =
+ vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+ float32x4_t xx4 = vmulq_f32(wk2rv, xx1);
+ float32x4_t xx12 = vmulq_f32(wk1rv, x1_x3_add);
+ float32x4_t xx22 = vmulq_f32(wk3rv, x1_x3_sub);
+ xx4 = vmlaq_f32(xx4, wk2iv, vrev64q_f32(xx1));
+ xx12 = vmlaq_f32(xx12, wk1iv, vrev64q_f32(x1_x3_add));
+ xx22 = vmlaq_f32(xx22, wk3iv, vrev64q_f32(x1_x3_sub));
+
+ vst1_f32(&a[j + 0], vget_low_f32(xx));
+ vst1_f32(&a[j + 32], vget_high_f32(xx));
+ vst1_f32(&a[j + 16], vget_low_f32(xx4));
+ vst1_f32(&a[j + 48], vget_high_f32(xx4));
+ vst1_f32(&a[j + 8], vget_low_f32(xx12));
+ vst1_f32(&a[j + 40], vget_high_f32(xx12));
+ vst1_f32(&a[j + 24], vget_low_f32(xx22));
+ vst1_f32(&a[j + 56], vget_high_f32(xx22));
+ }
+ }
+}
+
+__inline static float32x4_t reverse_order_f32x4(float32x4_t in) {
+ // A B C D -> C D A B
+ const float32x4_t rev = vcombine_f32(vget_high_f32(in), vget_low_f32(in));
+ // C D A B -> D C B A
+ return vrev64q_f32(rev);
+}
+
+void rftfsub_128_neon(float* a) {
+ const float* c = rdft_w + 32;
+ int j1, j2;
+ const float32x4_t mm_half = vdupq_n_f32(0.5f);
+
+ // Vectorized code (four at once).
+ // Note: commented number are indexes for the first iteration of the loop.
+ for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
+ // Load 'wk'.
+ const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4,
+ const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31,
+ const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31,
+ const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28,
+ const float32x4_t wki_ = c_j1; // 1, 2, 3, 4,
+ // Load and shuffle 'a'.
+ // 2, 4, 6, 8, 3, 5, 7, 9
+ float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
+ // 120, 122, 124, 126, 121, 123, 125, 127,
+ const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
+ // 126, 124, 122, 120
+ const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
+ // 127, 125, 123, 121
+ const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
+ // Calculate 'x'.
+ const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
+ // 2-126, 4-124, 6-122, 8-120,
+ const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
+ // 3-127, 5-125, 7-123, 9-121,
+ // Calculate product into 'y'.
+ // yr = wkr * xr - wki * xi;
+ // yi = wkr * xi + wki * xr;
+ const float32x4_t a_ = vmulq_f32(wkr_, xr_);
+ const float32x4_t b_ = vmulq_f32(wki_, xi_);
+ const float32x4_t c_ = vmulq_f32(wkr_, xi_);
+ const float32x4_t d_ = vmulq_f32(wki_, xr_);
+ const float32x4_t yr_ = vsubq_f32(a_, b_); // 2-126, 4-124, 6-122, 8-120,
+ const float32x4_t yi_ = vaddq_f32(c_, d_); // 3-127, 5-125, 7-123, 9-121,
+ // Update 'a'.
+ // a[j2 + 0] -= yr;
+ // a[j2 + 1] -= yi;
+ // a[k2 + 0] += yr;
+ // a[k2 + 1] -= yi;
+ // 126, 124, 122, 120,
+ const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
+ // 127, 125, 123, 121,
+ const float32x4_t a_k2_p1n = vsubq_f32(a_k2_p1, yi_);
+ // Shuffle in right order and store.
+ const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
+ const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
+ // 124, 125, 126, 127, 120, 121, 122, 123
+ const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
+ // 2, 4, 6, 8,
+ a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
+ // 3, 5, 7, 9,
+ a_j2_p.val[1] = vsubq_f32(a_j2_p.val[1], yi_);
+ // 2, 3, 4, 5, 6, 7, 8, 9,
+ vst2q_f32(&a[0 + j2], a_j2_p);
+
+ vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
+ vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
+ }
+
+ // Scalar code for the remaining items.
+ for (; j2 < 64; j1 += 1, j2 += 2) {
+ const int k2 = 128 - j2;
+ const int k1 = 32 - j1;
+ const float wkr = 0.5f - c[k1];
+ const float wki = c[j1];
+ const float xr = a[j2 + 0] - a[k2 + 0];
+ const float xi = a[j2 + 1] + a[k2 + 1];
+ const float yr = wkr * xr - wki * xi;
+ const float yi = wkr * xi + wki * xr;
+ a[j2 + 0] -= yr;
+ a[j2 + 1] -= yi;
+ a[k2 + 0] += yr;
+ a[k2 + 1] -= yi;
+ }
+}
+
+void rftbsub_128_neon(float* a) {
+ const float* c = rdft_w + 32;
+ int j1, j2;
+ const float32x4_t mm_half = vdupq_n_f32(0.5f);
+
+ a[1] = -a[1];
+ // Vectorized code (four at once).
+ // Note: commented number are indexes for the first iteration of the loop.
+ for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
+ // Load 'wk'.
+ const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4,
+ const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31,
+ const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31,
+ const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28,
+ const float32x4_t wki_ = c_j1; // 1, 2, 3, 4,
+ // Load and shuffle 'a'.
+ // 2, 4, 6, 8, 3, 5, 7, 9
+ float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
+ // 120, 122, 124, 126, 121, 123, 125, 127,
+ const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
+ // 126, 124, 122, 120
+ const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
+ // 127, 125, 123, 121
+ const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
+ // Calculate 'x'.
+ const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
+ // 2-126, 4-124, 6-122, 8-120,
+ const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
+ // 3-127, 5-125, 7-123, 9-121,
+ // Calculate product into 'y'.
+ // yr = wkr * xr - wki * xi;
+ // yi = wkr * xi + wki * xr;
+ const float32x4_t a_ = vmulq_f32(wkr_, xr_);
+ const float32x4_t b_ = vmulq_f32(wki_, xi_);
+ const float32x4_t c_ = vmulq_f32(wkr_, xi_);
+ const float32x4_t d_ = vmulq_f32(wki_, xr_);
+ const float32x4_t yr_ = vaddq_f32(a_, b_); // 2-126, 4-124, 6-122, 8-120,
+ const float32x4_t yi_ = vsubq_f32(c_, d_); // 3-127, 5-125, 7-123, 9-121,
+ // Update 'a'.
+ // a[j2 + 0] -= yr;
+ // a[j2 + 1] -= yi;
+ // a[k2 + 0] += yr;
+ // a[k2 + 1] -= yi;
+ // 126, 124, 122, 120,
+ const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
+ // 127, 125, 123, 121,
+ const float32x4_t a_k2_p1n = vsubq_f32(yi_, a_k2_p1);
+ // Shuffle in right order and store.
+ // 2, 3, 4, 5, 6, 7, 8, 9,
+ const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
+ const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
+ // 124, 125, 126, 127, 120, 121, 122, 123
+ const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
+ // 2, 4, 6, 8,
+ a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
+ // 3, 5, 7, 9,
+ a_j2_p.val[1] = vsubq_f32(yi_, a_j2_p.val[1]);
+ // 2, 3, 4, 5, 6, 7, 8, 9,
+ vst2q_f32(&a[0 + j2], a_j2_p);
+
+ vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
+ vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
+ }
+
+ // Scalar code for the remaining items.
+ for (; j2 < 64; j1 += 1, j2 += 2) {
+ const int k2 = 128 - j2;
+ const int k1 = 32 - j1;
+ const float wkr = 0.5f - c[k1];
+ const float wki = c[j1];
+ const float xr = a[j2 + 0] - a[k2 + 0];
+ const float xi = a[j2 + 1] + a[k2 + 1];
+ const float yr = wkr * xr + wki * xi;
+ const float yi = wkr * xi - wki * xr;
+ a[j2 + 0] = a[j2 + 0] - yr;
+ a[j2 + 1] = yi - a[j2 + 1];
+ a[k2 + 0] = yr + a[k2 + 0];
+ a[k2 + 1] = yi - a[k2 + 1];
+ }
+ a[65] = -a[65];
+}
+#endif
+
+} // namespace webrtc
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc
new file mode 100644
index 0000000000..48a05c3bc2
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc
@@ -0,0 +1,438 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing//utility/ooura_fft.h"
+
+#include <emmintrin.h>
+
+#include "modules/audio_processing/utility/ooura_fft_tables_common.h"
+#include "modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h"
+
+namespace webrtc {
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+
+namespace {
+// These intrinsics were unavailable before VS 2008.
+// TODO(andrew): move to a common file.
+#if defined(_MSC_VER) && _MSC_VER < 1500
+static __inline __m128 _mm_castsi128_ps(__m128i a) {
+ return *(__m128*)&a;
+}
+static __inline __m128i _mm_castps_si128(__m128 a) {
+ return *(__m128i*)&a;
+}
+#endif
+
+} // namespace
+
+void cft1st_128_SSE2(float* a) {
+ const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
+ int j, k2;
+
+ for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) {
+ __m128 a00v = _mm_loadu_ps(&a[j + 0]);
+ __m128 a04v = _mm_loadu_ps(&a[j + 4]);
+ __m128 a08v = _mm_loadu_ps(&a[j + 8]);
+ __m128 a12v = _mm_loadu_ps(&a[j + 12]);
+ __m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1, 0));
+ __m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3, 2));
+ __m128 a45v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1, 0));
+ __m128 a67v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3, 2));
+
+ const __m128 wk1rv = _mm_load_ps(&rdft_wk1r[k2]);
+ const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2]);
+ const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2]);
+ const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2]);
+ const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2]);
+ const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2]);
+ __m128 x0v = _mm_add_ps(a01v, a23v);
+ const __m128 x1v = _mm_sub_ps(a01v, a23v);
+ const __m128 x2v = _mm_add_ps(a45v, a67v);
+ const __m128 x3v = _mm_sub_ps(a45v, a67v);
+ __m128 x0w;
+ a01v = _mm_add_ps(x0v, x2v);
+ x0v = _mm_sub_ps(x0v, x2v);
+ x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
+ {
+ const __m128 a45_0v = _mm_mul_ps(wk2rv, x0v);
+ const __m128 a45_1v = _mm_mul_ps(wk2iv, x0w);
+ a45v = _mm_add_ps(a45_0v, a45_1v);
+ }
+ {
+ __m128 a23_0v, a23_1v;
+ const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0, 1));
+ const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w);
+ x0v = _mm_add_ps(x1v, x3s);
+ x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
+ a23_0v = _mm_mul_ps(wk1rv, x0v);
+ a23_1v = _mm_mul_ps(wk1iv, x0w);
+ a23v = _mm_add_ps(a23_0v, a23_1v);
+
+ x0v = _mm_sub_ps(x1v, x3s);
+ x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
+ }
+ {
+ const __m128 a67_0v = _mm_mul_ps(wk3rv, x0v);
+ const __m128 a67_1v = _mm_mul_ps(wk3iv, x0w);
+ a67v = _mm_add_ps(a67_0v, a67_1v);
+ }
+
+ a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1, 0));
+ a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1, 0));
+ a08v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3, 2));
+ a12v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3, 2));
+ _mm_storeu_ps(&a[j + 0], a00v);
+ _mm_storeu_ps(&a[j + 4], a04v);
+ _mm_storeu_ps(&a[j + 8], a08v);
+ _mm_storeu_ps(&a[j + 12], a12v);
+ }
+}
+
+void cftmdl_128_SSE2(float* a) {
+ const int l = 8;
+ const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
+ int j0;
+
+ __m128 wk1rv = _mm_load_ps(cftmdl_wk1r);
+ for (j0 = 0; j0 < l; j0 += 2) {
+ const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
+ const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
+ const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
+ const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
+ const __m128 a_00_32 =
+ _mm_shuffle_ps(_mm_castsi128_ps(a_00), _mm_castsi128_ps(a_32),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 a_08_40 =
+ _mm_shuffle_ps(_mm_castsi128_ps(a_08), _mm_castsi128_ps(a_40),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
+ const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
+
+ const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
+ const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
+ const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
+ const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
+ const __m128 a_16_48 =
+ _mm_shuffle_ps(_mm_castsi128_ps(a_16), _mm_castsi128_ps(a_48),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 a_24_56 =
+ _mm_shuffle_ps(_mm_castsi128_ps(a_24), _mm_castsi128_ps(a_56),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
+ const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
+
+ const __m128 xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+ const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+
+ const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
+ _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1)));
+ const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
+ const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+ const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+
+ const __m128 yy0 =
+ _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(2, 2, 2, 2));
+ const __m128 yy1 =
+ _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(3, 3, 3, 3));
+ const __m128 yy2 = _mm_mul_ps(mm_swap_sign, yy1);
+ const __m128 yy3 = _mm_add_ps(yy0, yy2);
+ const __m128 yy4 = _mm_mul_ps(wk1rv, yy3);
+
+ _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0));
+ _mm_storel_epi64(
+ (__m128i*)&a[j0 + 32],
+ _mm_shuffle_epi32(_mm_castps_si128(xx0), _MM_SHUFFLE(3, 2, 3, 2)));
+
+ _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx1));
+ _mm_storel_epi64(
+ (__m128i*)&a[j0 + 48],
+ _mm_shuffle_epi32(_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 2, 3)));
+ a[j0 + 48] = -a[j0 + 48];
+
+ _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add));
+ _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(x1_x3_sub));
+
+ _mm_storel_epi64((__m128i*)&a[j0 + 40], _mm_castps_si128(yy4));
+ _mm_storel_epi64(
+ (__m128i*)&a[j0 + 56],
+ _mm_shuffle_epi32(_mm_castps_si128(yy4), _MM_SHUFFLE(2, 3, 2, 3)));
+ }
+
+ {
+ int k = 64;
+ int k1 = 2;
+ int k2 = 2 * k1;
+ const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2 + 0]);
+ const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2 + 0]);
+ const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2 + 0]);
+ const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2 + 0]);
+ const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2 + 0]);
+ wk1rv = _mm_load_ps(&rdft_wk1r[k2 + 0]);
+ for (j0 = k; j0 < l + k; j0 += 2) {
+ const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
+ const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
+ const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
+ const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
+ const __m128 a_00_32 =
+ _mm_shuffle_ps(_mm_castsi128_ps(a_00), _mm_castsi128_ps(a_32),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 a_08_40 =
+ _mm_shuffle_ps(_mm_castsi128_ps(a_08), _mm_castsi128_ps(a_40),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
+ const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
+
+ const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
+ const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
+ const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
+ const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
+ const __m128 a_16_48 =
+ _mm_shuffle_ps(_mm_castsi128_ps(a_16), _mm_castsi128_ps(a_48),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 a_24_56 =
+ _mm_shuffle_ps(_mm_castsi128_ps(a_24), _mm_castsi128_ps(a_56),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
+ const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
+
+ const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+ const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+ const __m128 xx2 = _mm_mul_ps(xx1, wk2rv);
+ const __m128 xx3 = _mm_mul_ps(
+ wk2iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xx1),
+ _MM_SHUFFLE(2, 3, 0, 1))));
+ const __m128 xx4 = _mm_add_ps(xx2, xx3);
+
+ const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
+ _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1)));
+ const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
+ const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+ const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+
+ const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv);
+ const __m128 xx11 = _mm_mul_ps(
+ wk1iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add),
+ _MM_SHUFFLE(2, 3, 0, 1))));
+ const __m128 xx12 = _mm_add_ps(xx10, xx11);
+
+ const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv);
+ const __m128 xx21 = _mm_mul_ps(
+ wk3iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub),
+ _MM_SHUFFLE(2, 3, 0, 1))));
+ const __m128 xx22 = _mm_add_ps(xx20, xx21);
+
+ _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx));
+ _mm_storel_epi64(
+ (__m128i*)&a[j0 + 32],
+ _mm_shuffle_epi32(_mm_castps_si128(xx), _MM_SHUFFLE(3, 2, 3, 2)));
+
+ _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx4));
+ _mm_storel_epi64(
+ (__m128i*)&a[j0 + 48],
+ _mm_shuffle_epi32(_mm_castps_si128(xx4), _MM_SHUFFLE(3, 2, 3, 2)));
+
+ _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(xx12));
+ _mm_storel_epi64(
+ (__m128i*)&a[j0 + 40],
+ _mm_shuffle_epi32(_mm_castps_si128(xx12), _MM_SHUFFLE(3, 2, 3, 2)));
+
+ _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(xx22));
+ _mm_storel_epi64(
+ (__m128i*)&a[j0 + 56],
+ _mm_shuffle_epi32(_mm_castps_si128(xx22), _MM_SHUFFLE(3, 2, 3, 2)));
+ }
+ }
+}
+
+void rftfsub_128_SSE2(float* a) {
+ const float* c = rdft_w + 32;
+ int j1, j2, k1, k2;
+ float wkr, wki, xr, xi, yr, yi;
+
+ static const ALIGN16_BEG float ALIGN16_END k_half[4] = {0.5f, 0.5f, 0.5f,
+ 0.5f};
+ const __m128 mm_half = _mm_load_ps(k_half);
+
+ // Vectorized code (four at once).
+ // Note: commented number are indexes for the first iteration of the loop.
+ for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
+ // Load 'wk'.
+ const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4,
+ const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31,
+ const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31,
+ const __m128 wkr_ =
+ _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28,
+ const __m128 wki_ = c_j1; // 1, 2, 3, 4,
+ // Load and shuffle 'a'.
+ const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5,
+ const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9,
+ const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123,
+ const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127,
+ const __m128 a_j2_p0 = _mm_shuffle_ps(
+ a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8,
+ const __m128 a_j2_p1 = _mm_shuffle_ps(
+ a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9,
+ const __m128 a_k2_p0 = _mm_shuffle_ps(
+ a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120,
+ const __m128 a_k2_p1 = _mm_shuffle_ps(
+ a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121,
+ // Calculate 'x'.
+ const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0);
+ // 2-126, 4-124, 6-122, 8-120,
+ const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1);
+ // 3-127, 5-125, 7-123, 9-121,
+ // Calculate product into 'y'.
+ // yr = wkr * xr - wki * xi;
+ // yi = wkr * xi + wki * xr;
+ const __m128 a_ = _mm_mul_ps(wkr_, xr_);
+ const __m128 b_ = _mm_mul_ps(wki_, xi_);
+ const __m128 c_ = _mm_mul_ps(wkr_, xi_);
+ const __m128 d_ = _mm_mul_ps(wki_, xr_);
+ const __m128 yr_ = _mm_sub_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120,
+ const __m128 yi_ = _mm_add_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121,
+ // Update 'a'.
+ // a[j2 + 0] -= yr;
+ // a[j2 + 1] -= yi;
+ // a[k2 + 0] += yr;
+ // a[k2 + 1] -= yi;
+ const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8,
+ const __m128 a_j2_p1n = _mm_sub_ps(a_j2_p1, yi_); // 3, 5, 7, 9,
+ const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_); // 126, 124, 122, 120,
+ const __m128 a_k2_p1n = _mm_sub_ps(a_k2_p1, yi_); // 127, 125, 123, 121,
+ // Shuffle in right order and store.
+ const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n);
+ // 2, 3, 4, 5,
+ const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n);
+ // 6, 7, 8, 9,
+ const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n);
+ // 122, 123, 120, 121,
+ const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n);
+ // 126, 127, 124, 125,
+ const __m128 a_k2_0n = _mm_shuffle_ps(
+ a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123,
+ const __m128 a_k2_4n = _mm_shuffle_ps(
+ a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127,
+ _mm_storeu_ps(&a[0 + j2], a_j2_0n);
+ _mm_storeu_ps(&a[4 + j2], a_j2_4n);
+ _mm_storeu_ps(&a[122 - j2], a_k2_0n);
+ _mm_storeu_ps(&a[126 - j2], a_k2_4n);
+ }
+ // Scalar code for the remaining items.
+ for (; j2 < 64; j1 += 1, j2 += 2) {
+ k2 = 128 - j2;
+ k1 = 32 - j1;
+ wkr = 0.5f - c[k1];
+ wki = c[j1];
+ xr = a[j2 + 0] - a[k2 + 0];
+ xi = a[j2 + 1] + a[k2 + 1];
+ yr = wkr * xr - wki * xi;
+ yi = wkr * xi + wki * xr;
+ a[j2 + 0] -= yr;
+ a[j2 + 1] -= yi;
+ a[k2 + 0] += yr;
+ a[k2 + 1] -= yi;
+ }
+}
+
+void rftbsub_128_SSE2(float* a) {
+ const float* c = rdft_w + 32;
+ int j1, j2, k1, k2;
+ float wkr, wki, xr, xi, yr, yi;
+
+ static const ALIGN16_BEG float ALIGN16_END k_half[4] = {0.5f, 0.5f, 0.5f,
+ 0.5f};
+ const __m128 mm_half = _mm_load_ps(k_half);
+
+ a[1] = -a[1];
+ // Vectorized code (four at once).
+ // Note: commented number are indexes for the first iteration of the loop.
+ for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
+ // Load 'wk'.
+ const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4,
+ const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31,
+ const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31,
+ const __m128 wkr_ =
+ _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28,
+ const __m128 wki_ = c_j1; // 1, 2, 3, 4,
+ // Load and shuffle 'a'.
+ const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5,
+ const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9,
+ const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123,
+ const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127,
+ const __m128 a_j2_p0 = _mm_shuffle_ps(
+ a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8,
+ const __m128 a_j2_p1 = _mm_shuffle_ps(
+ a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9,
+ const __m128 a_k2_p0 = _mm_shuffle_ps(
+ a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120,
+ const __m128 a_k2_p1 = _mm_shuffle_ps(
+ a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121,
+ // Calculate 'x'.
+ const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0);
+ // 2-126, 4-124, 6-122, 8-120,
+ const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1);
+ // 3-127, 5-125, 7-123, 9-121,
+ // Calculate product into 'y'.
+ // yr = wkr * xr + wki * xi;
+ // yi = wkr * xi - wki * xr;
+ const __m128 a_ = _mm_mul_ps(wkr_, xr_);
+ const __m128 b_ = _mm_mul_ps(wki_, xi_);
+ const __m128 c_ = _mm_mul_ps(wkr_, xi_);
+ const __m128 d_ = _mm_mul_ps(wki_, xr_);
+ const __m128 yr_ = _mm_add_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120,
+ const __m128 yi_ = _mm_sub_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121,
+ // Update 'a'.
+ // a[j2 + 0] = a[j2 + 0] - yr;
+ // a[j2 + 1] = yi - a[j2 + 1];
+ // a[k2 + 0] = yr + a[k2 + 0];
+ // a[k2 + 1] = yi - a[k2 + 1];
+ const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8,
+ const __m128 a_j2_p1n = _mm_sub_ps(yi_, a_j2_p1); // 3, 5, 7, 9,
+ const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_); // 126, 124, 122, 120,
+ const __m128 a_k2_p1n = _mm_sub_ps(yi_, a_k2_p1); // 127, 125, 123, 121,
+ // Shuffle in right order and store.
+ const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n);
+ // 2, 3, 4, 5,
+ const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n);
+ // 6, 7, 8, 9,
+ const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n);
+ // 122, 123, 120, 121,
+ const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n);
+ // 126, 127, 124, 125,
+ const __m128 a_k2_0n = _mm_shuffle_ps(
+ a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123,
+ const __m128 a_k2_4n = _mm_shuffle_ps(
+ a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127,
+ _mm_storeu_ps(&a[0 + j2], a_j2_0n);
+ _mm_storeu_ps(&a[4 + j2], a_j2_4n);
+ _mm_storeu_ps(&a[122 - j2], a_k2_0n);
+ _mm_storeu_ps(&a[126 - j2], a_k2_4n);
+ }
+ // Scalar code for the remaining items.
+ for (; j2 < 64; j1 += 1, j2 += 2) {
+ k2 = 128 - j2;
+ k1 = 32 - j1;
+ wkr = 0.5f - c[k1];
+ wki = c[j1];
+ xr = a[j2 + 0] - a[k2 + 0];
+ xi = a[j2 + 1] + a[k2 + 1];
+ yr = wkr * xr + wki * xi;
+ yi = wkr * xi - wki * xr;
+ a[j2 + 0] = a[j2 + 0] - yr;
+ a[j2 + 1] = yi - a[j2 + 1];
+ a[k2 + 0] = yr + a[k2 + 0];
+ a[k2 + 1] = yi - a[k2 + 1];
+ }
+ a[65] = -a[65];
+}
+#endif
+
+} // namespace webrtc
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h
new file mode 100644
index 0000000000..47d076ea2a
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_
+#define MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_
+
+#include "modules/audio_processing/utility/ooura_fft.h"
+
+namespace webrtc {
+
+// This tables used to be computed at run-time. For example, refer to:
+// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/utility/apm_rdft.c?r=6564
+// to see the initialization code.
+// Constants shared by all paths (C, SSE2, NEON).
+const float rdft_w[64] = {
+ 1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f, 0.9238795638f,
+ 0.3826834559f, 0.3826834559f, 0.9238795638f, 0.9807852507f, 0.1950903237f,
+ 0.5555702448f, 0.8314695954f, 0.8314695954f, 0.5555702448f, 0.1950903237f,
+ 0.9807852507f, 0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f,
+ 0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f, 0.9569403529f,
+ 0.2902846634f, 0.4713967443f, 0.8819212914f, 0.7730104327f, 0.6343933344f,
+ 0.0980171412f, 0.9951847196f, 0.7071067691f, 0.4993977249f, 0.4975923598f,
+ 0.4945882559f, 0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f,
+ 0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f, 0.4157347977f,
+ 0.4016037583f, 0.3865052164f, 0.3704755902f, 0.3535533845f, 0.3357794881f,
+ 0.3171966672f, 0.2978496552f, 0.2777851224f, 0.2570513785f, 0.2356983721f,
+ 0.2137775421f, 0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f,
+ 0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f,
+};
+
+// Constants used by the C and MIPS paths.
+const float rdft_wk3ri_first[16] = {
+ 1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f,
+ 0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f,
+ 0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f,
+ 0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f,
+};
+const float rdft_wk3ri_second[16] = {
+ -0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f,
+ -0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f,
+ -0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f,
+ -0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f,
+};
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h
new file mode 100644
index 0000000000..1c44ae7197
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_NEON_SSE2_H_
+#define MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_NEON_SSE2_H_
+
+#include "modules/audio_processing/utility/ooura_fft.h"
+
+#ifdef _MSC_VER /* visual c++ */
+#define ALIGN16_BEG __declspec(align(16))
+#define ALIGN16_END
+#else /* gcc or icc */
+#define ALIGN16_BEG
+#define ALIGN16_END __attribute__((aligned(16)))
+#endif
+
+namespace webrtc {
+
+// These tables used to be computed at run-time. For example, refer to:
+// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/utility/apm_rdft.c?r=6564
+// to see the initialization code.
+#if defined(WEBRTC_ARCH_X86_FAMILY) || defined(WEBRTC_HAS_NEON)
+// Constants used by SSE2 and NEON but initialized in the C path.
+const ALIGN16_BEG float ALIGN16_END k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
+
+ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32] = {
+ 1.000000000f, 1.000000000f, 0.707106769f, 0.707106769f, 0.923879564f,
+ 0.923879564f, 0.382683456f, 0.382683456f, 0.980785251f, 0.980785251f,
+ 0.555570245f, 0.555570245f, 0.831469595f, 0.831469595f, 0.195090324f,
+ 0.195090324f, 0.995184720f, 0.995184720f, 0.634393334f, 0.634393334f,
+ 0.881921291f, 0.881921291f, 0.290284663f, 0.290284663f, 0.956940353f,
+ 0.956940353f, 0.471396744f, 0.471396744f, 0.773010433f, 0.773010433f,
+ 0.098017141f, 0.098017141f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32] = {
+ 1.000000000f, 1.000000000f, -0.000000000f, -0.000000000f, 0.707106769f,
+ 0.707106769f, -0.707106769f, -0.707106769f, 0.923879564f, 0.923879564f,
+ -0.382683456f, -0.382683456f, 0.382683456f, 0.382683456f, -0.923879564f,
+ -0.923879564f, 0.980785251f, 0.980785251f, -0.195090324f, -0.195090324f,
+ 0.555570245f, 0.555570245f, -0.831469595f, -0.831469595f, 0.831469595f,
+ 0.831469595f, -0.555570245f, -0.555570245f, 0.195090324f, 0.195090324f,
+ -0.980785251f, -0.980785251f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32] = {
+ 1.000000000f, 1.000000000f, -0.707106769f, -0.707106769f, 0.382683456f,
+ 0.382683456f, -0.923879564f, -0.923879564f, 0.831469536f, 0.831469536f,
+ -0.980785251f, -0.980785251f, -0.195090353f, -0.195090353f, -0.555570245f,
+ -0.555570245f, 0.956940353f, 0.956940353f, -0.881921172f, -0.881921172f,
+ 0.098017156f, 0.098017156f, -0.773010492f, -0.773010492f, 0.634393334f,
+ 0.634393334f, -0.995184720f, -0.995184720f, -0.471396863f, -0.471396863f,
+ -0.290284693f, -0.290284693f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32] = {
+ -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f, -0.382683456f,
+ 0.382683456f, -0.923879564f, 0.923879564f, -0.195090324f, 0.195090324f,
+ -0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f, -0.980785251f,
+ 0.980785251f, -0.098017141f, 0.098017141f, -0.773010433f, 0.773010433f,
+ -0.471396744f, 0.471396744f, -0.956940353f, 0.956940353f, -0.290284663f,
+ 0.290284663f, -0.881921291f, 0.881921291f, -0.634393334f, 0.634393334f,
+ -0.995184720f, 0.995184720f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32] = {
+ -0.000000000f, 0.000000000f, -1.000000000f, 1.000000000f, -0.707106769f,
+ 0.707106769f, -0.707106769f, 0.707106769f, -0.382683456f, 0.382683456f,
+ -0.923879564f, 0.923879564f, -0.923879564f, 0.923879564f, -0.382683456f,
+ 0.382683456f, -0.195090324f, 0.195090324f, -0.980785251f, 0.980785251f,
+ -0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f, -0.555570245f,
+ 0.555570245f, -0.831469595f, 0.831469595f, -0.980785251f, 0.980785251f,
+ -0.195090324f, 0.195090324f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32] = {
+ -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f, -0.923879564f,
+ 0.923879564f, 0.382683456f, -0.382683456f, -0.555570245f, 0.555570245f,
+ -0.195090353f, 0.195090353f, -0.980785251f, 0.980785251f, 0.831469536f,
+ -0.831469536f, -0.290284693f, 0.290284693f, -0.471396863f, 0.471396863f,
+ -0.995184720f, 0.995184720f, 0.634393334f, -0.634393334f, -0.773010492f,
+ 0.773010492f, 0.098017156f, -0.098017156f, -0.881921172f, 0.881921172f,
+ 0.956940353f, -0.956940353f,
+};
+ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4] = {
+ 0.707106769f, 0.707106769f, 0.707106769f, -0.707106769f,
+};
+#endif
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_NEON_SSE2_H_