diff options
Diffstat (limited to 'third_party/libwebrtc/modules/audio_processing/agc')
22 files changed, 7372 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/audio_processing/agc/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/agc/BUILD.gn new file mode 100644 index 0000000000..75bef1450f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/BUILD.gn @@ -0,0 +1,126 @@ +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_source_set("gain_control_interface") { + sources = [ "gain_control.h" ] +} + +rtc_library("agc") { + sources = [ + "agc_manager_direct.cc", + "agc_manager_direct.h", + ] + configs += [ "..:apm_debug_dump" ] + deps = [ + ":gain_control_interface", + ":level_estimation", + "..:api", + "..:apm_logging", + "..:audio_buffer", + "..:audio_frame_view", + "../../../api:array_view", + "../../../common_audio", + "../../../common_audio:common_audio_c", + "../../../rtc_base:checks", + "../../../rtc_base:gtest_prod", + "../../../rtc_base:logging", + "../../../rtc_base:safe_minmax", + "../../../system_wrappers:field_trial", + "../../../system_wrappers:metrics", + "../agc2:clipping_predictor", + "../agc2:gain_map", + "../agc2:input_volume_stats_reporter", + "../vad", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("level_estimation") { + sources = [ + "agc.cc", + "agc.h", + "loudness_histogram.cc", + "loudness_histogram.h", + "utility.cc", + "utility.h", + ] + deps = [ + "../../../api:array_view", + "../../../rtc_base:checks", + "../vad", + ] +} + +rtc_library("legacy_agc") { + visibility = [ + ":*", + "..:*", + ] # Only targets in this file and in + # audio_processing can depend on + # this. + + sources = [ + "legacy/analog_agc.cc", + "legacy/analog_agc.h", + "legacy/digital_agc.cc", + "legacy/digital_agc.h", + "legacy/gain_control.h", + ] + + deps = [ + "../../../common_audio", + "../../../common_audio:common_audio_c", + "../../../common_audio/third_party/ooura:fft_size_256", + "../../../rtc_base:checks", + "../../../system_wrappers", + ] + + if (rtc_build_with_neon) { + if (target_cpu != "arm64") { + # Enable compilation for the NEON instruction set. + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + } +} + +if (rtc_include_tests) { + rtc_library("agc_unittests") { + testonly = true + sources = [ + "agc_manager_direct_unittest.cc", + "loudness_histogram_unittest.cc", + "mock_agc.h", + ] + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":agc", + ":gain_control_interface", + ":level_estimation", + "..:mocks", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../rtc_base:random", + "../../../rtc_base:safe_conversions", + "../../../rtc_base:safe_minmax", + "../../../rtc_base:stringutils", + "../../../system_wrappers:metrics", + "../../../test:field_trial", + "../../../test:fileutils", + "../../../test:test_support", + "//testing/gtest", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/agc/agc.cc b/third_party/libwebrtc/modules/audio_processing/agc/agc.cc new file mode 100644 index 0000000000..a018ff9f93 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/agc.cc @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/agc.h" + +#include <cmath> +#include <cstdlib> +#include <vector> + +#include "modules/audio_processing/agc/loudness_histogram.h" +#include "modules/audio_processing/agc/utility.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr int kDefaultLevelDbfs = -18; +constexpr int kNumAnalysisFrames = 100; +constexpr double kActivityThreshold = 0.3; +constexpr int kNum10msFramesInOneSecond = 100; +constexpr int kMaxSampleRateHz = 384000; + +} // namespace + +Agc::Agc() + : target_level_loudness_(Dbfs2Loudness(kDefaultLevelDbfs)), + target_level_dbfs_(kDefaultLevelDbfs), + histogram_(LoudnessHistogram::Create(kNumAnalysisFrames)), + inactive_histogram_(LoudnessHistogram::Create()) {} + +Agc::~Agc() = default; + +void Agc::Process(rtc::ArrayView<const int16_t> audio) { + const int sample_rate_hz = audio.size() * kNum10msFramesInOneSecond; + RTC_DCHECK_LE(sample_rate_hz, kMaxSampleRateHz); + vad_.ProcessChunk(audio.data(), audio.size(), sample_rate_hz); + const std::vector<double>& rms = vad_.chunkwise_rms(); + const std::vector<double>& probabilities = + vad_.chunkwise_voice_probabilities(); + RTC_DCHECK_EQ(rms.size(), probabilities.size()); + for (size_t i = 0; i < rms.size(); ++i) { + histogram_->Update(rms[i], probabilities[i]); + } +} + +bool Agc::GetRmsErrorDb(int* error) { + if (!error) { + RTC_DCHECK_NOTREACHED(); + return false; + } + + if (histogram_->num_updates() < kNumAnalysisFrames) { + // We haven't yet received enough frames. + return false; + } + + if (histogram_->AudioContent() < kNumAnalysisFrames * kActivityThreshold) { + // We are likely in an inactive segment. + return false; + } + + double loudness = Linear2Loudness(histogram_->CurrentRms()); + *error = std::floor(Loudness2Db(target_level_loudness_ - loudness) + 0.5); + histogram_->Reset(); + return true; +} + +void Agc::Reset() { + histogram_->Reset(); +} + +int Agc::set_target_level_dbfs(int level) { + // TODO(turajs): just some arbitrary sanity check. We can come up with better + // limits. The upper limit should be chosen such that the risk of clipping is + // low. The lower limit should not result in a too quiet signal. + if (level >= 0 || level <= -100) + return -1; + target_level_dbfs_ = level; + target_level_loudness_ = Dbfs2Loudness(level); + return 0; +} + +int Agc::target_level_dbfs() const { + return target_level_dbfs_; +} + +float Agc::voice_probability() const { + return vad_.last_voice_probability(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/agc.h b/third_party/libwebrtc/modules/audio_processing/agc/agc.h new file mode 100644 index 0000000000..da42808225 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/agc.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_AGC_H_ +#define MODULES_AUDIO_PROCESSING_AGC_AGC_H_ + +#include <memory> + +#include "api/array_view.h" +#include "modules/audio_processing/vad/voice_activity_detector.h" + +namespace webrtc { + +class LoudnessHistogram; + +class Agc { + public: + Agc(); + virtual ~Agc(); + + // `audio` must be mono; in a multi-channel stream, provide the first (usually + // left) channel. + virtual void Process(rtc::ArrayView<const int16_t> audio); + + // Retrieves the difference between the target RMS level and the current + // signal RMS level in dB. Returns true if an update is available and false + // otherwise, in which case `error` should be ignored and no action taken. + virtual bool GetRmsErrorDb(int* error); + virtual void Reset(); + + virtual int set_target_level_dbfs(int level); + virtual int target_level_dbfs() const; + virtual float voice_probability() const; + + private: + double target_level_loudness_; + int target_level_dbfs_; + std::unique_ptr<LoudnessHistogram> histogram_; + std::unique_ptr<LoudnessHistogram> inactive_histogram_; + VoiceActivityDetector vad_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_AGC_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/agc_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc/agc_gn/moz.build new file mode 100644 index 0000000000..45e6cad306 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/agc_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("agc_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.cc b/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.cc new file mode 100644 index 0000000000..b8ad4a8bb9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.cc @@ -0,0 +1,713 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/agc_manager_direct.h" + +#include <algorithm> +#include <cmath> + +#include "api/array_view.h" +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc/gain_control.h" +#include "modules/audio_processing/agc2/gain_map_internal.h" +#include "modules/audio_processing/agc2/input_volume_stats_reporter.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/field_trial.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +// Amount of error we tolerate in the microphone level (presumably due to OS +// quantization) before we assume the user has manually adjusted the microphone. +constexpr int kLevelQuantizationSlack = 25; + +constexpr int kDefaultCompressionGain = 7; +constexpr int kMaxCompressionGain = 12; +constexpr int kMinCompressionGain = 2; +// Controls the rate of compression changes towards the target. +constexpr float kCompressionGainStep = 0.05f; + +constexpr int kMaxMicLevel = 255; +static_assert(kGainMapSize > kMaxMicLevel, "gain map too small"); +constexpr int kMinMicLevel = 12; + +// Prevent very large microphone level changes. +constexpr int kMaxResidualGainChange = 15; + +// Maximum additional gain allowed to compensate for microphone level +// restrictions from clipping events. +constexpr int kSurplusCompressionGain = 6; + +// Target speech level (dBFs) and speech probability threshold used to compute +// the RMS error override in `GetSpeechLevelErrorDb()`. These are only used for +// computing the error override and they are not passed to `agc_`. +// TODO(webrtc:7494): Move these to a config and pass in the ctor. +constexpr float kOverrideTargetSpeechLevelDbfs = -18.0f; +constexpr float kOverrideSpeechProbabilitySilenceThreshold = 0.5f; +// The minimum number of frames between `UpdateGain()` calls. +// TODO(webrtc:7494): Move this to a config and pass in the ctor with +// kOverrideWaitFrames = 100. Default value zero needed for the unit tests. +constexpr int kOverrideWaitFrames = 0; + +using AnalogAgcConfig = + AudioProcessing::Config::GainController1::AnalogGainController; + +// If the "WebRTC-Audio-2ndAgcMinMicLevelExperiment" field trial is specified, +// parses it and returns a value between 0 and 255 depending on the field-trial +// string. Returns an unspecified value if the field trial is not specified, if +// disabled or if it cannot be parsed. Example: +// 'WebRTC-Audio-2ndAgcMinMicLevelExperiment/Enabled-80' => returns 80. +absl::optional<int> GetMinMicLevelOverride() { + constexpr char kMinMicLevelFieldTrial[] = + "WebRTC-Audio-2ndAgcMinMicLevelExperiment"; + if (!webrtc::field_trial::IsEnabled(kMinMicLevelFieldTrial)) { + return absl::nullopt; + } + const auto field_trial_string = + webrtc::field_trial::FindFullName(kMinMicLevelFieldTrial); + int min_mic_level = -1; + sscanf(field_trial_string.c_str(), "Enabled-%d", &min_mic_level); + if (min_mic_level >= 0 && min_mic_level <= 255) { + return min_mic_level; + } else { + RTC_LOG(LS_WARNING) << "[agc] Invalid parameter for " + << kMinMicLevelFieldTrial << ", ignored."; + return absl::nullopt; + } +} + +int LevelFromGainError(int gain_error, int level, int min_mic_level) { + RTC_DCHECK_GE(level, 0); + RTC_DCHECK_LE(level, kMaxMicLevel); + if (gain_error == 0) { + return level; + } + + int new_level = level; + if (gain_error > 0) { + while (kGainMap[new_level] - kGainMap[level] < gain_error && + new_level < kMaxMicLevel) { + ++new_level; + } + } else { + while (kGainMap[new_level] - kGainMap[level] > gain_error && + new_level > min_mic_level) { + --new_level; + } + } + return new_level; +} + +// Returns the proportion of samples in the buffer which are at full-scale +// (and presumably clipped). +float ComputeClippedRatio(const float* const* audio, + size_t num_channels, + size_t samples_per_channel) { + RTC_DCHECK_GT(samples_per_channel, 0); + int num_clipped = 0; + for (size_t ch = 0; ch < num_channels; ++ch) { + int num_clipped_in_ch = 0; + for (size_t i = 0; i < samples_per_channel; ++i) { + RTC_DCHECK(audio[ch]); + if (audio[ch][i] >= 32767.0f || audio[ch][i] <= -32768.0f) { + ++num_clipped_in_ch; + } + } + num_clipped = std::max(num_clipped, num_clipped_in_ch); + } + return static_cast<float>(num_clipped) / (samples_per_channel); +} + +void LogClippingMetrics(int clipping_rate) { + RTC_LOG(LS_INFO) << "Input clipping rate: " << clipping_rate << "%"; + RTC_HISTOGRAM_COUNTS_LINEAR(/*name=*/"WebRTC.Audio.Agc.InputClippingRate", + /*sample=*/clipping_rate, /*min=*/0, /*max=*/100, + /*bucket_count=*/50); +} + +// Computes the speech level error in dB. `speech_level_dbfs` is required to be +// in the range [-90.0f, 30.0f] and `speech_probability` in the range +// [0.0f, 1.0f]. +int GetSpeechLevelErrorDb(float speech_level_dbfs, float speech_probability) { + constexpr float kMinSpeechLevelDbfs = -90.0f; + constexpr float kMaxSpeechLevelDbfs = 30.0f; + RTC_DCHECK_GE(speech_level_dbfs, kMinSpeechLevelDbfs); + RTC_DCHECK_LE(speech_level_dbfs, kMaxSpeechLevelDbfs); + RTC_DCHECK_GE(speech_probability, 0.0f); + RTC_DCHECK_LE(speech_probability, 1.0f); + + if (speech_probability < kOverrideSpeechProbabilitySilenceThreshold) { + return 0; + } + + const float speech_level = rtc::SafeClamp<float>( + speech_level_dbfs, kMinSpeechLevelDbfs, kMaxSpeechLevelDbfs); + + return std::round(kOverrideTargetSpeechLevelDbfs - speech_level); +} + +} // namespace + +MonoAgc::MonoAgc(ApmDataDumper* data_dumper, + int clipped_level_min, + bool disable_digital_adaptive, + int min_mic_level) + : min_mic_level_(min_mic_level), + disable_digital_adaptive_(disable_digital_adaptive), + agc_(std::make_unique<Agc>()), + max_level_(kMaxMicLevel), + max_compression_gain_(kMaxCompressionGain), + target_compression_(kDefaultCompressionGain), + compression_(target_compression_), + compression_accumulator_(compression_), + clipped_level_min_(clipped_level_min) {} + +MonoAgc::~MonoAgc() = default; + +void MonoAgc::Initialize() { + max_level_ = kMaxMicLevel; + max_compression_gain_ = kMaxCompressionGain; + target_compression_ = disable_digital_adaptive_ ? 0 : kDefaultCompressionGain; + compression_ = disable_digital_adaptive_ ? 0 : target_compression_; + compression_accumulator_ = compression_; + capture_output_used_ = true; + check_volume_on_next_process_ = true; + frames_since_update_gain_ = 0; + is_first_frame_ = true; +} + +void MonoAgc::Process(rtc::ArrayView<const int16_t> audio, + absl::optional<int> rms_error_override) { + new_compression_to_set_ = absl::nullopt; + + if (check_volume_on_next_process_) { + check_volume_on_next_process_ = false; + // We have to wait until the first process call to check the volume, + // because Chromium doesn't guarantee it to be valid any earlier. + CheckVolumeAndReset(); + } + + agc_->Process(audio); + + // Always check if `agc_` has a new error available. If yes, `agc_` gets + // reset. + // TODO(webrtc:7494) Replace the `agc_` call `GetRmsErrorDb()` with `Reset()` + // if an error override is used. + int rms_error = 0; + bool update_gain = agc_->GetRmsErrorDb(&rms_error); + if (rms_error_override.has_value()) { + if (is_first_frame_ || frames_since_update_gain_ < kOverrideWaitFrames) { + update_gain = false; + } else { + rms_error = *rms_error_override; + update_gain = true; + } + } + + if (update_gain) { + UpdateGain(rms_error); + } + + if (!disable_digital_adaptive_) { + UpdateCompressor(); + } + + is_first_frame_ = false; + if (frames_since_update_gain_ < kOverrideWaitFrames) { + ++frames_since_update_gain_; + } +} + +void MonoAgc::HandleClipping(int clipped_level_step) { + RTC_DCHECK_GT(clipped_level_step, 0); + // Always decrease the maximum level, even if the current level is below + // threshold. + SetMaxLevel(std::max(clipped_level_min_, max_level_ - clipped_level_step)); + if (log_to_histograms_) { + RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.AgcClippingAdjustmentAllowed", + level_ - clipped_level_step >= clipped_level_min_); + } + if (level_ > clipped_level_min_) { + // Don't try to adjust the level if we're already below the limit. As + // a consequence, if the user has brought the level above the limit, we + // will still not react until the postproc updates the level. + SetLevel(std::max(clipped_level_min_, level_ - clipped_level_step)); + // Reset the AGCs for all channels since the level has changed. + agc_->Reset(); + frames_since_update_gain_ = 0; + is_first_frame_ = false; + } +} + +void MonoAgc::SetLevel(int new_level) { + int voe_level = recommended_input_volume_; + if (voe_level == 0) { + RTC_DLOG(LS_INFO) + << "[agc] VolumeCallbacks returned level=0, taking no action."; + return; + } + if (voe_level < 0 || voe_level > kMaxMicLevel) { + RTC_LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" + << voe_level; + return; + } + + // Detect manual input volume adjustments by checking if the current level + // `voe_level` is outside of the `[level_ - kLevelQuantizationSlack, level_ + + // kLevelQuantizationSlack]` range where `level_` is the last input volume + // known by this gain controller. + if (voe_level > level_ + kLevelQuantizationSlack || + voe_level < level_ - kLevelQuantizationSlack) { + RTC_DLOG(LS_INFO) << "[agc] Mic volume was manually adjusted. Updating " + "stored level from " + << level_ << " to " << voe_level; + level_ = voe_level; + // Always allow the user to increase the volume. + if (level_ > max_level_) { + SetMaxLevel(level_); + } + // Take no action in this case, since we can't be sure when the volume + // was manually adjusted. The compressor will still provide some of the + // desired gain change. + agc_->Reset(); + frames_since_update_gain_ = 0; + is_first_frame_ = false; + return; + } + + new_level = std::min(new_level, max_level_); + if (new_level == level_) { + return; + } + + recommended_input_volume_ = new_level; + RTC_DLOG(LS_INFO) << "[agc] voe_level=" << voe_level << ", level_=" << level_ + << ", new_level=" << new_level; + level_ = new_level; +} + +void MonoAgc::SetMaxLevel(int level) { + RTC_DCHECK_GE(level, clipped_level_min_); + max_level_ = level; + // Scale the `kSurplusCompressionGain` linearly across the restricted + // level range. + max_compression_gain_ = + kMaxCompressionGain + std::floor((1.f * kMaxMicLevel - max_level_) / + (kMaxMicLevel - clipped_level_min_) * + kSurplusCompressionGain + + 0.5f); + RTC_DLOG(LS_INFO) << "[agc] max_level_=" << max_level_ + << ", max_compression_gain_=" << max_compression_gain_; +} + +void MonoAgc::HandleCaptureOutputUsedChange(bool capture_output_used) { + if (capture_output_used_ == capture_output_used) { + return; + } + capture_output_used_ = capture_output_used; + + if (capture_output_used) { + // When we start using the output, we should reset things to be safe. + check_volume_on_next_process_ = true; + } +} + +int MonoAgc::CheckVolumeAndReset() { + int level = recommended_input_volume_; + // Reasons for taking action at startup: + // 1) A person starting a call is expected to be heard. + // 2) Independent of interpretation of `level` == 0 we should raise it so the + // AGC can do its job properly. + if (level == 0 && !startup_) { + RTC_DLOG(LS_INFO) + << "[agc] VolumeCallbacks returned level=0, taking no action."; + return 0; + } + if (level < 0 || level > kMaxMicLevel) { + RTC_LOG(LS_ERROR) << "[agc] VolumeCallbacks returned an invalid level=" + << level; + return -1; + } + RTC_DLOG(LS_INFO) << "[agc] Initial GetMicVolume()=" << level; + + if (level < min_mic_level_) { + level = min_mic_level_; + RTC_DLOG(LS_INFO) << "[agc] Initial volume too low, raising to " << level; + recommended_input_volume_ = level; + } + agc_->Reset(); + level_ = level; + startup_ = false; + frames_since_update_gain_ = 0; + is_first_frame_ = true; + return 0; +} + +// Distributes the required gain change between the digital compression stage +// and volume slider. We use the compressor first, providing a slack region +// around the current slider position to reduce movement. +// +// If the slider needs to be moved, we check first if the user has adjusted +// it, in which case we take no action and cache the updated level. +void MonoAgc::UpdateGain(int rms_error_db) { + int rms_error = rms_error_db; + + // Always reset the counter regardless of whether the gain is changed + // or not. This matches with the bahvior of `agc_` where the histogram is + // reset every time an RMS error is successfully read. + frames_since_update_gain_ = 0; + + // The compressor will always add at least kMinCompressionGain. In effect, + // this adjusts our target gain upward by the same amount and rms_error + // needs to reflect that. + rms_error += kMinCompressionGain; + + // Handle as much error as possible with the compressor first. + int raw_compression = + rtc::SafeClamp(rms_error, kMinCompressionGain, max_compression_gain_); + + // Deemphasize the compression gain error. Move halfway between the current + // target and the newly received target. This serves to soften perceptible + // intra-talkspurt adjustments, at the cost of some adaptation speed. + if ((raw_compression == max_compression_gain_ && + target_compression_ == max_compression_gain_ - 1) || + (raw_compression == kMinCompressionGain && + target_compression_ == kMinCompressionGain + 1)) { + // Special case to allow the target to reach the endpoints of the + // compression range. The deemphasis would otherwise halt it at 1 dB shy. + target_compression_ = raw_compression; + } else { + target_compression_ = + (raw_compression - target_compression_) / 2 + target_compression_; + } + + // Residual error will be handled by adjusting the volume slider. Use the + // raw rather than deemphasized compression here as we would otherwise + // shrink the amount of slack the compressor provides. + const int residual_gain = + rtc::SafeClamp(rms_error - raw_compression, -kMaxResidualGainChange, + kMaxResidualGainChange); + RTC_DLOG(LS_INFO) << "[agc] rms_error=" << rms_error + << ", target_compression=" << target_compression_ + << ", residual_gain=" << residual_gain; + if (residual_gain == 0) + return; + + int old_level = level_; + SetLevel(LevelFromGainError(residual_gain, level_, min_mic_level_)); + if (old_level != level_) { + // Reset the AGC since the level has changed. + agc_->Reset(); + } +} + +void MonoAgc::UpdateCompressor() { + if (compression_ == target_compression_) { + return; + } + + // Adapt the compression gain slowly towards the target, in order to avoid + // highly perceptible changes. + if (target_compression_ > compression_) { + compression_accumulator_ += kCompressionGainStep; + } else { + compression_accumulator_ -= kCompressionGainStep; + } + + // The compressor accepts integer gains in dB. Adjust the gain when + // we've come within half a stepsize of the nearest integer. (We don't + // check for equality due to potential floating point imprecision). + int new_compression = compression_; + int nearest_neighbor = std::floor(compression_accumulator_ + 0.5); + if (std::fabs(compression_accumulator_ - nearest_neighbor) < + kCompressionGainStep / 2) { + new_compression = nearest_neighbor; + } + + // Set the new compression gain. + if (new_compression != compression_) { + compression_ = new_compression; + compression_accumulator_ = new_compression; + new_compression_to_set_ = compression_; + } +} + +std::atomic<int> AgcManagerDirect::instance_counter_(0); + +AgcManagerDirect::AgcManagerDirect( + const AudioProcessing::Config::GainController1::AnalogGainController& + analog_config, + Agc* agc) + : AgcManagerDirect(/*num_capture_channels=*/1, analog_config) { + RTC_DCHECK(channel_agcs_[0]); + RTC_DCHECK(agc); + channel_agcs_[0]->set_agc(agc); +} + +AgcManagerDirect::AgcManagerDirect(int num_capture_channels, + const AnalogAgcConfig& analog_config) + : analog_controller_enabled_(analog_config.enabled), + min_mic_level_override_(GetMinMicLevelOverride()), + data_dumper_(new ApmDataDumper(instance_counter_.fetch_add(1) + 1)), + num_capture_channels_(num_capture_channels), + disable_digital_adaptive_(!analog_config.enable_digital_adaptive), + frames_since_clipped_(analog_config.clipped_wait_frames), + capture_output_used_(true), + clipped_level_step_(analog_config.clipped_level_step), + clipped_ratio_threshold_(analog_config.clipped_ratio_threshold), + clipped_wait_frames_(analog_config.clipped_wait_frames), + channel_agcs_(num_capture_channels), + new_compressions_to_set_(num_capture_channels), + clipping_predictor_( + CreateClippingPredictor(num_capture_channels, + analog_config.clipping_predictor)), + use_clipping_predictor_step_( + !!clipping_predictor_ && + analog_config.clipping_predictor.use_predicted_step), + clipping_rate_log_(0.0f), + clipping_rate_log_counter_(0) { + RTC_LOG(LS_INFO) << "[agc] analog controller enabled: " + << (analog_controller_enabled_ ? "yes" : "no"); + const int min_mic_level = min_mic_level_override_.value_or(kMinMicLevel); + RTC_LOG(LS_INFO) << "[agc] Min mic level: " << min_mic_level + << " (overridden: " + << (min_mic_level_override_.has_value() ? "yes" : "no") + << ")"; + for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) { + ApmDataDumper* data_dumper_ch = ch == 0 ? data_dumper_.get() : nullptr; + + channel_agcs_[ch] = std::make_unique<MonoAgc>( + data_dumper_ch, analog_config.clipped_level_min, + disable_digital_adaptive_, min_mic_level); + } + RTC_DCHECK(!channel_agcs_.empty()); + RTC_DCHECK_GT(clipped_level_step_, 0); + RTC_DCHECK_LE(clipped_level_step_, 255); + RTC_DCHECK_GT(clipped_ratio_threshold_, 0.0f); + RTC_DCHECK_LT(clipped_ratio_threshold_, 1.0f); + RTC_DCHECK_GT(clipped_wait_frames_, 0); + channel_agcs_[0]->ActivateLogging(); +} + +AgcManagerDirect::~AgcManagerDirect() {} + +void AgcManagerDirect::Initialize() { + RTC_DLOG(LS_INFO) << "AgcManagerDirect::Initialize"; + data_dumper_->InitiateNewSetOfRecordings(); + for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) { + channel_agcs_[ch]->Initialize(); + } + capture_output_used_ = true; + + AggregateChannelLevels(); + clipping_rate_log_ = 0.0f; + clipping_rate_log_counter_ = 0; +} + +void AgcManagerDirect::SetupDigitalGainControl( + GainControl& gain_control) const { + if (gain_control.set_mode(GainControl::kFixedDigital) != 0) { + RTC_LOG(LS_ERROR) << "set_mode(GainControl::kFixedDigital) failed."; + } + const int target_level_dbfs = disable_digital_adaptive_ ? 0 : 2; + if (gain_control.set_target_level_dbfs(target_level_dbfs) != 0) { + RTC_LOG(LS_ERROR) << "set_target_level_dbfs() failed."; + } + const int compression_gain_db = + disable_digital_adaptive_ ? 0 : kDefaultCompressionGain; + if (gain_control.set_compression_gain_db(compression_gain_db) != 0) { + RTC_LOG(LS_ERROR) << "set_compression_gain_db() failed."; + } + const bool enable_limiter = !disable_digital_adaptive_; + if (gain_control.enable_limiter(enable_limiter) != 0) { + RTC_LOG(LS_ERROR) << "enable_limiter() failed."; + } +} + +void AgcManagerDirect::AnalyzePreProcess(const AudioBuffer& audio_buffer) { + const float* const* audio = audio_buffer.channels_const(); + size_t samples_per_channel = audio_buffer.num_frames(); + RTC_DCHECK(audio); + + AggregateChannelLevels(); + if (!capture_output_used_) { + return; + } + + if (!!clipping_predictor_) { + AudioFrameView<const float> frame = AudioFrameView<const float>( + audio, num_capture_channels_, static_cast<int>(samples_per_channel)); + clipping_predictor_->Analyze(frame); + } + + // Check for clipped samples, as the AGC has difficulty detecting pitch + // under clipping distortion. We do this in the preprocessing phase in order + // to catch clipped echo as well. + // + // If we find a sufficiently clipped frame, drop the current microphone level + // and enforce a new maximum level, dropped the same amount from the current + // maximum. This harsh treatment is an effort to avoid repeated clipped echo + // events. As compensation for this restriction, the maximum compression + // gain is increased, through SetMaxLevel(). + float clipped_ratio = + ComputeClippedRatio(audio, num_capture_channels_, samples_per_channel); + clipping_rate_log_ = std::max(clipped_ratio, clipping_rate_log_); + clipping_rate_log_counter_++; + constexpr int kNumFramesIn30Seconds = 3000; + if (clipping_rate_log_counter_ == kNumFramesIn30Seconds) { + LogClippingMetrics(std::round(100.0f * clipping_rate_log_)); + clipping_rate_log_ = 0.0f; + clipping_rate_log_counter_ = 0; + } + + if (frames_since_clipped_ < clipped_wait_frames_) { + ++frames_since_clipped_; + return; + } + + const bool clipping_detected = clipped_ratio > clipped_ratio_threshold_; + bool clipping_predicted = false; + int predicted_step = 0; + if (!!clipping_predictor_) { + for (int channel = 0; channel < num_capture_channels_; ++channel) { + const auto step = clipping_predictor_->EstimateClippedLevelStep( + channel, recommended_input_volume_, clipped_level_step_, + channel_agcs_[channel]->min_mic_level(), kMaxMicLevel); + if (step.has_value()) { + predicted_step = std::max(predicted_step, step.value()); + clipping_predicted = true; + } + } + } + if (clipping_detected) { + RTC_DLOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio=" + << clipped_ratio; + } + int step = clipped_level_step_; + if (clipping_predicted) { + predicted_step = std::max(predicted_step, clipped_level_step_); + RTC_DLOG(LS_INFO) << "[agc] Clipping predicted. step=" << predicted_step; + if (use_clipping_predictor_step_) { + step = predicted_step; + } + } + if (clipping_detected || + (clipping_predicted && use_clipping_predictor_step_)) { + for (auto& state_ch : channel_agcs_) { + state_ch->HandleClipping(step); + } + frames_since_clipped_ = 0; + if (!!clipping_predictor_) { + clipping_predictor_->Reset(); + } + } + AggregateChannelLevels(); +} + +void AgcManagerDirect::Process(const AudioBuffer& audio_buffer) { + Process(audio_buffer, /*speech_probability=*/absl::nullopt, + /*speech_level_dbfs=*/absl::nullopt); +} + +void AgcManagerDirect::Process(const AudioBuffer& audio_buffer, + absl::optional<float> speech_probability, + absl::optional<float> speech_level_dbfs) { + AggregateChannelLevels(); + const int volume_after_clipping_handling = recommended_input_volume_; + + if (!capture_output_used_) { + return; + } + + const size_t num_frames_per_band = audio_buffer.num_frames_per_band(); + absl::optional<int> rms_error_override = absl::nullopt; + if (speech_probability.has_value() && speech_level_dbfs.has_value()) { + rms_error_override = + GetSpeechLevelErrorDb(*speech_level_dbfs, *speech_probability); + } + for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) { + std::array<int16_t, AudioBuffer::kMaxSampleRate / 100> audio_data; + int16_t* audio_use = audio_data.data(); + FloatS16ToS16(audio_buffer.split_bands_const_f(ch)[0], num_frames_per_band, + audio_use); + channel_agcs_[ch]->Process({audio_use, num_frames_per_band}, + rms_error_override); + new_compressions_to_set_[ch] = channel_agcs_[ch]->new_compression(); + } + + AggregateChannelLevels(); + if (volume_after_clipping_handling != recommended_input_volume_) { + // The recommended input volume was adjusted in order to match the target + // level. + UpdateHistogramOnRecommendedInputVolumeChangeToMatchTarget( + recommended_input_volume_); + } +} + +absl::optional<int> AgcManagerDirect::GetDigitalComressionGain() { + return new_compressions_to_set_[channel_controlling_gain_]; +} + +void AgcManagerDirect::HandleCaptureOutputUsedChange(bool capture_output_used) { + for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) { + channel_agcs_[ch]->HandleCaptureOutputUsedChange(capture_output_used); + } + capture_output_used_ = capture_output_used; +} + +float AgcManagerDirect::voice_probability() const { + float max_prob = 0.f; + for (const auto& state_ch : channel_agcs_) { + max_prob = std::max(max_prob, state_ch->voice_probability()); + } + + return max_prob; +} + +void AgcManagerDirect::set_stream_analog_level(int level) { + if (!analog_controller_enabled_) { + recommended_input_volume_ = level; + } + + for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) { + channel_agcs_[ch]->set_stream_analog_level(level); + } + + AggregateChannelLevels(); +} + +void AgcManagerDirect::AggregateChannelLevels() { + int new_recommended_input_volume = + channel_agcs_[0]->recommended_analog_level(); + channel_controlling_gain_ = 0; + for (size_t ch = 1; ch < channel_agcs_.size(); ++ch) { + int level = channel_agcs_[ch]->recommended_analog_level(); + if (level < new_recommended_input_volume) { + new_recommended_input_volume = level; + channel_controlling_gain_ = static_cast<int>(ch); + } + } + + if (min_mic_level_override_.has_value() && new_recommended_input_volume > 0) { + new_recommended_input_volume = + std::max(new_recommended_input_volume, *min_mic_level_override_); + } + + if (analog_controller_enabled_) { + recommended_input_volume_ = new_recommended_input_volume; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.h b/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.h new file mode 100644 index 0000000000..adb2f5a63f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.h @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_ +#define MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_ + +#include <atomic> +#include <memory> + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/agc/agc.h" +#include "modules/audio_processing/agc2/clipping_predictor.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gtest_prod_util.h" + +namespace webrtc { + +class MonoAgc; +class GainControl; + +// Adaptive Gain Controller (AGC) that controls the input volume and a digital +// gain. The input volume controller recommends what volume to use, handles +// volume changes and clipping. In particular, it handles changes triggered by +// the user (e.g., volume set to zero by a HW mute button). The digital +// controller chooses and applies the digital compression gain. +// This class is not thread-safe. +// TODO(bugs.webrtc.org/7494): Use applied/recommended input volume naming +// convention. +class AgcManagerDirect final { + public: + // Ctor. `num_capture_channels` specifies the number of channels for the audio + // passed to `AnalyzePreProcess()` and `Process()`. Clamps + // `analog_config.startup_min_level` in the [12, 255] range. + AgcManagerDirect( + int num_capture_channels, + const AudioProcessing::Config::GainController1::AnalogGainController& + analog_config); + + ~AgcManagerDirect(); + AgcManagerDirect(const AgcManagerDirect&) = delete; + AgcManagerDirect& operator=(const AgcManagerDirect&) = delete; + + void Initialize(); + + // Configures `gain_control` to work as a fixed digital controller so that the + // adaptive part is only handled by this gain controller. Must be called if + // `gain_control` is also used to avoid the side-effects of running two AGCs. + void SetupDigitalGainControl(GainControl& gain_control) const; + + // Sets the applied input volume. + void set_stream_analog_level(int level); + + // TODO(bugs.webrtc.org/7494): Add argument for the applied input volume and + // remove `set_stream_analog_level()`. + // Analyzes `audio` before `Process()` is called so that the analysis can be + // performed before external digital processing operations take place (e.g., + // echo cancellation). The analysis consists of input clipping detection and + // prediction (if enabled). Must be called after `set_stream_analog_level()`. + void AnalyzePreProcess(const AudioBuffer& audio_buffer); + + // Processes `audio_buffer`. Chooses a digital compression gain and the new + // input volume to recommend. Must be called after `AnalyzePreProcess()`. If + // `speech_probability` (range [0.0f, 1.0f]) and `speech_level_dbfs` (range + // [-90.f, 30.0f]) are given, uses them to override the estimated RMS error. + // TODO(webrtc:7494): This signature is needed for testing purposes, unify + // the signatures when the clean-up is done. + void Process(const AudioBuffer& audio_buffer, + absl::optional<float> speech_probability, + absl::optional<float> speech_level_dbfs); + + // Processes `audio_buffer`. Chooses a digital compression gain and the new + // input volume to recommend. Must be called after `AnalyzePreProcess()`. + void Process(const AudioBuffer& audio_buffer); + + // TODO(bugs.webrtc.org/7494): Return recommended input volume and remove + // `recommended_analog_level()`. + // Returns the recommended input volume. If the input volume contoller is + // disabled, returns the input volume set via the latest + // `set_stream_analog_level()` call. Must be called after + // `AnalyzePreProcess()` and `Process()`. + int recommended_analog_level() const { return recommended_input_volume_; } + + // Call when the capture stream output has been flagged to be used/not-used. + // If unused, the manager disregards all incoming audio. + void HandleCaptureOutputUsedChange(bool capture_output_used); + + float voice_probability() const; + + int num_channels() const { return num_capture_channels_; } + + // If available, returns the latest digital compression gain that has been + // chosen. + absl::optional<int> GetDigitalComressionGain(); + + // Returns true if clipping prediction is enabled. + bool clipping_predictor_enabled() const { return !!clipping_predictor_; } + + // Returns true if clipping prediction is used to adjust the input volume. + bool use_clipping_predictor_step() const { + return use_clipping_predictor_step_; + } + + private: + friend class AgcManagerDirectTestHelper; + + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, DisableDigitalDisablesDigital); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, + AgcMinMicLevelExperimentDefault); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, + AgcMinMicLevelExperimentDisabled); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, + AgcMinMicLevelExperimentOutOfRangeAbove); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, + AgcMinMicLevelExperimentOutOfRangeBelow); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, + AgcMinMicLevelExperimentEnabled50); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, + AgcMinMicLevelExperimentEnabledAboveStartupLevel); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest, + ClippingParametersVerified); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest, + DisableClippingPredictorDoesNotLowerVolume); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest, + UsedClippingPredictionsProduceLowerAnalogLevels); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest, + UnusedClippingPredictionsProduceEqualAnalogLevels); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest, + EmptyRmsErrorOverrideHasNoEffect); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest, + NonEmptyRmsErrorOverrideHasEffect); + + // Ctor that creates a single channel AGC and by injecting `agc`. + // `agc` will be owned by this class; hence, do not delete it. + AgcManagerDirect( + const AudioProcessing::Config::GainController1::AnalogGainController& + analog_config, + Agc* agc); + + void AggregateChannelLevels(); + + const bool analog_controller_enabled_; + + const absl::optional<int> min_mic_level_override_; + std::unique_ptr<ApmDataDumper> data_dumper_; + static std::atomic<int> instance_counter_; + const int num_capture_channels_; + const bool disable_digital_adaptive_; + + int frames_since_clipped_; + + // TODO(bugs.webrtc.org/7494): Create a separate member for the applied input + // volume. + // TODO(bugs.webrtc.org/7494): Once + // `AudioProcessingImpl::recommended_stream_analog_level()` becomes a trivial + // getter, leave uninitialized. + // Recommended input volume. After `set_stream_analog_level()` is called it + // holds the observed input volume. Possibly updated by `AnalyzePreProcess()` + // and `Process()`; after these calls, holds the recommended input volume. + int recommended_input_volume_ = 0; + + bool capture_output_used_; + int channel_controlling_gain_ = 0; + + const int clipped_level_step_; + const float clipped_ratio_threshold_; + const int clipped_wait_frames_; + + std::vector<std::unique_ptr<MonoAgc>> channel_agcs_; + std::vector<absl::optional<int>> new_compressions_to_set_; + + const std::unique_ptr<ClippingPredictor> clipping_predictor_; + const bool use_clipping_predictor_step_; + float clipping_rate_log_; + int clipping_rate_log_counter_; +}; + +// TODO(bugs.webrtc.org/7494): Use applied/recommended input volume naming +// convention. +class MonoAgc { + public: + MonoAgc(ApmDataDumper* data_dumper, + int clipped_level_min, + bool disable_digital_adaptive, + int min_mic_level); + ~MonoAgc(); + MonoAgc(const MonoAgc&) = delete; + MonoAgc& operator=(const MonoAgc&) = delete; + + void Initialize(); + void HandleCaptureOutputUsedChange(bool capture_output_used); + + // Sets the current input volume. + void set_stream_analog_level(int level) { recommended_input_volume_ = level; } + + // Lowers the recommended input volume in response to clipping based on the + // suggested reduction `clipped_level_step`. Must be called after + // `set_stream_analog_level()`. + void HandleClipping(int clipped_level_step); + + // Analyzes `audio`, requests the RMS error from AGC, updates the recommended + // input volume based on the estimated speech level and, if enabled, updates + // the (digital) compression gain to be applied by `agc_`. Must be called + // after `HandleClipping()`. If `rms_error_override` has a value, RMS error + // from AGC is overridden by it. + void Process(rtc::ArrayView<const int16_t> audio, + absl::optional<int> rms_error_override); + + // Returns the recommended input volume. Must be called after `Process()`. + int recommended_analog_level() const { return recommended_input_volume_; } + + float voice_probability() const { return agc_->voice_probability(); } + void ActivateLogging() { log_to_histograms_ = true; } + absl::optional<int> new_compression() const { + return new_compression_to_set_; + } + + // Only used for testing. + void set_agc(Agc* agc) { agc_.reset(agc); } + int min_mic_level() const { return min_mic_level_; } + + private: + // Sets a new input volume, after first checking that it hasn't been updated + // by the user, in which case no action is taken. + void SetLevel(int new_level); + + // Set the maximum input volume the AGC is allowed to apply. Also updates the + // maximum compression gain to compensate. The volume must be at least + // `kClippedLevelMin`. + void SetMaxLevel(int level); + + int CheckVolumeAndReset(); + void UpdateGain(int rms_error_db); + void UpdateCompressor(); + + const int min_mic_level_; + const bool disable_digital_adaptive_; + std::unique_ptr<Agc> agc_; + int level_ = 0; + int max_level_; + int max_compression_gain_; + int target_compression_; + int compression_; + float compression_accumulator_; + bool capture_output_used_ = true; + bool check_volume_on_next_process_ = true; + bool startup_ = true; + + // TODO(bugs.webrtc.org/7494): Create a separate member for the applied + // input volume. + // Recommended input volume. After `set_stream_analog_level()` is + // called, it holds the observed applied input volume. Possibly updated by + // `HandleClipping()` and `Process()`; after these calls, holds the + // recommended input volume. + int recommended_input_volume_ = 0; + + absl::optional<int> new_compression_to_set_; + bool log_to_histograms_ = false; + const int clipped_level_min_; + + // Frames since the last `UpdateGain()` call. + int frames_since_update_gain_ = 0; + // Set to true for the first frame after startup and reset, otherwise false. + bool is_first_frame_ = true; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct_unittest.cc new file mode 100644 index 0000000000..70ac0b5b34 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct_unittest.cc @@ -0,0 +1,2184 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/agc_manager_direct.h" + +#include <fstream> +#include <limits> +#include <tuple> +#include <vector> + +#include "modules/audio_processing/agc/gain_control.h" +#include "modules/audio_processing/agc/mock_agc.h" +#include "modules/audio_processing/include/mock_audio_processing.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/strings/string_builder.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +using ::testing::_; +using ::testing::AtLeast; +using ::testing::DoAll; +using ::testing::Return; +using ::testing::SetArgPointee; + +namespace webrtc { +namespace { + +constexpr int kSampleRateHz = 32000; +constexpr int kNumChannels = 1; +constexpr int kInitialInputVolume = 128; +constexpr int kClippedMin = 165; // Arbitrary, but different from the default. +constexpr float kAboveClippedThreshold = 0.2f; +constexpr int kMinMicLevel = 12; +constexpr int kClippedLevelStep = 15; +constexpr float kClippedRatioThreshold = 0.1f; +constexpr int kClippedWaitFrames = 300; +constexpr float kLowSpeechProbability = 0.1f; +constexpr float kHighSpeechProbability = 0.7f; +constexpr float kSpeechLevelDbfs = -25.0f; + +constexpr float kMinSample = std::numeric_limits<int16_t>::min(); +constexpr float kMaxSample = std::numeric_limits<int16_t>::max(); + +using AnalogAgcConfig = + AudioProcessing::Config::GainController1::AnalogGainController; +using ClippingPredictorConfig = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor; +constexpr AnalogAgcConfig kDefaultAnalogConfig{}; + +class MockGainControl : public GainControl { + public: + virtual ~MockGainControl() {} + MOCK_METHOD(int, set_stream_analog_level, (int level), (override)); + MOCK_METHOD(int, stream_analog_level, (), (const, override)); + MOCK_METHOD(int, set_mode, (Mode mode), (override)); + MOCK_METHOD(Mode, mode, (), (const, override)); + MOCK_METHOD(int, set_target_level_dbfs, (int level), (override)); + MOCK_METHOD(int, target_level_dbfs, (), (const, override)); + MOCK_METHOD(int, set_compression_gain_db, (int gain), (override)); + MOCK_METHOD(int, compression_gain_db, (), (const, override)); + MOCK_METHOD(int, enable_limiter, (bool enable), (override)); + MOCK_METHOD(bool, is_limiter_enabled, (), (const, override)); + MOCK_METHOD(int, + set_analog_level_limits, + (int minimum, int maximum), + (override)); + MOCK_METHOD(int, analog_level_minimum, (), (const, override)); + MOCK_METHOD(int, analog_level_maximum, (), (const, override)); + MOCK_METHOD(bool, stream_is_saturated, (), (const, override)); +}; + +// TODO(bugs.webrtc.org/12874): Remove and use designated initializers once +// fixed. +std::unique_ptr<AgcManagerDirect> CreateAgcManagerDirect( + int startup_min_volume, + int clipped_level_step, + float clipped_ratio_threshold, + int clipped_wait_frames, + const ClippingPredictorConfig& clipping_predictor_config = + kDefaultAnalogConfig.clipping_predictor) { + AnalogAgcConfig config; + config.startup_min_volume = startup_min_volume; + config.clipped_level_min = kClippedMin; + config.enable_digital_adaptive = false; + config.clipped_level_step = clipped_level_step; + config.clipped_ratio_threshold = clipped_ratio_threshold; + config.clipped_wait_frames = clipped_wait_frames; + config.clipping_predictor = clipping_predictor_config; + return std::make_unique<AgcManagerDirect>(/*num_capture_channels=*/1, config); +} + +// Deprecated. +// TODO(bugs.webrtc.org/7494): Delete this helper, use +// `AgcManagerDirectTestHelper::CallAgcSequence()` instead. +// Calls `AnalyzePreProcess()` on `manager` `num_calls` times. `peak_ratio` is a +// value in [0, 1] which determines the amplitude of the samples (1 maps to full +// scale). The first half of the calls is made on frames which are half filled +// with zeros in order to simulate a signal with different crest factors. +void CallPreProcessAudioBuffer(int num_calls, + float peak_ratio, + AgcManagerDirect& manager) { + RTC_DCHECK_LE(peak_ratio, 1.0f); + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + const int num_channels = audio_buffer.num_channels(); + const int num_frames = audio_buffer.num_frames(); + + // Make half of the calls with half zeroed frames. + for (int ch = 0; ch < num_channels; ++ch) { + // 50% of the samples in one frame are zero. + for (int i = 0; i < num_frames; i += 2) { + audio_buffer.channels()[ch][i] = peak_ratio * 32767.0f; + audio_buffer.channels()[ch][i + 1] = 0.0f; + } + } + for (int n = 0; n < num_calls / 2; ++n) { + manager.AnalyzePreProcess(audio_buffer); + } + + // Make the remaining half of the calls with frames whose samples are all set. + for (int ch = 0; ch < num_channels; ++ch) { + for (int i = 0; i < num_frames; ++i) { + audio_buffer.channels()[ch][i] = peak_ratio * 32767.0f; + } + } + for (int n = 0; n < num_calls - num_calls / 2; ++n) { + manager.AnalyzePreProcess(audio_buffer); + } +} + +constexpr char kMinMicLevelFieldTrial[] = + "WebRTC-Audio-2ndAgcMinMicLevelExperiment"; + +std::string GetAgcMinMicLevelExperimentFieldTrial(const std::string& value) { + char field_trial_buffer[64]; + rtc::SimpleStringBuilder builder(field_trial_buffer); + builder << kMinMicLevelFieldTrial << "/" << value << "/"; + return builder.str(); +} + +std::string GetAgcMinMicLevelExperimentFieldTrialEnabled( + int enabled_value, + const std::string& suffix = "") { + RTC_DCHECK_GE(enabled_value, 0); + RTC_DCHECK_LE(enabled_value, 255); + char field_trial_buffer[64]; + rtc::SimpleStringBuilder builder(field_trial_buffer); + builder << kMinMicLevelFieldTrial << "/Enabled-" << enabled_value << suffix + << "/"; + return builder.str(); +} + +std::string GetAgcMinMicLevelExperimentFieldTrial( + absl::optional<int> min_mic_level) { + if (min_mic_level.has_value()) { + return GetAgcMinMicLevelExperimentFieldTrialEnabled(*min_mic_level); + } + return GetAgcMinMicLevelExperimentFieldTrial("Disabled"); +} + +// (Over)writes `samples_value` for the samples in `audio_buffer`. +// When `clipped_ratio`, a value in [0, 1], is greater than 0, the corresponding +// fraction of the frame is set to a full scale value to simulate clipping. +void WriteAudioBufferSamples(float samples_value, + float clipped_ratio, + AudioBuffer& audio_buffer) { + RTC_DCHECK_GE(samples_value, kMinSample); + RTC_DCHECK_LE(samples_value, kMaxSample); + RTC_DCHECK_GE(clipped_ratio, 0.0f); + RTC_DCHECK_LE(clipped_ratio, 1.0f); + int num_channels = audio_buffer.num_channels(); + int num_samples = audio_buffer.num_frames(); + int num_clipping_samples = clipped_ratio * num_samples; + for (int ch = 0; ch < num_channels; ++ch) { + int i = 0; + for (; i < num_clipping_samples; ++i) { + audio_buffer.channels()[ch][i] = 32767.0f; + } + for (; i < num_samples; ++i) { + audio_buffer.channels()[ch][i] = samples_value; + } + } +} + +// Deprecated. +// TODO(bugs.webrtc.org/7494): Delete this helper, use +// `AgcManagerDirectTestHelper::CallAgcSequence()` instead. +void CallPreProcessAndProcess(int num_calls, + const AudioBuffer& audio_buffer, + absl::optional<float> speech_probability_override, + absl::optional<float> speech_level_override, + AgcManagerDirect& manager) { + for (int n = 0; n < num_calls; ++n) { + manager.AnalyzePreProcess(audio_buffer); + manager.Process(audio_buffer, speech_probability_override, + speech_level_override); + } +} + +// Reads a given number of 10 ms chunks from a PCM file and feeds them to +// `AgcManagerDirect`. +class SpeechSamplesReader { + private: + // Recording properties. + static constexpr int kPcmSampleRateHz = 16000; + static constexpr int kPcmNumChannels = 1; + static constexpr int kPcmBytesPerSamples = sizeof(int16_t); + + public: + SpeechSamplesReader() + : is_(test::ResourcePath("audio_processing/agc/agc_audio", "pcm"), + std::ios::binary | std::ios::ate), + audio_buffer_(kPcmSampleRateHz, + kPcmNumChannels, + kPcmSampleRateHz, + kPcmNumChannels, + kPcmSampleRateHz, + kPcmNumChannels), + buffer_(audio_buffer_.num_frames()), + buffer_num_bytes_(buffer_.size() * kPcmBytesPerSamples) { + RTC_CHECK(is_); + } + + // Reads `num_frames` 10 ms frames from the beginning of the PCM file, applies + // `gain_db` and feeds the frames into `agc` by calling `AnalyzePreProcess()` + // and `Process()` for each frame. Reads the number of 10 ms frames available + // in the PCM file if `num_frames` is too large - i.e., does not loop. + void Feed(int num_frames, int gain_db, AgcManagerDirect& agc) { + float gain = std::pow(10.0f, gain_db / 20.0f); // From dB to linear gain. + is_.seekg(0, is_.beg); // Start from the beginning of the PCM file. + + // Read and feed frames. + for (int i = 0; i < num_frames; ++i) { + is_.read(reinterpret_cast<char*>(buffer_.data()), buffer_num_bytes_); + if (is_.gcount() < buffer_num_bytes_) { + // EOF reached. Stop. + break; + } + // Apply gain and copy samples into `audio_buffer_`. + std::transform(buffer_.begin(), buffer_.end(), + audio_buffer_.channels()[0], [gain](int16_t v) -> float { + return rtc::SafeClamp(static_cast<float>(v) * gain, + kMinSample, kMaxSample); + }); + + agc.AnalyzePreProcess(audio_buffer_); + agc.Process(audio_buffer_); + } + } + + // Reads `num_frames` 10 ms frames from the beginning of the PCM file, applies + // `gain_db` and feeds the frames into `agc` by calling `AnalyzePreProcess()` + // and `Process()` for each frame. Reads the number of 10 ms frames available + // in the PCM file if `num_frames` is too large - i.e., does not loop. + // `speech_probability_override` and `speech_level_override` are passed to + // `Process()` where they are used to override the `agc` RMS error if they + // have a value. + void Feed(int num_frames, + int gain_db, + absl::optional<float> speech_probability_override, + absl::optional<float> speech_level_override, + AgcManagerDirect& agc) { + float gain = std::pow(10.0f, gain_db / 20.0f); // From dB to linear gain. + is_.seekg(0, is_.beg); // Start from the beginning of the PCM file. + + // Read and feed frames. + for (int i = 0; i < num_frames; ++i) { + is_.read(reinterpret_cast<char*>(buffer_.data()), buffer_num_bytes_); + if (is_.gcount() < buffer_num_bytes_) { + // EOF reached. Stop. + break; + } + // Apply gain and copy samples into `audio_buffer_`. + std::transform(buffer_.begin(), buffer_.end(), + audio_buffer_.channels()[0], [gain](int16_t v) -> float { + return rtc::SafeClamp(static_cast<float>(v) * gain, + kMinSample, kMaxSample); + }); + + agc.AnalyzePreProcess(audio_buffer_); + agc.Process(audio_buffer_, speech_probability_override, + speech_level_override); + } + } + + private: + std::ifstream is_; + AudioBuffer audio_buffer_; + std::vector<int16_t> buffer_; + const std::streamsize buffer_num_bytes_; +}; + +} // namespace + +// TODO(bugs.webrtc.org/12874): Use constexpr struct with designated +// initializers once fixed. +constexpr AnalogAgcConfig GetAnalogAgcTestConfig() { + AnalogAgcConfig config; + config.enabled = true; + config.startup_min_volume = kInitialInputVolume; + config.clipped_level_min = kClippedMin; + config.enable_digital_adaptive = true; + config.clipped_level_step = kClippedLevelStep; + config.clipped_ratio_threshold = kClippedRatioThreshold; + config.clipped_wait_frames = kClippedWaitFrames; + config.clipping_predictor = kDefaultAnalogConfig.clipping_predictor; + return config; +} + +constexpr AnalogAgcConfig GetDisabledAnalogAgcConfig() { + AnalogAgcConfig config = GetAnalogAgcTestConfig(); + config.enabled = false; + return config; +} + +// Helper class that provides an `AgcManagerDirect` instance with an injected +// `Agc` mock, an `AudioBuffer` instance and `CallAgcSequence()`, a helper +// method that runs the `AgcManagerDirect` instance on the `AudioBuffer` one by +// sticking to the API contract. +class AgcManagerDirectTestHelper { + public: + // Ctor. Initializes `audio_buffer` with zeros. + AgcManagerDirectTestHelper() + : audio_buffer(kSampleRateHz, + kNumChannels, + kSampleRateHz, + kNumChannels, + kSampleRateHz, + kNumChannels), + mock_agc(new ::testing::NiceMock<MockAgc>()), + manager(GetAnalogAgcTestConfig(), mock_agc) { + manager.Initialize(); + manager.SetupDigitalGainControl(mock_gain_control); + WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipped_ratio=*/0.0f, + audio_buffer); + } + + // Calls the sequence of `AgcManagerDirect` methods according to the API + // contract, namely: + // - Sets the applied input volume; + // - Uses `audio_buffer` to call `AnalyzePreProcess()` and `Process()`; + // - Sets the digital compression gain, if specified, on the injected + // `mock_agc`. Returns the recommended input volume. The RMS error from + // AGC is replaced by an override value if `speech_probability_override` + // and `speech_level_override` have a value. + int CallAgcSequence(int applied_input_volume, + absl::optional<float> speech_probability_override, + absl::optional<float> speech_level_override) { + manager.set_stream_analog_level(applied_input_volume); + manager.AnalyzePreProcess(audio_buffer); + manager.Process(audio_buffer, speech_probability_override, + speech_level_override); + absl::optional<int> digital_gain = manager.GetDigitalComressionGain(); + if (digital_gain) { + mock_gain_control.set_compression_gain_db(*digital_gain); + } + return manager.recommended_analog_level(); + } + + // Deprecated. + // TODO(bugs.webrtc.org/7494): Let the caller write `audio_buffer` and use + // `CallAgcSequence()`. The RMS error from AGC is replaced by an override + // value if `speech_probability_override` and `speech_level_override` have + // a value. + void CallProcess(int num_calls, + absl::optional<float> speech_probability_override, + absl::optional<float> speech_level_override) { + for (int i = 0; i < num_calls; ++i) { + EXPECT_CALL(*mock_agc, Process(_)).WillOnce(Return()); + manager.Process(audio_buffer, speech_probability_override, + speech_level_override); + absl::optional<int> new_digital_gain = manager.GetDigitalComressionGain(); + if (new_digital_gain) { + mock_gain_control.set_compression_gain_db(*new_digital_gain); + } + } + } + + // Deprecated. + // TODO(bugs.webrtc.org/7494): Let the caller write `audio_buffer` and use + // `CallAgcSequence()`. + void CallPreProc(int num_calls, float clipped_ratio) { + RTC_DCHECK_GE(clipped_ratio, 0.0f); + RTC_DCHECK_LE(clipped_ratio, 1.0f); + WriteAudioBufferSamples(/*samples_value=*/0.0f, clipped_ratio, + audio_buffer); + for (int i = 0; i < num_calls; ++i) { + manager.AnalyzePreProcess(audio_buffer); + } + } + + // Deprecated. + // TODO(bugs.webrtc.org/7494): Let the caller write `audio_buffer` and use + // `CallAgcSequence()`. + void CallPreProcForChangingAudio(int num_calls, float peak_ratio) { + RTC_DCHECK_GE(peak_ratio, 0.0f); + RTC_DCHECK_LE(peak_ratio, 1.0f); + const float samples_value = peak_ratio * 32767.0f; + + // Make half of the calls on a frame where the samples alternate + // `sample_values` and zeros. + WriteAudioBufferSamples(samples_value, /*clipped_ratio=*/0.0f, + audio_buffer); + for (size_t ch = 0; ch < audio_buffer.num_channels(); ++ch) { + for (size_t k = 1; k < audio_buffer.num_frames(); k += 2) { + audio_buffer.channels()[ch][k] = 0.0f; + } + } + for (int i = 0; i < num_calls / 2; ++i) { + manager.AnalyzePreProcess(audio_buffer); + } + + // Make half of thecalls on a frame where all the samples equal + // `sample_values`. + WriteAudioBufferSamples(samples_value, /*clipped_ratio=*/0.0f, + audio_buffer); + for (int i = 0; i < num_calls - num_calls / 2; ++i) { + manager.AnalyzePreProcess(audio_buffer); + } + } + + AudioBuffer audio_buffer; + MockAgc* mock_agc; + AgcManagerDirect manager; + MockGainControl mock_gain_control; +}; + +class AgcManagerDirectParametrizedTest + : public ::testing::TestWithParam<std::tuple<absl::optional<int>, bool>> { + protected: + AgcManagerDirectParametrizedTest() + : field_trials_( + GetAgcMinMicLevelExperimentFieldTrial(std::get<0>(GetParam()))) {} + + bool IsMinMicLevelOverridden() const { + return std::get<0>(GetParam()).has_value(); + } + int GetMinMicLevel() const { + return std::get<0>(GetParam()).value_or(kMinMicLevel); + } + + bool IsRmsErrorOverridden() const { return std::get<1>(GetParam()); } + absl::optional<float> GetOverrideOrEmpty(float value) const { + return IsRmsErrorOverridden() ? absl::optional<float>(value) + : absl::nullopt; + } + + private: + test::ScopedFieldTrials field_trials_; +}; + +INSTANTIATE_TEST_SUITE_P( + , + AgcManagerDirectParametrizedTest, + ::testing::Combine(testing::Values(absl::nullopt, 12, 20), + testing::Bool())); + +// Checks that when the analog controller is disabled, no downward adaptation +// takes place. +// TODO(webrtc:7494): Revisit the test after moving the number of override wait +// frames to AMP config. The test passes but internally the gain update timing +// differs. +TEST_P(AgcManagerDirectParametrizedTest, + DisabledAnalogAgcDoesNotAdaptDownwards) { + AgcManagerDirect manager_no_analog_agc(kNumChannels, + GetDisabledAnalogAgcConfig()); + manager_no_analog_agc.Initialize(); + AgcManagerDirect manager_with_analog_agc(kNumChannels, + GetAnalogAgcTestConfig()); + manager_with_analog_agc.Initialize(); + + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + + constexpr int kAnalogLevel = 250; + static_assert(kAnalogLevel > kInitialInputVolume, "Increase `kAnalogLevel`."); + manager_no_analog_agc.set_stream_analog_level(kAnalogLevel); + manager_with_analog_agc.set_stream_analog_level(kAnalogLevel); + + // Make a first call with input that doesn't clip in order to let the + // controller read the input volume. That is needed because clipping input + // causes the controller to stay in idle state for + // `AnalogAgcConfig::clipped_wait_frames` frames. + WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipping_ratio=*/0.0f, + audio_buffer); + manager_no_analog_agc.AnalyzePreProcess(audio_buffer); + manager_with_analog_agc.AnalyzePreProcess(audio_buffer); + manager_no_analog_agc.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-18.0f)); + manager_with_analog_agc.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-18.0f)); + + // Feed clipping input to trigger a downward adapation of the analog level. + WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipping_ratio=*/0.2f, + audio_buffer); + manager_no_analog_agc.AnalyzePreProcess(audio_buffer); + manager_with_analog_agc.AnalyzePreProcess(audio_buffer); + manager_no_analog_agc.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-10.0f)); + manager_with_analog_agc.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-10.0f)); + + // Check that no adaptation occurs when the analog controller is disabled + // and make sure that the test triggers a downward adaptation otherwise. + EXPECT_EQ(manager_no_analog_agc.recommended_analog_level(), kAnalogLevel); + ASSERT_LT(manager_with_analog_agc.recommended_analog_level(), kAnalogLevel); +} + +// Checks that when the analog controller is disabled, no upward adaptation +// takes place. +// TODO(webrtc:7494): Revisit the test after moving the number of override wait +// frames to APM config. The test passes but internally the gain update timing +// differs. +TEST_P(AgcManagerDirectParametrizedTest, DisabledAnalogAgcDoesNotAdaptUpwards) { + AgcManagerDirect manager_no_analog_agc(kNumChannels, + GetDisabledAnalogAgcConfig()); + manager_no_analog_agc.Initialize(); + AgcManagerDirect manager_with_analog_agc(kNumChannels, + GetAnalogAgcTestConfig()); + manager_with_analog_agc.Initialize(); + + constexpr int kAnalogLevel = kInitialInputVolume; + manager_no_analog_agc.set_stream_analog_level(kAnalogLevel); + manager_with_analog_agc.set_stream_analog_level(kAnalogLevel); + + // Feed speech with low energy to trigger an upward adapation of the analog + // level. + constexpr int kNumFrames = 125; + constexpr int kGainDb = -20; + SpeechSamplesReader reader; + reader.Feed(kNumFrames, kGainDb, GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-42.0f), manager_no_analog_agc); + reader.Feed(kNumFrames, kGainDb, GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-42.0f), manager_with_analog_agc); + + // Check that no adaptation occurs when the analog controller is disabled + // and make sure that the test triggers an upward adaptation otherwise. + EXPECT_EQ(manager_no_analog_agc.recommended_analog_level(), kAnalogLevel); + ASSERT_GT(manager_with_analog_agc.recommended_analog_level(), kAnalogLevel); +} + +TEST_P(AgcManagerDirectParametrizedTest, + StartupMinVolumeConfigurationIsRespected) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + EXPECT_EQ(kInitialInputVolume, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, MicVolumeResponseToRmsError) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Compressor default; no residual error. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(5), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-23.0f)); + + // Inside the compressor's window; no change of volume. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-28.0f)); + + // Above the compressor's window; volume should be increased. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-29.0f)); + EXPECT_EQ(130, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(20), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-38.0f)); + EXPECT_EQ(168, helper.manager.recommended_analog_level()); + + // Inside the compressor's window; no change of volume. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(5), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-23.0f)); + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-18.0f)); + + // Below the compressor's window; volume should be decreased. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(167, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(163, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-9), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-9.0f)); + EXPECT_EQ(129, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, MicVolumeIsLimited) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Maximum upwards change is limited. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(183, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(243, helper.manager.recommended_analog_level()); + + // Won't go higher than the maximum. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(255, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(254, helper.manager.recommended_analog_level()); + + // Maximum downwards change is limited. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(194, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(137, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(88, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(54, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(33, helper.manager.recommended_analog_level()); + + // Won't go lower than the minimum. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(std::max(18, GetMinMicLevel()), + helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(std::max(12, GetMinMicLevel()), + helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, CompressorStepsTowardsTarget) { + constexpr absl::optional<float> kNoOverride = absl::nullopt; + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Compressor default; no call to set_compression_gain_db. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(5), Return(true))) + .WillRepeatedly(Return(false)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-23.0f)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + // The mock `GetRmsErrorDb()` returns false; mimic this by passing + // absl::nullopt as an override. + helper.CallProcess(/*num_calls=*/19, kNoOverride, kNoOverride); + + // Moves slowly upwards. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(9), Return(true))) + .WillRepeatedly(Return(false)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-27.0f)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/18, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(8)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/19, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(9)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + + // Moves slowly downward, then reverses before reaching the original target. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(5), Return(true))) + .WillRepeatedly(Return(false)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-23.0f)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/18, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(8)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(9), Return(true))) + .WillRepeatedly(Return(false)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-27.0f)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/18, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(9)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); +} + +TEST_P(AgcManagerDirectParametrizedTest, CompressorErrorIsDeemphasized) { + constexpr absl::optional<float> kNoOverride = absl::nullopt; + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillRepeatedly(Return(false)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-28.0f)); + // The mock `GetRmsErrorDb()` returns false; mimic this by passing + // absl::nullopt as an override. + helper.CallProcess(/*num_calls=*/18, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(8)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(9)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillRepeatedly(Return(false)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-18.0f)); + helper.CallProcess(/*num_calls=*/18, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(8)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(7)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(6)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); +} + +TEST_P(AgcManagerDirectParametrizedTest, CompressorReachesMaximum) { + constexpr absl::optional<float> kNoOverride = absl::nullopt; + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillRepeatedly(Return(false)); + helper.CallProcess(/*num_calls=*/4, speech_probability_override, + GetOverrideOrEmpty(-28.0f)); + // The mock `GetRmsErrorDb()` returns false; mimic this by passing + // absl::nullopt as an override. + helper.CallProcess(/*num_calls=*/15, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(8)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(9)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(10)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(11)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(12)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); +} + +TEST_P(AgcManagerDirectParametrizedTest, CompressorReachesMinimum) { + constexpr absl::optional<float> kNoOverride = absl::nullopt; + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillRepeatedly(Return(false)); + helper.CallProcess(/*num_calls=*/4, speech_probability_override, + GetOverrideOrEmpty(-18.0f)); + // The mock `GetRmsErrorDb()` returns false; mimic this by passing + // absl::nullopt as an override. + helper.CallProcess(/*num_calls=*/15, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(6)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(5)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(4)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(3)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(2)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); +} + +TEST_P(AgcManagerDirectParametrizedTest, NoActionWhileMuted) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + helper.manager.HandleCaptureOutputUsedChange(false); + helper.manager.Process(helper.audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + absl::optional<int> new_digital_gain = + helper.manager.GetDigitalComressionGain(); + if (new_digital_gain) { + helper.mock_gain_control.set_compression_gain_db(*new_digital_gain); + } +} + +TEST_P(AgcManagerDirectParametrizedTest, UnmutingChecksVolumeWithoutRaising) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + helper.manager.HandleCaptureOutputUsedChange(false); + helper.manager.HandleCaptureOutputUsedChange(true); + + constexpr int kInputVolume = 127; + helper.manager.set_stream_analog_level(kInputVolume); + EXPECT_CALL(*helper.mock_agc, Reset()); + + // SetMicVolume should not be called. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)).WillOnce(Return(false)); + helper.CallProcess(/*num_calls=*/1, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + EXPECT_EQ(127, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, UnmutingRaisesTooLowVolume) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + helper.manager.HandleCaptureOutputUsedChange(false); + helper.manager.HandleCaptureOutputUsedChange(true); + + constexpr int kInputVolume = 11; + helper.manager.set_stream_analog_level(kInputVolume); + EXPECT_CALL(*helper.mock_agc, Reset()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)).WillOnce(Return(false)); + helper.CallProcess(/*num_calls=*/1, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + EXPECT_EQ(GetMinMicLevel(), helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + ManualLevelChangeResultsInNoSetMicCall) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Change outside of compressor's range, which would normally trigger a call + // to `SetMicVolume()`. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))); + + // When the analog volume changes, the gain controller is reset. + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + + // GetMicVolume returns a value outside of the quantization slack, indicating + // a manual volume change. + ASSERT_NE(helper.manager.recommended_analog_level(), 154); + helper.manager.set_stream_analog_level(154); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-29.0f)); + EXPECT_EQ(154, helper.manager.recommended_analog_level()); + + // Do the same thing, except downwards now. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.manager.set_stream_analog_level(100); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(100, helper.manager.recommended_analog_level()); + + // And finally verify the AGC continues working without a manual change. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(99, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + RecoveryAfterManualLevelChangeFromMax) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Force the mic up to max volume. Takes a few steps due to the residual + // gain limitation. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillRepeatedly(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(183, helper.manager.recommended_analog_level()); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(243, helper.manager.recommended_analog_level()); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(255, helper.manager.recommended_analog_level()); + + // Manual change does not result in SetMicVolume call. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.manager.set_stream_analog_level(50); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(50, helper.manager.recommended_analog_level()); + + // Continues working as usual afterwards. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(20), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-38.0f)); + + EXPECT_EQ(69, helper.manager.recommended_analog_level()); +} + +// Checks that, when the min mic level override is not specified, AGC ramps up +// towards the minimum mic level after the mic level is manually set below the +// minimum gain to enforce. +TEST_P(AgcManagerDirectParametrizedTest, + RecoveryAfterManualLevelChangeBelowMinWithoutMiMicLevelnOverride) { + if (IsMinMicLevelOverridden()) { + GTEST_SKIP() << "Skipped. Min mic level overridden."; + } + + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Manual change below min, but strictly positive, otherwise AGC won't take + // any action. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.manager.set_stream_analog_level(1); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(1, helper.manager.recommended_analog_level()); + + // Continues working as usual afterwards. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-29.0f)); + EXPECT_EQ(2, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(11, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(20), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-38.0f)); + EXPECT_EQ(18, helper.manager.recommended_analog_level()); +} + +// Checks that, when the min mic level override is specified, AGC immediately +// applies the minimum mic level after the mic level is manually set below the +// minimum gain to enforce. +TEST_P(AgcManagerDirectParametrizedTest, + RecoveryAfterManualLevelChangeBelowMin) { + if (!IsMinMicLevelOverridden()) { + GTEST_SKIP() << "Skipped. Min mic level not overridden."; + } + + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Manual change below min, but strictly positive, otherwise + // AGC won't take any action. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.manager.set_stream_analog_level(1); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(GetMinMicLevel(), helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, NoClippingHasNoImpact) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + helper.CallPreProc(/*num_calls=*/100, /*clipped_ratio=*/0); + EXPECT_EQ(128, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, ClippingUnderThresholdHasNoImpact) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/0.099); + EXPECT_EQ(128, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, ClippingLowersVolume) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/255, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/0.2); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, WaitingPeriodBetweenClippingChecks) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/255, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(0); + helper.CallPreProc(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(225, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, ClippingLoweringIsLimited) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/180, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(kClippedMin, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(0); + helper.CallPreProc(/*num_calls=*/1000, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(kClippedMin, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + ClippingMaxIsRespectedWhenEqualToLevel) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/255, + speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillRepeatedly(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/10, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + ClippingMaxIsRespectedWhenHigherThanLevel) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/200, + speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(185, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillRepeatedly(DoAll(SetArgPointee<0>(40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-58.0f)); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); + helper.CallProcess(/*num_calls=*/10, speech_probability_override, + GetOverrideOrEmpty(-58.0f)); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + MaxCompressionIsIncreasedAfterClipping) { + constexpr absl::optional<float> kNoOverride = absl::nullopt; + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/210, + speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, kAboveClippedThreshold); + EXPECT_EQ(195, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillRepeatedly(Return(false)); + helper.CallProcess(/*num_calls=*/5, speech_probability_override, + GetOverrideOrEmpty(-29.0f)); + // The mock `GetRmsErrorDb()` returns false; mimic this by passing + // absl::nullopt as an override. + helper.CallProcess(/*num_calls=*/14, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(8)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(9)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(10)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(11)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(12)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(13)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + + // Continue clipping until we hit the maximum surplus compression. + helper.CallPreProc(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(180, helper.manager.recommended_analog_level()); + + helper.CallPreProc(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(1, kAboveClippedThreshold); + EXPECT_EQ(kClippedMin, helper.manager.recommended_analog_level()); + + // Current level is now at the minimum, but the maximum allowed level still + // has more to decrease. + helper.CallPreProc(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + + helper.CallPreProc(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + + helper.CallPreProc(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(16), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(16), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(16), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(16), Return(true))) + .WillRepeatedly(Return(false)); + helper.CallProcess(/*num_calls=*/4, speech_probability_override, + GetOverrideOrEmpty(-34.0f)); + helper.CallProcess(/*num_calls=*/15, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(14)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(15)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(16)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(17)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(18)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); +} + +TEST_P(AgcManagerDirectParametrizedTest, UserCanRaiseVolumeAfterClipping) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/225, + speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(210, helper.manager.recommended_analog_level()); + + // High enough error to trigger a volume check. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(14), Return(true))); + // User changed the volume. + helper.manager.set_stream_analog_level(250); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-32.0f)); + EXPECT_EQ(250, helper.manager.recommended_analog_level()); + + // Move down... + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-10), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-8.0f)); + EXPECT_EQ(210, helper.manager.recommended_analog_level()); + // And back up to the new max established by the user. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-58.0f)); + EXPECT_EQ(250, helper.manager.recommended_analog_level()); + // Will not move above new maximum. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(250, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, ClippingDoesNotPullLowVolumeBackUp) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/80, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(0); + int initial_volume = helper.manager.recommended_analog_level(); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(initial_volume, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, TakesNoActionOnZeroMicVolume) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillRepeatedly(DoAll(SetArgPointee<0>(30), Return(true))); + helper.manager.set_stream_analog_level(0); + helper.CallProcess(/*num_calls=*/10, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(0, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, ClippingDetectionLowersVolume) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/255, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_EQ(255, helper.manager.recommended_analog_level()); + helper.CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/0.99f); + EXPECT_EQ(255, helper.manager.recommended_analog_level()); + helper.CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/1.0f); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + DisabledClippingPredictorDoesNotLowerVolume) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/255, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_FALSE(helper.manager.clipping_predictor_enabled()); + EXPECT_EQ(255, helper.manager.recommended_analog_level()); + helper.CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/0.99f); + EXPECT_EQ(255, helper.manager.recommended_analog_level()); + helper.CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/0.99f); + EXPECT_EQ(255, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, DisableDigitalDisablesDigital) { + if (IsRmsErrorOverridden()) { + GTEST_SKIP() << "Skipped. RMS error override does not affect the test."; + } + + auto agc = std::unique_ptr<Agc>(new ::testing::NiceMock<MockAgc>()); + MockGainControl mock_gain_control; + EXPECT_CALL(mock_gain_control, set_mode(GainControl::kFixedDigital)); + EXPECT_CALL(mock_gain_control, set_target_level_dbfs(0)); + EXPECT_CALL(mock_gain_control, set_compression_gain_db(0)); + EXPECT_CALL(mock_gain_control, enable_limiter(false)); + + AnalogAgcConfig config; + config.enable_digital_adaptive = false; + auto manager = std::make_unique<AgcManagerDirect>(kNumChannels, config); + manager->Initialize(); + manager->SetupDigitalGainControl(mock_gain_control); +} + +TEST(AgcManagerDirectTest, AgcMinMicLevelExperimentDefault) { + std::unique_ptr<AgcManagerDirect> manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel); +} + +TEST(AgcManagerDirectTest, AgcMinMicLevelExperimentDisabled) { + for (const std::string& field_trial_suffix : {"", "_20220210"}) { + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrial("Disabled" + field_trial_suffix)); + std::unique_ptr<AgcManagerDirect> manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel); + } +} + +// Checks that a field-trial parameter outside of the valid range [0,255] is +// ignored. +TEST(AgcManagerDirectTest, AgcMinMicLevelExperimentOutOfRangeAbove) { + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrial("Enabled-256")); + std::unique_ptr<AgcManagerDirect> manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel); +} + +// Checks that a field-trial parameter outside of the valid range [0,255] is +// ignored. +TEST(AgcManagerDirectTest, AgcMinMicLevelExperimentOutOfRangeBelow) { + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrial("Enabled--1")); + std::unique_ptr<AgcManagerDirect> manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel); +} + +// Verifies that a valid experiment changes the minimum microphone level. The +// start volume is larger than the min level and should therefore not be +// changed. +TEST(AgcManagerDirectTest, AgcMinMicLevelExperimentEnabled50) { + constexpr int kMinMicLevelOverride = 50; + for (const std::string& field_trial_suffix : {"", "_20220210"}) { + SCOPED_TRACE(field_trial_suffix); + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride, + field_trial_suffix)); + std::unique_ptr<AgcManagerDirect> manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevelOverride); + } +} + +// Checks that, when the "WebRTC-Audio-AgcMinMicLevelExperiment" field trial is +// specified with a valid value, the mic level never gets lowered beyond the +// override value in the presence of clipping. +TEST(AgcManagerDirectTest, AgcMinMicLevelExperimentCheckMinLevelWithClipping) { + constexpr int kMinMicLevelOverride = 250; + + // Create and initialize two AGCs by specifying and leaving unspecified the + // relevant field trial. + const auto factory = []() { + std::unique_ptr<AgcManagerDirect> manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + manager->Initialize(); + manager->set_stream_analog_level(kInitialInputVolume); + return manager; + }; + std::unique_ptr<AgcManagerDirect> manager = factory(); + std::unique_ptr<AgcManagerDirect> manager_with_override; + { + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride)); + manager_with_override = factory(); + } + + // Create a test input signal which containts 80% of clipped samples. + AudioBuffer audio_buffer(kSampleRateHz, 1, kSampleRateHz, 1, kSampleRateHz, + 1); + WriteAudioBufferSamples(/*samples_value=*/4000.0f, /*clipped_ratio=*/0.8f, + audio_buffer); + + // Simulate 4 seconds of clipping; it is expected to trigger a downward + // adjustment of the analog gain. + CallPreProcessAndProcess(/*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::nullopt, + /*speech_level_override=*/absl::nullopt, *manager); + CallPreProcessAndProcess(/*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::nullopt, + /*speech_level_override=*/absl::nullopt, + *manager_with_override); + + // Make sure that an adaptation occurred. + ASSERT_GT(manager->recommended_analog_level(), 0); + + // Check that the test signal triggers a larger downward adaptation for + // `manager`, which is allowed to reach a lower gain. + EXPECT_GT(manager_with_override->recommended_analog_level(), + manager->recommended_analog_level()); + // Check that the gain selected by `manager_with_override` equals the minimum + // value overridden via field trial. + EXPECT_EQ(manager_with_override->recommended_analog_level(), + kMinMicLevelOverride); +} + +// Checks that, when the "WebRTC-Audio-AgcMinMicLevelExperiment" field trial is +// specified with a valid value, the mic level never gets lowered beyond the +// override value in the presence of clipping when RMS error override is used. +// TODO(webrtc:7494): Revisit the test after moving the number of override wait +// frames to APM config. The test passes but internally the gain update timing +// differs. +TEST(AgcManagerDirectTest, + AgcMinMicLevelExperimentCheckMinLevelWithClippingWithRmsErrorOverride) { + constexpr int kMinMicLevelOverride = 250; + + // Create and initialize two AGCs by specifying and leaving unspecified the + // relevant field trial. + const auto factory = []() { + std::unique_ptr<AgcManagerDirect> manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + manager->Initialize(); + manager->set_stream_analog_level(kInitialInputVolume); + return manager; + }; + std::unique_ptr<AgcManagerDirect> manager = factory(); + std::unique_ptr<AgcManagerDirect> manager_with_override; + { + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride)); + manager_with_override = factory(); + } + + // Create a test input signal which containts 80% of clipped samples. + AudioBuffer audio_buffer(kSampleRateHz, 1, kSampleRateHz, 1, kSampleRateHz, + 1); + WriteAudioBufferSamples(/*samples_value=*/4000.0f, /*clipped_ratio=*/0.8f, + audio_buffer); + + // Simulate 4 seconds of clipping; it is expected to trigger a downward + // adjustment of the analog gain. + CallPreProcessAndProcess( + /*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/0.7f, + /*speech_probability_level=*/-18.0f, *manager); + CallPreProcessAndProcess( + /*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::optional<float>(0.7f), + /*speech_probability_level=*/absl::optional<float>(-18.0f), + *manager_with_override); + + // Make sure that an adaptation occurred. + ASSERT_GT(manager->recommended_analog_level(), 0); + + // Check that the test signal triggers a larger downward adaptation for + // `manager`, which is allowed to reach a lower gain. + EXPECT_GT(manager_with_override->recommended_analog_level(), + manager->recommended_analog_level()); + // Check that the gain selected by `manager_with_override` equals the minimum + // value overridden via field trial. + EXPECT_EQ(manager_with_override->recommended_analog_level(), + kMinMicLevelOverride); +} + +// Checks that, when the "WebRTC-Audio-AgcMinMicLevelExperiment" field trial is +// specified with a value lower than the `clipped_level_min`, the behavior of +// the analog gain controller is the same as that obtained when the field trial +// is not specified. +TEST(AgcManagerDirectTest, + AgcMinMicLevelExperimentCompareMicLevelWithClipping) { + // Create and initialize two AGCs by specifying and leaving unspecified the + // relevant field trial. + const auto factory = []() { + // Use a large clipped level step to more quickly decrease the analog gain + // with clipping. + AnalogAgcConfig config = kDefaultAnalogConfig; + config.startup_min_volume = kInitialInputVolume; + config.enable_digital_adaptive = false; + config.clipped_level_step = 64; + config.clipped_ratio_threshold = kClippedRatioThreshold; + config.clipped_wait_frames = kClippedWaitFrames; + auto controller = + std::make_unique<AgcManagerDirect>(/*num_capture_channels=*/1, config); + controller->Initialize(); + controller->set_stream_analog_level(kInitialInputVolume); + return controller; + }; + std::unique_ptr<AgcManagerDirect> manager = factory(); + std::unique_ptr<AgcManagerDirect> manager_with_override; + { + constexpr int kMinMicLevelOverride = 20; + static_assert( + kDefaultAnalogConfig.clipped_level_min >= kMinMicLevelOverride, + "Use a lower override value."); + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride)); + manager_with_override = factory(); + } + + // Create a test input signal which containts 80% of clipped samples. + AudioBuffer audio_buffer(kSampleRateHz, 1, kSampleRateHz, 1, kSampleRateHz, + 1); + WriteAudioBufferSamples(/*samples_value=*/4000.0f, /*clipped_ratio=*/0.8f, + audio_buffer); + + // Simulate 4 seconds of clipping; it is expected to trigger a downward + // adjustment of the analog gain. + CallPreProcessAndProcess(/*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::nullopt, + /*speech_level_override=*/absl::nullopt, *manager); + CallPreProcessAndProcess(/*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::nullopt, + /*speech_level_override=*/absl::nullopt, + *manager_with_override); + + // Make sure that an adaptation occurred. + ASSERT_GT(manager->recommended_analog_level(), 0); + + // Check that the selected analog gain is the same for both controllers and + // that it equals the minimum level reached when clipping is handled. That is + // expected because the minimum microphone level override is less than the + // minimum level used when clipping is detected. + EXPECT_EQ(manager->recommended_analog_level(), + manager_with_override->recommended_analog_level()); + EXPECT_EQ(manager_with_override->recommended_analog_level(), + kDefaultAnalogConfig.clipped_level_min); +} + +// Checks that, when the "WebRTC-Audio-AgcMinMicLevelExperiment" field trial is +// specified with a value lower than the `clipped_level_min`, the behavior of +// the analog gain controller is the same as that obtained when the field trial +// is not specified. +// TODO(webrtc:7494): Revisit the test after moving the number of override wait +// frames to APM config. The test passes but internally the gain update timing +// differs. +TEST(AgcManagerDirectTest, + AgcMinMicLevelExperimentCompareMicLevelWithClippingWithRmsErrorOverride) { + // Create and initialize two AGCs by specifying and leaving unspecified the + // relevant field trial. + const auto factory = []() { + // Use a large clipped level step to more quickly decrease the analog gain + // with clipping. + AnalogAgcConfig config = kDefaultAnalogConfig; + config.startup_min_volume = kInitialInputVolume; + config.enable_digital_adaptive = false; + config.clipped_level_step = 64; + config.clipped_ratio_threshold = kClippedRatioThreshold; + config.clipped_wait_frames = kClippedWaitFrames; + auto controller = + std::make_unique<AgcManagerDirect>(/*num_capture_channels=*/1, config); + controller->Initialize(); + controller->set_stream_analog_level(kInitialInputVolume); + return controller; + }; + std::unique_ptr<AgcManagerDirect> manager = factory(); + std::unique_ptr<AgcManagerDirect> manager_with_override; + { + constexpr int kMinMicLevelOverride = 20; + static_assert( + kDefaultAnalogConfig.clipped_level_min >= kMinMicLevelOverride, + "Use a lower override value."); + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride)); + manager_with_override = factory(); + } + + // Create a test input signal which containts 80% of clipped samples. + AudioBuffer audio_buffer(kSampleRateHz, 1, kSampleRateHz, 1, kSampleRateHz, + 1); + WriteAudioBufferSamples(/*samples_value=*/4000.0f, /*clipped_ratio=*/0.8f, + audio_buffer); + + CallPreProcessAndProcess( + /*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::optional<float>(0.7f), + /*speech_level_override=*/absl::optional<float>(-18.0f), *manager); + CallPreProcessAndProcess( + /*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::optional<float>(0.7f), + /*speech_level_override=*/absl::optional<float>(-18.0f), + *manager_with_override); + + // Make sure that an adaptation occurred. + ASSERT_GT(manager->recommended_analog_level(), 0); + + // Check that the selected analog gain is the same for both controllers and + // that it equals the minimum level reached when clipping is handled. That is + // expected because the minimum microphone level override is less than the + // minimum level used when clipping is detected. + EXPECT_EQ(manager->recommended_analog_level(), + manager_with_override->recommended_analog_level()); + EXPECT_EQ(manager_with_override->recommended_analog_level(), + kDefaultAnalogConfig.clipped_level_min); +} + +// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_level_step`. +// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_ratio_threshold`. +// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_wait_frames`. +// Verifies that configurable clipping parameters are initialized as intended. +TEST_P(AgcManagerDirectParametrizedTest, ClippingParametersVerified) { + if (IsRmsErrorOverridden()) { + GTEST_SKIP() << "Skipped. RMS error override does not affect the test."; + } + + std::unique_ptr<AgcManagerDirect> manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + manager->Initialize(); + EXPECT_EQ(manager->clipped_level_step_, kClippedLevelStep); + EXPECT_EQ(manager->clipped_ratio_threshold_, kClippedRatioThreshold); + EXPECT_EQ(manager->clipped_wait_frames_, kClippedWaitFrames); + std::unique_ptr<AgcManagerDirect> manager_custom = + CreateAgcManagerDirect(kInitialInputVolume, + /*clipped_level_step=*/10, + /*clipped_ratio_threshold=*/0.2f, + /*clipped_wait_frames=*/50); + manager_custom->Initialize(); + EXPECT_EQ(manager_custom->clipped_level_step_, 10); + EXPECT_EQ(manager_custom->clipped_ratio_threshold_, 0.2f); + EXPECT_EQ(manager_custom->clipped_wait_frames_, 50); +} + +TEST_P(AgcManagerDirectParametrizedTest, + DisableClippingPredictorDisablesClippingPredictor) { + if (IsRmsErrorOverridden()) { + GTEST_SKIP() << "Skipped. RMS error override does not affect the test."; + } + + // TODO(bugs.webrtc.org/12874): Use designated initializers once fixed. + ClippingPredictorConfig config; + config.enabled = false; + + std::unique_ptr<AgcManagerDirect> manager = CreateAgcManagerDirect( + kInitialInputVolume, kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, config); + manager->Initialize(); + EXPECT_FALSE(manager->clipping_predictor_enabled()); + EXPECT_FALSE(manager->use_clipping_predictor_step()); +} + +TEST_P(AgcManagerDirectParametrizedTest, ClippingPredictorDisabledByDefault) { + if (IsRmsErrorOverridden()) { + GTEST_SKIP() << "Skipped. RMS error override does not affect the test."; + } + + constexpr ClippingPredictorConfig kDefaultConfig; + EXPECT_FALSE(kDefaultConfig.enabled); +} + +TEST_P(AgcManagerDirectParametrizedTest, + EnableClippingPredictorEnablesClippingPredictor) { + if (IsRmsErrorOverridden()) { + GTEST_SKIP() << "Skipped. RMS error override does not affect the test."; + } + + // TODO(bugs.webrtc.org/12874): Use designated initializers once fixed. + ClippingPredictorConfig config; + config.enabled = true; + config.use_predicted_step = true; + + std::unique_ptr<AgcManagerDirect> manager = CreateAgcManagerDirect( + kInitialInputVolume, kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, config); + manager->Initialize(); + EXPECT_TRUE(manager->clipping_predictor_enabled()); + EXPECT_TRUE(manager->use_clipping_predictor_step()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + DisableClippingPredictorDoesNotLowerVolume) { + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + + AnalogAgcConfig config = GetAnalogAgcTestConfig(); + config.clipping_predictor.enabled = false; + AgcManagerDirect manager(config, new ::testing::NiceMock<MockAgc>()); + manager.Initialize(); + manager.set_stream_analog_level(/*level=*/255); + EXPECT_FALSE(manager.clipping_predictor_enabled()); + EXPECT_FALSE(manager.use_clipping_predictor_step()); + EXPECT_EQ(manager.recommended_analog_level(), 255); + manager.Process(audio_buffer, GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager); + EXPECT_EQ(manager.recommended_analog_level(), 255); + CallPreProcessAudioBuffer(/*num_calls=*/300, /*peak_ratio=*/0.99f, manager); + EXPECT_EQ(manager.recommended_analog_level(), 255); + CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager); + EXPECT_EQ(manager.recommended_analog_level(), 255); +} + +TEST_P(AgcManagerDirectParametrizedTest, + UsedClippingPredictionsProduceLowerAnalogLevels) { + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + + AnalogAgcConfig config_with_prediction = GetAnalogAgcTestConfig(); + config_with_prediction.clipping_predictor.enabled = true; + config_with_prediction.clipping_predictor.use_predicted_step = true; + AnalogAgcConfig config_without_prediction = GetAnalogAgcTestConfig(); + config_without_prediction.clipping_predictor.enabled = false; + AgcManagerDirect manager_with_prediction(config_with_prediction, + new ::testing::NiceMock<MockAgc>()); + AgcManagerDirect manager_without_prediction( + config_without_prediction, new ::testing::NiceMock<MockAgc>()); + + manager_with_prediction.Initialize(); + manager_without_prediction.Initialize(); + + constexpr int kInitialLevel = 255; + constexpr float kClippingPeakRatio = 1.0f; + constexpr float kCloseToClippingPeakRatio = 0.99f; + constexpr float kZeroPeakRatio = 0.0f; + manager_with_prediction.set_stream_analog_level(kInitialLevel); + manager_without_prediction.set_stream_analog_level(kInitialLevel); + manager_with_prediction.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + manager_without_prediction.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + EXPECT_TRUE(manager_with_prediction.clipping_predictor_enabled()); + EXPECT_FALSE(manager_without_prediction.clipping_predictor_enabled()); + EXPECT_TRUE(manager_with_prediction.use_clipping_predictor_step()); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), kInitialLevel); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel); + + // Expect a change in the analog level when the prediction step is used. + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel); + + // Expect no change during waiting. + CallPreProcessAudioBuffer(kClippedWaitFrames, kCloseToClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(kClippedWaitFrames, kCloseToClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel); + + // Expect a change when the prediction step is used. + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - 2 * kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel); + + // Expect no change when clipping is not detected or predicted. + CallPreProcessAudioBuffer(2 * kClippedWaitFrames, kZeroPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(2 * kClippedWaitFrames, kZeroPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - 2 * kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel); + + // Expect a change for clipping frames. + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - 3 * kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel - kClippedLevelStep); + + // Expect no change during waiting. + CallPreProcessAudioBuffer(kClippedWaitFrames, kClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(kClippedWaitFrames, kClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - 3 * kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel - kClippedLevelStep); + + // Expect a change for clipping frames. + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - 4 * kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel - 2 * kClippedLevelStep); +} + +TEST_P(AgcManagerDirectParametrizedTest, + UnusedClippingPredictionsProduceEqualAnalogLevels) { + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + + AnalogAgcConfig config_with_prediction = GetAnalogAgcTestConfig(); + config_with_prediction.clipping_predictor.enabled = true; + config_with_prediction.clipping_predictor.use_predicted_step = false; + AnalogAgcConfig config_without_prediction = GetAnalogAgcTestConfig(); + config_without_prediction.clipping_predictor.enabled = false; + AgcManagerDirect manager_with_prediction(config_with_prediction, + new ::testing::NiceMock<MockAgc>()); + AgcManagerDirect manager_without_prediction( + config_without_prediction, new ::testing::NiceMock<MockAgc>()); + + constexpr int kInitialLevel = 255; + constexpr float kClippingPeakRatio = 1.0f; + constexpr float kCloseToClippingPeakRatio = 0.99f; + constexpr float kZeroPeakRatio = 0.0f; + manager_with_prediction.Initialize(); + manager_without_prediction.Initialize(); + manager_with_prediction.set_stream_analog_level(kInitialLevel); + manager_without_prediction.set_stream_analog_level(kInitialLevel); + manager_with_prediction.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + manager_without_prediction.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_TRUE(manager_with_prediction.clipping_predictor_enabled()); + EXPECT_FALSE(manager_without_prediction.clipping_predictor_enabled()); + EXPECT_FALSE(manager_with_prediction.use_clipping_predictor_step()); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), kInitialLevel); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel); + + // Expect no change in the analog level for non-clipping frames. + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); + + // Expect no change for non-clipping frames. + CallPreProcessAudioBuffer(kClippedWaitFrames, kCloseToClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(kClippedWaitFrames, kCloseToClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); + + // Expect no change for non-clipping frames. + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); + + // Expect no change when clipping is not detected or predicted. + CallPreProcessAudioBuffer(2 * kClippedWaitFrames, kZeroPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(2 * kClippedWaitFrames, kZeroPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); + + // Expect a change for clipping frames. + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); + + // Expect no change during waiting. + CallPreProcessAudioBuffer(kClippedWaitFrames, kClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(kClippedWaitFrames, kClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); + + // Expect a change for clipping frames. + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); +} + +// Checks that passing an empty speech level and probability overrides to +// `Process()` has the same effect as passing no overrides. +TEST_P(AgcManagerDirectParametrizedTest, EmptyRmsErrorOverrideHasNoEffect) { + AgcManagerDirect manager_1(kNumChannels, GetAnalogAgcTestConfig()); + AgcManagerDirect manager_2(kNumChannels, GetAnalogAgcTestConfig()); + manager_1.Initialize(); + manager_2.Initialize(); + + constexpr int kAnalogLevel = 50; + manager_1.set_stream_analog_level(kAnalogLevel); + manager_2.set_stream_analog_level(kAnalogLevel); + + // Feed speech with low energy to trigger an upward adapation of the analog + // level. + constexpr int kNumFrames = 125; + constexpr int kGainDb = -20; + SpeechSamplesReader reader; + + // Check the initial input volume. + ASSERT_EQ(manager_1.recommended_analog_level(), kAnalogLevel); + ASSERT_EQ(manager_2.recommended_analog_level(), kAnalogLevel); + + reader.Feed(kNumFrames, kGainDb, absl::nullopt, absl::nullopt, manager_1); + reader.Feed(kNumFrames, kGainDb, manager_2); + + // Check that the states are the same and adaptation occurs. + EXPECT_EQ(manager_1.recommended_analog_level(), + manager_2.recommended_analog_level()); + ASSERT_GT(manager_1.recommended_analog_level(), kAnalogLevel); + EXPECT_EQ(manager_1.voice_probability(), manager_2.voice_probability()); + EXPECT_EQ(manager_1.frames_since_clipped_, manager_2.frames_since_clipped_); + + // Check that the states of the channel AGCs are the same. + EXPECT_EQ(manager_1.num_channels(), manager_2.num_channels()); + for (int i = 0; i < manager_1.num_channels(); ++i) { + EXPECT_EQ(manager_1.channel_agcs_[i]->recommended_analog_level(), + manager_2.channel_agcs_[i]->recommended_analog_level()); + EXPECT_EQ(manager_1.channel_agcs_[i]->voice_probability(), + manager_2.channel_agcs_[i]->voice_probability()); + } +} + +// Checks that passing a non-empty speech level and probability overrides to +// `Process()` has an effect. +TEST_P(AgcManagerDirectParametrizedTest, NonEmptyRmsErrorOverrideHasEffect) { + AgcManagerDirect manager_1(kNumChannels, GetAnalogAgcTestConfig()); + AgcManagerDirect manager_2(kNumChannels, GetAnalogAgcTestConfig()); + manager_1.Initialize(); + manager_2.Initialize(); + + constexpr int kInputVolume = 128; + manager_1.set_stream_analog_level(kInputVolume); + manager_2.set_stream_analog_level(kInputVolume); + + // Feed speech with low energy to trigger an upward adapation of the input + // volume. + constexpr int kNumFrames = 125; + constexpr int kGainDb = -20; + SpeechSamplesReader reader; + + // Make sure that the feeding samples triggers an adaptation when no override + // is specified. + reader.Feed(kNumFrames, kGainDb, manager_1); + ASSERT_GT(manager_1.recommended_analog_level(), kInputVolume); + + // Expect that feeding samples triggers an adaptation when the speech + // probability and speech level overrides are specified. + reader.Feed(kNumFrames, kGainDb, + /*speech_probability_override=*/kHighSpeechProbability, + /*speech_level_override=*/-45.0f, manager_2); + EXPECT_GT(manager_2.recommended_analog_level(), kInputVolume); + + // The voice probability override does not affect the `voice_probability()` + // getter. + EXPECT_EQ(manager_1.voice_probability(), manager_2.voice_probability()); +} + +class AgcManagerDirectChannelSampleRateTest + : public ::testing::TestWithParam<std::tuple<int, int>> { + protected: + int GetNumChannels() const { return std::get<0>(GetParam()); } + int GetSampleRateHz() const { return std::get<1>(GetParam()); } +}; + +TEST_P(AgcManagerDirectChannelSampleRateTest, CheckIsAlive) { + const int num_channels = GetNumChannels(); + const int sample_rate_hz = GetSampleRateHz(); + + constexpr AnalogAgcConfig kConfig{.enabled = true, + .clipping_predictor{.enabled = true}}; + AgcManagerDirect manager(num_channels, kConfig); + manager.Initialize(); + AudioBuffer buffer(sample_rate_hz, num_channels, sample_rate_hz, num_channels, + sample_rate_hz, num_channels); + + constexpr int kStartupVolume = 100; + int applied_initial_volume = kStartupVolume; + + // Trigger a downward adaptation with clipping. + WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipped_ratio=*/0.5f, + buffer); + const int initial_volume1 = applied_initial_volume; + for (int i = 0; i < 400; ++i) { + manager.set_stream_analog_level(applied_initial_volume); + manager.AnalyzePreProcess(buffer); + manager.Process(buffer, kLowSpeechProbability, + /*speech_level_dbfs=*/-20.0f); + applied_initial_volume = manager.recommended_analog_level(); + } + ASSERT_LT(manager.recommended_analog_level(), initial_volume1); + + // Fill in audio that does not clip. + WriteAudioBufferSamples(/*samples_value=*/1234.5f, /*clipped_ratio=*/0.0f, + buffer); + + // Trigger an upward adaptation. + const int initial_volume2 = manager.recommended_analog_level(); + for (int i = 0; i < kConfig.clipped_wait_frames; ++i) { + manager.set_stream_analog_level(applied_initial_volume); + manager.AnalyzePreProcess(buffer); + manager.Process(buffer, kHighSpeechProbability, + /*speech_level_dbfs=*/-65.0f); + applied_initial_volume = manager.recommended_analog_level(); + } + EXPECT_GT(manager.recommended_analog_level(), initial_volume2); + + // Trigger a downward adaptation. + const int initial_volume = manager.recommended_analog_level(); + for (int i = 0; i < 100; ++i) { + manager.set_stream_analog_level(applied_initial_volume); + manager.AnalyzePreProcess(buffer); + manager.Process(buffer, kHighSpeechProbability, + /*speech_level_dbfs=*/-5.0f); + applied_initial_volume = manager.recommended_analog_level(); + } + EXPECT_LT(manager.recommended_analog_level(), initial_volume); +} + +INSTANTIATE_TEST_SUITE_P( + , + AgcManagerDirectChannelSampleRateTest, + ::testing::Combine(::testing::Values(1, 2, 3, 6), + ::testing::Values(8000, 16000, 32000, 48000))); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/gain_control.h b/third_party/libwebrtc/modules/audio_processing/agc/gain_control.h new file mode 100644 index 0000000000..389b2114af --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/gain_control.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_GAIN_CONTROL_H_ +#define MODULES_AUDIO_PROCESSING_AGC_GAIN_CONTROL_H_ + +namespace webrtc { + +// The automatic gain control (AGC) component brings the signal to an +// appropriate range. This is done by applying a digital gain directly and, in +// the analog mode, prescribing an analog gain to be applied at the audio HAL. +// +// Recommended to be enabled on the client-side. +class GainControl { + public: + // When an analog mode is set, this must be called prior to `ProcessStream()` + // to pass the current analog level from the audio HAL. Must be within the + // range provided to `set_analog_level_limits()`. + virtual int set_stream_analog_level(int level) = 0; + + // When an analog mode is set, this should be called after `ProcessStream()` + // to obtain the recommended new analog level for the audio HAL. It is the + // users responsibility to apply this level. + virtual int stream_analog_level() const = 0; + + enum Mode { + // Adaptive mode intended for use if an analog volume control is available + // on the capture device. It will require the user to provide coupling + // between the OS mixer controls and AGC through the `stream_analog_level()` + // functions. + // + // It consists of an analog gain prescription for the audio device and a + // digital compression stage. + kAdaptiveAnalog, + + // Adaptive mode intended for situations in which an analog volume control + // is unavailable. It operates in a similar fashion to the adaptive analog + // mode, but with scaling instead applied in the digital domain. As with + // the analog mode, it additionally uses a digital compression stage. + kAdaptiveDigital, + + // Fixed mode which enables only the digital compression stage also used by + // the two adaptive modes. + // + // It is distinguished from the adaptive modes by considering only a + // short time-window of the input signal. It applies a fixed gain through + // most of the input level range, and compresses (gradually reduces gain + // with increasing level) the input signal at higher levels. This mode is + // preferred on embedded devices where the capture signal level is + // predictable, so that a known gain can be applied. + kFixedDigital + }; + + virtual int set_mode(Mode mode) = 0; + virtual Mode mode() const = 0; + + // Sets the target peak `level` (or envelope) of the AGC in dBFs (decibels + // from digital full-scale). The convention is to use positive values. For + // instance, passing in a value of 3 corresponds to -3 dBFs, or a target + // level 3 dB below full-scale. Limited to [0, 31]. + // + // TODO(ajm): use a negative value here instead, if/when VoE will similarly + // update its interface. + virtual int set_target_level_dbfs(int level) = 0; + virtual int target_level_dbfs() const = 0; + + // Sets the maximum `gain` the digital compression stage may apply, in dB. A + // higher number corresponds to greater compression, while a value of 0 will + // leave the signal uncompressed. Limited to [0, 90]. + virtual int set_compression_gain_db(int gain) = 0; + virtual int compression_gain_db() const = 0; + + // When enabled, the compression stage will hard limit the signal to the + // target level. Otherwise, the signal will be compressed but not limited + // above the target level. + virtual int enable_limiter(bool enable) = 0; + virtual bool is_limiter_enabled() const = 0; + + // Sets the `minimum` and `maximum` analog levels of the audio capture device. + // Must be set if and only if an analog mode is used. Limited to [0, 65535]. + virtual int set_analog_level_limits(int minimum, int maximum) = 0; + virtual int analog_level_minimum() const = 0; + virtual int analog_level_maximum() const = 0; + + // Returns true if the AGC has detected a saturation event (period where the + // signal reaches digital full-scale) in the current frame and the analog + // level cannot be reduced. + // + // This could be used as an indicator to reduce or disable analog mic gain at + // the audio HAL. + virtual bool stream_is_saturated() const = 0; + + protected: + virtual ~GainControl() {} +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_GAIN_CONTROL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/gain_control_interface_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc/gain_control_interface_gn/moz.build new file mode 100644 index 0000000000..c6ab9b3160 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/gain_control_interface_gn/moz.build @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("gain_control_interface_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.cc b/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.cc new file mode 100644 index 0000000000..e40a3f1629 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.cc @@ -0,0 +1,1238 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * + * Using a feedback system, determines an appropriate analog volume level + * given an input signal and current volume level. Targets a conservative + * signal level and is intended for use with a digital AGC to apply + * additional gain. + * + */ + +#include "modules/audio_processing/agc/legacy/analog_agc.h" + +#include <stdlib.h> + +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Errors +#define AGC_UNSPECIFIED_ERROR 18000 +#define AGC_UNINITIALIZED_ERROR 18002 +#define AGC_NULL_POINTER_ERROR 18003 +#define AGC_BAD_PARAMETER_ERROR 18004 + +/* The slope of in Q13*/ +static const int16_t kSlope1[8] = {21793, 12517, 7189, 4129, + 2372, 1362, 472, 78}; + +/* The offset in Q14 */ +static const int16_t kOffset1[8] = {25395, 23911, 22206, 20737, + 19612, 18805, 17951, 17367}; + +/* The slope of in Q13*/ +static const int16_t kSlope2[8] = {2063, 1731, 1452, 1218, 1021, 857, 597, 337}; + +/* The offset in Q14 */ +static const int16_t kOffset2[8] = {18432, 18379, 18290, 18177, + 18052, 17920, 17670, 17286}; + +static const int16_t kMuteGuardTimeMs = 8000; +static const int16_t kInitCheck = 42; +static const size_t kNumSubframes = 10; + +/* Default settings if config is not used */ +#define AGC_DEFAULT_TARGET_LEVEL 3 +#define AGC_DEFAULT_COMP_GAIN 9 +/* This is the target level for the analog part in ENV scale. To convert to RMS + * scale you + * have to add OFFSET_ENV_TO_RMS. + */ +#define ANALOG_TARGET_LEVEL 11 +#define ANALOG_TARGET_LEVEL_2 5 // ANALOG_TARGET_LEVEL / 2 +/* Offset between RMS scale (analog part) and ENV scale (digital part). This + * value actually + * varies with the FIXED_ANALOG_TARGET_LEVEL, hence we should in the future + * replace it with + * a table. + */ +#define OFFSET_ENV_TO_RMS 9 +/* The reference input level at which the digital part gives an output of + * targetLevelDbfs + * (desired level) if we have no compression gain. This level should be set high + * enough not + * to compress the peaks due to the dynamics. + */ +#define DIGITAL_REF_AT_0_COMP_GAIN 4 +/* Speed of reference level decrease. + */ +#define DIFF_REF_TO_ANALOG 5 + +/* Size of analog gain table */ +#define GAIN_TBL_LEN 32 +/* Matlab code: + * fprintf(1, '\t%i, %i, %i, %i,\n', round(10.^(linspace(0,10,32)/20) * 2^12)); + */ +/* Q12 */ +static const uint16_t kGainTableAnalog[GAIN_TBL_LEN] = { + 4096, 4251, 4412, 4579, 4752, 4932, 5118, 5312, 5513, 5722, 5938, + 6163, 6396, 6638, 6889, 7150, 7420, 7701, 7992, 8295, 8609, 8934, + 9273, 9623, 9987, 10365, 10758, 11165, 11587, 12025, 12480, 12953}; + +/* Gain/Suppression tables for virtual Mic (in Q10) */ +static const uint16_t kGainTableVirtualMic[128] = { + 1052, 1081, 1110, 1141, 1172, 1204, 1237, 1271, 1305, 1341, 1378, + 1416, 1454, 1494, 1535, 1577, 1620, 1664, 1710, 1757, 1805, 1854, + 1905, 1957, 2010, 2065, 2122, 2180, 2239, 2301, 2364, 2428, 2495, + 2563, 2633, 2705, 2779, 2855, 2933, 3013, 3096, 3180, 3267, 3357, + 3449, 3543, 3640, 3739, 3842, 3947, 4055, 4166, 4280, 4397, 4517, + 4640, 4767, 4898, 5032, 5169, 5311, 5456, 5605, 5758, 5916, 6078, + 6244, 6415, 6590, 6770, 6956, 7146, 7341, 7542, 7748, 7960, 8178, + 8402, 8631, 8867, 9110, 9359, 9615, 9878, 10148, 10426, 10711, 11004, + 11305, 11614, 11932, 12258, 12593, 12938, 13292, 13655, 14029, 14412, 14807, + 15212, 15628, 16055, 16494, 16945, 17409, 17885, 18374, 18877, 19393, 19923, + 20468, 21028, 21603, 22194, 22801, 23425, 24065, 24724, 25400, 26095, 26808, + 27541, 28295, 29069, 29864, 30681, 31520, 32382}; +static const uint16_t kSuppressionTableVirtualMic[128] = { + 1024, 1006, 988, 970, 952, 935, 918, 902, 886, 870, 854, 839, 824, 809, 794, + 780, 766, 752, 739, 726, 713, 700, 687, 675, 663, 651, 639, 628, 616, 605, + 594, 584, 573, 563, 553, 543, 533, 524, 514, 505, 496, 487, 478, 470, 461, + 453, 445, 437, 429, 421, 414, 406, 399, 392, 385, 378, 371, 364, 358, 351, + 345, 339, 333, 327, 321, 315, 309, 304, 298, 293, 288, 283, 278, 273, 268, + 263, 258, 254, 249, 244, 240, 236, 232, 227, 223, 219, 215, 211, 208, 204, + 200, 197, 193, 190, 186, 183, 180, 176, 173, 170, 167, 164, 161, 158, 155, + 153, 150, 147, 145, 142, 139, 137, 134, 132, 130, 127, 125, 123, 121, 118, + 116, 114, 112, 110, 108, 106, 104, 102}; + +/* Table for target energy levels. Values in Q(-7) + * Matlab code + * targetLevelTable = fprintf('%d,\t%d,\t%d,\t%d,\n', + * round((32767*10.^(-(0:63)'/20)).^2*16/2^7) */ + +static const int32_t kTargetLevelTable[64] = { + 134209536, 106606424, 84680493, 67264106, 53429779, 42440782, 33711911, + 26778323, 21270778, 16895980, 13420954, 10660642, 8468049, 6726411, + 5342978, 4244078, 3371191, 2677832, 2127078, 1689598, 1342095, + 1066064, 846805, 672641, 534298, 424408, 337119, 267783, + 212708, 168960, 134210, 106606, 84680, 67264, 53430, + 42441, 33712, 26778, 21271, 16896, 13421, 10661, + 8468, 6726, 5343, 4244, 3371, 2678, 2127, + 1690, 1342, 1066, 847, 673, 534, 424, + 337, 268, 213, 169, 134, 107, 85, + 67}; + +} // namespace + +int WebRtcAgc_AddMic(void* state, + int16_t* const* in_mic, + size_t num_bands, + size_t samples) { + int32_t nrg, max_nrg, sample, tmp32; + int32_t* ptr; + uint16_t targetGainIdx, gain; + size_t i; + int16_t n, L, tmp16, tmp_speech[16]; + LegacyAgc* stt; + stt = reinterpret_cast<LegacyAgc*>(state); + + if (stt->fs == 8000) { + L = 8; + if (samples != 80) { + return -1; + } + } else { + L = 16; + if (samples != 160) { + return -1; + } + } + + /* apply slowly varying digital gain */ + if (stt->micVol > stt->maxAnalog) { + /* `maxLevel` is strictly >= `micVol`, so this condition should be + * satisfied here, ensuring there is no divide-by-zero. */ + RTC_DCHECK_GT(stt->maxLevel, stt->maxAnalog); + + /* Q1 */ + tmp16 = (int16_t)(stt->micVol - stt->maxAnalog); + tmp32 = (GAIN_TBL_LEN - 1) * tmp16; + tmp16 = (int16_t)(stt->maxLevel - stt->maxAnalog); + targetGainIdx = tmp32 / tmp16; + RTC_DCHECK_LT(targetGainIdx, GAIN_TBL_LEN); + + /* Increment through the table towards the target gain. + * If micVol drops below maxAnalog, we allow the gain + * to be dropped immediately. */ + if (stt->gainTableIdx < targetGainIdx) { + stt->gainTableIdx++; + } else if (stt->gainTableIdx > targetGainIdx) { + stt->gainTableIdx--; + } + + /* Q12 */ + gain = kGainTableAnalog[stt->gainTableIdx]; + + for (i = 0; i < samples; i++) { + size_t j; + for (j = 0; j < num_bands; ++j) { + sample = (in_mic[j][i] * gain) >> 12; + if (sample > 32767) { + in_mic[j][i] = 32767; + } else if (sample < -32768) { + in_mic[j][i] = -32768; + } else { + in_mic[j][i] = (int16_t)sample; + } + } + } + } else { + stt->gainTableIdx = 0; + } + + /* compute envelope */ + if (stt->inQueue > 0) { + ptr = stt->env[1]; + } else { + ptr = stt->env[0]; + } + + for (i = 0; i < kNumSubframes; i++) { + /* iterate over samples */ + max_nrg = 0; + for (n = 0; n < L; n++) { + nrg = in_mic[0][i * L + n] * in_mic[0][i * L + n]; + if (nrg > max_nrg) { + max_nrg = nrg; + } + } + ptr[i] = max_nrg; + } + + /* compute energy */ + if (stt->inQueue > 0) { + ptr = stt->Rxx16w32_array[1]; + } else { + ptr = stt->Rxx16w32_array[0]; + } + + for (i = 0; i < kNumSubframes / 2; i++) { + if (stt->fs == 16000) { + WebRtcSpl_DownsampleBy2(&in_mic[0][i * 32], 32, tmp_speech, + stt->filterState); + } else { + memcpy(tmp_speech, &in_mic[0][i * 16], 16 * sizeof(int16_t)); + } + /* Compute energy in blocks of 16 samples */ + ptr[i] = WebRtcSpl_DotProductWithScale(tmp_speech, tmp_speech, 16, 4); + } + + /* update queue information */ + if (stt->inQueue == 0) { + stt->inQueue = 1; + } else { + stt->inQueue = 2; + } + + /* call VAD (use low band only) */ + WebRtcAgc_ProcessVad(&stt->vadMic, in_mic[0], samples); + + return 0; +} + +int WebRtcAgc_AddFarend(void* state, const int16_t* in_far, size_t samples) { + LegacyAgc* stt = reinterpret_cast<LegacyAgc*>(state); + + int err = WebRtcAgc_GetAddFarendError(state, samples); + + if (err != 0) + return err; + + return WebRtcAgc_AddFarendToDigital(&stt->digitalAgc, in_far, samples); +} + +int WebRtcAgc_GetAddFarendError(void* state, size_t samples) { + LegacyAgc* stt; + stt = reinterpret_cast<LegacyAgc*>(state); + + if (stt == NULL) + return -1; + + if (stt->fs == 8000) { + if (samples != 80) + return -1; + } else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000) { + if (samples != 160) + return -1; + } else { + return -1; + } + + return 0; +} + +int WebRtcAgc_VirtualMic(void* agcInst, + int16_t* const* in_near, + size_t num_bands, + size_t samples, + int32_t micLevelIn, + int32_t* micLevelOut) { + int32_t tmpFlt, micLevelTmp, gainIdx; + uint16_t gain; + size_t ii, j; + LegacyAgc* stt; + + uint32_t nrg; + size_t sampleCntr; + uint32_t frameNrg = 0; + uint32_t frameNrgLimit = 5500; + int16_t numZeroCrossing = 0; + const int16_t kZeroCrossingLowLim = 15; + const int16_t kZeroCrossingHighLim = 20; + + stt = reinterpret_cast<LegacyAgc*>(agcInst); + + /* + * Before applying gain decide if this is a low-level signal. + * The idea is that digital AGC will not adapt to low-level + * signals. + */ + if (stt->fs != 8000) { + frameNrgLimit = frameNrgLimit << 1; + } + + frameNrg = (uint32_t)(in_near[0][0] * in_near[0][0]); + for (sampleCntr = 1; sampleCntr < samples; sampleCntr++) { + // increment frame energy if it is less than the limit + // the correct value of the energy is not important + if (frameNrg < frameNrgLimit) { + nrg = (uint32_t)(in_near[0][sampleCntr] * in_near[0][sampleCntr]); + frameNrg += nrg; + } + + // Count the zero crossings + numZeroCrossing += + ((in_near[0][sampleCntr] ^ in_near[0][sampleCntr - 1]) < 0); + } + + if ((frameNrg < 500) || (numZeroCrossing <= 5)) { + stt->lowLevelSignal = 1; + } else if (numZeroCrossing <= kZeroCrossingLowLim) { + stt->lowLevelSignal = 0; + } else if (frameNrg <= frameNrgLimit) { + stt->lowLevelSignal = 1; + } else if (numZeroCrossing >= kZeroCrossingHighLim) { + stt->lowLevelSignal = 1; + } else { + stt->lowLevelSignal = 0; + } + + micLevelTmp = micLevelIn << stt->scale; + /* Set desired level */ + gainIdx = stt->micVol; + if (stt->micVol > stt->maxAnalog) { + gainIdx = stt->maxAnalog; + } + if (micLevelTmp != stt->micRef) { + /* Something has happened with the physical level, restart. */ + stt->micRef = micLevelTmp; + stt->micVol = 127; + *micLevelOut = 127; + stt->micGainIdx = 127; + gainIdx = 127; + } + /* Pre-process the signal to emulate the microphone level. */ + /* Take one step at a time in the gain table. */ + if (gainIdx > 127) { + gain = kGainTableVirtualMic[gainIdx - 128]; + } else { + gain = kSuppressionTableVirtualMic[127 - gainIdx]; + } + for (ii = 0; ii < samples; ii++) { + tmpFlt = (in_near[0][ii] * gain) >> 10; + if (tmpFlt > 32767) { + tmpFlt = 32767; + gainIdx--; + if (gainIdx >= 127) { + gain = kGainTableVirtualMic[gainIdx - 127]; + } else { + gain = kSuppressionTableVirtualMic[127 - gainIdx]; + } + } + if (tmpFlt < -32768) { + tmpFlt = -32768; + gainIdx--; + if (gainIdx >= 127) { + gain = kGainTableVirtualMic[gainIdx - 127]; + } else { + gain = kSuppressionTableVirtualMic[127 - gainIdx]; + } + } + in_near[0][ii] = (int16_t)tmpFlt; + for (j = 1; j < num_bands; ++j) { + tmpFlt = (in_near[j][ii] * gain) >> 10; + if (tmpFlt > 32767) { + tmpFlt = 32767; + } + if (tmpFlt < -32768) { + tmpFlt = -32768; + } + in_near[j][ii] = (int16_t)tmpFlt; + } + } + /* Set the level we (finally) used */ + stt->micGainIdx = gainIdx; + // *micLevelOut = stt->micGainIdx; + *micLevelOut = stt->micGainIdx >> stt->scale; + /* Add to Mic as if it was the output from a true microphone */ + if (WebRtcAgc_AddMic(agcInst, in_near, num_bands, samples) != 0) { + return -1; + } + return 0; +} + +void WebRtcAgc_UpdateAgcThresholds(LegacyAgc* stt) { + int16_t tmp16; + + /* Set analog target level in envelope dBOv scale */ + tmp16 = (DIFF_REF_TO_ANALOG * stt->compressionGaindB) + ANALOG_TARGET_LEVEL_2; + tmp16 = WebRtcSpl_DivW32W16ResW16((int32_t)tmp16, ANALOG_TARGET_LEVEL); + stt->analogTarget = DIGITAL_REF_AT_0_COMP_GAIN + tmp16; + if (stt->analogTarget < DIGITAL_REF_AT_0_COMP_GAIN) { + stt->analogTarget = DIGITAL_REF_AT_0_COMP_GAIN; + } + if (stt->agcMode == kAgcModeFixedDigital) { + /* Adjust for different parameter interpretation in FixedDigital mode */ + stt->analogTarget = stt->compressionGaindB; + } + /* Since the offset between RMS and ENV is not constant, we should make this + * into a + * table, but for now, we'll stick with a constant, tuned for the chosen + * analog + * target level. + */ + stt->targetIdx = ANALOG_TARGET_LEVEL + OFFSET_ENV_TO_RMS; + /* Analog adaptation limits */ + /* analogTargetLevel = round((32767*10^(-targetIdx/20))^2*16/2^7) */ + stt->analogTargetLevel = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx]; /* ex. -20 dBov */ + stt->startUpperLimit = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx - 1]; /* -19 dBov */ + stt->startLowerLimit = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx + 1]; /* -21 dBov */ + stt->upperPrimaryLimit = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx - 2]; /* -18 dBov */ + stt->lowerPrimaryLimit = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx + 2]; /* -22 dBov */ + stt->upperSecondaryLimit = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx - 5]; /* -15 dBov */ + stt->lowerSecondaryLimit = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx + 5]; /* -25 dBov */ + stt->upperLimit = stt->startUpperLimit; + stt->lowerLimit = stt->startLowerLimit; +} + +void WebRtcAgc_SaturationCtrl(LegacyAgc* stt, + uint8_t* saturated, + int32_t* env) { + int16_t i, tmpW16; + + /* Check if the signal is saturated */ + for (i = 0; i < 10; i++) { + tmpW16 = (int16_t)(env[i] >> 20); + if (tmpW16 > 875) { + stt->envSum += tmpW16; + } + } + + if (stt->envSum > 25000) { + *saturated = 1; + stt->envSum = 0; + } + + /* stt->envSum *= 0.99; */ + stt->envSum = (int16_t)((stt->envSum * 32440) >> 15); +} + +void WebRtcAgc_ZeroCtrl(LegacyAgc* stt, int32_t* inMicLevel, int32_t* env) { + int16_t i; + int64_t tmp = 0; + int32_t midVal; + + /* Is the input signal zero? */ + for (i = 0; i < 10; i++) { + tmp += env[i]; + } + + /* Each block is allowed to have a few non-zero + * samples. + */ + if (tmp < 500) { + stt->msZero += 10; + } else { + stt->msZero = 0; + } + + if (stt->muteGuardMs > 0) { + stt->muteGuardMs -= 10; + } + + if (stt->msZero > 500) { + stt->msZero = 0; + + /* Increase microphone level only if it's less than 50% */ + midVal = (stt->maxAnalog + stt->minLevel + 1) / 2; + if (*inMicLevel < midVal) { + /* *inMicLevel *= 1.1; */ + *inMicLevel = (1126 * *inMicLevel) >> 10; + /* Reduces risk of a muted mic repeatedly triggering excessive levels due + * to zero signal detection. */ + *inMicLevel = WEBRTC_SPL_MIN(*inMicLevel, stt->zeroCtrlMax); + stt->micVol = *inMicLevel; + } + + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; + + /* The AGC has a tendency (due to problems with the VAD parameters), to + * vastly increase the volume after a muting event. This timer prevents + * upwards adaptation for a short period. */ + stt->muteGuardMs = kMuteGuardTimeMs; + } +} + +void WebRtcAgc_SpeakerInactiveCtrl(LegacyAgc* stt) { + /* Check if the near end speaker is inactive. + * If that is the case the VAD threshold is + * increased since the VAD speech model gets + * more sensitive to any sound after a long + * silence. + */ + + int32_t tmp32; + int16_t vadThresh; + + if (stt->vadMic.stdLongTerm < 2500) { + stt->vadThreshold = 1500; + } else { + vadThresh = kNormalVadThreshold; + if (stt->vadMic.stdLongTerm < 4500) { + /* Scale between min and max threshold */ + vadThresh += (4500 - stt->vadMic.stdLongTerm) / 2; + } + + /* stt->vadThreshold = (31 * stt->vadThreshold + vadThresh) / 32; */ + tmp32 = vadThresh + 31 * stt->vadThreshold; + stt->vadThreshold = (int16_t)(tmp32 >> 5); + } +} + +void WebRtcAgc_ExpCurve(int16_t volume, int16_t* index) { + // volume in Q14 + // index in [0-7] + /* 8 different curves */ + if (volume > 5243) { + if (volume > 7864) { + if (volume > 12124) { + *index = 7; + } else { + *index = 6; + } + } else { + if (volume > 6554) { + *index = 5; + } else { + *index = 4; + } + } + } else { + if (volume > 2621) { + if (volume > 3932) { + *index = 3; + } else { + *index = 2; + } + } else { + if (volume > 1311) { + *index = 1; + } else { + *index = 0; + } + } + } +} + +int32_t WebRtcAgc_ProcessAnalog(void* state, + int32_t inMicLevel, + int32_t* outMicLevel, + int16_t vadLogRatio, + int16_t echo, + uint8_t* saturationWarning) { + uint32_t tmpU32; + int32_t Rxx16w32, tmp32; + int32_t inMicLevelTmp, lastMicVol; + int16_t i; + uint8_t saturated = 0; + LegacyAgc* stt; + + stt = reinterpret_cast<LegacyAgc*>(state); + inMicLevelTmp = inMicLevel << stt->scale; + + if (inMicLevelTmp > stt->maxAnalog) { + return -1; + } else if (inMicLevelTmp < stt->minLevel) { + return -1; + } + + if (stt->firstCall == 0) { + int32_t tmpVol; + stt->firstCall = 1; + tmp32 = ((stt->maxLevel - stt->minLevel) * 51) >> 9; + tmpVol = (stt->minLevel + tmp32); + + /* If the mic level is very low at start, increase it! */ + if ((inMicLevelTmp < tmpVol) && (stt->agcMode == kAgcModeAdaptiveAnalog)) { + inMicLevelTmp = tmpVol; + } + stt->micVol = inMicLevelTmp; + } + + /* Set the mic level to the previous output value if there is digital input + * gain */ + if ((inMicLevelTmp == stt->maxAnalog) && (stt->micVol > stt->maxAnalog)) { + inMicLevelTmp = stt->micVol; + } + + /* If the mic level was manually changed to a very low value raise it! */ + if ((inMicLevelTmp != stt->micVol) && (inMicLevelTmp < stt->minOutput)) { + tmp32 = ((stt->maxLevel - stt->minLevel) * 51) >> 9; + inMicLevelTmp = (stt->minLevel + tmp32); + stt->micVol = inMicLevelTmp; + } + + if (inMicLevelTmp != stt->micVol) { + if (inMicLevel == stt->lastInMicLevel) { + // We requested a volume adjustment, but it didn't occur. This is + // probably due to a coarse quantization of the volume slider. + // Restore the requested value to prevent getting stuck. + inMicLevelTmp = stt->micVol; + } else { + // As long as the value changed, update to match. + stt->micVol = inMicLevelTmp; + } + } + + if (inMicLevelTmp > stt->maxLevel) { + // Always allow the user to raise the volume above the maxLevel. + stt->maxLevel = inMicLevelTmp; + } + + // Store last value here, after we've taken care of manual updates etc. + stt->lastInMicLevel = inMicLevel; + lastMicVol = stt->micVol; + + /* Checks if the signal is saturated. Also a check if individual samples + * are larger than 12000 is done. If they are the counter for increasing + * the volume level is set to -100ms + */ + WebRtcAgc_SaturationCtrl(stt, &saturated, stt->env[0]); + + /* The AGC is always allowed to lower the level if the signal is saturated */ + if (saturated == 1) { + /* Lower the recording level + * Rxx160_LP is adjusted down because it is so slow it could + * cause the AGC to make wrong decisions. */ + /* stt->Rxx160_LPw32 *= 0.875; */ + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 8) * 7; + + stt->zeroCtrlMax = stt->micVol; + + /* stt->micVol *= 0.903; */ + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = WEBRTC_SPL_UMUL(29591, (uint32_t)(tmp32)); + stt->micVol = (tmpU32 >> 15) + stt->minLevel; + if (stt->micVol > lastMicVol - 2) { + stt->micVol = lastMicVol - 2; + } + inMicLevelTmp = stt->micVol; + + if (stt->micVol < stt->minOutput) { + *saturationWarning = 1; + } + + /* Reset counter for decrease of volume level to avoid + * decreasing too much. The saturation control can still + * lower the level if needed. */ + stt->msTooHigh = -100; + + /* Enable the control mechanism to ensure that our measure, + * Rxx160_LP, is in the correct range. This must be done since + * the measure is very slow. */ + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; + + /* Reset to initial values */ + stt->msecSpeechInnerChange = kMsecSpeechInner; + stt->msecSpeechOuterChange = kMsecSpeechOuter; + stt->changeToSlowMode = 0; + + stt->muteGuardMs = 0; + + stt->upperLimit = stt->startUpperLimit; + stt->lowerLimit = stt->startLowerLimit; + } + + /* Check if the input speech is zero. If so the mic volume + * is increased. On some computers the input is zero up as high + * level as 17% */ + WebRtcAgc_ZeroCtrl(stt, &inMicLevelTmp, stt->env[0]); + + /* Check if the near end speaker is inactive. + * If that is the case the VAD threshold is + * increased since the VAD speech model gets + * more sensitive to any sound after a long + * silence. + */ + WebRtcAgc_SpeakerInactiveCtrl(stt); + + for (i = 0; i < 5; i++) { + /* Computed on blocks of 16 samples */ + + Rxx16w32 = stt->Rxx16w32_array[0][i]; + + /* Rxx160w32 in Q(-7) */ + tmp32 = (Rxx16w32 - stt->Rxx16_vectorw32[stt->Rxx16pos]) >> 3; + stt->Rxx160w32 = stt->Rxx160w32 + tmp32; + stt->Rxx16_vectorw32[stt->Rxx16pos] = Rxx16w32; + + /* Circular buffer */ + stt->Rxx16pos++; + if (stt->Rxx16pos == kRxxBufferLen) { + stt->Rxx16pos = 0; + } + + /* Rxx16_LPw32 in Q(-4) */ + tmp32 = (Rxx16w32 - stt->Rxx16_LPw32) >> kAlphaShortTerm; + stt->Rxx16_LPw32 = (stt->Rxx16_LPw32) + tmp32; + + if (vadLogRatio > stt->vadThreshold) { + /* Speech detected! */ + + /* Check if Rxx160_LP is in the correct range. If + * it is too high/low then we set it to the maximum of + * Rxx16_LPw32 during the first 200ms of speech. + */ + if (stt->activeSpeech < 250) { + stt->activeSpeech += 2; + + if (stt->Rxx16_LPw32 > stt->Rxx16_LPw32Max) { + stt->Rxx16_LPw32Max = stt->Rxx16_LPw32; + } + } else if (stt->activeSpeech == 250) { + stt->activeSpeech += 2; + tmp32 = stt->Rxx16_LPw32Max >> 3; + stt->Rxx160_LPw32 = tmp32 * kRxxBufferLen; + } + + tmp32 = (stt->Rxx160w32 - stt->Rxx160_LPw32) >> kAlphaLongTerm; + stt->Rxx160_LPw32 = stt->Rxx160_LPw32 + tmp32; + + if (stt->Rxx160_LPw32 > stt->upperSecondaryLimit) { + stt->msTooHigh += 2; + stt->msTooLow = 0; + stt->changeToSlowMode = 0; + + if (stt->msTooHigh > stt->msecSpeechOuterChange) { + stt->msTooHigh = 0; + + /* Lower the recording level */ + /* Multiply by 0.828125 which corresponds to decreasing ~0.8dB */ + tmp32 = stt->Rxx160_LPw32 >> 6; + stt->Rxx160_LPw32 = tmp32 * 53; + + /* Reduce the max gain to avoid excessive oscillation + * (but never drop below the maximum analog level). + */ + stt->maxLevel = (15 * stt->maxLevel + stt->micVol) / 16; + stt->maxLevel = WEBRTC_SPL_MAX(stt->maxLevel, stt->maxAnalog); + + stt->zeroCtrlMax = stt->micVol; + + /* 0.95 in Q15 */ + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = WEBRTC_SPL_UMUL(31130, (uint32_t)(tmp32)); + stt->micVol = (tmpU32 >> 15) + stt->minLevel; + if (stt->micVol > lastMicVol - 1) { + stt->micVol = lastMicVol - 1; + } + inMicLevelTmp = stt->micVol; + + /* Enable the control mechanism to ensure that our measure, + * Rxx160_LP, is in the correct range. + */ + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; + } + } else if (stt->Rxx160_LPw32 > stt->upperLimit) { + stt->msTooHigh += 2; + stt->msTooLow = 0; + stt->changeToSlowMode = 0; + + if (stt->msTooHigh > stt->msecSpeechInnerChange) { + /* Lower the recording level */ + stt->msTooHigh = 0; + /* Multiply by 0.828125 which corresponds to decreasing ~0.8dB */ + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 64) * 53; + + /* Reduce the max gain to avoid excessive oscillation + * (but never drop below the maximum analog level). + */ + stt->maxLevel = (15 * stt->maxLevel + stt->micVol) / 16; + stt->maxLevel = WEBRTC_SPL_MAX(stt->maxLevel, stt->maxAnalog); + + stt->zeroCtrlMax = stt->micVol; + + /* 0.965 in Q15 */ + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = + WEBRTC_SPL_UMUL(31621, (uint32_t)(inMicLevelTmp - stt->minLevel)); + stt->micVol = (tmpU32 >> 15) + stt->minLevel; + if (stt->micVol > lastMicVol - 1) { + stt->micVol = lastMicVol - 1; + } + inMicLevelTmp = stt->micVol; + } + } else if (stt->Rxx160_LPw32 < stt->lowerSecondaryLimit) { + stt->msTooHigh = 0; + stt->changeToSlowMode = 0; + stt->msTooLow += 2; + + if (stt->msTooLow > stt->msecSpeechOuterChange) { + /* Raise the recording level */ + int16_t index, weightFIX; + int16_t volNormFIX = 16384; // =1 in Q14. + + stt->msTooLow = 0; + + /* Normalize the volume level */ + tmp32 = (inMicLevelTmp - stt->minLevel) << 14; + if (stt->maxInit != stt->minLevel) { + volNormFIX = tmp32 / (stt->maxInit - stt->minLevel); + } + + /* Find correct curve */ + WebRtcAgc_ExpCurve(volNormFIX, &index); + + /* Compute weighting factor for the volume increase, 32^(-2*X)/2+1.05 + */ + weightFIX = + kOffset1[index] - (int16_t)((kSlope1[index] * volNormFIX) >> 13); + + /* stt->Rxx160_LPw32 *= 1.047 [~0.2 dB]; */ + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 64) * 67; + + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = + ((uint32_t)weightFIX * (uint32_t)(inMicLevelTmp - stt->minLevel)); + stt->micVol = (tmpU32 >> 14) + stt->minLevel; + if (stt->micVol < lastMicVol + 2) { + stt->micVol = lastMicVol + 2; + } + + inMicLevelTmp = stt->micVol; + } + } else if (stt->Rxx160_LPw32 < stt->lowerLimit) { + stt->msTooHigh = 0; + stt->changeToSlowMode = 0; + stt->msTooLow += 2; + + if (stt->msTooLow > stt->msecSpeechInnerChange) { + /* Raise the recording level */ + int16_t index, weightFIX; + int16_t volNormFIX = 16384; // =1 in Q14. + + stt->msTooLow = 0; + + /* Normalize the volume level */ + tmp32 = (inMicLevelTmp - stt->minLevel) << 14; + if (stt->maxInit != stt->minLevel) { + volNormFIX = tmp32 / (stt->maxInit - stt->minLevel); + } + + /* Find correct curve */ + WebRtcAgc_ExpCurve(volNormFIX, &index); + + /* Compute weighting factor for the volume increase, (3.^(-2.*X))/8+1 + */ + weightFIX = + kOffset2[index] - (int16_t)((kSlope2[index] * volNormFIX) >> 13); + + /* stt->Rxx160_LPw32 *= 1.047 [~0.2 dB]; */ + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 64) * 67; + + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = + ((uint32_t)weightFIX * (uint32_t)(inMicLevelTmp - stt->minLevel)); + stt->micVol = (tmpU32 >> 14) + stt->minLevel; + if (stt->micVol < lastMicVol + 1) { + stt->micVol = lastMicVol + 1; + } + + inMicLevelTmp = stt->micVol; + } + } else { + /* The signal is inside the desired range which is: + * lowerLimit < Rxx160_LP/640 < upperLimit + */ + if (stt->changeToSlowMode > 4000) { + stt->msecSpeechInnerChange = 1000; + stt->msecSpeechOuterChange = 500; + stt->upperLimit = stt->upperPrimaryLimit; + stt->lowerLimit = stt->lowerPrimaryLimit; + } else { + stt->changeToSlowMode += 2; // in milliseconds + } + stt->msTooLow = 0; + stt->msTooHigh = 0; + + stt->micVol = inMicLevelTmp; + } + } + } + + /* Ensure gain is not increased in presence of echo or after a mute event + * (but allow the zeroCtrl() increase on the frame of a mute detection). + */ + if (echo == 1 || + (stt->muteGuardMs > 0 && stt->muteGuardMs < kMuteGuardTimeMs)) { + if (stt->micVol > lastMicVol) { + stt->micVol = lastMicVol; + } + } + + /* limit the gain */ + if (stt->micVol > stt->maxLevel) { + stt->micVol = stt->maxLevel; + } else if (stt->micVol < stt->minOutput) { + stt->micVol = stt->minOutput; + } + + *outMicLevel = WEBRTC_SPL_MIN(stt->micVol, stt->maxAnalog) >> stt->scale; + + return 0; +} + +int WebRtcAgc_Analyze(void* agcInst, + const int16_t* const* in_near, + size_t num_bands, + size_t samples, + int32_t inMicLevel, + int32_t* outMicLevel, + int16_t echo, + uint8_t* saturationWarning, + int32_t gains[11]) { + LegacyAgc* stt = reinterpret_cast<LegacyAgc*>(agcInst); + + if (stt == NULL) { + return -1; + } + + if (stt->fs == 8000) { + if (samples != 80) { + return -1; + } + } else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000) { + if (samples != 160) { + return -1; + } + } else { + return -1; + } + + *saturationWarning = 0; + // TODO(minyue): PUT IN RANGE CHECKING FOR INPUT LEVELS + *outMicLevel = inMicLevel; + + int32_t error = + WebRtcAgc_ComputeDigitalGains(&stt->digitalAgc, in_near, num_bands, + stt->fs, stt->lowLevelSignal, gains); + if (error == -1) { + return -1; + } + + if (stt->agcMode < kAgcModeFixedDigital && + (stt->lowLevelSignal == 0 || stt->agcMode != kAgcModeAdaptiveDigital)) { + if (WebRtcAgc_ProcessAnalog(agcInst, inMicLevel, outMicLevel, + stt->vadMic.logRatio, echo, + saturationWarning) == -1) { + return -1; + } + } + + /* update queue */ + if (stt->inQueue > 1) { + memcpy(stt->env[0], stt->env[1], 10 * sizeof(int32_t)); + memcpy(stt->Rxx16w32_array[0], stt->Rxx16w32_array[1], 5 * sizeof(int32_t)); + } + + if (stt->inQueue > 0) { + stt->inQueue--; + } + + return 0; +} + +int WebRtcAgc_Process(const void* agcInst, + const int32_t gains[11], + const int16_t* const* in_near, + size_t num_bands, + int16_t* const* out) { + const LegacyAgc* stt = (const LegacyAgc*)agcInst; + return WebRtcAgc_ApplyDigitalGains(gains, num_bands, stt->fs, in_near, out); +} + +int WebRtcAgc_set_config(void* agcInst, WebRtcAgcConfig agcConfig) { + LegacyAgc* stt; + stt = reinterpret_cast<LegacyAgc*>(agcInst); + + if (stt == NULL) { + return -1; + } + + if (stt->initFlag != kInitCheck) { + stt->lastError = AGC_UNINITIALIZED_ERROR; + return -1; + } + + if (agcConfig.limiterEnable != kAgcFalse && + agcConfig.limiterEnable != kAgcTrue) { + stt->lastError = AGC_BAD_PARAMETER_ERROR; + return -1; + } + stt->limiterEnable = agcConfig.limiterEnable; + stt->compressionGaindB = agcConfig.compressionGaindB; + if ((agcConfig.targetLevelDbfs < 0) || (agcConfig.targetLevelDbfs > 31)) { + stt->lastError = AGC_BAD_PARAMETER_ERROR; + return -1; + } + stt->targetLevelDbfs = agcConfig.targetLevelDbfs; + + if (stt->agcMode == kAgcModeFixedDigital) { + /* Adjust for different parameter interpretation in FixedDigital mode */ + stt->compressionGaindB += agcConfig.targetLevelDbfs; + } + + /* Update threshold levels for analog adaptation */ + WebRtcAgc_UpdateAgcThresholds(stt); + + /* Recalculate gain table */ + if (WebRtcAgc_CalculateGainTable( + &(stt->digitalAgc.gainTable[0]), stt->compressionGaindB, + stt->targetLevelDbfs, stt->limiterEnable, stt->analogTarget) == -1) { + return -1; + } + /* Store the config in a WebRtcAgcConfig */ + stt->usedConfig.compressionGaindB = agcConfig.compressionGaindB; + stt->usedConfig.limiterEnable = agcConfig.limiterEnable; + stt->usedConfig.targetLevelDbfs = agcConfig.targetLevelDbfs; + + return 0; +} + +int WebRtcAgc_get_config(void* agcInst, WebRtcAgcConfig* config) { + LegacyAgc* stt; + stt = reinterpret_cast<LegacyAgc*>(agcInst); + + if (stt == NULL) { + return -1; + } + + if (config == NULL) { + stt->lastError = AGC_NULL_POINTER_ERROR; + return -1; + } + + if (stt->initFlag != kInitCheck) { + stt->lastError = AGC_UNINITIALIZED_ERROR; + return -1; + } + + config->limiterEnable = stt->usedConfig.limiterEnable; + config->targetLevelDbfs = stt->usedConfig.targetLevelDbfs; + config->compressionGaindB = stt->usedConfig.compressionGaindB; + + return 0; +} + +void* WebRtcAgc_Create() { + LegacyAgc* stt = static_cast<LegacyAgc*>(malloc(sizeof(LegacyAgc))); + + stt->initFlag = 0; + stt->lastError = 0; + + return stt; +} + +void WebRtcAgc_Free(void* state) { + LegacyAgc* stt; + + stt = reinterpret_cast<LegacyAgc*>(state); + free(stt); +} + +/* minLevel - Minimum volume level + * maxLevel - Maximum volume level + */ +int WebRtcAgc_Init(void* agcInst, + int32_t minLevel, + int32_t maxLevel, + int16_t agcMode, + uint32_t fs) { + int32_t max_add, tmp32; + int16_t i; + int tmpNorm; + LegacyAgc* stt; + + /* typecast state pointer */ + stt = reinterpret_cast<LegacyAgc*>(agcInst); + + if (WebRtcAgc_InitDigital(&stt->digitalAgc, agcMode) != 0) { + stt->lastError = AGC_UNINITIALIZED_ERROR; + return -1; + } + + /* Analog AGC variables */ + stt->envSum = 0; + + /* mode = 0 - Only saturation protection + * 1 - Analog Automatic Gain Control [-targetLevelDbfs (default -3 + * dBOv)] + * 2 - Digital Automatic Gain Control [-targetLevelDbfs (default -3 + * dBOv)] + * 3 - Fixed Digital Gain [compressionGaindB (default 8 dB)] + */ + if (agcMode < kAgcModeUnchanged || agcMode > kAgcModeFixedDigital) { + return -1; + } + stt->agcMode = agcMode; + stt->fs = fs; + + /* initialize input VAD */ + WebRtcAgc_InitVad(&stt->vadMic); + + /* If the volume range is smaller than 0-256 then + * the levels are shifted up to Q8-domain */ + tmpNorm = WebRtcSpl_NormU32((uint32_t)maxLevel); + stt->scale = tmpNorm - 23; + if (stt->scale < 0) { + stt->scale = 0; + } + // TODO(bjornv): Investigate if we really need to scale up a small range now + // when we have + // a guard against zero-increments. For now, we do not support scale up (scale + // = 0). + stt->scale = 0; + maxLevel <<= stt->scale; + minLevel <<= stt->scale; + + /* Make minLevel and maxLevel static in AdaptiveDigital */ + if (stt->agcMode == kAgcModeAdaptiveDigital) { + minLevel = 0; + maxLevel = 255; + stt->scale = 0; + } + /* The maximum supplemental volume range is based on a vague idea + * of how much lower the gain will be than the real analog gain. */ + max_add = (maxLevel - minLevel) / 4; + + /* Minimum/maximum volume level that can be set */ + stt->minLevel = minLevel; + stt->maxAnalog = maxLevel; + stt->maxLevel = maxLevel + max_add; + stt->maxInit = stt->maxLevel; + + stt->zeroCtrlMax = stt->maxAnalog; + stt->lastInMicLevel = 0; + + /* Initialize micVol parameter */ + stt->micVol = stt->maxAnalog; + if (stt->agcMode == kAgcModeAdaptiveDigital) { + stt->micVol = 127; /* Mid-point of mic level */ + } + stt->micRef = stt->micVol; + stt->micGainIdx = 127; + + /* Minimum output volume is 4% higher than the available lowest volume level + */ + tmp32 = ((stt->maxLevel - stt->minLevel) * 10) >> 8; + stt->minOutput = (stt->minLevel + tmp32); + + stt->msTooLow = 0; + stt->msTooHigh = 0; + stt->changeToSlowMode = 0; + stt->firstCall = 0; + stt->msZero = 0; + stt->muteGuardMs = 0; + stt->gainTableIdx = 0; + + stt->msecSpeechInnerChange = kMsecSpeechInner; + stt->msecSpeechOuterChange = kMsecSpeechOuter; + + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; + + stt->vadThreshold = kNormalVadThreshold; + stt->inActive = 0; + + for (i = 0; i < kRxxBufferLen; i++) { + stt->Rxx16_vectorw32[i] = (int32_t)1000; /* -54dBm0 */ + } + stt->Rxx160w32 = 125 * kRxxBufferLen; /* (stt->Rxx16_vectorw32[0]>>3) = 125 */ + + stt->Rxx16pos = 0; + stt->Rxx16_LPw32 = (int32_t)16284; /* Q(-4) */ + + for (i = 0; i < 5; i++) { + stt->Rxx16w32_array[0][i] = 0; + } + for (i = 0; i < 10; i++) { + stt->env[0][i] = 0; + stt->env[1][i] = 0; + } + stt->inQueue = 0; + + WebRtcSpl_MemSetW32(stt->filterState, 0, 8); + + stt->initFlag = kInitCheck; + // Default config settings. + stt->defaultConfig.limiterEnable = kAgcTrue; + stt->defaultConfig.targetLevelDbfs = AGC_DEFAULT_TARGET_LEVEL; + stt->defaultConfig.compressionGaindB = AGC_DEFAULT_COMP_GAIN; + + if (WebRtcAgc_set_config(stt, stt->defaultConfig) == -1) { + stt->lastError = AGC_UNSPECIFIED_ERROR; + return -1; + } + stt->Rxx160_LPw32 = stt->analogTargetLevel; // Initialize rms value + + stt->lowLevelSignal = 0; + + /* Only positive values are allowed that are not too large */ + if ((minLevel >= maxLevel) || (maxLevel & 0xFC000000)) { + return -1; + } else { + return 0; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.h b/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.h new file mode 100644 index 0000000000..22cd924a93 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_ +#define MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_ + + +#include "modules/audio_processing/agc/legacy/digital_agc.h" +#include "modules/audio_processing/agc/legacy/gain_control.h" + +namespace webrtc { + +/* Analog Automatic Gain Control variables: + * Constant declarations (inner limits inside which no changes are done) + * In the beginning the range is narrower to widen as soon as the measure + * 'Rxx160_LP' is inside it. Currently the starting limits are -22.2+/-1dBm0 + * and the final limits -22.2+/-2.5dBm0. These levels makes the speech signal + * go towards -25.4dBm0 (-31.4dBov). Tuned with wbfile-31.4dBov.pcm + * The limits are created by running the AGC with a file having the desired + * signal level and thereafter plotting Rxx160_LP in the dBm0-domain defined + * by out=10*log10(in/260537279.7); Set the target level to the average level + * of our measure Rxx160_LP. Remember that the levels are in blocks of 16 in + * Q(-7). (Example matlab code: round(db2pow(-21.2)*16/2^7) ) + */ +constexpr int16_t kRxxBufferLen = 10; + +static const int16_t kMsecSpeechInner = 520; +static const int16_t kMsecSpeechOuter = 340; + +static const int16_t kNormalVadThreshold = 400; + +static const int16_t kAlphaShortTerm = 6; // 1 >> 6 = 0.0156 +static const int16_t kAlphaLongTerm = 10; // 1 >> 10 = 0.000977 + +typedef struct { + // Configurable parameters/variables + uint32_t fs; // Sampling frequency + int16_t compressionGaindB; // Fixed gain level in dB + int16_t targetLevelDbfs; // Target level in -dBfs of envelope (default -3) + int16_t agcMode; // Hard coded mode (adaptAna/adaptDig/fixedDig) + uint8_t limiterEnable; // Enabling limiter (on/off (default off)) + WebRtcAgcConfig defaultConfig; + WebRtcAgcConfig usedConfig; + + // General variables + int16_t initFlag; + int16_t lastError; + + // Target level parameters + // Based on the above: analogTargetLevel = round((32767*10^(-22/20))^2*16/2^7) + int32_t analogTargetLevel; // = kRxxBufferLen * 846805; -22 dBfs + int32_t startUpperLimit; // = kRxxBufferLen * 1066064; -21 dBfs + int32_t startLowerLimit; // = kRxxBufferLen * 672641; -23 dBfs + int32_t upperPrimaryLimit; // = kRxxBufferLen * 1342095; -20 dBfs + int32_t lowerPrimaryLimit; // = kRxxBufferLen * 534298; -24 dBfs + int32_t upperSecondaryLimit; // = kRxxBufferLen * 2677832; -17 dBfs + int32_t lowerSecondaryLimit; // = kRxxBufferLen * 267783; -27 dBfs + uint16_t targetIdx; // Table index for corresponding target level + int16_t analogTarget; // Digital reference level in ENV scale + + // Analog AGC specific variables + int32_t filterState[8]; // For downsampling wb to nb + int32_t upperLimit; // Upper limit for mic energy + int32_t lowerLimit; // Lower limit for mic energy + int32_t Rxx160w32; // Average energy for one frame + int32_t Rxx16_LPw32; // Low pass filtered subframe energies + int32_t Rxx160_LPw32; // Low pass filtered frame energies + int32_t Rxx16_LPw32Max; // Keeps track of largest energy subframe + int32_t Rxx16_vectorw32[kRxxBufferLen]; // Array with subframe energies + int32_t Rxx16w32_array[2][5]; // Energy values of microphone signal + int32_t env[2][10]; // Envelope values of subframes + + int16_t Rxx16pos; // Current position in the Rxx16_vectorw32 + int16_t envSum; // Filtered scaled envelope in subframes + int16_t vadThreshold; // Threshold for VAD decision + int16_t inActive; // Inactive time in milliseconds + int16_t msTooLow; // Milliseconds of speech at a too low level + int16_t msTooHigh; // Milliseconds of speech at a too high level + int16_t changeToSlowMode; // Change to slow mode after some time at target + int16_t firstCall; // First call to the process-function + int16_t msZero; // Milliseconds of zero input + int16_t msecSpeechOuterChange; // Min ms of speech between volume changes + int16_t msecSpeechInnerChange; // Min ms of speech between volume changes + int16_t activeSpeech; // Milliseconds of active speech + int16_t muteGuardMs; // Counter to prevent mute action + int16_t inQueue; // 10 ms batch indicator + + // Microphone level variables + int32_t micRef; // Remember ref. mic level for virtual mic + uint16_t gainTableIdx; // Current position in virtual gain table + int32_t micGainIdx; // Gain index of mic level to increase slowly + int32_t micVol; // Remember volume between frames + int32_t maxLevel; // Max possible vol level, incl dig gain + int32_t maxAnalog; // Maximum possible analog volume level + int32_t maxInit; // Initial value of "max" + int32_t minLevel; // Minimum possible volume level + int32_t minOutput; // Minimum output volume level + int32_t zeroCtrlMax; // Remember max gain => don't amp low input + int32_t lastInMicLevel; + + int16_t scale; // Scale factor for internal volume levels + // Structs for VAD and digital_agc + AgcVad vadMic; + DigitalAgc digitalAgc; + + int16_t lowLevelSignal; +} LegacyAgc; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.cc b/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.cc new file mode 100644 index 0000000000..4cd86acba8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.cc @@ -0,0 +1,704 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/legacy/digital_agc.h" + +#include <string.h> + +#include "modules/audio_processing/agc/legacy/gain_control.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// To generate the gaintable, copy&paste the following lines to a Matlab window: +// MaxGain = 6; MinGain = 0; CompRatio = 3; Knee = 1; +// zeros = 0:31; lvl = 2.^(1-zeros); +// A = -10*log10(lvl) * (CompRatio - 1) / CompRatio; +// B = MaxGain - MinGain; +// gains = round(2^16*10.^(0.05 * (MinGain + B * ( +// log(exp(-Knee*A)+exp(-Knee*B)) - log(1+exp(-Knee*B)) ) / +// log(1/(1+exp(Knee*B)))))); +// fprintf(1, '\t%i, %i, %i, %i,\n', gains); +// % Matlab code for plotting the gain and input/output level characteristic +// (copy/paste the following 3 lines): +// in = 10*log10(lvl); out = 20*log10(gains/65536); +// subplot(121); plot(in, out); axis([-30, 0, -5, 20]); grid on; xlabel('Input +// (dB)'); ylabel('Gain (dB)'); +// subplot(122); plot(in, in+out); axis([-30, 0, -30, 5]); grid on; +// xlabel('Input (dB)'); ylabel('Output (dB)'); +// zoom on; + +// Generator table for y=log2(1+e^x) in Q8. +enum { kGenFuncTableSize = 128 }; +static const uint16_t kGenFuncTable[kGenFuncTableSize] = { + 256, 485, 786, 1126, 1484, 1849, 2217, 2586, 2955, 3324, 3693, + 4063, 4432, 4801, 5171, 5540, 5909, 6279, 6648, 7017, 7387, 7756, + 8125, 8495, 8864, 9233, 9603, 9972, 10341, 10711, 11080, 11449, 11819, + 12188, 12557, 12927, 13296, 13665, 14035, 14404, 14773, 15143, 15512, 15881, + 16251, 16620, 16989, 17359, 17728, 18097, 18466, 18836, 19205, 19574, 19944, + 20313, 20682, 21052, 21421, 21790, 22160, 22529, 22898, 23268, 23637, 24006, + 24376, 24745, 25114, 25484, 25853, 26222, 26592, 26961, 27330, 27700, 28069, + 28438, 28808, 29177, 29546, 29916, 30285, 30654, 31024, 31393, 31762, 32132, + 32501, 32870, 33240, 33609, 33978, 34348, 34717, 35086, 35456, 35825, 36194, + 36564, 36933, 37302, 37672, 38041, 38410, 38780, 39149, 39518, 39888, 40257, + 40626, 40996, 41365, 41734, 42104, 42473, 42842, 43212, 43581, 43950, 44320, + 44689, 45058, 45428, 45797, 46166, 46536, 46905}; + +static const int16_t kAvgDecayTime = 250; // frames; < 3000 + +// the 32 most significant bits of A(19) * B(26) >> 13 +#define AGC_MUL32(A, B) (((B) >> 13) * (A) + (((0x00001FFF & (B)) * (A)) >> 13)) +// C + the 32 most significant bits of A * B +#define AGC_SCALEDIFF32(A, B, C) \ + ((C) + ((B) >> 16) * (A) + (((0x0000FFFF & (B)) * (A)) >> 16)) + +} // namespace + +int32_t WebRtcAgc_CalculateGainTable(int32_t* gainTable, // Q16 + int16_t digCompGaindB, // Q0 + int16_t targetLevelDbfs, // Q0 + uint8_t limiterEnable, + int16_t analogTarget) { // Q0 + // This function generates the compressor gain table used in the fixed digital + // part. + uint32_t tmpU32no1, tmpU32no2, absInLevel, logApprox; + int32_t inLevel, limiterLvl; + int32_t tmp32, tmp32no1, tmp32no2, numFIX, den, y32; + const uint16_t kLog10 = 54426; // log2(10) in Q14 + const uint16_t kLog10_2 = 49321; // 10*log10(2) in Q14 + const uint16_t kLogE_1 = 23637; // log2(e) in Q14 + uint16_t constMaxGain; + uint16_t tmpU16, intPart, fracPart; + const int16_t kCompRatio = 3; + int16_t limiterOffset = 0; // Limiter offset + int16_t limiterIdx, limiterLvlX; + int16_t constLinApprox, maxGain, diffGain; + int16_t i, tmp16, tmp16no1; + int zeros, zerosScale; + + // Constants + // kLogE_1 = 23637; // log2(e) in Q14 + // kLog10 = 54426; // log2(10) in Q14 + // kLog10_2 = 49321; // 10*log10(2) in Q14 + + // Calculate maximum digital gain and zero gain level + tmp32no1 = (digCompGaindB - analogTarget) * (kCompRatio - 1); + tmp16no1 = analogTarget - targetLevelDbfs; + tmp16no1 += + WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio); + maxGain = WEBRTC_SPL_MAX(tmp16no1, (analogTarget - targetLevelDbfs)); + tmp32no1 = maxGain * kCompRatio; + if ((digCompGaindB <= analogTarget) && (limiterEnable)) { + limiterOffset = 0; + } + + // Calculate the difference between maximum gain and gain at 0dB0v + tmp32no1 = digCompGaindB * (kCompRatio - 1); + diffGain = + WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio); + if (diffGain < 0 || diffGain >= kGenFuncTableSize) { + RTC_DCHECK(0); + return -1; + } + + // Calculate the limiter level and index: + // limiterLvlX = analogTarget - limiterOffset + // limiterLvl = targetLevelDbfs + limiterOffset/compRatio + limiterLvlX = analogTarget - limiterOffset; + limiterIdx = 2 + WebRtcSpl_DivW32W16ResW16((int32_t)limiterLvlX * (1 << 13), + kLog10_2 / 2); + tmp16no1 = + WebRtcSpl_DivW32W16ResW16(limiterOffset + (kCompRatio >> 1), kCompRatio); + limiterLvl = targetLevelDbfs + tmp16no1; + + // Calculate (through table lookup): + // constMaxGain = log2(1+2^(log2(e)*diffGain)); (in Q8) + constMaxGain = kGenFuncTable[diffGain]; // in Q8 + + // Calculate a parameter used to approximate the fractional part of 2^x with a + // piecewise linear function in Q14: + // constLinApprox = round(3/2*(4*(3-2*sqrt(2))/(log(2)^2)-0.5)*2^14); + constLinApprox = 22817; // in Q14 + + // Calculate a denominator used in the exponential part to convert from dB to + // linear scale: + // den = 20*constMaxGain (in Q8) + den = WEBRTC_SPL_MUL_16_U16(20, constMaxGain); // in Q8 + + for (i = 0; i < 32; i++) { + // Calculate scaled input level (compressor): + // inLevel = + // fix((-constLog10_2*(compRatio-1)*(1-i)+fix(compRatio/2))/compRatio) + tmp16 = (int16_t)((kCompRatio - 1) * (i - 1)); // Q0 + tmp32 = WEBRTC_SPL_MUL_16_U16(tmp16, kLog10_2) + 1; // Q14 + inLevel = WebRtcSpl_DivW32W16(tmp32, kCompRatio); // Q14 + + // Calculate diffGain-inLevel, to map using the genFuncTable + inLevel = (int32_t)diffGain * (1 << 14) - inLevel; // Q14 + + // Make calculations on abs(inLevel) and compensate for the sign afterwards. + absInLevel = (uint32_t)WEBRTC_SPL_ABS_W32(inLevel); // Q14 + + // LUT with interpolation + intPart = (uint16_t)(absInLevel >> 14); + fracPart = + (uint16_t)(absInLevel & 0x00003FFF); // extract the fractional part + tmpU16 = kGenFuncTable[intPart + 1] - kGenFuncTable[intPart]; // Q8 + tmpU32no1 = tmpU16 * fracPart; // Q22 + tmpU32no1 += (uint32_t)kGenFuncTable[intPart] << 14; // Q22 + logApprox = tmpU32no1 >> 8; // Q14 + // Compensate for negative exponent using the relation: + // log2(1 + 2^-x) = log2(1 + 2^x) - x + if (inLevel < 0) { + zeros = WebRtcSpl_NormU32(absInLevel); + zerosScale = 0; + if (zeros < 15) { + // Not enough space for multiplication + tmpU32no2 = absInLevel >> (15 - zeros); // Q(zeros-1) + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no2, kLogE_1); // Q(zeros+13) + if (zeros < 9) { + zerosScale = 9 - zeros; + tmpU32no1 >>= zerosScale; // Q(zeros+13) + } else { + tmpU32no2 >>= zeros - 9; // Q22 + } + } else { + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(absInLevel, kLogE_1); // Q28 + tmpU32no2 >>= 6; // Q22 + } + logApprox = 0; + if (tmpU32no2 < tmpU32no1) { + logApprox = (tmpU32no1 - tmpU32no2) >> (8 - zerosScale); // Q14 + } + } + numFIX = (maxGain * constMaxGain) * (1 << 6); // Q14 + numFIX -= (int32_t)logApprox * diffGain; // Q14 + + // Calculate ratio + // Shift `numFIX` as much as possible. + // Ensure we avoid wrap-around in `den` as well. + if (numFIX > (den >> 8) || -numFIX > (den >> 8)) { // `den` is Q8. + zeros = WebRtcSpl_NormW32(numFIX); + } else { + zeros = WebRtcSpl_NormW32(den) + 8; + } + numFIX *= 1 << zeros; // Q(14+zeros) + + // Shift den so we end up in Qy1 + tmp32no1 = WEBRTC_SPL_SHIFT_W32(den, zeros - 9); // Q(zeros - 1) + y32 = numFIX / tmp32no1; // in Q15 + // This is to do rounding in Q14. + y32 = y32 >= 0 ? (y32 + 1) >> 1 : -((-y32 + 1) >> 1); + + if (limiterEnable && (i < limiterIdx)) { + tmp32 = WEBRTC_SPL_MUL_16_U16(i - 1, kLog10_2); // Q14 + tmp32 -= limiterLvl * (1 << 14); // Q14 + y32 = WebRtcSpl_DivW32W16(tmp32 + 10, 20); + } + if (y32 > 39000) { + tmp32 = (y32 >> 1) * kLog10 + 4096; // in Q27 + tmp32 >>= 13; // In Q14. + } else { + tmp32 = y32 * kLog10 + 8192; // in Q28 + tmp32 >>= 14; // In Q14. + } + tmp32 += 16 << 14; // in Q14 (Make sure final output is in Q16) + + // Calculate power + if (tmp32 > 0) { + intPart = (int16_t)(tmp32 >> 14); + fracPart = (uint16_t)(tmp32 & 0x00003FFF); // in Q14 + if ((fracPart >> 13) != 0) { + tmp16 = (2 << 14) - constLinApprox; + tmp32no2 = (1 << 14) - fracPart; + tmp32no2 *= tmp16; + tmp32no2 >>= 13; + tmp32no2 = (1 << 14) - tmp32no2; + } else { + tmp16 = constLinApprox - (1 << 14); + tmp32no2 = (fracPart * tmp16) >> 13; + } + fracPart = (uint16_t)tmp32no2; + gainTable[i] = + (1 << intPart) + WEBRTC_SPL_SHIFT_W32(fracPart, intPart - 14); + } else { + gainTable[i] = 0; + } + } + + return 0; +} + +int32_t WebRtcAgc_InitDigital(DigitalAgc* stt, int16_t agcMode) { + if (agcMode == kAgcModeFixedDigital) { + // start at minimum to find correct gain faster + stt->capacitorSlow = 0; + } else { + // start out with 0 dB gain + stt->capacitorSlow = 134217728; // (int32_t)(0.125f * 32768.0f * 32768.0f); + } + stt->capacitorFast = 0; + stt->gain = 65536; + stt->gatePrevious = 0; + stt->agcMode = agcMode; + + // initialize VADs + WebRtcAgc_InitVad(&stt->vadNearend); + WebRtcAgc_InitVad(&stt->vadFarend); + + return 0; +} + +int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* stt, + const int16_t* in_far, + size_t nrSamples) { + RTC_DCHECK(stt); + // VAD for far end + WebRtcAgc_ProcessVad(&stt->vadFarend, in_far, nrSamples); + + return 0; +} + +// Gains is an 11 element long array (one value per ms, incl start & end). +int32_t WebRtcAgc_ComputeDigitalGains(DigitalAgc* stt, + const int16_t* const* in_near, + size_t num_bands, + uint32_t FS, + int16_t lowlevelSignal, + int32_t gains[11]) { + int32_t tmp32; + int32_t env[10]; + int32_t max_nrg; + int32_t cur_level; + int32_t gain32; + int16_t logratio; + int16_t lower_thr, upper_thr; + int16_t zeros = 0, zeros_fast, frac = 0; + int16_t decay; + int16_t gate, gain_adj; + int16_t k; + size_t n, L; + + // determine number of samples per ms + if (FS == 8000) { + L = 8; + } else if (FS == 16000 || FS == 32000 || FS == 48000) { + L = 16; + } else { + return -1; + } + + // VAD for near end + logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, in_near[0], L * 10); + + // Account for far end VAD + if (stt->vadFarend.counter > 10) { + tmp32 = 3 * logratio; + logratio = (int16_t)((tmp32 - stt->vadFarend.logRatio) >> 2); + } + + // Determine decay factor depending on VAD + // upper_thr = 1.0f; + // lower_thr = 0.25f; + upper_thr = 1024; // Q10 + lower_thr = 0; // Q10 + if (logratio > upper_thr) { + // decay = -2^17 / DecayTime; -> -65 + decay = -65; + } else if (logratio < lower_thr) { + decay = 0; + } else { + // decay = (int16_t)(((lower_thr - logratio) + // * (2^27/(DecayTime*(upper_thr-lower_thr)))) >> 10); + // SUBSTITUTED: 2^27/(DecayTime*(upper_thr-lower_thr)) -> 65 + tmp32 = (lower_thr - logratio) * 65; + decay = (int16_t)(tmp32 >> 10); + } + + // adjust decay factor for long silence (detected as low standard deviation) + // This is only done in the adaptive modes + if (stt->agcMode != kAgcModeFixedDigital) { + if (stt->vadNearend.stdLongTerm < 4000) { + decay = 0; + } else if (stt->vadNearend.stdLongTerm < 8096) { + // decay = (int16_t)(((stt->vadNearend.stdLongTerm - 4000) * decay) >> + // 12); + tmp32 = (stt->vadNearend.stdLongTerm - 4000) * decay; + decay = (int16_t)(tmp32 >> 12); + } + + if (lowlevelSignal != 0) { + decay = 0; + } + } + // Find max amplitude per sub frame + // iterate over sub frames + for (k = 0; k < 10; k++) { + // iterate over samples + max_nrg = 0; + for (n = 0; n < L; n++) { + int32_t nrg = in_near[0][k * L + n] * in_near[0][k * L + n]; + if (nrg > max_nrg) { + max_nrg = nrg; + } + } + env[k] = max_nrg; + } + + // Calculate gain per sub frame + gains[0] = stt->gain; + for (k = 0; k < 10; k++) { + // Fast envelope follower + // decay time = -131000 / -1000 = 131 (ms) + stt->capacitorFast = + AGC_SCALEDIFF32(-1000, stt->capacitorFast, stt->capacitorFast); + if (env[k] > stt->capacitorFast) { + stt->capacitorFast = env[k]; + } + // Slow envelope follower + if (env[k] > stt->capacitorSlow) { + // increase capacitorSlow + stt->capacitorSlow = AGC_SCALEDIFF32(500, (env[k] - stt->capacitorSlow), + stt->capacitorSlow); + } else { + // decrease capacitorSlow + stt->capacitorSlow = + AGC_SCALEDIFF32(decay, stt->capacitorSlow, stt->capacitorSlow); + } + + // use maximum of both capacitors as current level + if (stt->capacitorFast > stt->capacitorSlow) { + cur_level = stt->capacitorFast; + } else { + cur_level = stt->capacitorSlow; + } + // Translate signal level into gain, using a piecewise linear approximation + // find number of leading zeros + zeros = WebRtcSpl_NormU32((uint32_t)cur_level); + if (cur_level == 0) { + zeros = 31; + } + tmp32 = ((uint32_t)cur_level << zeros) & 0x7FFFFFFF; + frac = (int16_t)(tmp32 >> 19); // Q12. + // Interpolate between gainTable[zeros] and gainTable[zeros-1]. + tmp32 = + ((stt->gainTable[zeros - 1] - stt->gainTable[zeros]) * (int64_t)frac) >> + 12; + gains[k + 1] = stt->gainTable[zeros] + tmp32; + } + + // Gate processing (lower gain during absence of speech) + zeros = (zeros << 9) - (frac >> 3); + // find number of leading zeros + zeros_fast = WebRtcSpl_NormU32((uint32_t)stt->capacitorFast); + if (stt->capacitorFast == 0) { + zeros_fast = 31; + } + tmp32 = ((uint32_t)stt->capacitorFast << zeros_fast) & 0x7FFFFFFF; + zeros_fast <<= 9; + zeros_fast -= (int16_t)(tmp32 >> 22); + + gate = 1000 + zeros_fast - zeros - stt->vadNearend.stdShortTerm; + + if (gate < 0) { + stt->gatePrevious = 0; + } else { + tmp32 = stt->gatePrevious * 7; + gate = (int16_t)((gate + tmp32) >> 3); + stt->gatePrevious = gate; + } + // gate < 0 -> no gate + // gate > 2500 -> max gate + if (gate > 0) { + if (gate < 2500) { + gain_adj = (2500 - gate) >> 5; + } else { + gain_adj = 0; + } + for (k = 0; k < 10; k++) { + if ((gains[k + 1] - stt->gainTable[0]) > 8388608) { + // To prevent wraparound + tmp32 = (gains[k + 1] - stt->gainTable[0]) >> 8; + tmp32 *= 178 + gain_adj; + } else { + tmp32 = (gains[k + 1] - stt->gainTable[0]) * (178 + gain_adj); + tmp32 >>= 8; + } + gains[k + 1] = stt->gainTable[0] + tmp32; + } + } + + // Limit gain to avoid overload distortion + for (k = 0; k < 10; k++) { + // Find a shift of gains[k + 1] such that it can be squared without + // overflow, but at least by 10 bits. + zeros = 10; + if (gains[k + 1] > 47452159) { + zeros = 16 - WebRtcSpl_NormW32(gains[k + 1]); + } + gain32 = (gains[k + 1] >> zeros) + 1; + gain32 *= gain32; + // check for overflow + while (AGC_MUL32((env[k] >> 12) + 1, gain32) > + WEBRTC_SPL_SHIFT_W32((int32_t)32767, 2 * (1 - zeros + 10))) { + // multiply by 253/256 ==> -0.1 dB + if (gains[k + 1] > 8388607) { + // Prevent wrap around + gains[k + 1] = (gains[k + 1] / 256) * 253; + } else { + gains[k + 1] = (gains[k + 1] * 253) / 256; + } + gain32 = (gains[k + 1] >> zeros) + 1; + gain32 *= gain32; + } + } + // gain reductions should be done 1 ms earlier than gain increases + for (k = 1; k < 10; k++) { + if (gains[k] > gains[k + 1]) { + gains[k] = gains[k + 1]; + } + } + // save start gain for next frame + stt->gain = gains[10]; + + return 0; +} + +int32_t WebRtcAgc_ApplyDigitalGains(const int32_t gains[11], + size_t num_bands, + uint32_t FS, + const int16_t* const* in_near, + int16_t* const* out) { + // Apply gain + // handle first sub frame separately + size_t L; + int16_t L2; // samples/subframe + + // determine number of samples per ms + if (FS == 8000) { + L = 8; + L2 = 3; + } else if (FS == 16000 || FS == 32000 || FS == 48000) { + L = 16; + L2 = 4; + } else { + return -1; + } + + for (size_t i = 0; i < num_bands; ++i) { + if (in_near[i] != out[i]) { + // Only needed if they don't already point to the same place. + memcpy(out[i], in_near[i], 10 * L * sizeof(in_near[i][0])); + } + } + + // iterate over samples + int32_t delta = (gains[1] - gains[0]) * (1 << (4 - L2)); + int32_t gain32 = gains[0] * (1 << 4); + for (size_t n = 0; n < L; n++) { + for (size_t i = 0; i < num_bands; ++i) { + int32_t out_tmp = (int64_t)out[i][n] * ((gain32 + 127) >> 7) >> 16; + if (out_tmp > 4095) { + out[i][n] = (int16_t)32767; + } else if (out_tmp < -4096) { + out[i][n] = (int16_t)-32768; + } else { + int32_t tmp32 = ((int64_t)out[i][n] * (gain32 >> 4)) >> 16; + out[i][n] = (int16_t)tmp32; + } + } + + gain32 += delta; + } + // iterate over subframes + for (int k = 1; k < 10; k++) { + delta = (gains[k + 1] - gains[k]) * (1 << (4 - L2)); + gain32 = gains[k] * (1 << 4); + // iterate over samples + for (size_t n = 0; n < L; n++) { + for (size_t i = 0; i < num_bands; ++i) { + int64_t tmp64 = ((int64_t)(out[i][k * L + n])) * (gain32 >> 4); + tmp64 = tmp64 >> 16; + if (tmp64 > 32767) { + out[i][k * L + n] = 32767; + } else if (tmp64 < -32768) { + out[i][k * L + n] = -32768; + } else { + out[i][k * L + n] = (int16_t)(tmp64); + } + } + gain32 += delta; + } + } + return 0; +} + +void WebRtcAgc_InitVad(AgcVad* state) { + int16_t k; + + state->HPstate = 0; // state of high pass filter + state->logRatio = 0; // log( P(active) / P(inactive) ) + // average input level (Q10) + state->meanLongTerm = 15 << 10; + + // variance of input level (Q8) + state->varianceLongTerm = 500 << 8; + + state->stdLongTerm = 0; // standard deviation of input level in dB + // short-term average input level (Q10) + state->meanShortTerm = 15 << 10; + + // short-term variance of input level (Q8) + state->varianceShortTerm = 500 << 8; + + state->stdShortTerm = + 0; // short-term standard deviation of input level in dB + state->counter = 3; // counts updates + for (k = 0; k < 8; k++) { + // downsampling filter + state->downState[k] = 0; + } +} + +int16_t WebRtcAgc_ProcessVad(AgcVad* state, // (i) VAD state + const int16_t* in, // (i) Speech signal + size_t nrSamples) { // (i) number of samples + uint32_t nrg; + int32_t out, tmp32, tmp32b; + uint16_t tmpU16; + int16_t k, subfr, tmp16; + int16_t buf1[8]; + int16_t buf2[4]; + int16_t HPstate; + int16_t zeros, dB; + int64_t tmp64; + + // process in 10 sub frames of 1 ms (to save on memory) + nrg = 0; + HPstate = state->HPstate; + for (subfr = 0; subfr < 10; subfr++) { + // downsample to 4 kHz + if (nrSamples == 160) { + for (k = 0; k < 8; k++) { + tmp32 = (int32_t)in[2 * k] + (int32_t)in[2 * k + 1]; + tmp32 >>= 1; + buf1[k] = (int16_t)tmp32; + } + in += 16; + + WebRtcSpl_DownsampleBy2(buf1, 8, buf2, state->downState); + } else { + WebRtcSpl_DownsampleBy2(in, 8, buf2, state->downState); + in += 8; + } + + // high pass filter and compute energy + for (k = 0; k < 4; k++) { + out = buf2[k] + HPstate; + tmp32 = 600 * out; + HPstate = (int16_t)((tmp32 >> 10) - buf2[k]); + + // Add 'out * out / 2**6' to 'nrg' in a non-overflowing + // way. Guaranteed to work as long as 'out * out / 2**6' fits in + // an int32_t. + nrg += out * (out / (1 << 6)); + nrg += out * (out % (1 << 6)) / (1 << 6); + } + } + state->HPstate = HPstate; + + // find number of leading zeros + if (!(0xFFFF0000 & nrg)) { + zeros = 16; + } else { + zeros = 0; + } + if (!(0xFF000000 & (nrg << zeros))) { + zeros += 8; + } + if (!(0xF0000000 & (nrg << zeros))) { + zeros += 4; + } + if (!(0xC0000000 & (nrg << zeros))) { + zeros += 2; + } + if (!(0x80000000 & (nrg << zeros))) { + zeros += 1; + } + + // energy level (range {-32..30}) (Q10) + dB = (15 - zeros) * (1 << 11); + + // Update statistics + + if (state->counter < kAvgDecayTime) { + // decay time = AvgDecTime * 10 ms + state->counter++; + } + + // update short-term estimate of mean energy level (Q10) + tmp32 = state->meanShortTerm * 15 + dB; + state->meanShortTerm = (int16_t)(tmp32 >> 4); + + // update short-term estimate of variance in energy level (Q8) + tmp32 = (dB * dB) >> 12; + tmp32 += state->varianceShortTerm * 15; + state->varianceShortTerm = tmp32 / 16; + + // update short-term estimate of standard deviation in energy level (Q10) + tmp32 = state->meanShortTerm * state->meanShortTerm; + tmp32 = (state->varianceShortTerm << 12) - tmp32; + state->stdShortTerm = (int16_t)WebRtcSpl_Sqrt(tmp32); + + // update long-term estimate of mean energy level (Q10) + tmp32 = state->meanLongTerm * state->counter + dB; + state->meanLongTerm = + WebRtcSpl_DivW32W16ResW16(tmp32, WebRtcSpl_AddSatW16(state->counter, 1)); + + // update long-term estimate of variance in energy level (Q8) + tmp32 = (dB * dB) >> 12; + tmp32 += state->varianceLongTerm * state->counter; + state->varianceLongTerm = + WebRtcSpl_DivW32W16(tmp32, WebRtcSpl_AddSatW16(state->counter, 1)); + + // update long-term estimate of standard deviation in energy level (Q10) + tmp32 = state->meanLongTerm * state->meanLongTerm; + tmp32 = (state->varianceLongTerm << 12) - tmp32; + state->stdLongTerm = (int16_t)WebRtcSpl_Sqrt(tmp32); + + // update voice activity measure (Q10) + tmp16 = 3 << 12; + // TODO(bjornv): (dB - state->meanLongTerm) can overflow, e.g., in + // ApmTest.Process unit test. Previously the macro WEBRTC_SPL_MUL_16_16() + // was used, which did an intermediate cast to (int16_t), hence losing + // significant bits. This cause logRatio to max out positive, rather than + // negative. This is a bug, but has very little significance. + tmp32 = tmp16 * (int16_t)(dB - state->meanLongTerm); + tmp32 = WebRtcSpl_DivW32W16(tmp32, state->stdLongTerm); + tmpU16 = (13 << 12); + tmp32b = WEBRTC_SPL_MUL_16_U16(state->logRatio, tmpU16); + tmp64 = tmp32; + tmp64 += tmp32b >> 10; + tmp64 >>= 6; + + // limit + if (tmp64 > 2048) { + tmp64 = 2048; + } else if (tmp64 < -2048) { + tmp64 = -2048; + } + state->logRatio = (int16_t)tmp64; + + return state->logRatio; // Q10 +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.h b/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.h new file mode 100644 index 0000000000..223c74b9bd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_ +#define MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +namespace webrtc { + +typedef struct { + int32_t downState[8]; + int16_t HPstate; + int16_t counter; + int16_t logRatio; // log( P(active) / P(inactive) ) (Q10) + int16_t meanLongTerm; // Q10 + int32_t varianceLongTerm; // Q8 + int16_t stdLongTerm; // Q10 + int16_t meanShortTerm; // Q10 + int32_t varianceShortTerm; // Q8 + int16_t stdShortTerm; // Q10 +} AgcVad; // total = 54 bytes + +typedef struct { + int32_t capacitorSlow; + int32_t capacitorFast; + int32_t gain; + int32_t gainTable[32]; + int16_t gatePrevious; + int16_t agcMode; + AgcVad vadNearend; + AgcVad vadFarend; +} DigitalAgc; + +int32_t WebRtcAgc_InitDigital(DigitalAgc* digitalAgcInst, int16_t agcMode); + +int32_t WebRtcAgc_ComputeDigitalGains(DigitalAgc* digitalAgcInst, + const int16_t* const* inNear, + size_t num_bands, + uint32_t FS, + int16_t lowLevelSignal, + int32_t gains[11]); + +int32_t WebRtcAgc_ApplyDigitalGains(const int32_t gains[11], + size_t num_bands, + uint32_t FS, + const int16_t* const* in_near, + int16_t* const* out); + +int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* digitalAgcInst, + const int16_t* inFar, + size_t nrSamples); + +void WebRtcAgc_InitVad(AgcVad* vadInst); + +int16_t WebRtcAgc_ProcessVad(AgcVad* vadInst, // (i) VAD state + const int16_t* in, // (i) Speech signal + size_t nrSamples); // (i) number of samples + +int32_t WebRtcAgc_CalculateGainTable(int32_t* gainTable, // Q16 + int16_t compressionGaindB, // Q0 (in dB) + int16_t targetLevelDbfs, // Q0 (in dB) + uint8_t limiterEnable, + int16_t analogTarget); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/legacy/gain_control.h b/third_party/libwebrtc/modules/audio_processing/agc/legacy/gain_control.h new file mode 100644 index 0000000000..6010a988fa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/legacy/gain_control.h @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_ +#define MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_ + +#include <stddef.h> +#include <stdint.h> + +namespace webrtc { + +enum { + kAgcModeUnchanged, + kAgcModeAdaptiveAnalog, + kAgcModeAdaptiveDigital, + kAgcModeFixedDigital +}; + +enum { kAgcFalse = 0, kAgcTrue }; + +typedef struct { + int16_t targetLevelDbfs; // default 3 (-3 dBOv) + int16_t compressionGaindB; // default 9 dB + uint8_t limiterEnable; // default kAgcTrue (on) +} WebRtcAgcConfig; + +/* + * This function analyses the number of samples passed to + * farend and produces any error code that could arise. + * + * Input: + * - agcInst : AGC instance. + * - samples : Number of samples in input vector. + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error. + */ +int WebRtcAgc_GetAddFarendError(void* state, size_t samples); + +/* + * This function processes a 10 ms frame of far-end speech to determine + * if there is active speech. The length of the input speech vector must be + * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or + * FS=48000). + * + * Input: + * - agcInst : AGC instance. + * - inFar : Far-end input speech vector + * - samples : Number of samples in input vector + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_AddFarend(void* agcInst, const int16_t* inFar, size_t samples); + +/* + * This function processes a 10 ms frame of microphone speech to determine + * if there is active speech. The length of the input speech vector must be + * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or + * FS=48000). For very low input levels, the input signal is increased in level + * by multiplying and overwriting the samples in inMic[]. + * + * This function should be called before any further processing of the + * near-end microphone signal. + * + * Input: + * - agcInst : AGC instance. + * - inMic : Microphone input speech vector for each band + * - num_bands : Number of bands in input vector + * - samples : Number of samples in input vector + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_AddMic(void* agcInst, + int16_t* const* inMic, + size_t num_bands, + size_t samples); + +/* + * This function replaces the analog microphone with a virtual one. + * It is a digital gain applied to the input signal and is used in the + * agcAdaptiveDigital mode where no microphone level is adjustable. The length + * of the input speech vector must be given in samples (80 when FS=8000, and 160 + * when FS=16000, FS=32000 or FS=48000). + * + * Input: + * - agcInst : AGC instance. + * - inMic : Microphone input speech vector for each band + * - num_bands : Number of bands in input vector + * - samples : Number of samples in input vector + * - micLevelIn : Input level of microphone (static) + * + * Output: + * - inMic : Microphone output after processing (L band) + * - inMic_H : Microphone output after processing (H band) + * - micLevelOut : Adjusted microphone level after processing + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_VirtualMic(void* agcInst, + int16_t* const* inMic, + size_t num_bands, + size_t samples, + int32_t micLevelIn, + int32_t* micLevelOut); + +/* + * This function analyses a 10 ms frame and produces the analog and digital + * gains required to normalize the signal. The gain adjustments are done only + * during active periods of speech. The length of the speech vectors must be + * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or + * FS=48000). The echo parameter can be used to ensure the AGC will not adjust + * upward in the presence of echo. + * + * This function should be called after processing the near-end microphone + * signal, in any case after any echo cancellation. + * + * Input: + * - agcInst : AGC instance + * - inNear : Near-end input speech vector for each band + * - num_bands : Number of bands in input/output vector + * - samples : Number of samples in input/output vector + * - inMicLevel : Current microphone volume level + * - echo : Set to 0 if the signal passed to add_mic is + * almost certainly free of echo; otherwise set + * to 1. If you have no information regarding echo + * set to 0. + * + * Output: + * - outMicLevel : Adjusted microphone volume level + * - saturationWarning : A returned value of 1 indicates a saturation event + * has occurred and the volume cannot be further + * reduced. Otherwise will be set to 0. + * - gains : Vector of gains to apply for digital normalization + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_Analyze(void* agcInst, + const int16_t* const* inNear, + size_t num_bands, + size_t samples, + int32_t inMicLevel, + int32_t* outMicLevel, + int16_t echo, + uint8_t* saturationWarning, + int32_t gains[11]); + +/* + * This function processes a 10 ms frame by applying precomputed digital gains. + * + * Input: + * - agcInst : AGC instance + * - gains : Vector of gains to apply for digital normalization + * - in_near : Near-end input speech vector for each band + * - num_bands : Number of bands in input/output vector + * + * Output: + * - out : Gain-adjusted near-end speech vector + * : May be the same vector as the input. + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_Process(const void* agcInst, + const int32_t gains[11], + const int16_t* const* in_near, + size_t num_bands, + int16_t* const* out); + +/* + * This function sets the config parameters (targetLevelDbfs, + * compressionGaindB and limiterEnable). + * + * Input: + * - agcInst : AGC instance + * - config : config struct + * + * Output: + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_set_config(void* agcInst, WebRtcAgcConfig config); + +/* + * This function returns the config parameters (targetLevelDbfs, + * compressionGaindB and limiterEnable). + * + * Input: + * - agcInst : AGC instance + * + * Output: + * - config : config struct + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_get_config(void* agcInst, WebRtcAgcConfig* config); + +/* + * This function creates and returns an AGC instance, which will contain the + * state information for one (duplex) channel. + */ +void* WebRtcAgc_Create(void); + +/* + * This function frees the AGC instance created at the beginning. + * + * Input: + * - agcInst : AGC instance. + */ +void WebRtcAgc_Free(void* agcInst); + +/* + * This function initializes an AGC instance. + * + * Input: + * - agcInst : AGC instance. + * - minLevel : Minimum possible mic level + * - maxLevel : Maximum possible mic level + * - agcMode : 0 - Unchanged + * : 1 - Adaptive Analog Automatic Gain Control -3dBOv + * : 2 - Adaptive Digital Automatic Gain Control -3dBOv + * : 3 - Fixed Digital Gain 0dB + * - fs : Sampling frequency + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcAgc_Init(void* agcInst, + int32_t minLevel, + int32_t maxLevel, + int16_t agcMode, + uint32_t fs); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/legacy_agc_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc/legacy_agc_gn/moz.build new file mode 100644 index 0000000000..0188a8ac10 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/legacy_agc_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.cc", + "/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("legacy_agc_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc/level_estimation_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc/level_estimation_gn/moz.build new file mode 100644 index 0000000000..9db9a639e7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/level_estimation_gn/moz.build @@ -0,0 +1,234 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc/agc.cc", + "/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.cc", + "/third_party/libwebrtc/modules/audio_processing/agc/utility.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("level_estimation_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.cc b/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.cc new file mode 100644 index 0000000000..b0a1f53b97 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.cc @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/loudness_histogram.h" + +#include <string.h> + +#include <cmath> + +#include "rtc_base/checks.h" + +namespace webrtc { + +static const double kHistBinCenters[] = { + 7.59621091765857e-02, 9.02036021061016e-02, 1.07115112009343e-01, + 1.27197217770508e-01, 1.51044347572047e-01, 1.79362373905283e-01, + 2.12989507320644e-01, 2.52921107370304e-01, 3.00339145144454e-01, + 3.56647189489147e-01, 4.23511952494003e-01, 5.02912623991786e-01, + 5.97199455365749e-01, 7.09163326739184e-01, 8.42118356728544e-01, + 1.00000000000000e+00, 1.18748153630660e+00, 1.41011239906908e+00, + 1.67448243801153e+00, 1.98841697800836e+00, 2.36120844786349e+00, + 2.80389143520905e+00, 3.32956930911896e+00, 3.95380207843188e+00, + 4.69506696634852e+00, 5.57530533426190e+00, 6.62057214370769e+00, + 7.86180718043869e+00, 9.33575086877358e+00, 1.10860317842269e+01, + 1.31644580546776e+01, 1.56325508754123e+01, 1.85633655299256e+01, + 2.20436538184971e+01, 2.61764319021997e+01, 3.10840295702492e+01, + 3.69117111886792e+01, 4.38319755100383e+01, 5.20496616180135e+01, + 6.18080121423973e+01, 7.33958732149108e+01, 8.71562442838066e+01, + 1.03496430860848e+02, 1.22900100720889e+02, 1.45941600416277e+02, + 1.73302955873365e+02, 2.05794060286978e+02, 2.44376646872353e+02, + 2.90192756065437e+02, 3.44598539797631e+02, 4.09204403447902e+02, + 4.85922673669740e+02, 5.77024203055553e+02, 6.85205587130498e+02, + 8.13668983291589e+02, 9.66216894324125e+02, 1.14736472207740e+03, + 1.36247442287647e+03, 1.61791322085579e+03, 1.92124207711260e+03, + 2.28143949334655e+03, 2.70916727454970e+03, 3.21708611729384e+03, + 3.82023036499473e+03, 4.53645302286906e+03, 5.38695420497926e+03, + 6.39690865534207e+03, 7.59621091765857e+03, 9.02036021061016e+03, + 1.07115112009343e+04, 1.27197217770508e+04, 1.51044347572047e+04, + 1.79362373905283e+04, 2.12989507320644e+04, 2.52921107370304e+04, + 3.00339145144454e+04, 3.56647189489147e+04}; + +static const double kProbQDomain = 1024.0; +// Loudness of -15 dB (smallest expected loudness) in log domain, +// loudness_db = 13.5 * log10(rms); +static const double kLogDomainMinBinCenter = -2.57752062648587; +// Loudness step of 1 dB in log domain +static const double kLogDomainStepSizeInverse = 5.81954605750359; + +static const int kTransientWidthThreshold = 7; +static const double kLowProbabilityThreshold = 0.2; + +static const int kLowProbThresholdQ10 = + static_cast<int>(kLowProbabilityThreshold * kProbQDomain); + +LoudnessHistogram::LoudnessHistogram() + : num_updates_(0), + audio_content_q10_(0), + bin_count_q10_(), + activity_probability_(), + hist_bin_index_(), + buffer_index_(0), + buffer_is_full_(false), + len_circular_buffer_(0), + len_high_activity_(0) { + static_assert( + kHistSize == sizeof(kHistBinCenters) / sizeof(kHistBinCenters[0]), + "histogram bin centers incorrect size"); +} + +LoudnessHistogram::LoudnessHistogram(int window_size) + : num_updates_(0), + audio_content_q10_(0), + bin_count_q10_(), + activity_probability_(new int[window_size]), + hist_bin_index_(new int[window_size]), + buffer_index_(0), + buffer_is_full_(false), + len_circular_buffer_(window_size), + len_high_activity_(0) {} + +LoudnessHistogram::~LoudnessHistogram() {} + +void LoudnessHistogram::Update(double rms, double activity_probaility) { + // If circular histogram is activated then remove the oldest entry. + if (len_circular_buffer_ > 0) + RemoveOldestEntryAndUpdate(); + + // Find the corresponding bin. + int hist_index = GetBinIndex(rms); + // To Q10 domain. + int prob_q10 = + static_cast<int16_t>(floor(activity_probaility * kProbQDomain)); + InsertNewestEntryAndUpdate(prob_q10, hist_index); +} + +// Doing nothing if buffer is not full, yet. +void LoudnessHistogram::RemoveOldestEntryAndUpdate() { + RTC_DCHECK_GT(len_circular_buffer_, 0); + // Do nothing if circular buffer is not full. + if (!buffer_is_full_) + return; + + int oldest_prob = activity_probability_[buffer_index_]; + int oldest_hist_index = hist_bin_index_[buffer_index_]; + UpdateHist(-oldest_prob, oldest_hist_index); +} + +void LoudnessHistogram::RemoveTransient() { + // Don't expect to be here if high-activity region is longer than + // `kTransientWidthThreshold` or there has not been any transient. + RTC_DCHECK_LE(len_high_activity_, kTransientWidthThreshold); + int index = + (buffer_index_ > 0) ? (buffer_index_ - 1) : len_circular_buffer_ - 1; + while (len_high_activity_ > 0) { + UpdateHist(-activity_probability_[index], hist_bin_index_[index]); + activity_probability_[index] = 0; + index = (index > 0) ? (index - 1) : (len_circular_buffer_ - 1); + len_high_activity_--; + } +} + +void LoudnessHistogram::InsertNewestEntryAndUpdate(int activity_prob_q10, + int hist_index) { + // Update the circular buffer if it is enabled. + if (len_circular_buffer_ > 0) { + // Removing transient. + if (activity_prob_q10 <= kLowProbThresholdQ10) { + // Lower than threshold probability, set it to zero. + activity_prob_q10 = 0; + // Check if this has been a transient. + if (len_high_activity_ <= kTransientWidthThreshold) + RemoveTransient(); // Remove this transient. + len_high_activity_ = 0; + } else if (len_high_activity_ <= kTransientWidthThreshold) { + len_high_activity_++; + } + // Updating the circular buffer. + activity_probability_[buffer_index_] = activity_prob_q10; + hist_bin_index_[buffer_index_] = hist_index; + // Increment the buffer index and check for wrap-around. + buffer_index_++; + if (buffer_index_ >= len_circular_buffer_) { + buffer_index_ = 0; + buffer_is_full_ = true; + } + } + + num_updates_++; + if (num_updates_ < 0) + num_updates_--; + + UpdateHist(activity_prob_q10, hist_index); +} + +void LoudnessHistogram::UpdateHist(int activity_prob_q10, int hist_index) { + bin_count_q10_[hist_index] += activity_prob_q10; + audio_content_q10_ += activity_prob_q10; +} + +double LoudnessHistogram::AudioContent() const { + return audio_content_q10_ / kProbQDomain; +} + +LoudnessHistogram* LoudnessHistogram::Create() { + return new LoudnessHistogram; +} + +LoudnessHistogram* LoudnessHistogram::Create(int window_size) { + if (window_size < 0) + return NULL; + return new LoudnessHistogram(window_size); +} + +void LoudnessHistogram::Reset() { + // Reset the histogram, audio-content and number of updates. + memset(bin_count_q10_, 0, sizeof(bin_count_q10_)); + audio_content_q10_ = 0; + num_updates_ = 0; + // Empty the circular buffer. + buffer_index_ = 0; + buffer_is_full_ = false; + len_high_activity_ = 0; +} + +int LoudnessHistogram::GetBinIndex(double rms) { + // First exclude overload cases. + if (rms <= kHistBinCenters[0]) { + return 0; + } else if (rms >= kHistBinCenters[kHistSize - 1]) { + return kHistSize - 1; + } else { + // The quantizer is uniform in log domain. Alternatively we could do binary + // search in linear domain. + double rms_log = log(rms); + + int index = static_cast<int>( + floor((rms_log - kLogDomainMinBinCenter) * kLogDomainStepSizeInverse)); + // The final decision is in linear domain. + double b = 0.5 * (kHistBinCenters[index] + kHistBinCenters[index + 1]); + if (rms > b) { + return index + 1; + } + return index; + } +} + +double LoudnessHistogram::CurrentRms() const { + double p; + double mean_val = 0; + if (audio_content_q10_ > 0) { + double p_total_inverse = 1. / static_cast<double>(audio_content_q10_); + for (int n = 0; n < kHistSize; n++) { + p = static_cast<double>(bin_count_q10_[n]) * p_total_inverse; + mean_val += p * kHistBinCenters[n]; + } + } else { + mean_val = kHistBinCenters[0]; + } + return mean_val; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.h b/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.h new file mode 100644 index 0000000000..51b38714c2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_LOUDNESS_HISTOGRAM_H_ +#define MODULES_AUDIO_PROCESSING_AGC_LOUDNESS_HISTOGRAM_H_ + +#include <stdint.h> + +#include <memory> + +namespace webrtc { + +// This class implements the histogram of loudness with circular buffers so that +// the histogram tracks the last T seconds of the loudness. +class LoudnessHistogram { + public: + // Create a non-sliding LoudnessHistogram. + static LoudnessHistogram* Create(); + + // Create a sliding LoudnessHistogram, i.e. the histogram represents the last + // `window_size` samples. + static LoudnessHistogram* Create(int window_size); + ~LoudnessHistogram(); + + // Insert RMS and the corresponding activity probability. + void Update(double rms, double activity_probability); + + // Reset the histogram, forget the past. + void Reset(); + + // Current loudness, which is actually the mean of histogram in loudness + // domain. + double CurrentRms() const; + + // Sum of the histogram content. + double AudioContent() const; + + // Number of times the histogram has been updated. + int num_updates() const { return num_updates_; } + + private: + LoudnessHistogram(); + explicit LoudnessHistogram(int window); + + // Find the histogram bin associated with the given `rms`. + int GetBinIndex(double rms); + + void RemoveOldestEntryAndUpdate(); + void InsertNewestEntryAndUpdate(int activity_prob_q10, int hist_index); + void UpdateHist(int activity_prob_q10, int hist_index); + void RemoveTransient(); + + // Number of histogram bins. + static const int kHistSize = 77; + + // Number of times the histogram is updated + int num_updates_; + // Audio content, this should be equal to the sum of the components of + // `bin_count_q10_`. + int64_t audio_content_q10_; + + // LoudnessHistogram of input RMS in Q10 with `kHistSize_` bins. In each + // 'Update(),' we increment the associated histogram-bin with the given + // probability. The increment is implemented in Q10 to avoid rounding errors. + int64_t bin_count_q10_[kHistSize]; + + // Circular buffer for probabilities + std::unique_ptr<int[]> activity_probability_; + // Circular buffer for histogram-indices of probabilities. + std::unique_ptr<int[]> hist_bin_index_; + // Current index of circular buffer, where the newest data will be written to, + // therefore, pointing to the oldest data if buffer is full. + int buffer_index_; + // Indicating if buffer is full and we had a wrap around. + int buffer_is_full_; + // Size of circular buffer. + int len_circular_buffer_; + int len_high_activity_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_LOUDNESS_HISTOGRAM_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram_unittest.cc new file mode 100644 index 0000000000..bbc0a7ee92 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram_unittest.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Use CreateHistUnittestFile.m to generate the input file. + +#include "modules/audio_processing/agc/loudness_histogram.h" + +#include <stdio.h> + +#include <algorithm> +#include <cmath> +#include <memory> +#include <string> + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/agc/utility.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +struct InputOutput { + double rms; + double activity_probability; + double audio_content; + double loudness; +}; + +const double kRelativeErrTol = 1e-10; + +class LoudnessHistogramTest : public ::testing::Test { + protected: + void RunTest(bool enable_circular_buff, absl::string_view filename); + + private: + void TestClean(); + std::unique_ptr<LoudnessHistogram> hist_; +}; + +void LoudnessHistogramTest::TestClean() { + EXPECT_EQ(hist_->CurrentRms(), 7.59621091765857e-02); + EXPECT_EQ(hist_->AudioContent(), 0); + EXPECT_EQ(hist_->num_updates(), 0); +} + +void LoudnessHistogramTest::RunTest(bool enable_circular_buff, + absl::string_view filename) { + FILE* in_file = fopen(std::string(filename).c_str(), "rb"); + ASSERT_TRUE(in_file != NULL); + if (enable_circular_buff) { + int buffer_size; + EXPECT_EQ(fread(&buffer_size, sizeof(buffer_size), 1, in_file), 1u); + hist_.reset(LoudnessHistogram::Create(buffer_size)); + } else { + hist_.reset(LoudnessHistogram::Create()); + } + TestClean(); + + InputOutput io; + int num_updates = 0; + while (fread(&io, sizeof(InputOutput), 1, in_file) == 1) { + if (io.rms < 0) { + // We have to reset. + hist_->Reset(); + TestClean(); + num_updates = 0; + // Read the next chunk of input. + if (fread(&io, sizeof(InputOutput), 1, in_file) != 1) + break; + } + hist_->Update(io.rms, io.activity_probability); + num_updates++; + EXPECT_EQ(hist_->num_updates(), num_updates); + double audio_content = hist_->AudioContent(); + + double abs_err = + std::min(audio_content, io.audio_content) * kRelativeErrTol; + + ASSERT_NEAR(audio_content, io.audio_content, abs_err); + double current_loudness = Linear2Loudness(hist_->CurrentRms()); + abs_err = + std::min(fabs(current_loudness), fabs(io.loudness)) * kRelativeErrTol; + ASSERT_NEAR(current_loudness, io.loudness, abs_err); + } + fclose(in_file); +} + +TEST_F(LoudnessHistogramTest, ActiveCircularBuffer) { + RunTest(true, test::ResourcePath( + "audio_processing/agc/agc_with_circular_buffer", "dat") + .c_str()); +} + +TEST_F(LoudnessHistogramTest, InactiveCircularBuffer) { + RunTest(false, test::ResourcePath( + "audio_processing/agc/agc_no_circular_buffer", "dat") + .c_str()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/mock_agc.h b/third_party/libwebrtc/modules/audio_processing/agc/mock_agc.h new file mode 100644 index 0000000000..3080e1563c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/mock_agc.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_ +#define MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_ + +#include "api/array_view.h" +#include "modules/audio_processing/agc/agc.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockAgc : public Agc { + public: + virtual ~MockAgc() {} + MOCK_METHOD(void, Process, (rtc::ArrayView<const int16_t> audio), (override)); + MOCK_METHOD(bool, GetRmsErrorDb, (int* error), (override)); + MOCK_METHOD(void, Reset, (), (override)); + MOCK_METHOD(int, set_target_level_dbfs, (int level), (override)); + MOCK_METHOD(int, target_level_dbfs, (), (const, override)); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/utility.cc b/third_party/libwebrtc/modules/audio_processing/agc/utility.cc new file mode 100644 index 0000000000..2a87e5ce74 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/utility.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/utility.h" + +#include <math.h> + +namespace webrtc { + +static const double kLog10 = 2.30258509299; +static const double kLinear2DbScale = 20.0 / kLog10; +static const double kLinear2LoudnessScale = 13.4 / kLog10; + +double Loudness2Db(double loudness) { + return loudness * kLinear2DbScale / kLinear2LoudnessScale; +} + +double Linear2Loudness(double rms) { + if (rms == 0) + return -15; + return kLinear2LoudnessScale * log(rms); +} + +double Db2Loudness(double db) { + return db * kLinear2LoudnessScale / kLinear2DbScale; +} + +double Dbfs2Loudness(double dbfs) { + return Db2Loudness(90 + dbfs); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/utility.h b/third_party/libwebrtc/modules/audio_processing/agc/utility.h new file mode 100644 index 0000000000..56eec244a7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/utility.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_ +#define MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_ + +namespace webrtc { + +// TODO(turajs): Add description of function. +double Loudness2Db(double loudness); + +double Linear2Loudness(double rms); + +double Db2Loudness(double db); + +double Dbfs2Loudness(double dbfs); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_ |