diff options
Diffstat (limited to 'third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc')
-rw-r--r-- | third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc new file mode 100644 index 0000000000..475bef9775 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/rnn.h" + +#include "rtc_base/checks.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +using ::rnnoise::kInputLayerInputSize; +static_assert(kFeatureVectorSize == kInputLayerInputSize, ""); +using ::rnnoise::kInputDenseBias; +using ::rnnoise::kInputDenseWeights; +using ::rnnoise::kInputLayerOutputSize; +static_assert(kInputLayerOutputSize <= kFullyConnectedLayerMaxUnits, ""); + +using ::rnnoise::kHiddenGruBias; +using ::rnnoise::kHiddenGruRecurrentWeights; +using ::rnnoise::kHiddenGruWeights; +using ::rnnoise::kHiddenLayerOutputSize; +static_assert(kHiddenLayerOutputSize <= kGruLayerMaxUnits, ""); + +using ::rnnoise::kOutputDenseBias; +using ::rnnoise::kOutputDenseWeights; +using ::rnnoise::kOutputLayerOutputSize; +static_assert(kOutputLayerOutputSize <= kFullyConnectedLayerMaxUnits, ""); + +} // namespace + +RnnVad::RnnVad(const AvailableCpuFeatures& cpu_features) + : input_(kInputLayerInputSize, + kInputLayerOutputSize, + kInputDenseBias, + kInputDenseWeights, + ActivationFunction::kTansigApproximated, + cpu_features, + /*layer_name=*/"FC1"), + hidden_(kInputLayerOutputSize, + kHiddenLayerOutputSize, + kHiddenGruBias, + kHiddenGruWeights, + kHiddenGruRecurrentWeights, + cpu_features, + /*layer_name=*/"GRU1"), + output_(kHiddenLayerOutputSize, + kOutputLayerOutputSize, + kOutputDenseBias, + kOutputDenseWeights, + ActivationFunction::kSigmoidApproximated, + // The output layer is just 24x1. The unoptimized code is faster. + NoAvailableCpuFeatures(), + /*layer_name=*/"FC2") { + // Input-output chaining size checks. + RTC_DCHECK_EQ(input_.size(), hidden_.input_size()) + << "The input and the hidden layers sizes do not match."; + RTC_DCHECK_EQ(hidden_.size(), output_.input_size()) + << "The hidden and the output layers sizes do not match."; +} + +RnnVad::~RnnVad() = default; + +void RnnVad::Reset() { + hidden_.Reset(); +} + +float RnnVad::ComputeVadProbability( + rtc::ArrayView<const float, kFeatureVectorSize> feature_vector, + bool is_silence) { + if (is_silence) { + Reset(); + return 0.f; + } + input_.ComputeOutput(feature_vector); + hidden_.ComputeOutput(input_); + output_.ComputeOutput(hidden_); + RTC_DCHECK_EQ(output_.size(), 1); + return output_.data()[0]; +} + +} // namespace rnn_vad +} // namespace webrtc |