blob: 7746f6c000d827651d7573707387036768290d16 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
/*
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/speech_probability_buffer.h"
#include <algorithm>
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
constexpr float kActivityThreshold = 0.9f;
constexpr int kNumAnalysisFrames = 100;
// We use 12 in AGC2 adaptive digital, but with a slightly different logic.
constexpr int kTransientWidthThreshold = 7;
} // namespace
SpeechProbabilityBuffer::SpeechProbabilityBuffer(
float low_probability_threshold)
: low_probability_threshold_(low_probability_threshold),
probabilities_(kNumAnalysisFrames) {
RTC_DCHECK_GE(low_probability_threshold, 0.0f);
RTC_DCHECK_LE(low_probability_threshold, 1.0f);
RTC_DCHECK(!probabilities_.empty());
}
void SpeechProbabilityBuffer::Update(float probability) {
// Remove the oldest entry if the circular buffer is full.
if (buffer_is_full_) {
const float oldest_probability = probabilities_[buffer_index_];
sum_probabilities_ -= oldest_probability;
}
// Check for transients.
if (probability <= low_probability_threshold_) {
// Set a probability lower than the threshold to zero.
probability = 0.0f;
// Check if this has been a transient.
if (num_high_probability_observations_ <= kTransientWidthThreshold) {
RemoveTransient();
}
num_high_probability_observations_ = 0;
} else if (num_high_probability_observations_ <= kTransientWidthThreshold) {
++num_high_probability_observations_;
}
// Update the circular buffer and the current sum.
probabilities_[buffer_index_] = probability;
sum_probabilities_ += probability;
// Increment the buffer index and check for wrap-around.
if (++buffer_index_ >= kNumAnalysisFrames) {
buffer_index_ = 0;
buffer_is_full_ = true;
}
}
void SpeechProbabilityBuffer::RemoveTransient() {
// Don't expect to be here if high-activity region is longer than
// `kTransientWidthThreshold` or there has not been any transient.
RTC_DCHECK_LE(num_high_probability_observations_, kTransientWidthThreshold);
// Replace previously added probabilities with zero.
int index =
(buffer_index_ > 0) ? (buffer_index_ - 1) : (kNumAnalysisFrames - 1);
while (num_high_probability_observations_-- > 0) {
sum_probabilities_ -= probabilities_[index];
probabilities_[index] = 0.0f;
// Update the circular buffer index.
index = (index > 0) ? (index - 1) : (kNumAnalysisFrames - 1);
}
}
bool SpeechProbabilityBuffer::IsActiveSegment() const {
if (!buffer_is_full_) {
return false;
}
if (sum_probabilities_ < kActivityThreshold * kNumAnalysisFrames) {
return false;
}
return true;
}
void SpeechProbabilityBuffer::Reset() {
sum_probabilities_ = 0.0f;
// Empty the circular buffer.
buffer_index_ = 0;
buffer_is_full_ = false;
num_high_probability_observations_ = 0;
}
} // namespace webrtc
|