summaryrefslogtreecommitdiffstats
path: root/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.cc
blob: 98068964d9eb5304a5aab3ca95e499136047896d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
/*
 *  Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "modules/audio_processing/aec3/multi_channel_content_detector.h"

#include <cmath>

#include "rtc_base/checks.h"
#include "system_wrappers/include/metrics.h"

namespace webrtc {

namespace {

constexpr int kNumFramesPerSecond = 100;

// Compares the left and right channels in the render `frame` to determine
// whether the signal is a proper stereo signal. To allow for differences
// introduced by hardware drivers, a threshold `detection_threshold` is used for
// the detection.
bool HasStereoContent(const std::vector<std::vector<std::vector<float>>>& frame,
                      float detection_threshold) {
  if (frame[0].size() < 2) {
    return false;
  }

  for (size_t band = 0; band < frame.size(); ++band) {
    for (size_t k = 0; k < frame[band][0].size(); ++k) {
      if (std::fabs(frame[band][0][k] - frame[band][1][k]) >
          detection_threshold) {
        return true;
      }
    }
  }
  return false;
}

// In order to avoid logging metrics for very short lifetimes that are unlikely
// to reflect real calls and that may dilute the "real" data, logging is limited
// to lifetimes of at leats 5 seconds.
constexpr int kMinNumberOfFramesRequiredToLogMetrics = 500;

// Continuous metrics are logged every 10 seconds.
constexpr int kFramesPer10Seconds = 1000;

}  // namespace

MultiChannelContentDetector::MetricsLogger::MetricsLogger() {}

MultiChannelContentDetector::MetricsLogger::~MetricsLogger() {
  if (frame_counter_ < kMinNumberOfFramesRequiredToLogMetrics)
    return;

  RTC_HISTOGRAM_BOOLEAN(
      "WebRTC.Audio.EchoCanceller.PersistentMultichannelContentEverDetected",
      any_multichannel_content_detected_ ? 1 : 0);
}

void MultiChannelContentDetector::MetricsLogger::Update(
    bool persistent_multichannel_content_detected) {
  ++frame_counter_;
  if (persistent_multichannel_content_detected) {
    any_multichannel_content_detected_ = true;
    ++persistent_multichannel_frame_counter_;
  }

  if (frame_counter_ < kMinNumberOfFramesRequiredToLogMetrics)
    return;
  if (frame_counter_ % kFramesPer10Seconds != 0)
    return;
  const bool mostly_multichannel_last_10_seconds =
      (persistent_multichannel_frame_counter_ >= kFramesPer10Seconds / 2);
  RTC_HISTOGRAM_BOOLEAN(
      "WebRTC.Audio.EchoCanceller.ProcessingPersistentMultichannelContent",
      mostly_multichannel_last_10_seconds ? 1 : 0);

  persistent_multichannel_frame_counter_ = 0;
}

MultiChannelContentDetector::MultiChannelContentDetector(
    bool detect_stereo_content,
    int num_render_input_channels,
    float detection_threshold,
    int stereo_detection_timeout_threshold_seconds,
    float stereo_detection_hysteresis_seconds)
    : detect_stereo_content_(detect_stereo_content),
      detection_threshold_(detection_threshold),
      detection_timeout_threshold_frames_(
          stereo_detection_timeout_threshold_seconds > 0
              ? absl::make_optional(stereo_detection_timeout_threshold_seconds *
                                    kNumFramesPerSecond)
              : absl::nullopt),
      stereo_detection_hysteresis_frames_(static_cast<int>(
          stereo_detection_hysteresis_seconds * kNumFramesPerSecond)),
      metrics_logger_((detect_stereo_content && num_render_input_channels > 1)
                          ? std::make_unique<MetricsLogger>()
                          : nullptr),
      persistent_multichannel_content_detected_(
          !detect_stereo_content && num_render_input_channels > 1) {}

bool MultiChannelContentDetector::UpdateDetection(
    const std::vector<std::vector<std::vector<float>>>& frame) {
  if (!detect_stereo_content_) {
    RTC_DCHECK_EQ(frame[0].size() > 1,
                  persistent_multichannel_content_detected_);
    return false;
  }

  const bool previous_persistent_multichannel_content_detected =
      persistent_multichannel_content_detected_;
  const bool stereo_detected_in_frame =
      HasStereoContent(frame, detection_threshold_);

  consecutive_frames_with_stereo_ =
      stereo_detected_in_frame ? consecutive_frames_with_stereo_ + 1 : 0;
  frames_since_stereo_detected_last_ =
      stereo_detected_in_frame ? 0 : frames_since_stereo_detected_last_ + 1;

  // Detect persistent multichannel content.
  if (consecutive_frames_with_stereo_ > stereo_detection_hysteresis_frames_) {
    persistent_multichannel_content_detected_ = true;
  }
  if (detection_timeout_threshold_frames_.has_value() &&
      frames_since_stereo_detected_last_ >=
          *detection_timeout_threshold_frames_) {
    persistent_multichannel_content_detected_ = false;
  }

  // Detect temporary multichannel content.
  temporary_multichannel_content_detected_ =
      persistent_multichannel_content_detected_ ? false
                                                : stereo_detected_in_frame;

  if (metrics_logger_)
    metrics_logger_->Update(persistent_multichannel_content_detected_);

  return previous_persistent_multichannel_content_detected !=
         persistent_multichannel_content_detected_;
}

}  // namespace webrtc