summaryrefslogtreecommitdiffstats
path: root/third_party/libwebrtc/modules/audio_processing/test/conversational_speech
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/libwebrtc/modules/audio_processing/test/conversational_speech')
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/BUILD.gn81
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/OWNERS3
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/README.md74
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.cc31
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.h43
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator.cc88
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc675
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.cc34
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h48
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc66
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h59
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc193
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.h104
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.cc235
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.h44
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.cc73
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.h51
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h34
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.cc65
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.h36
-rw-r--r--third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_interface.h40
21 files changed, 2077 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/BUILD.gn
new file mode 100644
index 0000000000..2c3678092e
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/BUILD.gn
@@ -0,0 +1,81 @@
+# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS. All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+import("../../../../webrtc.gni")
+
+if (!build_with_chromium) {
+ group("conversational_speech") {
+ testonly = true
+ deps = [ ":conversational_speech_generator" ]
+ }
+
+ rtc_executable("conversational_speech_generator") {
+ testonly = true
+ sources = [ "generator.cc" ]
+ deps = [
+ ":lib",
+ "../../../../test:fileutils",
+ "../../../../test:test_support",
+ "//third_party/abseil-cpp/absl/flags:flag",
+ "//third_party/abseil-cpp/absl/flags:parse",
+ ]
+ }
+}
+
+rtc_library("lib") {
+ testonly = true
+ sources = [
+ "config.cc",
+ "config.h",
+ "multiend_call.cc",
+ "multiend_call.h",
+ "simulator.cc",
+ "simulator.h",
+ "timing.cc",
+ "timing.h",
+ "wavreader_abstract_factory.h",
+ "wavreader_factory.cc",
+ "wavreader_factory.h",
+ "wavreader_interface.h",
+ ]
+ deps = [
+ "../../../../api:array_view",
+ "../../../../common_audio",
+ "../../../../rtc_base:checks",
+ "../../../../rtc_base:logging",
+ "../../../../rtc_base:safe_conversions",
+ "../../../../rtc_base:stringutils",
+ "../../../../test:fileutils",
+ ]
+ absl_deps = [ "//third_party/abseil-cpp/absl/strings" ]
+ visibility = [ ":*" ] # Only targets in this file can depend on this.
+}
+
+rtc_library("unittest") {
+ testonly = true
+ sources = [
+ "generator_unittest.cc",
+ "mock_wavreader.cc",
+ "mock_wavreader.h",
+ "mock_wavreader_factory.cc",
+ "mock_wavreader_factory.h",
+ ]
+ deps = [
+ ":lib",
+ "../../../../api:array_view",
+ "../../../../common_audio",
+ "../../../../rtc_base:logging",
+ "../../../../test:fileutils",
+ "../../../../test:test_support",
+ "//testing/gtest",
+ ]
+ absl_deps = [
+ "//third_party/abseil-cpp/absl/strings",
+ "//third_party/abseil-cpp/absl/types:optional",
+ ]
+}
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/OWNERS b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/OWNERS
new file mode 100644
index 0000000000..07cff405e6
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/OWNERS
@@ -0,0 +1,3 @@
+alessiob@webrtc.org
+henrik.lundin@webrtc.org
+peah@webrtc.org
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/README.md b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/README.md
new file mode 100644
index 0000000000..0fa66669e6
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/README.md
@@ -0,0 +1,74 @@
+# Conversational Speech generator tool
+
+Tool to generate multiple-end audio tracks to simulate conversational speech
+with two or more participants.
+
+The input to the tool is a directory containing a number of audio tracks and
+a text file indicating how to time the sequence of speech turns (see the Example
+section).
+
+Since the timing of the speaking turns is specified by the user, the generated
+tracks may not be suitable for testing scenarios in which there is unpredictable
+network delay (e.g., end-to-end RTC assessment).
+
+Instead, the generated pairs can be used when the delay is constant (obviously
+including the case in which there is no delay).
+For instance, echo cancellation in the APM module can be evaluated using two-end
+audio tracks as input and reverse input.
+
+By indicating negative and positive time offsets, one can reproduce cross-talk
+(aka double-talk) and silence in the conversation.
+
+### Example
+
+For each end, there is a set of audio tracks, e.g., a1, a2 and a3 (speaker A)
+and b1, b2 (speaker B).
+The text file with the timing information may look like this:
+
+```
+A a1 0
+B b1 0
+A a2 100
+B b2 -200
+A a3 0
+A a4 0
+```
+
+The first column indicates the speaker name, the second contains the audio track
+file names, and the third the offsets (in milliseconds) used to concatenate the
+chunks. An optional fourth column contains positive or negative integral gains
+in dB that will be applied to the tracks. It's possible to specify the gain for
+some turns but not for others. If the gain is left out, no gain is applied.
+
+Assume that all the audio tracks in the example above are 1000 ms long.
+The tool will then generate two tracks (A and B) that look like this:
+
+**Track A**
+```
+ a1 (1000 ms)
+ silence (1100 ms)
+ a2 (1000 ms)
+ silence (800 ms)
+ a3 (1000 ms)
+ a4 (1000 ms)
+```
+
+**Track B**
+```
+ silence (1000 ms)
+ b1 (1000 ms)
+ silence (900 ms)
+ b2 (1000 ms)
+ silence (2000 ms)
+```
+
+The two tracks can be also visualized as follows (one characheter represents
+100 ms, "." is silence and "*" is speech).
+
+```
+t: 0 1 2 3 4 5 6 (s)
+A: **********...........**********........********************
+B: ..........**********.........**********....................
+ ^ 200 ms cross-talk
+ 100 ms silence ^
+```
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.cc
new file mode 100644
index 0000000000..76d3de8108
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.cc
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/test/conversational_speech/config.h"
+
+namespace webrtc {
+namespace test {
+namespace conversational_speech {
+
+const std::string& Config::audiotracks_path() const {
+ return audiotracks_path_;
+}
+
+const std::string& Config::timing_filepath() const {
+ return timing_filepath_;
+}
+
+const std::string& Config::output_path() const {
+ return output_path_;
+}
+
+} // namespace conversational_speech
+} // namespace test
+} // namespace webrtc
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.h
new file mode 100644
index 0000000000..5a847e06a2
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_CONFIG_H_
+#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_CONFIG_H_
+
+#include <string>
+
+#include "absl/strings/string_view.h"
+
+namespace webrtc {
+namespace test {
+namespace conversational_speech {
+
+struct Config {
+ Config(absl::string_view audiotracks_path,
+ absl::string_view timing_filepath,
+ absl::string_view output_path)
+ : audiotracks_path_(audiotracks_path),
+ timing_filepath_(timing_filepath),
+ output_path_(output_path) {}
+
+ const std::string& audiotracks_path() const;
+ const std::string& timing_filepath() const;
+ const std::string& output_path() const;
+
+ const std::string audiotracks_path_;
+ const std::string timing_filepath_;
+ const std::string output_path_;
+};
+
+} // namespace conversational_speech
+} // namespace test
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_CONFIG_H_
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator.cc
new file mode 100644
index 0000000000..4f776fa216
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator.cc
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <iostream>
+#include <memory>
+#include <vector>
+
+#include "absl/flags/flag.h"
+#include "absl/flags/parse.h"
+#include "modules/audio_processing/test/conversational_speech/config.h"
+#include "modules/audio_processing/test/conversational_speech/multiend_call.h"
+#include "modules/audio_processing/test/conversational_speech/simulator.h"
+#include "modules/audio_processing/test/conversational_speech/timing.h"
+#include "modules/audio_processing/test/conversational_speech/wavreader_factory.h"
+#include "test/testsupport/file_utils.h"
+
+ABSL_FLAG(std::string, i, "", "Directory containing the speech turn wav files");
+ABSL_FLAG(std::string, t, "", "Path to the timing text file");
+ABSL_FLAG(std::string, o, "", "Output wav files destination path");
+
+namespace webrtc {
+namespace test {
+namespace {
+
+const char kUsageDescription[] =
+ "Usage: conversational_speech_generator\n"
+ " -i <path/to/source/audiotracks>\n"
+ " -t <path/to/timing_file.txt>\n"
+ " -o <output/path>\n"
+ "\n\n"
+ "Command-line tool to generate multiple-end audio tracks to simulate "
+ "conversational speech with two or more participants.\n";
+
+} // namespace
+
+int main(int argc, char* argv[]) {
+ std::vector<char*> args = absl::ParseCommandLine(argc, argv);
+ if (args.size() != 1) {
+ printf("%s", kUsageDescription);
+ return 1;
+ }
+ RTC_CHECK(DirExists(absl::GetFlag(FLAGS_i)));
+ RTC_CHECK(FileExists(absl::GetFlag(FLAGS_t)));
+ RTC_CHECK(DirExists(absl::GetFlag(FLAGS_o)));
+
+ conversational_speech::Config config(
+ absl::GetFlag(FLAGS_i), absl::GetFlag(FLAGS_t), absl::GetFlag(FLAGS_o));
+
+ // Load timing.
+ std::vector<conversational_speech::Turn> timing =
+ conversational_speech::LoadTiming(config.timing_filepath());
+
+ // Parse timing and audio tracks.
+ auto wavreader_factory =
+ std::make_unique<conversational_speech::WavReaderFactory>();
+ conversational_speech::MultiEndCall multiend_call(
+ timing, config.audiotracks_path(), std::move(wavreader_factory));
+
+ // Generate output audio tracks.
+ auto generated_audiotrack_pairs =
+ conversational_speech::Simulate(multiend_call, config.output_path());
+
+ // Show paths to created audio tracks.
+ std::cout << "Output files:" << std::endl;
+ for (const auto& output_paths_entry : *generated_audiotrack_pairs) {
+ std::cout << " speaker: " << output_paths_entry.first << std::endl;
+ std::cout << " near end: " << output_paths_entry.second.near_end
+ << std::endl;
+ std::cout << " far end: " << output_paths_entry.second.far_end
+ << std::endl;
+ }
+
+ return 0;
+}
+
+} // namespace test
+} // namespace webrtc
+
+int main(int argc, char* argv[]) {
+ return webrtc::test::main(argc, argv);
+}
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc
new file mode 100644
index 0000000000..17714440d4
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc
@@ -0,0 +1,675 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// This file consists of unit tests for webrtc::test::conversational_speech
+// members. Part of them focus on accepting or rejecting different
+// conversational speech setups. A setup is defined by a set of audio tracks and
+// timing information).
+// The docstring at the beginning of each TEST(ConversationalSpeechTest,
+// MultiEndCallSetup*) function looks like the drawing below and indicates which
+// setup is tested.
+//
+// Accept:
+// A 0****.....
+// B .....1****
+//
+// The drawing indicates the following:
+// - the illustrated setup should be accepted,
+// - there are two speakers (namely, A and B),
+// - A is the first speaking, B is the second one,
+// - each character after the speaker's letter indicates a time unit (e.g., 100
+// ms),
+// - "*" indicates speaking, "." listening,
+// - numbers indicate the turn index in std::vector<Turn>.
+//
+// Note that the same speaker can appear in multiple lines in order to depict
+// cases in which there are wrong offsets leading to self cross-talk (which is
+// rejected).
+
+// MSVC++ requires this to be set before any other includes to get M_PI.
+#define _USE_MATH_DEFINES
+
+#include <stdio.h>
+
+#include <cmath>
+#include <map>
+#include <memory>
+#include <vector>
+
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
+#include "common_audio/wav_file.h"
+#include "modules/audio_processing/test/conversational_speech/config.h"
+#include "modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h"
+#include "modules/audio_processing/test/conversational_speech/multiend_call.h"
+#include "modules/audio_processing/test/conversational_speech/simulator.h"
+#include "modules/audio_processing/test/conversational_speech/timing.h"
+#include "modules/audio_processing/test/conversational_speech/wavreader_factory.h"
+#include "rtc_base/logging.h"
+#include "test/gmock.h"
+#include "test/gtest.h"
+#include "test/testsupport/file_utils.h"
+
+namespace webrtc {
+namespace test {
+namespace {
+
+using conversational_speech::LoadTiming;
+using conversational_speech::MockWavReaderFactory;
+using conversational_speech::MultiEndCall;
+using conversational_speech::SaveTiming;
+using conversational_speech::Turn;
+using conversational_speech::WavReaderFactory;
+
+const char* const audiotracks_path = "/path/to/audiotracks";
+const char* const timing_filepath = "/path/to/timing_file.txt";
+const char* const output_path = "/path/to/output_dir";
+
+const std::vector<Turn> expected_timing = {
+ {"A", "a1", 0, 0}, {"B", "b1", 0, 0}, {"A", "a2", 100, 0},
+ {"B", "b2", -200, 0}, {"A", "a3", 0, 0}, {"A", "a3", 0, 0},
+};
+const std::size_t kNumberOfTurns = expected_timing.size();
+
+// Default arguments for MockWavReaderFactory ctor.
+// Fake audio track parameters.
+constexpr int kDefaultSampleRate = 48000;
+const std::map<std::string, const MockWavReaderFactory::Params>
+ kDefaultMockWavReaderFactoryParamsMap = {
+ {"t300", {kDefaultSampleRate, 1u, 14400u}}, // Mono, 0.3 seconds.
+ {"t500", {kDefaultSampleRate, 1u, 24000u}}, // Mono, 0.5 seconds.
+ {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // Mono, 1.0 seconds.
+ {"sr8000", {8000, 1u, 8000u}}, // 8kHz sample rate, mono, 1 second.
+ {"sr16000", {16000, 1u, 16000u}}, // 16kHz sample rate, mono, 1 second.
+ {"sr16000_stereo", {16000, 2u, 16000u}}, // Like sr16000, but stereo.
+};
+const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams =
+ kDefaultMockWavReaderFactoryParamsMap.at("t500");
+
+std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() {
+ return std::unique_ptr<MockWavReaderFactory>(
+ new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,
+ kDefaultMockWavReaderFactoryParamsMap));
+}
+
+void CreateSineWavFile(absl::string_view filepath,
+ const MockWavReaderFactory::Params& params,
+ float frequency = 440.0f) {
+ // Create samples.
+ constexpr double two_pi = 2.0 * M_PI;
+ std::vector<int16_t> samples(params.num_samples);
+ for (std::size_t i = 0; i < params.num_samples; ++i) {
+ // TODO(alessiob): the produced tone is not pure, improve.
+ samples[i] = std::lround(
+ 32767.0f * std::sin(two_pi * i * frequency / params.sample_rate));
+ }
+
+ // Write samples.
+ WavWriter wav_writer(filepath, params.sample_rate, params.num_channels);
+ wav_writer.WriteSamples(samples.data(), params.num_samples);
+}
+
+// Parameters to generate audio tracks with CreateSineWavFile.
+struct SineAudioTrackParams {
+ MockWavReaderFactory::Params params;
+ float frequency;
+};
+
+// Creates a temporary directory in which sine audio tracks are written.
+std::string CreateTemporarySineAudioTracks(
+ const std::map<std::string, SineAudioTrackParams>& sine_tracks_params) {
+ // Create temporary directory.
+ std::string temp_directory =
+ OutputPath() + "TempConversationalSpeechAudioTracks";
+ CreateDir(temp_directory);
+
+ // Create sine tracks.
+ for (const auto& it : sine_tracks_params) {
+ const std::string temp_filepath = JoinFilename(temp_directory, it.first);
+ CreateSineWavFile(temp_filepath, it.second.params, it.second.frequency);
+ }
+
+ return temp_directory;
+}
+
+void CheckAudioTrackParams(const WavReaderFactory& wav_reader_factory,
+ absl::string_view filepath,
+ const MockWavReaderFactory::Params& expeted_params) {
+ auto wav_reader = wav_reader_factory.Create(filepath);
+ EXPECT_EQ(expeted_params.sample_rate, wav_reader->SampleRate());
+ EXPECT_EQ(expeted_params.num_channels, wav_reader->NumChannels());
+ EXPECT_EQ(expeted_params.num_samples, wav_reader->NumSamples());
+}
+
+void DeleteFolderAndContents(absl::string_view dir) {
+ if (!DirExists(dir)) {
+ return;
+ }
+ absl::optional<std::vector<std::string>> dir_content = ReadDirectory(dir);
+ EXPECT_TRUE(dir_content);
+ for (const auto& path : *dir_content) {
+ if (DirExists(path)) {
+ DeleteFolderAndContents(path);
+ } else if (FileExists(path)) {
+ // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed.
+ RemoveFile(path);
+ } else {
+ FAIL();
+ }
+ }
+ // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed.
+ RemoveDir(dir);
+}
+
+} // namespace
+
+using ::testing::_;
+
+TEST(ConversationalSpeechTest, Settings) {
+ const conversational_speech::Config config(audiotracks_path, timing_filepath,
+ output_path);
+
+ // Test getters.
+ EXPECT_EQ(audiotracks_path, config.audiotracks_path());
+ EXPECT_EQ(timing_filepath, config.timing_filepath());
+ EXPECT_EQ(output_path, config.output_path());
+}
+
+TEST(ConversationalSpeechTest, TimingSaveLoad) {
+ // Save test timing.
+ const std::string temporary_filepath =
+ TempFilename(OutputPath(), "TempTimingTestFile");
+ SaveTiming(temporary_filepath, expected_timing);
+
+ // Create a std::vector<Turn> instance by loading from file.
+ std::vector<Turn> actual_timing = LoadTiming(temporary_filepath);
+ RemoveFile(temporary_filepath);
+
+ // Check size.
+ EXPECT_EQ(expected_timing.size(), actual_timing.size());
+
+ // Check Turn instances.
+ for (size_t index = 0; index < expected_timing.size(); ++index) {
+ EXPECT_EQ(expected_timing[index], actual_timing[index])
+ << "turn #" << index << " not matching";
+ }
+}
+
+TEST(ConversationalSpeechTest, MultiEndCallCreate) {
+ auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+ // There are 5 unique audio tracks to read.
+ EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(5);
+
+ // Inject the mock wav reader factory.
+ conversational_speech::MultiEndCall multiend_call(
+ expected_timing, audiotracks_path, std::move(mock_wavreader_factory));
+ EXPECT_TRUE(multiend_call.valid());
+
+ // Test.
+ EXPECT_EQ(2u, multiend_call.speaker_names().size());
+ EXPECT_EQ(5u, multiend_call.audiotrack_readers().size());
+ EXPECT_EQ(6u, multiend_call.speaking_turns().size());
+}
+
+TEST(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) {
+ const std::vector<Turn> timing = {
+ {"A", "sr8000", 0, 0},
+ {"B", "sr16000", 0, 0},
+ };
+ auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+ // There are two unique audio tracks to read.
+ EXPECT_CALL(*mock_wavreader_factory, Create(::testing::_)).Times(2);
+
+ MultiEndCall multiend_call(timing, audiotracks_path,
+ std::move(mock_wavreader_factory));
+ EXPECT_FALSE(multiend_call.valid());
+}
+
+TEST(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) {
+ const std::vector<Turn> timing = {
+ {"A", "sr16000_stereo", 0, 0},
+ {"B", "sr16000_stereo", 0, 0},
+ };
+ auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+ // There is one unique audio track to read.
+ EXPECT_CALL(*mock_wavreader_factory, Create(::testing::_)).Times(1);
+
+ MultiEndCall multiend_call(timing, audiotracks_path,
+ std::move(mock_wavreader_factory));
+ EXPECT_FALSE(multiend_call.valid());
+}
+
+TEST(ConversationalSpeechTest,
+ MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) {
+ const std::vector<Turn> timing = {
+ {"A", "sr8000", 0, 0},
+ {"B", "sr16000_stereo", 0, 0},
+ };
+ auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+ // There are two unique audio tracks to read.
+ EXPECT_CALL(*mock_wavreader_factory, Create(::testing::_)).Times(2);
+
+ MultiEndCall multiend_call(timing, audiotracks_path,
+ std::move(mock_wavreader_factory));
+ EXPECT_FALSE(multiend_call.valid());
+}
+
+TEST(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {
+ const std::vector<Turn> timing = {
+ {"A", "t500", -100, 0},
+ {"B", "t500", 0, 0},
+ };
+ auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+ // There is one unique audio track to read.
+ EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
+
+ conversational_speech::MultiEndCall multiend_call(
+ timing, audiotracks_path, std::move(mock_wavreader_factory));
+ EXPECT_FALSE(multiend_call.valid());
+}
+
+TEST(ConversationalSpeechTest, MultiEndCallSetupSimple) {
+ // Accept:
+ // A 0****.....
+ // B .....1****
+ constexpr std::size_t expected_duration = kDefaultSampleRate;
+ const std::vector<Turn> timing = {
+ {"A", "t500", 0, 0},
+ {"B", "t500", 0, 0},
+ };
+ auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+ // There is one unique audio track to read.
+ EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
+
+ conversational_speech::MultiEndCall multiend_call(
+ timing, audiotracks_path, std::move(mock_wavreader_factory));
+ EXPECT_TRUE(multiend_call.valid());
+
+ // Test.
+ EXPECT_EQ(2u, multiend_call.speaker_names().size());
+ EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
+ EXPECT_EQ(2u, multiend_call.speaking_turns().size());
+ EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
+}
+
+TEST(ConversationalSpeechTest, MultiEndCallSetupPause) {
+ // Accept:
+ // A 0****.......
+ // B .......1****
+ constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2;
+ const std::vector<Turn> timing = {
+ {"A", "t500", 0, 0},
+ {"B", "t500", 200, 0},
+ };
+ auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+ // There is one unique audio track to read.
+ EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
+
+ conversational_speech::MultiEndCall multiend_call(
+ timing, audiotracks_path, std::move(mock_wavreader_factory));
+ EXPECT_TRUE(multiend_call.valid());
+
+ // Test.
+ EXPECT_EQ(2u, multiend_call.speaker_names().size());
+ EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
+ EXPECT_EQ(2u, multiend_call.speaking_turns().size());
+ EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
+}
+
+TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) {
+ // Accept:
+ // A 0****....
+ // B ....1****
+ constexpr std::size_t expected_duration = kDefaultSampleRate * 0.9;
+ const std::vector<Turn> timing = {
+ {"A", "t500", 0, 0},
+ {"B", "t500", -100, 0},
+ };
+ auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+ // There is one unique audio track to read.
+ EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
+
+ conversational_speech::MultiEndCall multiend_call(
+ timing, audiotracks_path, std::move(mock_wavreader_factory));
+ EXPECT_TRUE(multiend_call.valid());
+
+ // Test.
+ EXPECT_EQ(2u, multiend_call.speaker_names().size());
+ EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
+ EXPECT_EQ(2u, multiend_call.speaking_turns().size());
+ EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
+}
+
+TEST(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) {
+ // Reject:
+ // A ..0****
+ // B .1****. The n-th turn cannot start before the (n-1)-th one.
+ const std::vector<Turn> timing = {
+ {"A", "t500", 200, 0},
+ {"B", "t500", -600, 0},
+ };
+ auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+ // There is one unique audio track to read.
+ EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
+
+ conversational_speech::MultiEndCall multiend_call(
+ timing, audiotracks_path, std::move(mock_wavreader_factory));
+ EXPECT_FALSE(multiend_call.valid());
+}
+
+TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) {
+ // Accept:
+ // A 0****2****...
+ // B ...1*********
+ constexpr std::size_t expected_duration = kDefaultSampleRate * 1.3;
+ const std::vector<Turn> timing = {
+ {"A", "t500", 0, 0},
+ {"B", "t1000", -200, 0},
+ {"A", "t500", -800, 0},
+ };
+ auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+ // There are two unique audio tracks to read.
+ EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
+
+ conversational_speech::MultiEndCall multiend_call(
+ timing, audiotracks_path, std::move(mock_wavreader_factory));
+ EXPECT_TRUE(multiend_call.valid());
+
+ // Test.
+ EXPECT_EQ(2u, multiend_call.speaker_names().size());
+ EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
+ EXPECT_EQ(3u, multiend_call.speaking_turns().size());
+ EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
+}
+
+TEST(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) {
+ // Reject:
+ // A 0****......
+ // A ...1****...
+ // B ......2****
+ // ^ Turn #1 overlaps with #0 which is from the same speaker.
+ const std::vector<Turn> timing = {
+ {"A", "t500", 0, 0},
+ {"A", "t500", -200, 0},
+ {"B", "t500", -200, 0},
+ };
+ auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+ // There is one unique audio track to read.
+ EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
+
+ conversational_speech::MultiEndCall multiend_call(
+ timing, audiotracks_path, std::move(mock_wavreader_factory));
+ EXPECT_FALSE(multiend_call.valid());
+}
+
+TEST(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) {
+ // Reject:
+ // A 0*********
+ // B 1**.......
+ // C ...2**....
+ // A ......3**.
+ // ^ Turn #3 overlaps with #0 which is from the same speaker.
+ const std::vector<Turn> timing = {
+ {"A", "t1000", 0, 0},
+ {"B", "t300", -1000, 0},
+ {"C", "t300", 0, 0},
+ {"A", "t300", 0, 0},
+ };
+ auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+ // There are two unique audio tracks to read.
+ EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
+
+ conversational_speech::MultiEndCall multiend_call(
+ timing, audiotracks_path, std::move(mock_wavreader_factory));
+ EXPECT_FALSE(multiend_call.valid());
+}
+
+TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) {
+ // Accept:
+ // A 0*********..
+ // B ..1****.....
+ // C .......2****
+ constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2;
+ const std::vector<Turn> timing = {
+ {"A", "t1000", 0, 0},
+ {"B", "t500", -800, 0},
+ {"C", "t500", 0, 0},
+ };
+ auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+ // There are two unique audio tracks to read.
+ EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
+
+ conversational_speech::MultiEndCall multiend_call(
+ timing, audiotracks_path, std::move(mock_wavreader_factory));
+ EXPECT_TRUE(multiend_call.valid());
+
+ // Test.
+ EXPECT_EQ(3u, multiend_call.speaker_names().size());
+ EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
+ EXPECT_EQ(3u, multiend_call.speaking_turns().size());
+ EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
+}
+
+TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) {
+ // Reject:
+ // A 0*********
+ // B ..1****...
+ // C ....2****.
+ // ^ Turn #2 overlaps both with #0 and #1 (cross-talk with 3+ speakers
+ // not permitted).
+ const std::vector<Turn> timing = {
+ {"A", "t1000", 0, 0},
+ {"B", "t500", -800, 0},
+ {"C", "t500", -300, 0},
+ };
+ auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+ // There are two unique audio tracks to read.
+ EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
+
+ conversational_speech::MultiEndCall multiend_call(
+ timing, audiotracks_path, std::move(mock_wavreader_factory));
+ EXPECT_FALSE(multiend_call.valid());
+}
+
+TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) {
+ // Accept:
+ // A 0*********..
+ // B .2****......
+ // C .......3****
+ constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2;
+ const std::vector<Turn> timing = {
+ {"A", "t1000", 0, 0},
+ {"B", "t500", -900, 0},
+ {"C", "t500", 100, 0},
+ };
+ auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+ // There are two unique audio tracks to read.
+ EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
+
+ conversational_speech::MultiEndCall multiend_call(
+ timing, audiotracks_path, std::move(mock_wavreader_factory));
+ EXPECT_TRUE(multiend_call.valid());
+
+ // Test.
+ EXPECT_EQ(3u, multiend_call.speaker_names().size());
+ EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
+ EXPECT_EQ(3u, multiend_call.speaking_turns().size());
+ EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
+}
+
+TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) {
+ // Accept:
+ // A 0****
+ // B 1****
+ const std::vector<Turn> timing = {
+ {"A", "t500", 0, 0},
+ {"B", "t500", -500, 0},
+ };
+ auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+ // There is one unique audio track to read.
+ EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
+
+ conversational_speech::MultiEndCall multiend_call(
+ timing, audiotracks_path, std::move(mock_wavreader_factory));
+ EXPECT_TRUE(multiend_call.valid());
+
+ // Test.
+ EXPECT_EQ(2u, multiend_call.speaker_names().size());
+ EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
+ EXPECT_EQ(2u, multiend_call.speaking_turns().size());
+}
+
+TEST(ConversationalSpeechTest, MultiEndCallSetupLongSequence) {
+ // Accept:
+ // A 0****....3****.5**.
+ // B .....1****...4**...
+ // C ......2**.......6**..
+ constexpr std::size_t expected_duration = kDefaultSampleRate * 1.9;
+ const std::vector<Turn> timing = {
+ {"A", "t500", 0, 0}, {"B", "t500", 0, 0}, {"C", "t300", -400, 0},
+ {"A", "t500", 0, 0}, {"B", "t300", -100, 0}, {"A", "t300", -100, 0},
+ {"C", "t300", -200, 0},
+ };
+ auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>(
+ new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,
+ kDefaultMockWavReaderFactoryParamsMap));
+
+ // There are two unique audio tracks to read.
+ EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
+
+ conversational_speech::MultiEndCall multiend_call(
+ timing, audiotracks_path, std::move(mock_wavreader_factory));
+ EXPECT_TRUE(multiend_call.valid());
+
+ // Test.
+ EXPECT_EQ(3u, multiend_call.speaker_names().size());
+ EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
+ EXPECT_EQ(7u, multiend_call.speaking_turns().size());
+ EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
+}
+
+TEST(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) {
+ // Reject:
+ // A 0****....3****.6**
+ // B .....1****...4**..
+ // C ......2**.....5**..
+ // ^ Turns #4, #5 and #6 overlapping (cross-talk with 3+
+ // speakers not permitted).
+ const std::vector<Turn> timing = {
+ {"A", "t500", 0, 0}, {"B", "t500", 0, 0}, {"C", "t300", -400, 0},
+ {"A", "t500", 0, 0}, {"B", "t300", -100, 0}, {"A", "t300", -200, 0},
+ {"C", "t300", -200, 0},
+ };
+ auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>(
+ new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,
+ kDefaultMockWavReaderFactoryParamsMap));
+
+ // There are two unique audio tracks to read.
+ EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
+
+ conversational_speech::MultiEndCall multiend_call(
+ timing, audiotracks_path, std::move(mock_wavreader_factory));
+ EXPECT_FALSE(multiend_call.valid());
+}
+
+TEST(ConversationalSpeechTest, MultiEndCallWavReaderAdaptorSine) {
+ // Parameters with which wav files are created.
+ constexpr int duration_seconds = 5;
+ const int sample_rates[] = {8000, 11025, 16000, 22050, 32000, 44100, 48000};
+
+ for (int sample_rate : sample_rates) {
+ const std::string temp_filename = OutputPath() + "TempSineWavFile_" +
+ std::to_string(sample_rate) + ".wav";
+
+ // Write wav file.
+ const std::size_t num_samples = duration_seconds * sample_rate;
+ MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples};
+ CreateSineWavFile(temp_filename, params);
+
+ // Load wav file and check if params match.
+ WavReaderFactory wav_reader_factory;
+ MockWavReaderFactory::Params expeted_params = {sample_rate, 1u,
+ num_samples};
+ CheckAudioTrackParams(wav_reader_factory, temp_filename, expeted_params);
+
+ // Clean up.
+ RemoveFile(temp_filename);
+ }
+}
+
+TEST(ConversationalSpeechTest, DISABLED_MultiEndCallSimulator) {
+ // Simulated call (one character corresponding to 500 ms):
+ // A 0*********...........2*********.....
+ // B ...........1*********.....3*********
+ const std::vector<Turn> expected_timing = {
+ {"A", "t5000_440.wav", 0, 0},
+ {"B", "t5000_880.wav", 500, 0},
+ {"A", "t5000_440.wav", 0, 0},
+ {"B", "t5000_880.wav", -2500, 0},
+ };
+ const std::size_t expected_duration_seconds = 18;
+
+ // Create temporary audio track files.
+ const int sample_rate = 16000;
+ const std::map<std::string, SineAudioTrackParams> sine_tracks_params = {
+ {"t5000_440.wav", {{sample_rate, 1u, sample_rate * 5}, 440.0}},
+ {"t5000_880.wav", {{sample_rate, 1u, sample_rate * 5}, 880.0}},
+ };
+ const std::string audiotracks_path =
+ CreateTemporarySineAudioTracks(sine_tracks_params);
+
+ // Set up the multi-end call.
+ auto wavreader_factory =
+ std::unique_ptr<WavReaderFactory>(new WavReaderFactory());
+ MultiEndCall multiend_call(expected_timing, audiotracks_path,
+ std::move(wavreader_factory));
+
+ // Simulate the call.
+ std::string output_path = JoinFilename(audiotracks_path, "output");
+ CreateDir(output_path);
+ RTC_LOG(LS_VERBOSE) << "simulator output path: " << output_path;
+ auto generated_audiotrak_pairs =
+ conversational_speech::Simulate(multiend_call, output_path);
+ EXPECT_EQ(2u, generated_audiotrak_pairs->size());
+
+ // Check the output.
+ WavReaderFactory wav_reader_factory;
+ const MockWavReaderFactory::Params expeted_params = {
+ sample_rate, 1u, sample_rate * expected_duration_seconds};
+ for (const auto& it : *generated_audiotrak_pairs) {
+ RTC_LOG(LS_VERBOSE) << "checking far/near-end for <" << it.first << ">";
+ CheckAudioTrackParams(wav_reader_factory, it.second.near_end,
+ expeted_params);
+ CheckAudioTrackParams(wav_reader_factory, it.second.far_end,
+ expeted_params);
+ }
+
+ // Clean.
+ EXPECT_NO_FATAL_FAILURE(DeleteFolderAndContents(audiotracks_path));
+}
+
+} // namespace test
+} // namespace webrtc
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.cc
new file mode 100644
index 0000000000..1263e938c4
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.cc
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/test/conversational_speech/mock_wavreader.h"
+
+namespace webrtc {
+namespace test {
+namespace conversational_speech {
+
+using ::testing::Return;
+
+MockWavReader::MockWavReader(int sample_rate,
+ size_t num_channels,
+ size_t num_samples)
+ : sample_rate_(sample_rate),
+ num_channels_(num_channels),
+ num_samples_(num_samples) {
+ ON_CALL(*this, SampleRate()).WillByDefault(Return(sample_rate_));
+ ON_CALL(*this, NumChannels()).WillByDefault(Return(num_channels_));
+ ON_CALL(*this, NumSamples()).WillByDefault(Return(num_samples_));
+}
+
+MockWavReader::~MockWavReader() = default;
+
+} // namespace conversational_speech
+} // namespace test
+} // namespace webrtc
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h
new file mode 100644
index 0000000000..94e20b9ec6
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_H_
+#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_H_
+
+#include <cstddef>
+#include <string>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h"
+#include "test/gmock.h"
+
+namespace webrtc {
+namespace test {
+namespace conversational_speech {
+
+class MockWavReader : public WavReaderInterface {
+ public:
+ MockWavReader(int sample_rate, size_t num_channels, size_t num_samples);
+ ~MockWavReader();
+
+ // TODO(alessiob): use ON_CALL to return random samples if needed.
+ MOCK_METHOD(size_t, ReadFloatSamples, (rtc::ArrayView<float>), (override));
+ MOCK_METHOD(size_t, ReadInt16Samples, (rtc::ArrayView<int16_t>), (override));
+
+ MOCK_METHOD(int, SampleRate, (), (const, override));
+ MOCK_METHOD(size_t, NumChannels, (), (const, override));
+ MOCK_METHOD(size_t, NumSamples, (), (const, override));
+
+ private:
+ const int sample_rate_;
+ const size_t num_channels_;
+ const size_t num_samples_;
+};
+
+} // namespace conversational_speech
+} // namespace test
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_H_
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc
new file mode 100644
index 0000000000..a377cce7e3
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h"
+
+#include "absl/strings/string_view.h"
+#include "modules/audio_processing/test/conversational_speech/mock_wavreader.h"
+#include "rtc_base/logging.h"
+#include "test/gmock.h"
+
+namespace webrtc {
+namespace test {
+namespace conversational_speech {
+
+using ::testing::_;
+using ::testing::Invoke;
+
+MockWavReaderFactory::MockWavReaderFactory(
+ const Params& default_params,
+ const std::map<std::string, const Params>& params)
+ : default_params_(default_params), audiotrack_names_params_(params) {
+ ON_CALL(*this, Create(_))
+ .WillByDefault(Invoke(this, &MockWavReaderFactory::CreateMock));
+}
+
+MockWavReaderFactory::MockWavReaderFactory(const Params& default_params)
+ : MockWavReaderFactory(default_params,
+ std::map<std::string, const Params>{}) {}
+
+MockWavReaderFactory::~MockWavReaderFactory() = default;
+
+std::unique_ptr<WavReaderInterface> MockWavReaderFactory::CreateMock(
+ absl::string_view filepath) {
+ // Search the parameters corresponding to filepath.
+ size_t delimiter = filepath.find_last_of("/\\"); // Either windows or posix
+ std::string filename(filepath.substr(
+ delimiter == absl::string_view::npos ? 0 : delimiter + 1));
+ const auto it = audiotrack_names_params_.find(filename);
+
+ // If not found, use default parameters.
+ if (it == audiotrack_names_params_.end()) {
+ RTC_LOG(LS_VERBOSE) << "using default parameters for " << filepath;
+ return std::unique_ptr<WavReaderInterface>(new MockWavReader(
+ default_params_.sample_rate, default_params_.num_channels,
+ default_params_.num_samples));
+ }
+
+ // Found, use the audiotrack-specific parameters.
+ RTC_LOG(LS_VERBOSE) << "using ad-hoc parameters for " << filepath;
+ RTC_LOG(LS_VERBOSE) << "sample_rate " << it->second.sample_rate;
+ RTC_LOG(LS_VERBOSE) << "num_channels " << it->second.num_channels;
+ RTC_LOG(LS_VERBOSE) << "num_samples " << it->second.num_samples;
+ return std::unique_ptr<WavReaderInterface>(new MockWavReader(
+ it->second.sample_rate, it->second.num_channels, it->second.num_samples));
+}
+
+} // namespace conversational_speech
+} // namespace test
+} // namespace webrtc
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h
new file mode 100644
index 0000000000..bcc7f3069b
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_FACTORY_H_
+#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_FACTORY_H_
+
+#include <map>
+#include <memory>
+#include <string>
+
+#include "absl/strings/string_view.h"
+#include "modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h"
+#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h"
+#include "test/gmock.h"
+
+namespace webrtc {
+namespace test {
+namespace conversational_speech {
+
+class MockWavReaderFactory : public WavReaderAbstractFactory {
+ public:
+ struct Params {
+ int sample_rate;
+ size_t num_channels;
+ size_t num_samples;
+ };
+
+ MockWavReaderFactory(const Params& default_params,
+ const std::map<std::string, const Params>& params);
+ explicit MockWavReaderFactory(const Params& default_params);
+ ~MockWavReaderFactory();
+
+ MOCK_METHOD(std::unique_ptr<WavReaderInterface>,
+ Create,
+ (absl::string_view),
+ (const, override));
+
+ private:
+ // Creates a MockWavReader instance using the parameters in
+ // audiotrack_names_params_ if the entry corresponding to filepath exists,
+ // otherwise creates a MockWavReader instance using the default parameters.
+ std::unique_ptr<WavReaderInterface> CreateMock(absl::string_view filepath);
+
+ const Params& default_params_;
+ std::map<std::string, const Params> audiotrack_names_params_;
+};
+
+} // namespace conversational_speech
+} // namespace test
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_FACTORY_H_
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc
new file mode 100644
index 0000000000..952114a78b
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/test/conversational_speech/multiend_call.h"
+
+#include <algorithm>
+#include <iterator>
+
+#include "absl/strings/string_view.h"
+#include "rtc_base/logging.h"
+#include "test/testsupport/file_utils.h"
+
+namespace webrtc {
+namespace test {
+namespace conversational_speech {
+
+MultiEndCall::MultiEndCall(
+ rtc::ArrayView<const Turn> timing,
+ absl::string_view audiotracks_path,
+ std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory)
+ : timing_(timing),
+ audiotracks_path_(audiotracks_path),
+ wavreader_abstract_factory_(std::move(wavreader_abstract_factory)),
+ valid_(false) {
+ FindSpeakerNames();
+ if (CreateAudioTrackReaders())
+ valid_ = CheckTiming();
+}
+
+MultiEndCall::~MultiEndCall() = default;
+
+void MultiEndCall::FindSpeakerNames() {
+ RTC_DCHECK(speaker_names_.empty());
+ for (const Turn& turn : timing_) {
+ speaker_names_.emplace(turn.speaker_name);
+ }
+}
+
+bool MultiEndCall::CreateAudioTrackReaders() {
+ RTC_DCHECK(audiotrack_readers_.empty());
+ sample_rate_hz_ = 0; // Sample rate will be set when reading the first track.
+ for (const Turn& turn : timing_) {
+ auto it = audiotrack_readers_.find(turn.audiotrack_file_name);
+ if (it != audiotrack_readers_.end())
+ continue;
+
+ const std::string audiotrack_file_path =
+ test::JoinFilename(audiotracks_path_, turn.audiotrack_file_name);
+
+ // Map the audiotrack file name to a new instance of WavReaderInterface.
+ std::unique_ptr<WavReaderInterface> wavreader =
+ wavreader_abstract_factory_->Create(
+ test::JoinFilename(audiotracks_path_, turn.audiotrack_file_name));
+
+ if (sample_rate_hz_ == 0) {
+ sample_rate_hz_ = wavreader->SampleRate();
+ } else if (sample_rate_hz_ != wavreader->SampleRate()) {
+ RTC_LOG(LS_ERROR)
+ << "All the audio tracks should have the same sample rate.";
+ return false;
+ }
+
+ if (wavreader->NumChannels() != 1) {
+ RTC_LOG(LS_ERROR) << "Only mono audio tracks supported.";
+ return false;
+ }
+
+ audiotrack_readers_.emplace(turn.audiotrack_file_name,
+ std::move(wavreader));
+ }
+
+ return true;
+}
+
+bool MultiEndCall::CheckTiming() {
+ struct Interval {
+ size_t begin;
+ size_t end;
+ };
+ size_t number_of_turns = timing_.size();
+ auto millisecond_to_samples = [](int ms, int sr) -> int {
+ // Truncation may happen if the sampling rate is not an integer multiple
+ // of 1000 (e.g., 44100).
+ return ms * sr / 1000;
+ };
+ auto in_interval = [](size_t value, const Interval& interval) {
+ return interval.begin <= value && value < interval.end;
+ };
+ total_duration_samples_ = 0;
+ speaking_turns_.clear();
+
+ // Begin and end timestamps for the last two turns (unit: number of samples).
+ Interval second_last_turn = {0, 0};
+ Interval last_turn = {0, 0};
+
+ // Initialize map to store speaking turn indices of each speaker (used to
+ // detect self cross-talk).
+ std::map<std::string, std::vector<size_t>> speaking_turn_indices;
+ for (const std::string& speaker_name : speaker_names_) {
+ speaking_turn_indices.emplace(std::piecewise_construct,
+ std::forward_as_tuple(speaker_name),
+ std::forward_as_tuple());
+ }
+
+ // Parse turns.
+ for (size_t turn_index = 0; turn_index < number_of_turns; ++turn_index) {
+ const Turn& turn = timing_[turn_index];
+ auto it = audiotrack_readers_.find(turn.audiotrack_file_name);
+ RTC_CHECK(it != audiotrack_readers_.end())
+ << "Audio track reader not created";
+
+ // Begin and end timestamps for the current turn.
+ int offset_samples =
+ millisecond_to_samples(turn.offset, it->second->SampleRate());
+ std::size_t begin_timestamp = last_turn.end + offset_samples;
+ std::size_t end_timestamp = begin_timestamp + it->second->NumSamples();
+ RTC_LOG(LS_INFO) << "turn #" << turn_index << " " << begin_timestamp << "-"
+ << end_timestamp << " ms";
+
+ // The order is invalid if the offset is negative and its absolute value is
+ // larger then the duration of the previous turn.
+ if (offset_samples < 0 &&
+ -offset_samples > static_cast<int>(last_turn.end - last_turn.begin)) {
+ RTC_LOG(LS_ERROR) << "invalid order";
+ return false;
+ }
+
+ // Cross-talk with 3 or more speakers occurs when the beginning of the
+ // current interval falls in the last two turns.
+ if (turn_index > 1 && in_interval(begin_timestamp, last_turn) &&
+ in_interval(begin_timestamp, second_last_turn)) {
+ RTC_LOG(LS_ERROR) << "cross-talk with 3+ speakers";
+ return false;
+ }
+
+ // Append turn.
+ speaking_turns_.emplace_back(turn.speaker_name, turn.audiotrack_file_name,
+ begin_timestamp, end_timestamp, turn.gain);
+
+ // Save speaking turn index for self cross-talk detection.
+ RTC_DCHECK_EQ(speaking_turns_.size(), turn_index + 1);
+ speaking_turn_indices[turn.speaker_name].push_back(turn_index);
+
+ // Update total duration of the consversational speech.
+ if (total_duration_samples_ < end_timestamp)
+ total_duration_samples_ = end_timestamp;
+
+ // Update and continue with next turn.
+ second_last_turn = last_turn;
+ last_turn.begin = begin_timestamp;
+ last_turn.end = end_timestamp;
+ }
+
+ // Detect self cross-talk.
+ for (const std::string& speaker_name : speaker_names_) {
+ RTC_LOG(LS_INFO) << "checking self cross-talk for <" << speaker_name << ">";
+
+ // Copy all turns for this speaker to new vector.
+ std::vector<SpeakingTurn> speaking_turns_for_name;
+ std::copy_if(speaking_turns_.begin(), speaking_turns_.end(),
+ std::back_inserter(speaking_turns_for_name),
+ [&speaker_name](const SpeakingTurn& st) {
+ return st.speaker_name == speaker_name;
+ });
+
+ // Check for overlap between adjacent elements.
+ // This is a sufficient condition for self cross-talk since the intervals
+ // are sorted by begin timestamp.
+ auto overlap = std::adjacent_find(
+ speaking_turns_for_name.begin(), speaking_turns_for_name.end(),
+ [](const SpeakingTurn& a, const SpeakingTurn& b) {
+ return a.end > b.begin;
+ });
+
+ if (overlap != speaking_turns_for_name.end()) {
+ RTC_LOG(LS_ERROR) << "Self cross-talk detected";
+ return false;
+ }
+ }
+
+ return true;
+}
+
+} // namespace conversational_speech
+} // namespace test
+} // namespace webrtc
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.h
new file mode 100644
index 0000000000..63283465fa
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_
+#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_
+
+#include <stddef.h>
+
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "absl/strings/string_view.h"
+#include "api/array_view.h"
+#include "modules/audio_processing/test/conversational_speech/timing.h"
+#include "modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h"
+#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h"
+
+namespace webrtc {
+namespace test {
+namespace conversational_speech {
+
+class MultiEndCall {
+ public:
+ struct SpeakingTurn {
+ // Constructor required in order to use std::vector::emplace_back().
+ SpeakingTurn(absl::string_view new_speaker_name,
+ absl::string_view new_audiotrack_file_name,
+ size_t new_begin,
+ size_t new_end,
+ int gain)
+ : speaker_name(new_speaker_name),
+ audiotrack_file_name(new_audiotrack_file_name),
+ begin(new_begin),
+ end(new_end),
+ gain(gain) {}
+ std::string speaker_name;
+ std::string audiotrack_file_name;
+ size_t begin;
+ size_t end;
+ int gain;
+ };
+
+ MultiEndCall(
+ rtc::ArrayView<const Turn> timing,
+ absl::string_view audiotracks_path,
+ std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory);
+ ~MultiEndCall();
+
+ MultiEndCall(const MultiEndCall&) = delete;
+ MultiEndCall& operator=(const MultiEndCall&) = delete;
+
+ const std::set<std::string>& speaker_names() const { return speaker_names_; }
+ const std::map<std::string, std::unique_ptr<WavReaderInterface>>&
+ audiotrack_readers() const {
+ return audiotrack_readers_;
+ }
+ bool valid() const { return valid_; }
+ int sample_rate() const { return sample_rate_hz_; }
+ size_t total_duration_samples() const { return total_duration_samples_; }
+ const std::vector<SpeakingTurn>& speaking_turns() const {
+ return speaking_turns_;
+ }
+
+ private:
+ // Finds unique speaker names.
+ void FindSpeakerNames();
+
+ // Creates one WavReader instance for each unique audiotrack. It returns false
+ // if the audio tracks do not have the same sample rate or if they are not
+ // mono.
+ bool CreateAudioTrackReaders();
+
+ // Validates the speaking turns timing information. Accepts cross-talk, but
+ // only up to 2 speakers. Rejects unordered turns and self cross-talk.
+ bool CheckTiming();
+
+ rtc::ArrayView<const Turn> timing_;
+ std::string audiotracks_path_;
+ std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory_;
+ std::set<std::string> speaker_names_;
+ std::map<std::string, std::unique_ptr<WavReaderInterface>>
+ audiotrack_readers_;
+ bool valid_;
+ int sample_rate_hz_;
+ size_t total_duration_samples_;
+ std::vector<SpeakingTurn> speaking_turns_;
+};
+
+} // namespace conversational_speech
+} // namespace test
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.cc
new file mode 100644
index 0000000000..89bcd48d84
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.cc
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/test/conversational_speech/simulator.h"
+
+#include <math.h>
+
+#include <algorithm>
+#include <memory>
+#include <set>
+#include <utility>
+#include <vector>
+
+#include "absl/strings/string_view.h"
+#include "api/array_view.h"
+#include "common_audio/include/audio_util.h"
+#include "common_audio/wav_file.h"
+#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h"
+#include "rtc_base/logging.h"
+#include "rtc_base/numerics/safe_conversions.h"
+#include "test/testsupport/file_utils.h"
+
+namespace webrtc {
+namespace test {
+namespace {
+
+using conversational_speech::MultiEndCall;
+using conversational_speech::SpeakerOutputFilePaths;
+using conversational_speech::WavReaderInterface;
+
+// Combines output path and speaker names to define the output file paths for
+// the near-end and far=end audio tracks.
+std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>>
+InitSpeakerOutputFilePaths(const std::set<std::string>& speaker_names,
+ absl::string_view output_path) {
+ // Create map.
+ auto speaker_output_file_paths_map =
+ std::make_unique<std::map<std::string, SpeakerOutputFilePaths>>();
+
+ // Add near-end and far-end output paths into the map.
+ for (const auto& speaker_name : speaker_names) {
+ const std::string near_end_path =
+ test::JoinFilename(output_path, "s_" + speaker_name + "-near_end.wav");
+ RTC_LOG(LS_VERBOSE) << "The near-end audio track will be created in "
+ << near_end_path << ".";
+
+ const std::string far_end_path =
+ test::JoinFilename(output_path, "s_" + speaker_name + "-far_end.wav");
+ RTC_LOG(LS_VERBOSE) << "The far-end audio track will be created in "
+ << far_end_path << ".";
+
+ // Add to map.
+ speaker_output_file_paths_map->emplace(
+ std::piecewise_construct, std::forward_as_tuple(speaker_name),
+ std::forward_as_tuple(near_end_path, far_end_path));
+ }
+
+ return speaker_output_file_paths_map;
+}
+
+// Class that provides one WavWriter for the near-end and one for the far-end
+// output track of a speaker.
+class SpeakerWavWriters {
+ public:
+ SpeakerWavWriters(const SpeakerOutputFilePaths& output_file_paths,
+ int sample_rate)
+ : near_end_wav_writer_(output_file_paths.near_end, sample_rate, 1u),
+ far_end_wav_writer_(output_file_paths.far_end, sample_rate, 1u) {}
+ WavWriter* near_end_wav_writer() { return &near_end_wav_writer_; }
+ WavWriter* far_end_wav_writer() { return &far_end_wav_writer_; }
+
+ private:
+ WavWriter near_end_wav_writer_;
+ WavWriter far_end_wav_writer_;
+};
+
+// Initializes one WavWriter instance for each speaker and both the near-end and
+// far-end output tracks.
+std::unique_ptr<std::map<std::string, SpeakerWavWriters>>
+InitSpeakersWavWriters(const std::map<std::string, SpeakerOutputFilePaths>&
+ speaker_output_file_paths,
+ int sample_rate) {
+ // Create map.
+ auto speaker_wav_writers_map =
+ std::make_unique<std::map<std::string, SpeakerWavWriters>>();
+
+ // Add SpeakerWavWriters instance into the map.
+ for (auto it = speaker_output_file_paths.begin();
+ it != speaker_output_file_paths.end(); ++it) {
+ speaker_wav_writers_map->emplace(
+ std::piecewise_construct, std::forward_as_tuple(it->first),
+ std::forward_as_tuple(it->second, sample_rate));
+ }
+
+ return speaker_wav_writers_map;
+}
+
+// Reads all the samples for each audio track.
+std::unique_ptr<std::map<std::string, std::vector<int16_t>>> PreloadAudioTracks(
+ const std::map<std::string, std::unique_ptr<WavReaderInterface>>&
+ audiotrack_readers) {
+ // Create map.
+ auto audiotracks_map =
+ std::make_unique<std::map<std::string, std::vector<int16_t>>>();
+
+ // Add audio track vectors.
+ for (auto it = audiotrack_readers.begin(); it != audiotrack_readers.end();
+ ++it) {
+ // Add map entry.
+ audiotracks_map->emplace(std::piecewise_construct,
+ std::forward_as_tuple(it->first),
+ std::forward_as_tuple(it->second->NumSamples()));
+
+ // Read samples.
+ it->second->ReadInt16Samples(audiotracks_map->at(it->first));
+ }
+
+ return audiotracks_map;
+}
+
+// Writes all the values in `source_samples` via `wav_writer`. If the number of
+// previously written samples in `wav_writer` is less than `interval_begin`, it
+// adds zeros as left padding. The padding corresponds to intervals during which
+// a speaker is not active.
+void PadLeftWriteChunk(rtc::ArrayView<const int16_t> source_samples,
+ size_t interval_begin,
+ WavWriter* wav_writer) {
+ // Add left padding.
+ RTC_CHECK(wav_writer);
+ RTC_CHECK_GE(interval_begin, wav_writer->num_samples());
+ size_t padding_size = interval_begin - wav_writer->num_samples();
+ if (padding_size != 0) {
+ const std::vector<int16_t> padding(padding_size, 0);
+ wav_writer->WriteSamples(padding.data(), padding_size);
+ }
+
+ // Write source samples.
+ wav_writer->WriteSamples(source_samples.data(), source_samples.size());
+}
+
+// Appends zeros via `wav_writer`. The number of zeros is always non-negative
+// and equal to the difference between the previously written samples and
+// `pad_samples`.
+void PadRightWrite(WavWriter* wav_writer, size_t pad_samples) {
+ RTC_CHECK(wav_writer);
+ RTC_CHECK_GE(pad_samples, wav_writer->num_samples());
+ size_t padding_size = pad_samples - wav_writer->num_samples();
+ if (padding_size != 0) {
+ const std::vector<int16_t> padding(padding_size, 0);
+ wav_writer->WriteSamples(padding.data(), padding_size);
+ }
+}
+
+void ScaleSignal(rtc::ArrayView<const int16_t> source_samples,
+ int gain,
+ rtc::ArrayView<int16_t> output_samples) {
+ const float gain_linear = DbToRatio(gain);
+ RTC_DCHECK_EQ(source_samples.size(), output_samples.size());
+ std::transform(source_samples.begin(), source_samples.end(),
+ output_samples.begin(), [gain_linear](int16_t x) -> int16_t {
+ return rtc::saturated_cast<int16_t>(x * gain_linear);
+ });
+}
+
+} // namespace
+
+namespace conversational_speech {
+
+std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>> Simulate(
+ const MultiEndCall& multiend_call,
+ absl::string_view output_path) {
+ // Set output file paths and initialize wav writers.
+ const auto& speaker_names = multiend_call.speaker_names();
+ auto speaker_output_file_paths =
+ InitSpeakerOutputFilePaths(speaker_names, output_path);
+ auto speakers_wav_writers = InitSpeakersWavWriters(
+ *speaker_output_file_paths, multiend_call.sample_rate());
+
+ // Preload all the input audio tracks.
+ const auto& audiotrack_readers = multiend_call.audiotrack_readers();
+ auto audiotracks = PreloadAudioTracks(audiotrack_readers);
+
+ // TODO(alessiob): When speaker_names.size() == 2, near-end and far-end
+ // across the 2 speakers are symmetric; hence, the code below could be
+ // replaced by only creating the near-end or the far-end. However, this would
+ // require to split the unit tests and document the behavior in README.md.
+ // In practice, it should not be an issue since the files are not expected to
+ // be signinificant.
+
+ // Write near-end and far-end output tracks.
+ for (const auto& speaking_turn : multiend_call.speaking_turns()) {
+ const std::string& active_speaker_name = speaking_turn.speaker_name;
+ const auto source_audiotrack =
+ audiotracks->at(speaking_turn.audiotrack_file_name);
+ std::vector<int16_t> scaled_audiotrack(source_audiotrack.size());
+ ScaleSignal(source_audiotrack, speaking_turn.gain, scaled_audiotrack);
+
+ // Write active speaker's chunk to active speaker's near-end.
+ PadLeftWriteChunk(
+ scaled_audiotrack, speaking_turn.begin,
+ speakers_wav_writers->at(active_speaker_name).near_end_wav_writer());
+
+ // Write active speaker's chunk to other participants' far-ends.
+ for (const std::string& speaker_name : speaker_names) {
+ if (speaker_name == active_speaker_name)
+ continue;
+ PadLeftWriteChunk(
+ scaled_audiotrack, speaking_turn.begin,
+ speakers_wav_writers->at(speaker_name).far_end_wav_writer());
+ }
+ }
+
+ // Finalize all the output tracks with right padding.
+ // This is required to make all the output tracks duration equal.
+ size_t duration_samples = multiend_call.total_duration_samples();
+ for (const std::string& speaker_name : speaker_names) {
+ PadRightWrite(speakers_wav_writers->at(speaker_name).near_end_wav_writer(),
+ duration_samples);
+ PadRightWrite(speakers_wav_writers->at(speaker_name).far_end_wav_writer(),
+ duration_samples);
+ }
+
+ return speaker_output_file_paths;
+}
+
+} // namespace conversational_speech
+} // namespace test
+} // namespace webrtc
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.h
new file mode 100644
index 0000000000..2f311e16b3
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_SIMULATOR_H_
+#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_SIMULATOR_H_
+
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "absl/strings/string_view.h"
+#include "modules/audio_processing/test/conversational_speech/multiend_call.h"
+
+namespace webrtc {
+namespace test {
+namespace conversational_speech {
+
+struct SpeakerOutputFilePaths {
+ SpeakerOutputFilePaths(absl::string_view new_near_end,
+ absl::string_view new_far_end)
+ : near_end(new_near_end), far_end(new_far_end) {}
+ // Paths to the near-end and far-end audio track files.
+ const std::string near_end;
+ const std::string far_end;
+};
+
+// Generates the near-end and far-end audio track pairs for each speaker.
+std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>> Simulate(
+ const MultiEndCall& multiend_call,
+ absl::string_view output_path);
+
+} // namespace conversational_speech
+} // namespace test
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_SIMULATOR_H_
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.cc
new file mode 100644
index 0000000000..95ec9f542e
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.cc
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/test/conversational_speech/timing.h"
+
+#include <fstream>
+#include <iostream>
+#include <string>
+
+#include "absl/strings/string_view.h"
+#include "rtc_base/string_encode.h"
+
+namespace webrtc {
+namespace test {
+namespace conversational_speech {
+
+bool Turn::operator==(const Turn& b) const {
+ return b.speaker_name == speaker_name &&
+ b.audiotrack_file_name == audiotrack_file_name && b.offset == offset &&
+ b.gain == gain;
+}
+
+std::vector<Turn> LoadTiming(absl::string_view timing_filepath) {
+ // Line parser.
+ auto parse_line = [](absl::string_view line) {
+ std::vector<absl::string_view> fields = rtc::split(line, ' ');
+ RTC_CHECK_GE(fields.size(), 3);
+ RTC_CHECK_LE(fields.size(), 4);
+ int gain = 0;
+ if (fields.size() == 4) {
+ gain = rtc::StringToNumber<int>(fields[3]).value_or(0);
+ }
+ return Turn(fields[0], fields[1],
+ rtc::StringToNumber<int>(fields[2]).value_or(0), gain);
+ };
+
+ // Init.
+ std::vector<Turn> timing;
+
+ // Parse lines.
+ std::string line;
+ std::ifstream infile(std::string{timing_filepath});
+ while (std::getline(infile, line)) {
+ if (line.empty())
+ continue;
+ timing.push_back(parse_line(line));
+ }
+ infile.close();
+
+ return timing;
+}
+
+void SaveTiming(absl::string_view timing_filepath,
+ rtc::ArrayView<const Turn> timing) {
+ std::ofstream outfile(std::string{timing_filepath});
+ RTC_CHECK(outfile.is_open());
+ for (const Turn& turn : timing) {
+ outfile << turn.speaker_name << " " << turn.audiotrack_file_name << " "
+ << turn.offset << " " << turn.gain << std::endl;
+ }
+ outfile.close();
+}
+
+} // namespace conversational_speech
+} // namespace test
+} // namespace webrtc
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.h
new file mode 100644
index 0000000000..9314f6fc43
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_TIMING_H_
+#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_TIMING_H_
+
+#include <string>
+#include <vector>
+
+#include "absl/strings/string_view.h"
+#include "api/array_view.h"
+
+namespace webrtc {
+namespace test {
+namespace conversational_speech {
+
+struct Turn {
+ Turn(absl::string_view new_speaker_name,
+ absl::string_view new_audiotrack_file_name,
+ int new_offset,
+ int gain)
+ : speaker_name(new_speaker_name),
+ audiotrack_file_name(new_audiotrack_file_name),
+ offset(new_offset),
+ gain(gain) {}
+ bool operator==(const Turn& b) const;
+ std::string speaker_name;
+ std::string audiotrack_file_name;
+ int offset;
+ int gain;
+};
+
+// Loads a list of turns from a file.
+std::vector<Turn> LoadTiming(absl::string_view timing_filepath);
+
+// Writes a list of turns into a file.
+void SaveTiming(absl::string_view timing_filepath,
+ rtc::ArrayView<const Turn> timing);
+
+} // namespace conversational_speech
+} // namespace test
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_TIMING_H_
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h
new file mode 100644
index 0000000000..14ddfc7539
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_ABSTRACT_FACTORY_H_
+#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_ABSTRACT_FACTORY_H_
+
+#include <memory>
+
+#include "absl/strings/string_view.h"
+#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h"
+
+namespace webrtc {
+namespace test {
+namespace conversational_speech {
+
+class WavReaderAbstractFactory {
+ public:
+ virtual ~WavReaderAbstractFactory() = default;
+ virtual std::unique_ptr<WavReaderInterface> Create(
+ absl::string_view filepath) const = 0;
+};
+
+} // namespace conversational_speech
+} // namespace test
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_ABSTRACT_FACTORY_H_
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.cc
new file mode 100644
index 0000000000..99b1686484
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/test/conversational_speech/wavreader_factory.h"
+
+#include <cstddef>
+
+#include "absl/strings/string_view.h"
+#include "api/array_view.h"
+#include "common_audio/wav_file.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+namespace test {
+namespace {
+
+using conversational_speech::WavReaderInterface;
+
+class WavReaderAdaptor final : public WavReaderInterface {
+ public:
+ explicit WavReaderAdaptor(absl::string_view filepath)
+ : wav_reader_(filepath) {}
+ ~WavReaderAdaptor() override = default;
+
+ size_t ReadFloatSamples(rtc::ArrayView<float> samples) override {
+ return wav_reader_.ReadSamples(samples.size(), samples.begin());
+ }
+
+ size_t ReadInt16Samples(rtc::ArrayView<int16_t> samples) override {
+ return wav_reader_.ReadSamples(samples.size(), samples.begin());
+ }
+
+ int SampleRate() const override { return wav_reader_.sample_rate(); }
+
+ size_t NumChannels() const override { return wav_reader_.num_channels(); }
+
+ size_t NumSamples() const override { return wav_reader_.num_samples(); }
+
+ private:
+ WavReader wav_reader_;
+};
+
+} // namespace
+
+namespace conversational_speech {
+
+WavReaderFactory::WavReaderFactory() = default;
+
+WavReaderFactory::~WavReaderFactory() = default;
+
+std::unique_ptr<WavReaderInterface> WavReaderFactory::Create(
+ absl::string_view filepath) const {
+ return std::unique_ptr<WavReaderAdaptor>(new WavReaderAdaptor(filepath));
+}
+
+} // namespace conversational_speech
+} // namespace test
+} // namespace webrtc
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.h
new file mode 100644
index 0000000000..f2e5b61055
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_FACTORY_H_
+#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_FACTORY_H_
+
+#include <memory>
+
+#include "absl/strings/string_view.h"
+#include "modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h"
+#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h"
+
+namespace webrtc {
+namespace test {
+namespace conversational_speech {
+
+class WavReaderFactory : public WavReaderAbstractFactory {
+ public:
+ WavReaderFactory();
+ ~WavReaderFactory() override;
+ std::unique_ptr<WavReaderInterface> Create(
+ absl::string_view filepath) const override;
+};
+
+} // namespace conversational_speech
+} // namespace test
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_FACTORY_H_
diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_interface.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_interface.h
new file mode 100644
index 0000000000..c74f639461
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_interface.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_INTERFACE_H_
+#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_INTERFACE_H_
+
+#include <stddef.h>
+
+#include "api/array_view.h"
+
+namespace webrtc {
+namespace test {
+namespace conversational_speech {
+
+class WavReaderInterface {
+ public:
+ virtual ~WavReaderInterface() = default;
+
+ // Returns the number of samples read.
+ virtual size_t ReadFloatSamples(rtc::ArrayView<float> samples) = 0;
+ virtual size_t ReadInt16Samples(rtc::ArrayView<int16_t> samples) = 0;
+
+ // Getters.
+ virtual int SampleRate() const = 0;
+ virtual size_t NumChannels() const = 0;
+ virtual size_t NumSamples() const = 0;
+};
+
+} // namespace conversational_speech
+} // namespace test
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_INTERFACE_H_