summaryrefslogtreecommitdiffstats
path: root/dom/media/gtest/TestAudioInputProcessing.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /dom/media/gtest/TestAudioInputProcessing.cpp
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'dom/media/gtest/TestAudioInputProcessing.cpp')
-rw-r--r--dom/media/gtest/TestAudioInputProcessing.cpp395
1 files changed, 395 insertions, 0 deletions
diff --git a/dom/media/gtest/TestAudioInputProcessing.cpp b/dom/media/gtest/TestAudioInputProcessing.cpp
new file mode 100644
index 0000000000..82c1831e84
--- /dev/null
+++ b/dom/media/gtest/TestAudioInputProcessing.cpp
@@ -0,0 +1,395 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "AudioGenerator.h"
+#include "MediaEngineWebRTCAudio.h"
+#include "MediaTrackGraphImpl.h"
+#include "PrincipalHandle.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/NullPrincipal.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Unused.h"
+#include "nsContentUtils.h"
+#include "nsTArray.h"
+
+using namespace mozilla;
+using testing::NiceMock;
+using testing::Return;
+
+class MockGraph : public MediaTrackGraphImpl {
+ public:
+ explicit MockGraph(TrackRate aRate)
+ : MediaTrackGraphImpl(0, aRate, nullptr, AbstractThread::MainThread()) {
+ ON_CALL(*this, OnGraphThread).WillByDefault(Return(true));
+ }
+
+ void Init(uint32_t aChannels) {
+ MediaTrackGraphImpl::Init(OFFLINE_THREAD_DRIVER, DIRECT_DRIVER, aChannels);
+ // Remove this graph's driver since it holds a ref. If no AppendMessage
+ // takes place, the driver never starts. This will also make sure no-one
+ // tries to use it. We are still kept alive by the self-ref. Destroy() must
+ // be called to break that cycle.
+ SetCurrentDriver(nullptr);
+ }
+
+ MOCK_CONST_METHOD0(OnGraphThread, bool());
+
+ protected:
+ ~MockGraph() = default;
+};
+
+// AudioInputProcessing will put extra frames as pre-buffering data to avoid
+// glitchs in non pass-through mode. The main goal of the test is to check how
+// many frames left in the AudioInputProcessing's mSegment in various situations
+// after input data has been processed.
+TEST(TestAudioInputProcessing, Buffering)
+{
+ const TrackRate rate = 8000; // So packet size is 80
+ const uint32_t channels = 1;
+ auto graph = MakeRefPtr<NiceMock<MockGraph>>(rate);
+ graph->Init(channels);
+
+ auto aip = MakeRefPtr<AudioInputProcessing>(channels);
+
+ const size_t frames = 72;
+
+ AudioGenerator<AudioDataValue> generator(channels, rate);
+ GraphTime processedTime;
+ GraphTime nextTime;
+ AudioSegment output;
+
+ // Toggle pass-through mode without starting
+ {
+ EXPECT_EQ(aip->PassThrough(graph), false);
+ EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
+
+ aip->SetPassThrough(graph, true);
+ EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
+
+ aip->SetPassThrough(graph, false);
+ EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
+
+ aip->SetPassThrough(graph, true);
+ EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
+ }
+
+ {
+ // Need (nextTime - processedTime) = 128 - 0 = 128 frames this round.
+ // aip has not started and set to processing mode yet, so output will be
+ // filled with silence data directly.
+ processedTime = 0;
+ nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(frames);
+
+ AudioSegment input;
+ generator.Generate(input, nextTime - processedTime);
+
+ aip->Process(graph, processedTime, nextTime, &input, &output);
+ EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
+ EXPECT_EQ(output.GetDuration(), nextTime);
+ EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
+ }
+
+ // Set aip to processing/non-pass-through mode
+ aip->SetPassThrough(graph, false);
+ {
+ // Need (nextTime - processedTime) = 256 - 128 = 128 frames this round.
+ // aip has not started yet, so output will be filled with silence data
+ // directly.
+ processedTime = nextTime;
+ nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(2 * frames);
+
+ AudioSegment input;
+ generator.Generate(input, nextTime - processedTime);
+
+ aip->Process(graph, processedTime, nextTime, &input, &output);
+ EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
+ EXPECT_EQ(output.GetDuration(), nextTime);
+ EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
+ }
+
+ // aip has been started and set to processing mode so it will insert 80 frames
+ // into aip's internal buffer as pre-buffering.
+ aip->Start(graph);
+ {
+ // Need (nextTime - processedTime) = 256 - 256 = 0 frames this round.
+ // The Process() aip will take 0 frames from input, packetize and process
+ // these frames into 0 80-frame packet(0 frames left in packetizer), insert
+ // packets into aip's internal buffer, then move 0 frames the internal
+ // buffer to output, leaving 80 + 0 - 0 = 80 frames in aip's internal
+ // buffer.
+ processedTime = nextTime;
+ nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(3 * frames);
+
+ AudioSegment input;
+ generator.Generate(input, nextTime - processedTime);
+
+ aip->Process(graph, processedTime, nextTime, &input, &output);
+ EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
+ EXPECT_EQ(output.GetDuration(), nextTime);
+ EXPECT_EQ(aip->NumBufferedFrames(graph), 80);
+ }
+
+ {
+ // Need (nextTime - processedTime) = 384 - 256 = 128 frames this round.
+ // The Process() aip will take 128 frames from input, packetize and process
+ // these frames into floor(128/80) = 1 80-frame packet (48 frames left in
+ // packetizer), insert packets into aip's internal buffer, then move 128
+ // frames the internal buffer to output, leaving 80 + 80 - 128 = 32 frames
+ // in aip's internal buffer.
+ processedTime = nextTime;
+ nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(4 * frames);
+
+ AudioSegment input;
+ generator.Generate(input, nextTime - processedTime);
+
+ aip->Process(graph, processedTime, nextTime, &input, &output);
+ EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
+ EXPECT_EQ(output.GetDuration(), nextTime);
+ EXPECT_EQ(aip->NumBufferedFrames(graph), 32);
+ }
+
+ {
+ // Need (nextTime - processedTime) = 384 - 384 = 0 frames this round.
+ processedTime = nextTime;
+ nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(5 * frames);
+
+ AudioSegment input;
+ generator.Generate(input, nextTime - processedTime);
+
+ aip->Process(graph, processedTime, nextTime, &input, &output);
+ EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
+ EXPECT_EQ(output.GetDuration(), nextTime);
+ EXPECT_EQ(aip->NumBufferedFrames(graph), 32);
+ }
+
+ {
+ // Need (nextTime - processedTime) = 512 - 384 = 128 frames this round.
+ // The Process() aip will take 128 frames from input, packetize and process
+ // these frames into floor(128+48/80) = 2 80-frame packet (16 frames left in
+ // packetizer), insert packets into aip's internal buffer, then move 128
+ // frames the internal buffer to output, leaving 32 + 2*80 - 128 = 64 frames
+ // in aip's internal buffer.
+ processedTime = nextTime;
+ nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(6 * frames);
+
+ AudioSegment input;
+ generator.Generate(input, nextTime - processedTime);
+
+ aip->Process(graph, processedTime, nextTime, &input, &output);
+ EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
+ EXPECT_EQ(output.GetDuration(), nextTime);
+ EXPECT_EQ(aip->NumBufferedFrames(graph), 64);
+ }
+
+ aip->SetPassThrough(graph, true);
+ {
+ // Need (nextTime - processedTime) = 512 - 512 = 0 frames this round.
+ // No buffering in pass-through mode
+ processedTime = nextTime;
+ nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(7 * frames);
+
+ AudioSegment input;
+ generator.Generate(input, nextTime - processedTime);
+
+ aip->Process(graph, processedTime, nextTime, &input, &output);
+ EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
+ EXPECT_EQ(output.GetDuration(), processedTime);
+ EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
+ }
+
+ aip->Stop(graph);
+ graph->Destroy();
+}
+
+TEST(TestAudioInputProcessing, ProcessDataWithDifferentPrincipals)
+{
+ const TrackRate rate = 48000; // so # of output frames from packetizer is 480
+ const uint32_t channels = 2;
+ auto graph = MakeRefPtr<NiceMock<MockGraph>>(rate);
+ graph->Init(channels);
+
+ auto aip = MakeRefPtr<AudioInputProcessing>(channels);
+ AudioGenerator<AudioDataValue> generator(channels, rate);
+
+ RefPtr<nsIPrincipal> dummy_principal =
+ NullPrincipal::CreateWithoutOriginAttributes();
+ const PrincipalHandle principal1 = MakePrincipalHandle(dummy_principal.get());
+ const PrincipalHandle principal2 =
+ MakePrincipalHandle(nsContentUtils::GetSystemPrincipal());
+
+ // Total 4800 frames. It's easier to test with frames of multiples of 480.
+ nsTArray<std::pair<TrackTime, PrincipalHandle>> framesWithPrincipal = {
+ {100, principal1},
+ {200, PRINCIPAL_HANDLE_NONE},
+ {300, principal2},
+ {400, principal1},
+ {440, PRINCIPAL_HANDLE_NONE},
+ // 3 packet-size above.
+ {480, principal1},
+ {480, principal2},
+ {480, PRINCIPAL_HANDLE_NONE},
+ // 3 packet-size above.
+ {500, principal2},
+ {490, principal1},
+ {600, principal1},
+ {330, principal1}
+ // 4 packet-size above.
+ };
+
+ // Generate 4800 frames of data with different principals.
+ AudioSegment input;
+ {
+ for (const auto& [duration, principal] : framesWithPrincipal) {
+ AudioSegment data;
+ generator.Generate(data, duration);
+ for (AudioSegment::ChunkIterator it(data); !it.IsEnded(); it.Next()) {
+ it->mPrincipalHandle = principal;
+ }
+
+ input.AppendFrom(&data);
+ }
+ }
+
+ auto verifyPrincipals = [&](const AudioSegment& data) {
+ TrackTime start = 0;
+ for (const auto& [duration, principal] : framesWithPrincipal) {
+ const TrackTime end = start + duration;
+
+ AudioSegment slice;
+ slice.AppendSlice(data, start, end);
+ start = end;
+
+ for (AudioSegment::ChunkIterator it(slice); !it.IsEnded(); it.Next()) {
+ EXPECT_EQ(it->mPrincipalHandle, principal);
+ }
+ }
+ };
+
+ // Check the principals in audio-processing mode.
+ EXPECT_EQ(aip->PassThrough(graph), false);
+ aip->Start(graph);
+ {
+ EXPECT_EQ(aip->NumBufferedFrames(graph), 480);
+ AudioSegment output;
+ {
+ // Trim the prebuffering silence.
+
+ AudioSegment data;
+ aip->Process(graph, 0, 4800, &input, &data);
+ EXPECT_EQ(input.GetDuration(), 4800);
+ EXPECT_EQ(data.GetDuration(), 4800);
+
+ AudioSegment dummy;
+ dummy.AppendNullData(480);
+ aip->Process(graph, 0, 480, &dummy, &data);
+ EXPECT_EQ(dummy.GetDuration(), 480);
+ EXPECT_EQ(data.GetDuration(), 480 + 4800);
+
+ // Ignore the pre-buffering data
+ output.AppendSlice(data, 480, 480 + 4800);
+ }
+
+ verifyPrincipals(output);
+ }
+
+ // Check the principals in pass-through mode.
+ aip->SetPassThrough(graph, true);
+ {
+ AudioSegment output;
+ aip->Process(graph, 0, 4800, &input, &output);
+ EXPECT_EQ(input.GetDuration(), 4800);
+ EXPECT_EQ(output.GetDuration(), 4800);
+
+ verifyPrincipals(output);
+ }
+
+ aip->Stop(graph);
+ graph->Destroy();
+}
+
+TEST(TestAudioInputProcessing, Downmixing)
+{
+ const TrackRate rate = 44100;
+ const uint32_t channels = 4;
+ auto graph = MakeRefPtr<NiceMock<MockGraph>>(rate);
+ graph->Init(channels);
+
+ auto aip = MakeRefPtr<AudioInputProcessing>(channels);
+
+ const size_t frames = 44100;
+
+ AudioGenerator<AudioDataValue> generator(channels, rate);
+ GraphTime processedTime;
+ GraphTime nextTime;
+
+ aip->SetPassThrough(graph, false);
+ aip->Start(graph);
+
+ processedTime = 0;
+ nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(frames);
+
+ {
+ AudioSegment input;
+ AudioSegment output;
+ generator.Generate(input, nextTime - processedTime);
+
+ // Intentionally reduce the amplitude of the generated sine wave so there's
+ // no chance the max amplitude reaches 1.0, but not enough so that 4
+ // channels summed together won't clip.
+ input.ApplyVolume(0.9);
+
+ // Process is going to see that it has 4 channels of input, and is going to
+ // downmix to mono, scaling the input by 1/4 in the process.
+ // We can't compare the input and output signal because the sine is going to
+ // be mangledui
+ aip->Process(graph, processedTime, nextTime, &input, &output);
+ EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
+ EXPECT_EQ(output.GetDuration(), nextTime);
+ EXPECT_EQ(output.MaxChannelCount(), 1u);
+
+ // Verify that it doesn't clip: the input signal has likely been mangled by
+ // the various processing passes, but at least it shouldn't clip. We know we
+ // always have floating point audio here, regardless of the sample-type used
+ // by Gecko.
+ for (AudioSegment::ChunkIterator iterOutput(output); !iterOutput.IsEnded();
+ iterOutput.Next()) {
+ const float* const output = iterOutput->ChannelData<float>()[0];
+ for (uint32_t i = 0; i < iterOutput->GetDuration(); i++) {
+ // Very conservative here, it's likely that the AGC lowers the volume a
+ // lot.
+ EXPECT_LE(std::abs(output[i]), 0.95);
+ }
+ }
+ }
+
+ // Now, repeat the test, checking we get the unmodified 4 channels.
+ aip->SetPassThrough(graph, true);
+
+ AudioSegment input, output;
+ processedTime = nextTime;
+ nextTime += MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(frames);
+ generator.Generate(input, nextTime - processedTime);
+
+ aip->Process(graph, processedTime, nextTime, &input, &output);
+ EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
+ EXPECT_EQ(output.GetDuration(), nextTime - processedTime);
+ // This time, no downmix: 4 channels of input, 4 channels of output
+ EXPECT_EQ(output.MaxChannelCount(), 4u);
+
+ nsTArray<AudioDataValue> inputLinearized, outputLinearized;
+ input.WriteToInterleavedBuffer(inputLinearized, input.MaxChannelCount());
+ output.WriteToInterleavedBuffer(outputLinearized, output.MaxChannelCount());
+
+ // The data should be passed through, and exactly equal.
+ for (uint32_t i = 0; i < frames * channels; i++) {
+ EXPECT_EQ(inputLinearized[i], outputLinearized[i]);
+ }
+
+ aip->Stop(graph);
+ graph->Destroy();
+}