1 files changed, 395 insertions, 0 deletions
diff --git a/dom/media/gtest/TestAudioInputProcessing.cpp b/dom/media/gtest/TestAudioInputProcessing.cpp
new file mode 100644
index 0000000000..82c1831e84
--- /dev/null
+++ b/dom/media/gtest/TestAudioInputProcessing.cpp
@@ -0,0 +1,395 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "AudioGenerator.h"
+#include "MediaEngineWebRTCAudio.h"
+#include "MediaTrackGraphImpl.h"
+#include "PrincipalHandle.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/NullPrincipal.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Unused.h"
+#include "nsContentUtils.h"
+#include "nsTArray.h"
+
+using namespace mozilla;
+using testing::NiceMock;
+using testing::Return;
+
+class MockGraph : public MediaTrackGraphImpl {
+ public:
+  explicit MockGraph(TrackRate aRate)
+      : MediaTrackGraphImpl(0, aRate, nullptr, AbstractThread::MainThread()) {
+    ON_CALL(*this, OnGraphThread).WillByDefault(Return(true));
+  }
+
+  void Init(uint32_t aChannels) {
+    MediaTrackGraphImpl::Init(OFFLINE_THREAD_DRIVER, DIRECT_DRIVER, aChannels);
+    // Remove this graph's driver since it holds a ref. If no AppendMessage
+    // takes place, the driver never starts. This will also make sure no-one
+    // tries to use it. We are still kept alive by the self-ref. Destroy() must
+    // be called to break that cycle.
+    SetCurrentDriver(nullptr);
+  }
+
+  MOCK_CONST_METHOD0(OnGraphThread, bool());
+
+ protected:
+  ~MockGraph() = default;
+};
+
+// AudioInputProcessing will put extra frames as pre-buffering data to avoid
+// glitchs in non pass-through mode. The main goal of the test is to check how
+// many frames left in the AudioInputProcessing's mSegment in various situations
+// after input data has been processed.
+TEST(TestAudioInputProcessing, Buffering)
+{
+  const TrackRate rate = 8000;  // So packet size is 80
+  const uint32_t channels = 1;
+  auto graph = MakeRefPtr<NiceMock<MockGraph>>(rate);
+  graph->Init(channels);
+
+  auto aip = MakeRefPtr<AudioInputProcessing>(channels);
+
+  const size_t frames = 72;
+
+  AudioGenerator<AudioDataValue> generator(channels, rate);
+  GraphTime processedTime;
+  GraphTime nextTime;
+  AudioSegment output;
+
+  // Toggle pass-through mode without starting
+  {
+    EXPECT_EQ(aip->PassThrough(graph), false);
+    EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
+
+    aip->SetPassThrough(graph, true);
+    EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
+
+    aip->SetPassThrough(graph, false);
+    EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
+
+    aip->SetPassThrough(graph, true);
+    EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
+  }
+
+  {
+    // Need (nextTime - processedTime) = 128 - 0 = 128 frames this round.
+    // aip has not started and set to processing mode yet, so output will be
+    // filled with silence data directly.
+    processedTime = 0;
+    nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(frames);
+
+    AudioSegment input;
+    generator.Generate(input, nextTime - processedTime);
+
+    aip->Process(graph, processedTime, nextTime, &input, &output);
+    EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
+    EXPECT_EQ(output.GetDuration(), nextTime);
+    EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
+  }
+
+  // Set aip to processing/non-pass-through mode
+  aip->SetPassThrough(graph, false);
+  {
+    // Need (nextTime - processedTime) = 256 - 128 = 128 frames this round.
+    // aip has not started yet, so output will be filled with silence data
+    // directly.
+    processedTime = nextTime;
+    nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(2 * frames);
+
+    AudioSegment input;
+    generator.Generate(input, nextTime - processedTime);
+
+    aip->Process(graph, processedTime, nextTime, &input, &output);
+    EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
+    EXPECT_EQ(output.GetDuration(), nextTime);
+    EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
+  }
+
+  // aip has been started and set to processing mode so it will insert 80 frames
+  // into aip's internal buffer as pre-buffering.
+  aip->Start(graph);
+  {
+    // Need (nextTime - processedTime) = 256 - 256 = 0 frames this round.
+    // The Process() aip will take 0 frames from input, packetize and process
+    // these frames into 0 80-frame packet(0 frames left in packetizer), insert
+    // packets into aip's internal buffer, then move 0 frames the internal
+    // buffer to output, leaving 80 + 0 - 0 = 80 frames in aip's internal
+    // buffer.
+    processedTime = nextTime;
+    nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(3 * frames);
+
+    AudioSegment input;
+    generator.Generate(input, nextTime - processedTime);
+
+    aip->Process(graph, processedTime, nextTime, &input, &output);
+    EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
+    EXPECT_EQ(output.GetDuration(), nextTime);
+    EXPECT_EQ(aip->NumBufferedFrames(graph), 80);
+  }
+
+  {
+    // Need (nextTime - processedTime) = 384 - 256 = 128 frames this round.
+    // The Process() aip will take 128 frames from input, packetize and process
+    // these frames into floor(128/80) = 1 80-frame packet (48 frames left in
+    // packetizer), insert packets into aip's internal buffer, then move 128
+    // frames the internal buffer to output, leaving 80 + 80 - 128 = 32 frames
+    // in aip's internal buffer.
+    processedTime = nextTime;
+    nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(4 * frames);
+
+    AudioSegment input;
+    generator.Generate(input, nextTime - processedTime);
+
+    aip->Process(graph, processedTime, nextTime, &input, &output);
+    EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
+    EXPECT_EQ(output.GetDuration(), nextTime);
+    EXPECT_EQ(aip->NumBufferedFrames(graph), 32);
+  }
+
+  {
+    // Need (nextTime - processedTime) = 384 - 384 = 0 frames this round.
+    processedTime = nextTime;
+    nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(5 * frames);
+
+    AudioSegment input;
+    generator.Generate(input, nextTime - processedTime);
+
+    aip->Process(graph, processedTime, nextTime, &input, &output);
+    EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
+    EXPECT_EQ(output.GetDuration(), nextTime);
+    EXPECT_EQ(aip->NumBufferedFrames(graph), 32);
+  }
+
+  {
+    // Need (nextTime - processedTime) = 512 - 384 = 128 frames this round.
+    // The Process() aip will take 128 frames from input, packetize and process
+    // these frames into floor(128+48/80) = 2 80-frame packet (16 frames left in
+    // packetizer), insert packets into aip's internal buffer, then move 128
+    // frames the internal buffer to output, leaving 32 + 2*80 - 128 = 64 frames
+    // in aip's internal buffer.
+    processedTime = nextTime;
+    nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(6 * frames);
+
+    AudioSegment input;
+    generator.Generate(input, nextTime - processedTime);
+
+    aip->Process(graph, processedTime, nextTime, &input, &output);
+    EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
+    EXPECT_EQ(output.GetDuration(), nextTime);
+    EXPECT_EQ(aip->NumBufferedFrames(graph), 64);
+  }
+
+  aip->SetPassThrough(graph, true);
+  {
+    // Need (nextTime - processedTime) = 512 - 512 = 0 frames this round.
+    // No buffering in pass-through mode
+    processedTime = nextTime;
+    nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(7 * frames);
+
+    AudioSegment input;
+    generator.Generate(input, nextTime - processedTime);
+
+    aip->Process(graph, processedTime, nextTime, &input, &output);
+    EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
+    EXPECT_EQ(output.GetDuration(), processedTime);
+    EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
+  }
+
+  aip->Stop(graph);
+  graph->Destroy();
+}
+
+TEST(TestAudioInputProcessing, ProcessDataWithDifferentPrincipals)
+{
+  const TrackRate rate = 48000;  // so # of output frames from packetizer is 480
+  const uint32_t channels = 2;
+  auto graph = MakeRefPtr<NiceMock<MockGraph>>(rate);
+  graph->Init(channels);
+
+  auto aip = MakeRefPtr<AudioInputProcessing>(channels);
+  AudioGenerator<AudioDataValue> generator(channels, rate);
+
+  RefPtr<nsIPrincipal> dummy_principal =
+      NullPrincipal::CreateWithoutOriginAttributes();
+  const PrincipalHandle principal1 = MakePrincipalHandle(dummy_principal.get());
+  const PrincipalHandle principal2 =
+      MakePrincipalHandle(nsContentUtils::GetSystemPrincipal());
+
+  // Total 4800 frames. It's easier to test with frames of multiples of 480.
+  nsTArray<std::pair<TrackTime, PrincipalHandle>> framesWithPrincipal = {
+      {100, principal1},
+      {200, PRINCIPAL_HANDLE_NONE},
+      {300, principal2},
+      {400, principal1},
+      {440, PRINCIPAL_HANDLE_NONE},
+      // 3 packet-size above.
+      {480, principal1},
+      {480, principal2},
+      {480, PRINCIPAL_HANDLE_NONE},
+      // 3 packet-size above.
+      {500, principal2},
+      {490, principal1},
+      {600, principal1},
+      {330, principal1}
+      // 4 packet-size above.
+  };
+
+  // Generate 4800 frames of data with different principals.
+  AudioSegment input;
+  {
+    for (const auto& [duration, principal] : framesWithPrincipal) {
+      AudioSegment data;
+      generator.Generate(data, duration);
+      for (AudioSegment::ChunkIterator it(data); !it.IsEnded(); it.Next()) {
+        it->mPrincipalHandle = principal;
+      }
+
+      input.AppendFrom(&data);
+    }
+  }
+
+  auto verifyPrincipals = [&](const AudioSegment& data) {
+    TrackTime start = 0;
+    for (const auto& [duration, principal] : framesWithPrincipal) {
+      const TrackTime end = start + duration;
+
+      AudioSegment slice;
+      slice.AppendSlice(data, start, end);
+      start = end;
+
+      for (AudioSegment::ChunkIterator it(slice); !it.IsEnded(); it.Next()) {
+        EXPECT_EQ(it->mPrincipalHandle, principal);
+      }
+    }
+  };
+
+  // Check the principals in audio-processing mode.
+  EXPECT_EQ(aip->PassThrough(graph), false);
+  aip->Start(graph);
+  {
+    EXPECT_EQ(aip->NumBufferedFrames(graph), 480);
+    AudioSegment output;
+    {
+      // Trim the prebuffering silence.
+
+      AudioSegment data;
+      aip->Process(graph, 0, 4800, &input, &data);
+      EXPECT_EQ(input.GetDuration(), 4800);
+      EXPECT_EQ(data.GetDuration(), 4800);
+
+      AudioSegment dummy;
+      dummy.AppendNullData(480);
+      aip->Process(graph, 0, 480, &dummy, &data);
+      EXPECT_EQ(dummy.GetDuration(), 480);
+      EXPECT_EQ(data.GetDuration(), 480 + 4800);
+
+      // Ignore the pre-buffering data
+      output.AppendSlice(data, 480, 480 + 4800);
+    }
+
+    verifyPrincipals(output);
+  }
+
+  // Check the principals in pass-through mode.
+  aip->SetPassThrough(graph, true);
+  {
+    AudioSegment output;
+    aip->Process(graph, 0, 4800, &input, &output);
+    EXPECT_EQ(input.GetDuration(), 4800);
+    EXPECT_EQ(output.GetDuration(), 4800);
+
+    verifyPrincipals(output);
+  }
+
+  aip->Stop(graph);
+  graph->Destroy();
+}
+
+TEST(TestAudioInputProcessing, Downmixing)
+{
+  const TrackRate rate = 44100;
+  const uint32_t channels = 4;
+  auto graph = MakeRefPtr<NiceMock<MockGraph>>(rate);
+  graph->Init(channels);
+
+  auto aip = MakeRefPtr<AudioInputProcessing>(channels);
+
+  const size_t frames = 44100;
+
+  AudioGenerator<AudioDataValue> generator(channels, rate);
+  GraphTime processedTime;
+  GraphTime nextTime;
+
+  aip->SetPassThrough(graph, false);
+  aip->Start(graph);
+
+  processedTime = 0;
+  nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(frames);
+
+  {
+    AudioSegment input;
+    AudioSegment output;
+    generator.Generate(input, nextTime - processedTime);
+
+    // Intentionally reduce the amplitude of the generated sine wave so there's
+    // no chance the max amplitude reaches 1.0, but not enough so that 4
+    // channels summed together won't clip.
+    input.ApplyVolume(0.9);
+
+    // Process is going to see that it has 4 channels of input, and is going to
+    // downmix to mono, scaling the input by 1/4 in the process.
+    // We can't compare the input and output signal because the sine is going to
+    // be mangledui
+    aip->Process(graph, processedTime, nextTime, &input, &output);
+    EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
+    EXPECT_EQ(output.GetDuration(), nextTime);
+    EXPECT_EQ(output.MaxChannelCount(), 1u);
+
+    // Verify that it doesn't clip: the input signal has likely been mangled by
+    // the various processing passes, but at least it shouldn't clip. We know we
+    // always have floating point audio here, regardless of the sample-type used
+    // by Gecko.
+    for (AudioSegment::ChunkIterator iterOutput(output); !iterOutput.IsEnded();
+         iterOutput.Next()) {
+      const float* const output = iterOutput->ChannelData<float>()[0];
+      for (uint32_t i = 0; i < iterOutput->GetDuration(); i++) {
+        // Very conservative here, it's likely that the AGC lowers the volume a
+        // lot.
+        EXPECT_LE(std::abs(output[i]), 0.95);
+      }
+    }
+  }
+
+  // Now, repeat the test, checking we get the unmodified 4 channels.
+  aip->SetPassThrough(graph, true);
+
+  AudioSegment input, output;
+  processedTime = nextTime;
+  nextTime += MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(frames);
+  generator.Generate(input, nextTime - processedTime);
+
+  aip->Process(graph, processedTime, nextTime, &input, &output);
+  EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
+  EXPECT_EQ(output.GetDuration(), nextTime - processedTime);
+  // This time, no downmix: 4 channels of input, 4 channels of output
+  EXPECT_EQ(output.MaxChannelCount(), 4u);
+
+  nsTArray<AudioDataValue> inputLinearized, outputLinearized;
+  input.WriteToInterleavedBuffer(inputLinearized, input.MaxChannelCount());
+  output.WriteToInterleavedBuffer(outputLinearized, output.MaxChannelCount());
+
+  // The data should be passed through, and exactly equal.
+  for (uint32_t i = 0; i < frames * channels; i++) {
+    EXPECT_EQ(inputLinearized[i], outputLinearized[i]);
+  }
+
+  aip->Stop(graph);
+  graph->Destroy();
+}