diff options
Diffstat (limited to 'dom/media/webrtc/MediaEngineWebRTCAudio.cpp')
-rw-r--r-- | dom/media/webrtc/MediaEngineWebRTCAudio.cpp | 1329 |
1 files changed, 1329 insertions, 0 deletions
diff --git a/dom/media/webrtc/MediaEngineWebRTCAudio.cpp b/dom/media/webrtc/MediaEngineWebRTCAudio.cpp new file mode 100644 index 0000000000..64ed88c625 --- /dev/null +++ b/dom/media/webrtc/MediaEngineWebRTCAudio.cpp @@ -0,0 +1,1329 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "MediaEngineWebRTCAudio.h" + +#include <stdio.h> +#include <algorithm> + +#include "AudioConverter.h" +#include "MediaManager.h" +#include "MediaTrackGraphImpl.h" +#include "MediaTrackConstraints.h" +#include "mozilla/Assertions.h" +#include "mozilla/ErrorNames.h" +#include "nsIDUtils.h" +#include "transport/runnable_utils.h" +#include "Tracing.h" +#include "mozilla/Sprintf.h" +#include "mozilla/Logging.h" + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/include/audio_processing.h" + +using namespace webrtc; + +// These are restrictions from the webrtc.org code +#define MAX_CHANNELS 2 +#define MONO 1 +#define MAX_SAMPLING_FREQ 48000 // Hz - multiple of 100 + +namespace mozilla { + +using dom::MediaSourceEnum; + +extern LazyLogModule gMediaManagerLog; +#define LOG(...) MOZ_LOG(gMediaManagerLog, LogLevel::Debug, (__VA_ARGS__)) +#define LOG_FRAME(...) \ + MOZ_LOG(gMediaManagerLog, LogLevel::Verbose, (__VA_ARGS__)) +#define LOG_ERROR(...) MOZ_LOG(gMediaManagerLog, LogLevel::Error, (__VA_ARGS__)) + +/** + * WebRTC Microphone MediaEngineSource. + */ + +MediaEngineWebRTCMicrophoneSource::MediaEngineWebRTCMicrophoneSource( + const MediaDevice* aMediaDevice) + : mPrincipal(PRINCIPAL_HANDLE_NONE), + mDeviceInfo(aMediaDevice->mAudioDeviceInfo), + mDeviceMaxChannelCount(mDeviceInfo->MaxChannels()), + mSettings(new nsMainThreadPtrHolder< + media::Refcountable<dom::MediaTrackSettings>>( + "MediaEngineWebRTCMicrophoneSource::mSettings", + new media::Refcountable<dom::MediaTrackSettings>(), + // Non-strict means it won't assert main thread for us. + // It would be great if it did but we're already on the media thread. + /* aStrict = */ false)) { + MOZ_ASSERT(aMediaDevice->mMediaSource == MediaSourceEnum::Microphone); +#ifndef ANDROID + MOZ_ASSERT(mDeviceInfo->DeviceID()); +#endif + + // We'll init lazily as needed + mSettings->mEchoCancellation.Construct(0); + mSettings->mAutoGainControl.Construct(0); + mSettings->mNoiseSuppression.Construct(0); + mSettings->mChannelCount.Construct(0); + + mState = kReleased; +} + +nsresult MediaEngineWebRTCMicrophoneSource::EvaluateSettings( + const NormalizedConstraints& aConstraintsUpdate, + const MediaEnginePrefs& aInPrefs, MediaEnginePrefs* aOutPrefs, + const char** aOutBadConstraint) { + AssertIsOnOwningThread(); + + FlattenedConstraints c(aConstraintsUpdate); + MediaEnginePrefs prefs = aInPrefs; + + prefs.mAecOn = c.mEchoCancellation.Get(aInPrefs.mAecOn); + prefs.mAgcOn = c.mAutoGainControl.Get(aInPrefs.mAgcOn && prefs.mAecOn); + prefs.mNoiseOn = c.mNoiseSuppression.Get(aInPrefs.mNoiseOn && prefs.mAecOn); + + // Determine an actual channel count to use for this source. Three factors at + // play here: the device capabilities, the constraints passed in by content, + // and a pref that can force things (for testing) + int32_t maxChannels = static_cast<int32_t>(mDeviceInfo->MaxChannels()); + + // First, check channelCount violation wrt constraints. This fails in case of + // error. + if (c.mChannelCount.mMin > maxChannels) { + *aOutBadConstraint = "channelCount"; + return NS_ERROR_FAILURE; + } + // A pref can force the channel count to use. If the pref has a value of zero + // or lower, it has no effect. + if (aInPrefs.mChannels <= 0) { + prefs.mChannels = maxChannels; + } + + // Get the number of channels asked for by content, and clamp it between the + // pref and the maximum number of channels that the device supports. + prefs.mChannels = c.mChannelCount.Get(std::min(prefs.mChannels, maxChannels)); + prefs.mChannels = std::max(1, std::min(prefs.mChannels, maxChannels)); + + LOG("Audio config: agc: %d, noise: %d, channels: %d", + prefs.mAgcOn ? prefs.mAgc : -1, prefs.mNoiseOn ? prefs.mNoise : -1, + prefs.mChannels); + + *aOutPrefs = prefs; + + return NS_OK; +} + +nsresult MediaEngineWebRTCMicrophoneSource::Reconfigure( + const dom::MediaTrackConstraints& aConstraints, + const MediaEnginePrefs& aPrefs, const char** aOutBadConstraint) { + AssertIsOnOwningThread(); + MOZ_ASSERT(mTrack); + + LOG("Mic source %p Reconfigure ", this); + + NormalizedConstraints constraints(aConstraints); + MediaEnginePrefs outputPrefs; + nsresult rv = + EvaluateSettings(constraints, aPrefs, &outputPrefs, aOutBadConstraint); + if (NS_FAILED(rv)) { + if (aOutBadConstraint) { + return NS_ERROR_INVALID_ARG; + } + + nsAutoCString name; + GetErrorName(rv, name); + LOG("Mic source %p Reconfigure() failed unexpectedly. rv=%s", this, + name.Data()); + Stop(); + return NS_ERROR_UNEXPECTED; + } + + ApplySettings(outputPrefs); + + mCurrentPrefs = outputPrefs; + + return NS_OK; +} + +void MediaEngineWebRTCMicrophoneSource::ApplySettings( + const MediaEnginePrefs& aPrefs) { + AssertIsOnOwningThread(); + + TRACE("ApplySettings"); + MOZ_ASSERT( + mTrack, + "ApplySetting is to be called only after SetTrack has been called"); + + mAudioProcessingConfig.pipeline.multi_channel_render = true; + mAudioProcessingConfig.pipeline.multi_channel_capture = true; + + mAudioProcessingConfig.echo_canceller.enabled = aPrefs.mAecOn; + mAudioProcessingConfig.echo_canceller.mobile_mode = aPrefs.mUseAecMobile; + + if ((mAudioProcessingConfig.gain_controller1.enabled = + aPrefs.mAgcOn && !aPrefs.mAgc2Forced)) { + auto mode = static_cast<AudioProcessing::Config::GainController1::Mode>( + aPrefs.mAgc); + if (mode != AudioProcessing::Config::GainController1::kAdaptiveAnalog && + mode != AudioProcessing::Config::GainController1::kAdaptiveDigital && + mode != AudioProcessing::Config::GainController1::kFixedDigital) { + LOG_ERROR("AudioInputProcessing %p Attempt to set invalid AGC mode %d", + mInputProcessing.get(), static_cast<int>(mode)); + mode = AudioProcessing::Config::GainController1::kAdaptiveDigital; + } +#if defined(WEBRTC_IOS) || defined(ATA) || defined(WEBRTC_ANDROID) + if (mode == AudioProcessing::Config::GainController1::kAdaptiveAnalog) { + LOG_ERROR( + "AudioInputProcessing %p Invalid AGC mode kAdaptiveAnalog on " + "mobile", + mInputProcessing.get()); + MOZ_ASSERT_UNREACHABLE( + "Bad pref set in all.js or in about:config" + " for the auto gain, on mobile."); + mode = AudioProcessing::Config::GainController1::kFixedDigital; + } +#endif + mAudioProcessingConfig.gain_controller1.mode = mode; + } + mAudioProcessingConfig.gain_controller2.enabled = + mAudioProcessingConfig.gain_controller2.adaptive_digital.enabled = + aPrefs.mAgcOn && aPrefs.mAgc2Forced; + + if ((mAudioProcessingConfig.noise_suppression.enabled = aPrefs.mNoiseOn)) { + auto level = static_cast<AudioProcessing::Config::NoiseSuppression::Level>( + aPrefs.mNoise); + if (level != AudioProcessing::Config::NoiseSuppression::kLow && + level != AudioProcessing::Config::NoiseSuppression::kModerate && + level != AudioProcessing::Config::NoiseSuppression::kHigh && + level != AudioProcessing::Config::NoiseSuppression::kVeryHigh) { + LOG_ERROR( + "AudioInputProcessing %p Attempt to set invalid noise suppression " + "level %d", + mInputProcessing.get(), static_cast<int>(level)); + + level = AudioProcessing::Config::NoiseSuppression::kModerate; + } + mAudioProcessingConfig.noise_suppression.level = level; + } + + mAudioProcessingConfig.transient_suppression.enabled = aPrefs.mTransientOn; + + mAudioProcessingConfig.high_pass_filter.enabled = aPrefs.mHPFOn; + + // See https://bugs.chromium.org/p/webrtc/issues/detail?id=11539 for more + // info. Our pref defaults to false, and if this is truly as unhelpful + // as the upstream bug claim, we could delete the pref that drive this: + // media.getusermedia.residual_echo_enabled. See Bug 1779498. + // mAudioProcessingConfig.residual_echo_detector.enabled = + // aPrefs.mResidualEchoOn; + + RefPtr<MediaEngineWebRTCMicrophoneSource> that = this; + CubebUtils::AudioDeviceID deviceID = mDeviceInfo->DeviceID(); + NS_DispatchToMainThread(NS_NewRunnableFunction( + __func__, [this, that, deviceID, track = mTrack, prefs = aPrefs, + audioProcessingConfig = mAudioProcessingConfig] { + mSettings->mEchoCancellation.Value() = prefs.mAecOn; + mSettings->mAutoGainControl.Value() = prefs.mAgcOn; + mSettings->mNoiseSuppression.Value() = prefs.mNoiseOn; + mSettings->mChannelCount.Value() = prefs.mChannels; + + class Message : public ControlMessage { + CubebUtils::AudioDeviceID mDeviceID; + const RefPtr<AudioInputProcessing> mInputProcessing; + const AudioProcessing::Config mAudioProcessingConfig; + const bool mPassThrough; + const uint32_t mRequestedInputChannelCount; + + public: + Message(MediaTrack* aTrack, CubebUtils::AudioDeviceID aDeviceID, + AudioInputProcessing* aInputProcessing, + const AudioProcessing::Config& aAudioProcessingConfig, + bool aPassThrough, uint32_t aRequestedInputChannelCount) + : ControlMessage(aTrack), + mDeviceID(aDeviceID), + mInputProcessing(aInputProcessing), + mAudioProcessingConfig(aAudioProcessingConfig), + mPassThrough(aPassThrough), + mRequestedInputChannelCount(aRequestedInputChannelCount) {} + + void Run() override { + mInputProcessing->ApplyConfig(mTrack->GraphImpl(), + mAudioProcessingConfig); + { + TRACE("SetRequestedInputChannelCount"); + mInputProcessing->SetRequestedInputChannelCount( + mTrack->GraphImpl(), mDeviceID, mRequestedInputChannelCount); + } + { + TRACE("SetPassThrough") + mInputProcessing->SetPassThrough(mTrack->GraphImpl(), + mPassThrough); + } + } + }; + + // The high-pass filter is not taken into account when activating the + // pass through, since it's not controllable from content. + bool passThrough = !(prefs.mAecOn || prefs.mAgcOn || prefs.mNoiseOn); + + if (track->IsDestroyed()) { + return; + } + track->GraphImpl()->AppendMessage(MakeUnique<Message>( + track, deviceID, mInputProcessing, audioProcessingConfig, + passThrough, prefs.mChannels)); + })); +} + +nsresult MediaEngineWebRTCMicrophoneSource::Allocate( + const dom::MediaTrackConstraints& aConstraints, + const MediaEnginePrefs& aPrefs, uint64_t aWindowID, + const char** aOutBadConstraint) { + AssertIsOnOwningThread(); + + mState = kAllocated; + + NormalizedConstraints normalized(aConstraints); + MediaEnginePrefs outputPrefs; + nsresult rv = + EvaluateSettings(normalized, aPrefs, &outputPrefs, aOutBadConstraint); + if (NS_FAILED(rv)) { + return rv; + } + + NS_DispatchToMainThread(NS_NewRunnableFunction( + __func__, [settings = mSettings, prefs = outputPrefs] { + settings->mEchoCancellation.Value() = prefs.mAecOn; + settings->mAutoGainControl.Value() = prefs.mAgcOn; + settings->mNoiseSuppression.Value() = prefs.mNoiseOn; + settings->mChannelCount.Value() = prefs.mChannels; + })); + + mCurrentPrefs = outputPrefs; + + return rv; +} + +nsresult MediaEngineWebRTCMicrophoneSource::Deallocate() { + AssertIsOnOwningThread(); + + MOZ_ASSERT(mState == kStopped || mState == kAllocated); + + class EndTrackMessage : public ControlMessage { + const RefPtr<AudioInputProcessing> mInputProcessing; + + public: + explicit EndTrackMessage(AudioInputProcessing* aAudioInputProcessing) + : ControlMessage(nullptr), mInputProcessing(aAudioInputProcessing) {} + + void Run() override { + TRACE("mInputProcessing::End"); + mInputProcessing->End(); + } + }; + + if (mTrack) { + NS_DispatchToMainThread(NS_NewRunnableFunction( + __func__, + [track = std::move(mTrack), inputProcessing = mInputProcessing] { + if (track->IsDestroyed()) { + // This track has already been destroyed on main thread by its + // DOMMediaStream. No cleanup left to do. + return; + } + track->GraphImpl()->AppendMessage( + MakeUnique<EndTrackMessage>(inputProcessing)); + })); + } + + // Reset all state. This is not strictly necessary, this instance will get + // destroyed soon. + mTrack = nullptr; + mPrincipal = PRINCIPAL_HANDLE_NONE; + + // If empty, no callbacks to deliver data should be occuring + MOZ_ASSERT(mState != kReleased, "Source not allocated"); + MOZ_ASSERT(mState != kStarted, "Source not stopped"); + + mState = kReleased; + LOG("Mic source %p Audio device %s deallocated", this, + NS_ConvertUTF16toUTF8(mDeviceInfo->Name()).get()); + return NS_OK; +} + +void MediaEngineWebRTCMicrophoneSource::SetTrack( + const RefPtr<MediaTrack>& aTrack, const PrincipalHandle& aPrincipal) { + AssertIsOnOwningThread(); + MOZ_ASSERT(aTrack); + MOZ_ASSERT(aTrack->AsAudioProcessingTrack()); + + MOZ_ASSERT(!mTrack); + MOZ_ASSERT(mPrincipal == PRINCIPAL_HANDLE_NONE); + mTrack = aTrack->AsAudioProcessingTrack(); + mPrincipal = aPrincipal; + + mInputProcessing = + MakeAndAddRef<AudioInputProcessing>(mDeviceMaxChannelCount); + + NS_DispatchToMainThread(NS_NewRunnableFunction( + __func__, [track = mTrack, processing = mInputProcessing]() mutable { + track->SetInputProcessing(std::move(processing)); + track->Resume(); // Suspended by MediaManager + })); + + LOG("Mic source %p Track %p registered for microphone capture", this, + aTrack.get()); +} + +class StartStopMessage : public ControlMessage { + public: + enum StartStop { Start, Stop }; + + StartStopMessage(MediaTrack* aTrack, AudioInputProcessing* aInputProcessing, + StartStop aAction) + : ControlMessage(aTrack), + mInputProcessing(aInputProcessing), + mAction(aAction) {} + + void Run() override { + if (mAction == StartStopMessage::Start) { + TRACE("InputProcessing::Start") + mInputProcessing->Start(mTrack->GraphImpl()); + } else if (mAction == StartStopMessage::Stop) { + TRACE("InputProcessing::Stop") + mInputProcessing->Stop(mTrack->GraphImpl()); + } else { + MOZ_CRASH("Invalid enum value"); + } + } + + protected: + const RefPtr<AudioInputProcessing> mInputProcessing; + const StartStop mAction; +}; + +nsresult MediaEngineWebRTCMicrophoneSource::Start() { + AssertIsOnOwningThread(); + + // This spans setting both the enabled state and mState. + if (mState == kStarted) { + return NS_OK; + } + + MOZ_ASSERT(mState == kAllocated || mState == kStopped); + + ApplySettings(mCurrentPrefs); + + CubebUtils::AudioDeviceID deviceID = mDeviceInfo->DeviceID(); + NS_DispatchToMainThread(NS_NewRunnableFunction( + __func__, [inputProcessing = mInputProcessing, deviceID, track = mTrack, + principal = mPrincipal] { + if (track->IsDestroyed()) { + return; + } + + track->GraphImpl()->AppendMessage(MakeUnique<StartStopMessage>( + track, inputProcessing, StartStopMessage::Start)); + track->ConnectDeviceInput(deviceID, inputProcessing.get(), principal); + })); + + MOZ_ASSERT(mState != kReleased); + mState = kStarted; + + return NS_OK; +} + +nsresult MediaEngineWebRTCMicrophoneSource::Stop() { + AssertIsOnOwningThread(); + + LOG("Mic source %p Stop()", this); + MOZ_ASSERT(mTrack, "SetTrack must have been called before ::Stop"); + + if (mState == kStopped) { + // Already stopped - this is allowed + return NS_OK; + } + + NS_DispatchToMainThread(NS_NewRunnableFunction( + __func__, [inputProcessing = mInputProcessing, deviceInfo = mDeviceInfo, + track = mTrack] { + if (track->IsDestroyed()) { + return; + } + + MOZ_ASSERT(track->DeviceId().value() == deviceInfo->DeviceID()); + track->DisconnectDeviceInput(); + track->GraphImpl()->AppendMessage(MakeUnique<StartStopMessage>( + track, inputProcessing, StartStopMessage::Stop)); + })); + + MOZ_ASSERT(mState == kStarted, "Should be started when stopping"); + mState = kStopped; + + return NS_OK; +} + +void MediaEngineWebRTCMicrophoneSource::GetSettings( + dom::MediaTrackSettings& aOutSettings) const { + MOZ_ASSERT(NS_IsMainThread()); + aOutSettings = *mSettings; +} + +AudioInputProcessing::AudioInputProcessing(uint32_t aMaxChannelCount) + : mAudioProcessing(AudioProcessingBuilder().Create().release()), + mRequestedInputChannelCount(aMaxChannelCount), + mSkipProcessing(false), + mInputDownmixBuffer(MAX_SAMPLING_FREQ * MAX_CHANNELS / 100), + mEnabled(false), + mEnded(false), + mPacketCount(0) {} + +void AudioInputProcessing::Disconnect(MediaTrackGraphImpl* aGraph) { + // This method is just for asserts. + MOZ_ASSERT(aGraph->OnGraphThread()); +} + +bool AudioInputProcessing::PassThrough(MediaTrackGraphImpl* aGraph) const { + MOZ_ASSERT(aGraph->OnGraphThread()); + return mSkipProcessing; +} + +void AudioInputProcessing::SetPassThrough(MediaTrackGraphImpl* aGraph, + bool aPassThrough) { + MOZ_ASSERT(aGraph->OnGraphThread()); + + if (aPassThrough == mSkipProcessing) { + return; + } + mSkipProcessing = aPassThrough; + + if (!mEnabled) { + MOZ_ASSERT(!mPacketizerInput); + return; + } + + if (aPassThrough) { + // Turn on pass-through + ResetAudioProcessing(aGraph); + } else { + // Turn off pass-through + MOZ_ASSERT(!mPacketizerInput); + EnsureAudioProcessing(aGraph, mRequestedInputChannelCount); + } +} + +uint32_t AudioInputProcessing::GetRequestedInputChannelCount() { + return mRequestedInputChannelCount; +} + +void AudioInputProcessing::SetRequestedInputChannelCount( + MediaTrackGraphImpl* aGraph, CubebUtils::AudioDeviceID aDeviceId, + uint32_t aRequestedInputChannelCount) { + mRequestedInputChannelCount = aRequestedInputChannelCount; + + aGraph->ReevaluateInputDevice(aDeviceId); +} + +void AudioInputProcessing::Start(MediaTrackGraphImpl* aGraph) { + MOZ_ASSERT(aGraph->OnGraphThread()); + + if (mEnabled) { + return; + } + mEnabled = true; + + if (mSkipProcessing) { + return; + } + + MOZ_ASSERT(!mPacketizerInput); + EnsureAudioProcessing(aGraph, mRequestedInputChannelCount); +} + +void AudioInputProcessing::Stop(MediaTrackGraphImpl* aGraph) { + MOZ_ASSERT(aGraph->OnGraphThread()); + + if (!mEnabled) { + return; + } + + mEnabled = false; + + if (mSkipProcessing) { + return; + } + + // Packetizer is active and we were just stopped. Stop the packetizer and + // processing. + ResetAudioProcessing(aGraph); +} + +// The following is how how Process() works in pass-through and non-pass-through +// mode. In both mode, Process() outputs the same amount of the frames as its +// input data. +// +// I. In non-pass-through mode: +// +// We will use webrtc::AudioProcessing to process the input audio data in this +// mode. The data input in webrtc::AudioProcessing needs to be a 10ms chunk, +// while the input data passed to Process() is not necessary to have times of +// 10ms-chunk length. To divide the input data into 10ms chunks, +// mPacketizerInput is introduced. +// +// We will add one 10ms-chunk silence into the internal buffer before Process() +// works. Those extra frames is called pre-buffering. It aims to avoid glitches +// we may have when producing data in mPacketizerInput. Without pre-buffering, +// when the input data length is not 10ms-times, we could end up having no +// enough output needs since mPacketizerInput would keep some input data, which +// is the remainder of the 10ms-chunk length. To force processing those data +// left in mPacketizerInput, we would need to add some extra frames to make +// mPacketizerInput produce a 10ms-chunk. For example, if the sample rate is +// 44100 Hz, then the packet-size is 441 frames. When we only have 384 input +// frames, we would need to put additional 57 frames to mPacketizerInput to +// produce a packet. However, those extra 57 frames result in a glitch sound. +// +// By adding one 10ms-chunk silence in advance to the internal buffer, we won't +// need to add extra frames between the input data no matter what data length it +// is. The only drawback is the input data won't be processed and send to output +// immediately. Process() will consume pre-buffering data for its output first. +// The below describes how it works: +// +// +// Process() +// +-----------------------------+ +// input D(N) | +--------+ +--------+ | output D(N) +// --------------|-->| P(N) |-->| S(N) |---|--------------> +// | +--------+ +--------+ | +// | packetizer mSegment | +// +-----------------------------+ +// <------ internal buffer ------> +// +// +// D(N): number of frames from the input and the output needs in the N round +// Z: number of frames of a 10ms chunk(packet) in mPacketizerInput, Z >= 1 +// (if Z = 1, packetizer has no effect) +// P(N): number of frames left in mPacketizerInput after the N round. Once the +// frames in packetizer >= Z, packetizer will produce a packet to +// mSegment, so P(N) = (P(N-1) + D(N)) % Z, 0 <= P(N) <= Z-1 +// S(N): number of frames left in mSegment after the N round. The input D(N) +// frames will be passed to mPacketizerInput first, and then +// mPacketizerInput may append some packets to mSegment, so +// S(N) = S(N-1) + Z * floor((P(N-1) + D(N)) / Z) - D(N) +// +// At the first, we set P(0) = 0, S(0) = X, where X >= Z-1. X is the +// pre-buffering put in the internal buffer. With this settings, P(K) + S(K) = X +// always holds. +// +// Intuitively, this seems true: We put X frames in the internal buffer at +// first. If the data won't be blocked in packetizer, after the Process(), the +// internal buffer should still hold X frames since the number of frames coming +// from input is the same as the output needs. The key of having enough data for +// output needs, while the input data is piled up in packetizer, is by putting +// at least Z-1 frames as pre-buffering, since the maximum number of frames +// stuck in the packetizer before it can emit a packet is packet-size - 1. +// Otherwise, we don't have enough data for output if the new input data plus +// the data left in packetizer produces a smaller-than-10ms chunk, which will be +// left in packetizer. Thus we must have some pre-buffering frames in the +// mSegment to make up the length of the left chunk we need for output. This can +// also be told by by induction: +// (1) This holds when K = 0 +// (2) Assume this holds when K = N: so P(N) + S(N) = X +// => P(N) + S(N) = X >= Z-1 => S(N) >= Z-1-P(N) +// (3) When K = N+1, we have D(N+1) input frames comes +// a. if P(N) + D(N+1) < Z, then packetizer has no enough data for one +// packet. No data produced by packertizer, so the mSegment now has +// S(N) >= Z-1-P(N) frames. Output needs D(N+1) < Z-P(N) frames. So it +// needs at most Z-P(N)-1 frames, and mSegment has enough frames for +// output, Then, P(N+1) = P(N) + D(N+1) and S(N+1) = S(N) - D(N+1) +// => P(N+1) + S(N+1) = P(N) + S(N) = X +// b. if P(N) + D(N+1) = Z, then packetizer will produce one packet for +// mSegment, so mSegment now has S(N) + Z frames. Output needs D(N+1) +// = Z-P(N) frames. S(N) has at least Z-1-P(N)+Z >= Z-P(N) frames, since +// Z >= 1. So mSegment has enough frames for output. Then, P(N+1) = 0 and +// S(N+1) = S(N) + Z - D(N+1) = S(N) + P(N) +// => P(N+1) + S(N+1) = P(N) + S(N) = X +// c. if P(N) + D(N+1) > Z, and let P(N) + D(N+1) = q * Z + r, where q >= 1 +// and 0 <= r <= Z-1, then packetizer will produce can produce q packets +// for mSegment. Output needs D(N+1) = q * Z - P(N) + r frames and +// mSegment has S(N) + q * z >= q * z - P(N) + Z-1 >= q*z -P(N) + r, +// since r <= Z-1. So mSegment has enough frames for output. Then, +// P(N+1) = r and S(N+1) = S(N) + q * Z - D(N+1) +// => P(N+1) + S(N+1) = S(N) + (q * Z + r - D(N+1)) = S(N) + P(N) = X +// => P(K) + S(K) = X always holds +// +// Since P(K) + S(K) = X and P(K) is in [0, Z-1], the S(K) is in [X-Z+1, X] +// range. In our implementation, X is set to Z so S(K) is in [1, Z]. +// By the above workflow, we always have enough data for output and no extra +// frames put into packetizer. It means we don't have any glitch! +// +// II. In pass-through mode: +// +// Process() +// +--------+ +// input D(N) | | output D(N) +// -------------->-------->---------------> +// | | +// +--------+ +// +// The D(N) frames of data are just forwarded from input to output without any +// processing +void AudioInputProcessing::Process(MediaTrackGraphImpl* aGraph, GraphTime aFrom, + GraphTime aTo, AudioSegment* aInput, + AudioSegment* aOutput) { + MOZ_ASSERT(aGraph->OnGraphThread()); + MOZ_ASSERT(aFrom <= aTo); + MOZ_ASSERT(!mEnded); + + TrackTime need = aTo - aFrom; + if (need == 0) { + return; + } + + if (!mEnabled) { + LOG_FRAME("(Graph %p, Driver %p) AudioInputProcessing %p Filling %" PRId64 + " frames of silence to output (disabled)", + aGraph, aGraph->CurrentDriver(), this, need); + aOutput->AppendNullData(need); + return; + } + + MOZ_ASSERT(aInput->GetDuration() == need, + "Wrong data length from input port source"); + + if (PassThrough(aGraph)) { + LOG_FRAME( + "(Graph %p, Driver %p) AudioInputProcessing %p Forwarding %" PRId64 + " frames of input data to output directly (PassThrough)", + aGraph, aGraph->CurrentDriver(), this, aInput->GetDuration()); + aOutput->AppendSegment(aInput); + return; + } + + // SetPassThrough(false) must be called before reaching here. + MOZ_ASSERT(mPacketizerInput); + // If mRequestedInputChannelCount is updated, create a new packetizer. No + // need to change the pre-buffering since the rate is always the same. The + // frames left in the packetizer would be replaced by null data and then + // transferred to mSegment. + EnsureAudioProcessing(aGraph, mRequestedInputChannelCount); + + // Preconditions of the audio-processing logic. + MOZ_ASSERT(static_cast<uint32_t>(mSegment.GetDuration()) + + mPacketizerInput->FramesAvailable() == + mPacketizerInput->mPacketSize); + // We pre-buffer mPacketSize frames, but the maximum number of frames stuck in + // the packetizer before it can emit a packet is mPacketSize-1. Thus that + // remaining 1 frame will always be present in mSegment. + MOZ_ASSERT(mSegment.GetDuration() >= 1); + MOZ_ASSERT(mSegment.GetDuration() <= mPacketizerInput->mPacketSize); + + PacketizeAndProcess(aGraph, *aInput); + LOG_FRAME("(Graph %p, Driver %p) AudioInputProcessing %p Buffer has %" PRId64 + " frames of data now, after packetizing and processing", + aGraph, aGraph->CurrentDriver(), this, mSegment.GetDuration()); + + // By setting pre-buffering to the number of frames of one packet, and + // because the maximum number of frames stuck in the packetizer before + // it can emit a packet is the mPacketSize-1, we always have at least + // one more frame than output needs. + MOZ_ASSERT(mSegment.GetDuration() > need); + aOutput->AppendSlice(mSegment, 0, need); + mSegment.RemoveLeading(need); + LOG_FRAME("(Graph %p, Driver %p) AudioInputProcessing %p moving %" PRId64 + " frames of data to output, leaving %" PRId64 " frames in buffer", + aGraph, aGraph->CurrentDriver(), this, need, + mSegment.GetDuration()); + + // Postconditions of the audio-processing logic. + MOZ_ASSERT(static_cast<uint32_t>(mSegment.GetDuration()) + + mPacketizerInput->FramesAvailable() == + mPacketizerInput->mPacketSize); + MOZ_ASSERT(mSegment.GetDuration() >= 1); + MOZ_ASSERT(mSegment.GetDuration() <= mPacketizerInput->mPacketSize); +} + +void AudioInputProcessing::ProcessOutputData(MediaTrackGraphImpl* aGraph, + AudioDataValue* aBuffer, + size_t aFrames, TrackRate aRate, + uint32_t aChannels) { + MOZ_ASSERT(aGraph->OnGraphThread()); + + if (!mEnabled || PassThrough(aGraph)) { + return; + } + + if (!mPacketizerOutput || + mPacketizerOutput->mPacketSize != GetPacketSize(aRate) || + mPacketizerOutput->mChannels != aChannels) { + // It's ok to drop the audio still in the packetizer here: if this changes, + // we changed devices or something. + mPacketizerOutput = Nothing(); + mPacketizerOutput.emplace(GetPacketSize(aRate), aChannels); + } + + mPacketizerOutput->Input(aBuffer, aFrames); + + while (mPacketizerOutput->PacketsAvailable()) { + uint32_t samplesPerPacket = + mPacketizerOutput->mPacketSize * mPacketizerOutput->mChannels; + if (mOutputBuffer.Length() < samplesPerPacket) { + mOutputBuffer.SetLength(samplesPerPacket); + } + if (mDeinterleavedBuffer.Length() < samplesPerPacket) { + mDeinterleavedBuffer.SetLength(samplesPerPacket); + } + float* packet = mOutputBuffer.Data(); + mPacketizerOutput->Output(packet); + + AutoTArray<float*, MAX_CHANNELS> deinterleavedPacketDataChannelPointers; + float* interleavedFarend = nullptr; + uint32_t channelCountFarend = 0; + uint32_t framesPerPacketFarend = 0; + + // Downmix from aChannels to MAX_CHANNELS if needed. We always have + // floats here, the packetized performed the conversion. + if (aChannels > MAX_CHANNELS) { + AudioConverter converter( + AudioConfig(aChannels, 0, AudioConfig::FORMAT_FLT), + AudioConfig(MAX_CHANNELS, 0, AudioConfig::FORMAT_FLT)); + framesPerPacketFarend = mPacketizerOutput->mPacketSize; + framesPerPacketFarend = + converter.Process(mInputDownmixBuffer, packet, framesPerPacketFarend); + interleavedFarend = mInputDownmixBuffer.Data(); + channelCountFarend = MAX_CHANNELS; + deinterleavedPacketDataChannelPointers.SetLength(MAX_CHANNELS); + } else { + interleavedFarend = packet; + channelCountFarend = aChannels; + framesPerPacketFarend = mPacketizerOutput->mPacketSize; + deinterleavedPacketDataChannelPointers.SetLength(aChannels); + } + + MOZ_ASSERT(interleavedFarend && + (channelCountFarend == 1 || channelCountFarend == 2) && + framesPerPacketFarend); + + if (mInputBuffer.Length() < framesPerPacketFarend * channelCountFarend) { + mInputBuffer.SetLength(framesPerPacketFarend * channelCountFarend); + } + + size_t offset = 0; + for (size_t i = 0; i < deinterleavedPacketDataChannelPointers.Length(); + ++i) { + deinterleavedPacketDataChannelPointers[i] = mInputBuffer.Data() + offset; + offset += framesPerPacketFarend; + } + + // Deinterleave, prepare a channel pointers array, with enough storage for + // the frames. + DeinterleaveAndConvertBuffer( + interleavedFarend, framesPerPacketFarend, channelCountFarend, + deinterleavedPacketDataChannelPointers.Elements()); + + // Having the same config for input and output means we potentially save + // some CPU. + StreamConfig inputConfig(aRate, channelCountFarend); + StreamConfig outputConfig = inputConfig; + + // Passing the same pointers here saves a copy inside this function. + DebugOnly<int> err = mAudioProcessing->ProcessReverseStream( + deinterleavedPacketDataChannelPointers.Elements(), inputConfig, + outputConfig, deinterleavedPacketDataChannelPointers.Elements()); + + MOZ_ASSERT(!err, "Could not process the reverse stream."); + } +} + +// Only called if we're not in passthrough mode +void AudioInputProcessing::PacketizeAndProcess(MediaTrackGraphImpl* aGraph, + const AudioSegment& aSegment) { + MOZ_ASSERT(!PassThrough(aGraph), + "This should be bypassed when in PassThrough mode."); + MOZ_ASSERT(mEnabled); + MOZ_ASSERT(mPacketizerInput); + MOZ_ASSERT(mPacketizerInput->mPacketSize == + GetPacketSize(aGraph->GraphRate())); + + // Calculate number of the pending frames in mChunksInPacketizer. + auto pendingFrames = [&]() { + TrackTime frames = 0; + for (const auto& p : mChunksInPacketizer) { + frames += p.first; + } + return frames; + }; + + // Precondition of the Principal-labelling logic below. + MOZ_ASSERT(mPacketizerInput->FramesAvailable() == + static_cast<uint32_t>(pendingFrames())); + + // The WriteToInterleavedBuffer will do upmix or downmix if the channel-count + // in aSegment's chunks is different from mPacketizerInput->mChannels + // WriteToInterleavedBuffer could be avoided once Bug 1729041 is done. + size_t sampleCount = aSegment.WriteToInterleavedBuffer( + mInterleavedBuffer, mPacketizerInput->mChannels); + size_t frameCount = + sampleCount / static_cast<size_t>(mPacketizerInput->mChannels); + + // Packetize our input data into 10ms chunks, deinterleave into planar channel + // buffers, process, and append to the right MediaStreamTrack. + mPacketizerInput->Input(mInterleavedBuffer.Elements(), + static_cast<uint32_t>(frameCount)); + + // Update mChunksInPacketizer and make sure the precondition for the + // Principal-labelling logic still holds. + for (AudioSegment::ConstChunkIterator iter(aSegment); !iter.IsEnded(); + iter.Next()) { + MOZ_ASSERT(iter->mDuration > 0); + mChunksInPacketizer.emplace_back( + std::make_pair(iter->mDuration, iter->mPrincipalHandle)); + } + MOZ_ASSERT(mPacketizerInput->FramesAvailable() == + static_cast<uint32_t>(pendingFrames())); + + LOG_FRAME( + "(Graph %p, Driver %p) AudioInputProcessing %p Packetizing %zu frames. " + "Packetizer has %u frames (enough for %u packets) now", + aGraph, aGraph->CurrentDriver(), this, frameCount, + mPacketizerInput->FramesAvailable(), + mPacketizerInput->PacketsAvailable()); + + size_t offset = 0; + + while (mPacketizerInput->PacketsAvailable()) { + mPacketCount++; + uint32_t samplesPerPacket = + mPacketizerInput->mPacketSize * mPacketizerInput->mChannels; + if (mInputBuffer.Length() < samplesPerPacket) { + mInputBuffer.SetLength(samplesPerPacket); + } + if (mDeinterleavedBuffer.Length() < samplesPerPacket) { + mDeinterleavedBuffer.SetLength(samplesPerPacket); + } + float* packet = mInputBuffer.Data(); + mPacketizerInput->Output(packet); + + // Downmix from mPacketizerInput->mChannels to mono if needed. We always + // have floats here, the packetizer performed the conversion. + AutoTArray<float*, 8> deinterleavedPacketizedInputDataChannelPointers; + uint32_t channelCountInput = 0; + if (mPacketizerInput->mChannels > MAX_CHANNELS) { + channelCountInput = MONO; + deinterleavedPacketizedInputDataChannelPointers.SetLength( + channelCountInput); + deinterleavedPacketizedInputDataChannelPointers[0] = + mDeinterleavedBuffer.Data(); + // Downmix to mono (and effectively have a planar buffer) by summing all + // channels in the first channel, and scaling by the number of channels to + // avoid clipping. + float gain = 1.f / mPacketizerInput->mChannels; + size_t readIndex = 0; + for (size_t i = 0; i < mPacketizerInput->mPacketSize; i++) { + mDeinterleavedBuffer.Data()[i] = 0.; + for (size_t j = 0; j < mPacketizerInput->mChannels; j++) { + mDeinterleavedBuffer.Data()[i] += gain * packet[readIndex++]; + } + } + } else { + channelCountInput = mPacketizerInput->mChannels; + // Deinterleave the input data + // Prepare an array pointing to deinterleaved channels. + deinterleavedPacketizedInputDataChannelPointers.SetLength( + channelCountInput); + offset = 0; + for (size_t i = 0; + i < deinterleavedPacketizedInputDataChannelPointers.Length(); ++i) { + deinterleavedPacketizedInputDataChannelPointers[i] = + mDeinterleavedBuffer.Data() + offset; + offset += mPacketizerInput->mPacketSize; + } + // Deinterleave to mInputBuffer, pointed to by inputBufferChannelPointers. + Deinterleave(packet, mPacketizerInput->mPacketSize, channelCountInput, + deinterleavedPacketizedInputDataChannelPointers.Elements()); + } + + StreamConfig inputConfig(aGraph->GraphRate(), channelCountInput); + StreamConfig outputConfig = inputConfig; + + // Bug 1404965: Get the right delay here, it saves some work down the line. + mAudioProcessing->set_stream_delay_ms(0); + + // Bug 1414837: find a way to not allocate here. + CheckedInt<size_t> bufferSize(sizeof(float)); + bufferSize *= mPacketizerInput->mPacketSize; + bufferSize *= channelCountInput; + RefPtr<SharedBuffer> buffer = SharedBuffer::Create(bufferSize); + + // Prepare channel pointers to the SharedBuffer created above. + AutoTArray<float*, 8> processedOutputChannelPointers; + AutoTArray<const float*, 8> processedOutputChannelPointersConst; + processedOutputChannelPointers.SetLength(channelCountInput); + processedOutputChannelPointersConst.SetLength(channelCountInput); + + offset = 0; + for (size_t i = 0; i < processedOutputChannelPointers.Length(); ++i) { + processedOutputChannelPointers[i] = + static_cast<float*>(buffer->Data()) + offset; + processedOutputChannelPointersConst[i] = + static_cast<float*>(buffer->Data()) + offset; + offset += mPacketizerInput->mPacketSize; + } + + mAudioProcessing->ProcessStream( + deinterleavedPacketizedInputDataChannelPointers.Elements(), inputConfig, + outputConfig, processedOutputChannelPointers.Elements()); + + // If logging is enabled, dump the audio processing stats twice a second + if (MOZ_LOG_TEST(gMediaManagerLog, LogLevel::Debug) && + !(mPacketCount % 50)) { + AudioProcessingStats stats = mAudioProcessing->GetStatistics(); + char msg[1024]; + size_t offset = 0; +#define AddIfValue(format, member) \ + if (stats.member.has_value()) { \ + offset += SprintfBuf(msg + offset, sizeof(msg) - offset, \ + #member ":" format ", ", stats.member.value()); \ + } + AddIfValue("%d", voice_detected); + AddIfValue("%lf", echo_return_loss); + AddIfValue("%lf", echo_return_loss_enhancement); + AddIfValue("%lf", divergent_filter_fraction); + AddIfValue("%d", delay_median_ms); + AddIfValue("%d", delay_standard_deviation_ms); + AddIfValue("%lf", residual_echo_likelihood); + AddIfValue("%lf", residual_echo_likelihood_recent_max); + AddIfValue("%d", delay_ms); +#undef AddIfValue + LOG("AudioProcessing statistics: %s", msg); + } + + if (mEnded) { + continue; + } + + // We already have planar audio data of the right format. Insert into the + // MTG. + MOZ_ASSERT(processedOutputChannelPointers.Length() == channelCountInput); + + // Insert the processed data chunk by chunk to mSegment with the paired + // PrincipalHandle value. The chunks are tracked in mChunksInPacketizer. + + auto getAudioChunk = [&](TrackTime aStart, TrackTime aEnd, + const PrincipalHandle& aPrincipalHandle) { + if (aStart == aEnd) { + return AudioChunk(); + } + RefPtr<SharedBuffer> other = buffer; + AudioChunk c = + AudioChunk(other.forget(), processedOutputChannelPointersConst, + static_cast<TrackTime>(mPacketizerInput->mPacketSize), + aPrincipalHandle); + c.SliceTo(aStart, aEnd); + return c; + }; + + // The number of frames of data that needs to be labelled with Principal + // values. + TrackTime len = static_cast<TrackTime>(mPacketizerInput->mPacketSize); + // The start offset of the unlabelled chunk. + TrackTime start = 0; + // By mChunksInPacketizer's information, we can keep labelling the + // unlabelled frames chunk by chunk. + while (!mChunksInPacketizer.empty()) { + auto& [frames, principal] = mChunksInPacketizer.front(); + const TrackTime end = start + frames; + if (end > len) { + // If the left unlabelled frames are part of this chunk, then we need to + // adjust the number of frames in the chunk. + if (len > start) { + mSegment.AppendAndConsumeChunk(getAudioChunk(start, len, principal)); + frames -= len - start; + } + break; + } + // Otherwise, the number of unlabelled frames is larger than or equal to + // this chunk. We can label the whole chunk directly. + mSegment.AppendAndConsumeChunk(getAudioChunk(start, end, principal)); + start = end; + mChunksInPacketizer.pop_front(); + } + + LOG_FRAME( + "(Graph %p, Driver %p) AudioInputProcessing %p Appending %u frames of " + "packetized audio, leaving %u frames in packetizer (%" PRId64 + " frames in mChunksInPacketizer)", + aGraph, aGraph->CurrentDriver(), this, mPacketizerInput->mPacketSize, + mPacketizerInput->FramesAvailable(), pendingFrames()); + + // Postcondition of the Principal-labelling logic. + MOZ_ASSERT(mPacketizerInput->FramesAvailable() == + static_cast<uint32_t>(pendingFrames())); + } +} + +void AudioInputProcessing::DeviceChanged(MediaTrackGraphImpl* aGraph) { + MOZ_ASSERT(aGraph->OnGraphThread()); + + // Reset some processing + mAudioProcessing->Initialize(); + LOG_FRAME( + "(Graph %p, Driver %p) AudioInputProcessing %p Reinitializing audio " + "processing", + aGraph, aGraph->CurrentDriver(), this); +} + +void AudioInputProcessing::ApplyConfig(MediaTrackGraphImpl* aGraph, + const AudioProcessing::Config& aConfig) { + MOZ_ASSERT(aGraph->OnGraphThread()); + mAudioProcessing->ApplyConfig(aConfig); +} + +void AudioInputProcessing::End() { + mEnded = true; + mSegment.Clear(); +} + +TrackTime AudioInputProcessing::NumBufferedFrames( + MediaTrackGraphImpl* aGraph) const { + MOZ_ASSERT(aGraph->OnGraphThread()); + return mSegment.GetDuration(); +} + +void AudioInputProcessing::EnsureAudioProcessing(MediaTrackGraphImpl* aGraph, + uint32_t aChannels) { + MOZ_ASSERT(aGraph->OnGraphThread()); + MOZ_ASSERT(aChannels > 0); + MOZ_ASSERT(mEnabled); + MOZ_ASSERT(!mSkipProcessing); + + if (mPacketizerInput && mPacketizerInput->mChannels == aChannels) { + return; + } + + // If mPacketizerInput exists but with different channel-count, there is no + // need to change pre-buffering since the packet size is the same as the old + // one, since the rate is a constant. + MOZ_ASSERT_IF(mPacketizerInput, mPacketizerInput->mPacketSize == + GetPacketSize(aGraph->GraphRate())); + bool needPreBuffering = !mPacketizerInput; + if (mPacketizerInput) { + const TrackTime numBufferedFrames = + static_cast<TrackTime>(mPacketizerInput->FramesAvailable()); + mSegment.AppendNullData(numBufferedFrames); + mPacketizerInput = Nothing(); + mChunksInPacketizer.clear(); + } + + mPacketizerInput.emplace(GetPacketSize(aGraph->GraphRate()), aChannels); + + if (needPreBuffering) { + LOG_FRAME( + "(Graph %p, Driver %p) AudioInputProcessing %p: Adding %u frames of " + "silence as pre-buffering", + aGraph, aGraph->CurrentDriver(), this, mPacketizerInput->mPacketSize); + + AudioSegment buffering; + buffering.AppendNullData( + static_cast<TrackTime>(mPacketizerInput->mPacketSize)); + PacketizeAndProcess(aGraph, buffering); + } +} + +void AudioInputProcessing::ResetAudioProcessing(MediaTrackGraphImpl* aGraph) { + MOZ_ASSERT(aGraph->OnGraphThread()); + MOZ_ASSERT(mSkipProcessing || !mEnabled); + MOZ_ASSERT(mPacketizerInput); + + LOG_FRAME( + "(Graph %p, Driver %p) AudioInputProcessing %p Resetting audio " + "processing", + aGraph, aGraph->CurrentDriver(), this); + + // Reset AudioProcessing so that if we resume processing in the future it + // doesn't depend on old state. + mAudioProcessing->Initialize(); + + MOZ_ASSERT(static_cast<uint32_t>(mSegment.GetDuration()) + + mPacketizerInput->FramesAvailable() == + mPacketizerInput->mPacketSize); + + // It's ok to clear all the internal buffer here since we won't use mSegment + // in pass-through mode or when audio processing is disabled. + LOG_FRAME( + "(Graph %p, Driver %p) AudioInputProcessing %p Emptying out %" PRId64 + " frames of data", + aGraph, aGraph->CurrentDriver(), this, mSegment.GetDuration()); + mSegment.Clear(); + + mPacketizerInput = Nothing(); + mChunksInPacketizer.clear(); +} + +void AudioProcessingTrack::Destroy() { + MOZ_ASSERT(NS_IsMainThread()); + DisconnectDeviceInput(); + + MediaTrack::Destroy(); +} + +void AudioProcessingTrack::SetInputProcessing( + RefPtr<AudioInputProcessing> aInputProcessing) { + class Message : public ControlMessage { + const RefPtr<AudioProcessingTrack> mTrack; + const RefPtr<AudioInputProcessing> mProcessing; + + public: + Message(RefPtr<AudioProcessingTrack> aTrack, + RefPtr<AudioInputProcessing> aProcessing) + : ControlMessage(aTrack), + mTrack(std::move(aTrack)), + mProcessing(std::move(aProcessing)) {} + void Run() override { + TRACE("AudioProcessingTrack::SetInputProcessingImpl"); + mTrack->SetInputProcessingImpl(mProcessing); + } + }; + + if (IsDestroyed()) { + return; + } + GraphImpl()->AppendMessage( + MakeUnique<Message>(std::move(this), std::move(aInputProcessing))); +} + +AudioProcessingTrack* AudioProcessingTrack::Create(MediaTrackGraph* aGraph) { + MOZ_ASSERT(NS_IsMainThread()); + AudioProcessingTrack* track = new AudioProcessingTrack(aGraph->GraphRate()); + aGraph->AddTrack(track); + return track; +} + +void AudioProcessingTrack::DestroyImpl() { + ProcessedMediaTrack::DestroyImpl(); + if (mInputProcessing) { + mInputProcessing->End(); + } +} + +void AudioProcessingTrack::ProcessInput(GraphTime aFrom, GraphTime aTo, + uint32_t aFlags) { + TRACE_COMMENT("AudioProcessingTrack::ProcessInput", "AudioProcessingTrack %p", + this); + MOZ_ASSERT(mInputProcessing); + + LOG_FRAME( + "(Graph %p, Driver %p) AudioProcessingTrack %p ProcessInput from %" PRId64 + " to %" PRId64 ", needs %" PRId64 " frames", + mGraph, mGraph->CurrentDriver(), this, aFrom, aTo, aTo - aFrom); + + if (aFrom >= aTo) { + return; + } + + if (!mInputProcessing->IsEnded()) { + MOZ_ASSERT(TrackTimeToGraphTime(GetEnd()) == aFrom); + if (mInputs.IsEmpty()) { + GetData<AudioSegment>()->AppendNullData(aTo - aFrom); + LOG_FRAME("(Graph %p, Driver %p) AudioProcessingTrack %p Filling %" PRId64 + " frames of null data (no input source)", + mGraph, mGraph->CurrentDriver(), this, aTo - aFrom); + } else { + MOZ_ASSERT(mInputs.Length() == 1); + AudioSegment data; + DeviceInputConsumerTrack::GetInputSourceData(data, mInputs[0], aFrom, + aTo); + mInputProcessing->Process(GraphImpl(), aFrom, aTo, &data, + GetData<AudioSegment>()); + } + MOZ_ASSERT(TrackTimeToGraphTime(GetEnd()) == aTo); + + ApplyTrackDisabling(mSegment.get()); + } else if (aFlags & ALLOW_END) { + mEnded = true; + } +} + +void AudioProcessingTrack::NotifyOutputData(MediaTrackGraphImpl* aGraph, + AudioDataValue* aBuffer, + size_t aFrames, TrackRate aRate, + uint32_t aChannels) { + MOZ_ASSERT(mGraph == aGraph, "Cannot feed audio output to another graph"); + MOZ_ASSERT(mGraph->OnGraphThread()); + if (mInputProcessing) { + mInputProcessing->ProcessOutputData(aGraph, aBuffer, aFrames, aRate, + aChannels); + } +} + +void AudioProcessingTrack::SetInputProcessingImpl( + RefPtr<AudioInputProcessing> aInputProcessing) { + MOZ_ASSERT(GraphImpl()->OnGraphThread()); + mInputProcessing = std::move(aInputProcessing); +} + +MediaEngineWebRTCAudioCaptureSource::MediaEngineWebRTCAudioCaptureSource( + const MediaDevice* aMediaDevice) { + MOZ_ASSERT(aMediaDevice->mMediaSource == MediaSourceEnum::AudioCapture); +} + +/* static */ +nsString MediaEngineWebRTCAudioCaptureSource::GetUUID() { + nsID uuid{}; + char uuidBuffer[NSID_LENGTH]; + nsCString asciiString; + ErrorResult rv; + + rv = nsID::GenerateUUIDInPlace(uuid); + if (rv.Failed()) { + return u""_ns; + } + + uuid.ToProvidedString(uuidBuffer); + asciiString.AssignASCII(uuidBuffer); + + // Remove {} and the null terminator + return NS_ConvertASCIItoUTF16(Substring(asciiString, 1, NSID_LENGTH - 3)); +} + +/* static */ +nsString MediaEngineWebRTCAudioCaptureSource::GetGroupId() { + return u"AudioCaptureGroup"_ns; +} + +void MediaEngineWebRTCAudioCaptureSource::SetTrack( + const RefPtr<MediaTrack>& aTrack, const PrincipalHandle& aPrincipalHandle) { + AssertIsOnOwningThread(); + // Nothing to do here. aTrack is a placeholder dummy and not exposed. +} + +nsresult MediaEngineWebRTCAudioCaptureSource::Start() { + AssertIsOnOwningThread(); + return NS_OK; +} + +nsresult MediaEngineWebRTCAudioCaptureSource::Stop() { + AssertIsOnOwningThread(); + return NS_OK; +} + +nsresult MediaEngineWebRTCAudioCaptureSource::Reconfigure( + const dom::MediaTrackConstraints& aConstraints, + const MediaEnginePrefs& aPrefs, const char** aOutBadConstraint) { + return NS_OK; +} + +void MediaEngineWebRTCAudioCaptureSource::GetSettings( + dom::MediaTrackSettings& aOutSettings) const { + aOutSettings.mAutoGainControl.Construct(false); + aOutSettings.mEchoCancellation.Construct(false); + aOutSettings.mNoiseSuppression.Construct(false); + aOutSettings.mChannelCount.Construct(1); +} + +} // namespace mozilla + +// Don't allow our macros to leak into other cpps in our unified build unit. +#undef MAX_CHANNELS +#undef MONO +#undef MAX_SAMPLING_FREQ |