summaryrefslogtreecommitdiffstats
path: root/dom/media/mediasink/AudioDecoderInputTrack.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /dom/media/mediasink/AudioDecoderInputTrack.h
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'dom/media/mediasink/AudioDecoderInputTrack.h')
-rw-r--r--dom/media/mediasink/AudioDecoderInputTrack.h242
1 files changed, 242 insertions, 0 deletions
diff --git a/dom/media/mediasink/AudioDecoderInputTrack.h b/dom/media/mediasink/AudioDecoderInputTrack.h
new file mode 100644
index 0000000000..8c82d7bed6
--- /dev/null
+++ b/dom/media/mediasink/AudioDecoderInputTrack.h
@@ -0,0 +1,242 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef AudioDecoderInputTrack_h
+#define AudioDecoderInputTrack_h
+
+#include "AudioSegment.h"
+#include "MediaEventSource.h"
+#include "MediaTimer.h"
+#include "MediaTrackGraph.h"
+#include "MediaTrackGraphImpl.h"
+#include "MediaSegment.h"
+#include "mozilla/SPSCQueue.h"
+#include "mozilla/StateMirroring.h"
+#include "nsISerialEventTarget.h"
+
+namespace soundtouch {
+class MOZ_EXPORT SoundTouch;
+}
+
+namespace mozilla {
+
+class AudioData;
+
+/**
+ * AudioDecoderInputTrack is used as a source for the audio decoder data, which
+ * supports adjusting playback rate and preserve pitch.
+ * The owner of this track would be responsible to push audio data via
+ * `AppendData()` into a SPSC queue, which is a thread-safe queue between the
+ * decoder thread (producer) and the graph thread (consumer). MediaTrackGraph
+ * requires data via `ProcessInput()`, then AudioDecoderInputTrack would convert
+ * (based on sample rate and playback rate) and append the amount of needed
+ * audio frames onto the output segment that would be used by MediaTrackGraph.
+ */
+class AudioDecoderInputTrack final : public ProcessedMediaTrack {
+ public:
+ static AudioDecoderInputTrack* Create(MediaTrackGraph* aGraph,
+ nsISerialEventTarget* aDecoderThread,
+ const AudioInfo& aInfo,
+ float aPlaybackRate, float aVolume,
+ bool aPreservesPitch);
+
+ // SPSCData suppports filling different supported type variants, and is used
+ // to achieve a thread-safe information exchange between the decoder thread
+ // and the graph thread.
+ struct SPSCData final {
+ struct Empty {};
+ struct ClearFutureData {};
+ struct DecodedData {
+ DecodedData()
+ : mStartTime(media::TimeUnit::Invalid()),
+ mEndTime(media::TimeUnit::Invalid()) {}
+ DecodedData(DecodedData&& aDecodedData)
+ : mSegment(std::move(aDecodedData.mSegment)) {
+ mStartTime = aDecodedData.mStartTime;
+ mEndTime = aDecodedData.mEndTime;
+ aDecodedData.Clear();
+ }
+ DecodedData(media::TimeUnit aStartTime, media::TimeUnit aEndTime)
+ : mStartTime(aStartTime), mEndTime(aEndTime) {}
+ DecodedData(const DecodedData&) = delete;
+ DecodedData& operator=(const DecodedData&) = delete;
+ void Clear() {
+ mSegment.Clear();
+ mStartTime = media::TimeUnit::Invalid();
+ mEndTime = media::TimeUnit::Invalid();
+ }
+ AudioSegment mSegment;
+ media::TimeUnit mStartTime;
+ media::TimeUnit mEndTime;
+ };
+ struct EOS {};
+
+ SPSCData() : mData(Empty()){};
+ explicit SPSCData(ClearFutureData&& aArg) : mData(std::move(aArg)){};
+ explicit SPSCData(DecodedData&& aArg) : mData(std::move(aArg)){};
+ explicit SPSCData(EOS&& aArg) : mData(std::move(aArg)){};
+
+ bool HasData() const { return !mData.is<Empty>(); }
+ bool IsClearFutureData() const { return mData.is<ClearFutureData>(); }
+ bool IsDecodedData() const { return mData.is<DecodedData>(); }
+ bool IsEOS() const { return mData.is<EOS>(); }
+
+ DecodedData* AsDecodedData() {
+ return IsDecodedData() ? &mData.as<DecodedData>() : nullptr;
+ }
+
+ Variant<Empty, ClearFutureData, DecodedData, EOS> mData;
+ };
+
+ // Decoder thread API
+ void AppendData(AudioData* aAudio, const PrincipalHandle& aPrincipalHandle);
+ void AppendData(nsTArray<RefPtr<AudioData>>& aAudioArray,
+ const PrincipalHandle& aPrincipalHandle);
+ void NotifyEndOfStream();
+ void ClearFutureData();
+ void SetVolume(float aVolume);
+ void SetPlaybackRate(float aPlaybackRate);
+ void SetPreservesPitch(bool aPreservesPitch);
+ // After calling this, the track are not expected to receive any new data.
+ void Close();
+ bool HasBatchedData() const;
+
+ MediaEventSource<int64_t>& OnOutput() { return mOnOutput; }
+ MediaEventSource<void>& OnEnd() { return mOnEnd; }
+
+ // Graph Thread API
+ void DestroyImpl() override;
+ void ProcessInput(GraphTime aFrom, GraphTime aTo, uint32_t aFlags) override;
+ uint32_t NumberOfChannels() const override;
+
+ // The functions below are only used for testing.
+ TrackTime WrittenFrames() const {
+ AssertOnGraphThread();
+ return mWrittenFrames;
+ }
+ float Volume() const {
+ AssertOnGraphThread();
+ return mVolume;
+ }
+ float PlaybackRate() const {
+ AssertOnGraphThread();
+ return mPlaybackRate;
+ }
+
+ protected:
+ ~AudioDecoderInputTrack();
+
+ private:
+ AudioDecoderInputTrack(nsISerialEventTarget* aDecoderThread,
+ TrackRate aGraphRate, const AudioInfo& aInfo,
+ float aPlaybackRate, float aVolume,
+ bool aPreservesPitch);
+
+ // Return false if the converted segment contains zero duration.
+ bool ConvertAudioDataToSegment(AudioData* aAudio, AudioSegment& aSegment,
+ const PrincipalHandle& aPrincipalHandle);
+
+ void HandleSPSCData(SPSCData& aData);
+
+ // These methods would return the total frames that we consumed from
+ // `mBufferedData`.
+ TrackTime AppendBufferedDataToOutput(TrackTime aExpectedDuration);
+ TrackTime FillDataToTimeStretcher(TrackTime aExpectedDuration);
+ TrackTime AppendTimeStretchedDataToSegment(TrackTime aExpectedDuration,
+ AudioSegment& aOutput);
+ TrackTime AppendUnstretchedDataToSegment(TrackTime aExpectedDuration,
+ AudioSegment& aOutput);
+
+ // Return the total frames that we retrieve from the time stretcher.
+ TrackTime DrainStretchedDataIfNeeded(TrackTime aExpectedDuration,
+ AudioSegment& aOutput);
+ TrackTime GetDataFromTimeStretcher(TrackTime aExpectedDuration,
+ AudioSegment& aOutput);
+ void NotifyInTheEndOfProcessInput(TrackTime aFillDuration);
+
+ bool HasSentAllData() const;
+
+ bool ShouldBatchData() const;
+ void BatchData(AudioData* aAudio, const PrincipalHandle& aPrincipalHandle);
+ void DispatchPushBatchedDataIfNeeded();
+ void PushBatchedDataIfNeeded();
+ void PushDataToSPSCQueue(SPSCData& data);
+
+ void SetVolumeImpl(float aVolume);
+ void SetPlaybackRateImpl(float aPlaybackRate);
+ void SetPreservesPitchImpl(bool aPreservesPitch);
+
+ void EnsureTimeStretcher();
+ void SetTempoAndRateForTimeStretcher();
+ uint32_t GetChannelCountForTimeStretcher() const;
+
+ inline void AssertOnDecoderThread() const {
+ MOZ_ASSERT(mDecoderThread->IsOnCurrentThread());
+ }
+ inline void AssertOnGraphThread() const {
+ MOZ_ASSERT(GraphImpl()->OnGraphThread());
+ }
+ inline void AssertOnGraphThreadOrNotRunning() const {
+ MOZ_ASSERT(GraphImpl()->OnGraphThreadOrNotRunning());
+ }
+
+ const RefPtr<nsISerialEventTarget> mDecoderThread;
+
+ // Notify the amount of audio frames which have been sent to the track.
+ MediaEventProducer<int64_t> mOnOutput;
+ // Notify when the track is ended.
+ MediaEventProducer<void> mOnEnd;
+
+ // These variables are ONLY used in the decoder thread.
+ nsAutoRef<SpeexResamplerState> mResampler;
+ uint32_t mResamplerChannelCount;
+ const uint32_t mInitialInputChannels;
+ TrackRate mInputSampleRate;
+ DelayedScheduler mDelayedScheduler;
+ bool mShutdownSPSCQueue = false;
+
+ // These attributes are ONLY used in the graph thread.
+ bool mReceivedEOS = false;
+ TrackTime mWrittenFrames = 0;
+ float mPlaybackRate;
+ float mVolume;
+ bool mPreservesPitch;
+
+ // A thread-safe queue shared by the decoder thread and the graph thread.
+ // The decoder thread is the producer side, and the graph thread is the
+ // consumer side. This queue should NEVER get full. In order to achieve that,
+ // we would batch input samples when SPSC queue doesn't have many available
+ // capacity.
+ // In addition, as the media track isn't guaranteed to be destroyed on the
+ // graph thread (it could be destroyed on the main thread as well) so we might
+ // not clear all data in SPSC queue when the track's `DestroyImpl()` gets
+ // called. We leave to destroy the queue later when the track gets destroyed.
+ SPSCQueue<SPSCData> mSPSCQueue{40};
+
+ // When the graph requires the less amount of audio frames than the amount of
+ // frames an audio data has, then the remaining part of frames would be stored
+ // and used in next iteration.
+ // This is ONLY used in the graph thread.
+ AudioSegment mBufferedData;
+
+ // In order to prevent SPSC queue from being full, we want to batch multiple
+ // data into one to control the density of SPSC queue, the length of batched
+ // data would be dynamically adjusted by queue's available capacity.
+ // This is ONLY used in the decoder thread.
+ SPSCData::DecodedData mBatchedData;
+
+ // True if we've sent all data to the graph, then the track will be marked as
+ // ended in the next iteration.
+ bool mSentAllData = false;
+
+ // This is used to adjust the playback rate and pitch.
+ soundtouch::SoundTouch* mTimeStretcher = nullptr;
+
+ // Buffers that would be used for the time stretching.
+ AutoTArray<AudioDataValue, 2> mInterleavedBuffer;
+};
+
+} // namespace mozilla
+
+#endif // AudioDecoderInputTrack_h