diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /dom/media/AudioSegment.h | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | dom/media/AudioSegment.h | 539 |
1 files changed, 539 insertions, 0 deletions
diff --git a/dom/media/AudioSegment.h b/dom/media/AudioSegment.h new file mode 100644 index 0000000000..a13db6fec6 --- /dev/null +++ b/dom/media/AudioSegment.h @@ -0,0 +1,539 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZILLA_AUDIOSEGMENT_H_ +#define MOZILLA_AUDIOSEGMENT_H_ + +#include <speex/speex_resampler.h> +#include "MediaTrackGraph.h" +#include "MediaSegment.h" +#include "AudioSampleFormat.h" +#include "AudioChannelFormat.h" +#include "SharedBuffer.h" +#include "WebAudioUtils.h" +#include "mozilla/ScopeExit.h" +#include "nsAutoRef.h" +#ifdef MOZILLA_INTERNAL_API +# include "mozilla/TimeStamp.h" +#endif +#include <float.h> + +namespace mozilla { +struct AudioChunk; +class AudioSegment; +} // namespace mozilla +MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioChunk) + +/** + * This allows compilation of nsTArray<AudioSegment> and + * AutoTArray<AudioSegment> since without it, static analysis fails on the + * mChunks member being a non-memmovable AutoTArray. + * + * Note that AudioSegment(const AudioSegment&) is deleted, so this should + * never come into effect. + */ +MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioSegment) + +namespace mozilla { + +template <typename T> +class SharedChannelArrayBuffer : public ThreadSharedObject { + public: + explicit SharedChannelArrayBuffer(nsTArray<nsTArray<T> >&& aBuffers) + : mBuffers(std::move(aBuffers)) {} + + size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override { + size_t amount = 0; + amount += mBuffers.ShallowSizeOfExcludingThis(aMallocSizeOf); + for (size_t i = 0; i < mBuffers.Length(); i++) { + amount += mBuffers[i].ShallowSizeOfExcludingThis(aMallocSizeOf); + } + + return amount; + } + + size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override { + return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); + } + + nsTArray<nsTArray<T> > mBuffers; +}; + +class AudioMixer; + +/** + * For auto-arrays etc, guess this as the common number of channels. + */ +const int GUESS_AUDIO_CHANNELS = 2; + +// We ensure that the graph advances in steps that are multiples of the Web +// Audio block size +const uint32_t WEBAUDIO_BLOCK_SIZE_BITS = 7; +const uint32_t WEBAUDIO_BLOCK_SIZE = 1 << WEBAUDIO_BLOCK_SIZE_BITS; + +template <typename SrcT, typename DestT> +static void InterleaveAndConvertBuffer(const SrcT* const* aSourceChannels, + uint32_t aLength, float aVolume, + uint32_t aChannels, DestT* aOutput) { + DestT* output = aOutput; + for (size_t i = 0; i < aLength; ++i) { + for (size_t channel = 0; channel < aChannels; ++channel) { + float v = AudioSampleToFloat(aSourceChannels[channel][i]) * aVolume; + *output = FloatToAudioSample<DestT>(v); + ++output; + } + } +} + +template <typename SrcT, typename DestT> +static void DeinterleaveAndConvertBuffer(const SrcT* aSourceBuffer, + uint32_t aFrames, uint32_t aChannels, + DestT** aOutput) { + for (size_t i = 0; i < aChannels; i++) { + size_t interleavedIndex = i; + for (size_t j = 0; j < aFrames; j++) { + ConvertAudioSample(aSourceBuffer[interleavedIndex], aOutput[i][j]); + interleavedIndex += aChannels; + } + } +} + +class SilentChannel { + public: + static const int AUDIO_PROCESSING_FRAMES = 640; /* > 10ms of 48KHz audio */ + static const uint8_t + gZeroChannel[MAX_AUDIO_SAMPLE_SIZE * AUDIO_PROCESSING_FRAMES]; + // We take advantage of the fact that zero in float and zero in int have the + // same all-zeros bit layout. + template <typename T> + static const T* ZeroChannel(); +}; + +/** + * Given an array of input channels (aChannelData), downmix to aOutputChannels, + * interleave the channel data. A total of aOutputChannels*aDuration + * interleaved samples will be copied to a channel buffer in aOutput. + */ +template <typename SrcT, typename DestT> +void DownmixAndInterleave(const nsTArray<const SrcT*>& aChannelData, + int32_t aDuration, float aVolume, + uint32_t aOutputChannels, DestT* aOutput) { + if (aChannelData.Length() == aOutputChannels) { + InterleaveAndConvertBuffer(aChannelData.Elements(), aDuration, aVolume, + aOutputChannels, aOutput); + } else { + AutoTArray<SrcT*, GUESS_AUDIO_CHANNELS> outputChannelData; + AutoTArray<SrcT, + SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS> + outputBuffers; + outputChannelData.SetLength(aOutputChannels); + outputBuffers.SetLength(aDuration * aOutputChannels); + for (uint32_t i = 0; i < aOutputChannels; i++) { + outputChannelData[i] = outputBuffers.Elements() + aDuration * i; + } + AudioChannelsDownMix(aChannelData, outputChannelData.Elements(), + aOutputChannels, aDuration); + InterleaveAndConvertBuffer(outputChannelData.Elements(), aDuration, aVolume, + aOutputChannels, aOutput); + } +} + +/** + * An AudioChunk represents a multi-channel buffer of audio samples. + * It references an underlying ThreadSharedObject which manages the lifetime + * of the buffer. An AudioChunk maintains its own duration and channel data + * pointers so it can represent a subinterval of a buffer without copying. + * An AudioChunk can store its individual channels anywhere; it maintains + * separate pointers to each channel's buffer. + */ +struct AudioChunk { + typedef mozilla::AudioSampleFormat SampleFormat; + + AudioChunk() = default; + + template <typename T> + AudioChunk(already_AddRefed<ThreadSharedObject> aBuffer, + const nsTArray<const T*>& aChannelData, TrackTime aDuration, + PrincipalHandle aPrincipalHandle) + : mDuration(aDuration), + mBuffer(aBuffer), + mBufferFormat(AudioSampleTypeToFormat<T>::Format), + mPrincipalHandle(std::move(aPrincipalHandle)) { + MOZ_ASSERT(!mBuffer == aChannelData.IsEmpty(), "Appending invalid data ?"); + for (const T* data : aChannelData) { + mChannelData.AppendElement(data); + } + } + + // Generic methods + void SliceTo(TrackTime aStart, TrackTime aEnd) { + MOZ_ASSERT(aStart >= 0, "Slice out of bounds: invalid start"); + MOZ_ASSERT(aStart < aEnd, "Slice out of bounds: invalid range"); + MOZ_ASSERT(aEnd <= mDuration, "Slice out of bounds: invalid end"); + + if (mBuffer) { + MOZ_ASSERT(aStart < INT32_MAX, + "Can't slice beyond 32-bit sample lengths"); + for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) { + mChannelData[channel] = AddAudioSampleOffset( + mChannelData[channel], mBufferFormat, int32_t(aStart)); + } + } + mDuration = aEnd - aStart; + } + TrackTime GetDuration() const { return mDuration; } + bool CanCombineWithFollowing(const AudioChunk& aOther) const { + if (aOther.mBuffer != mBuffer) { + return false; + } + if (!mBuffer) { + return true; + } + if (aOther.mVolume != mVolume) { + return false; + } + if (aOther.mPrincipalHandle != mPrincipalHandle) { + return false; + } + NS_ASSERTION(aOther.mBufferFormat == mBufferFormat, + "Wrong metadata about buffer"); + NS_ASSERTION(aOther.mChannelData.Length() == mChannelData.Length(), + "Mismatched channel count"); + if (mDuration > INT32_MAX) { + return false; + } + for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) { + if (aOther.mChannelData[channel] != + AddAudioSampleOffset(mChannelData[channel], mBufferFormat, + int32_t(mDuration))) { + return false; + } + } + return true; + } + bool IsNull() const { return mBuffer == nullptr; } + void SetNull(TrackTime aDuration) { + mBuffer = nullptr; + mChannelData.Clear(); + mDuration = aDuration; + mVolume = 1.0f; + mBufferFormat = AUDIO_FORMAT_SILENCE; + mPrincipalHandle = PRINCIPAL_HANDLE_NONE; + } + + uint32_t ChannelCount() const { return mChannelData.Length(); } + + bool IsMuted() const { return mVolume == 0.0f; } + + size_t SizeOfExcludingThisIfUnshared(MallocSizeOf aMallocSizeOf) const { + return SizeOfExcludingThis(aMallocSizeOf, true); + } + + size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf, bool aUnshared) const { + size_t amount = 0; + + // Possibly owned: + // - mBuffer - Can hold data that is also in the decoded audio queue. If it + // is not shared, or unshared == false it gets counted. + if (mBuffer && (!aUnshared || !mBuffer->IsShared())) { + amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf); + } + + // Memory in the array is owned by mBuffer. + amount += mChannelData.ShallowSizeOfExcludingThis(aMallocSizeOf); + return amount; + } + + template <typename T> + const nsTArray<const T*>& ChannelData() const { + MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat); + return *reinterpret_cast<const AutoTArray<const T*, GUESS_AUDIO_CHANNELS>*>( + &mChannelData); + } + + /** + * ChannelFloatsForWrite() should be used only when mBuffer is owned solely + * by the calling thread. + */ + template <typename T> + T* ChannelDataForWrite(size_t aChannel) { + MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat); + MOZ_ASSERT(!mBuffer->IsShared()); + return static_cast<T*>(const_cast<void*>(mChannelData[aChannel])); + } + + template <typename T> + static AudioChunk FromInterleavedBuffer( + const T* aBuffer, size_t aFrames, uint32_t aChannels, + const PrincipalHandle& aPrincipalHandle) { + CheckedInt<size_t> bufferSize(sizeof(T)); + bufferSize *= aFrames; + bufferSize *= aChannels; + RefPtr<SharedBuffer> buffer = SharedBuffer::Create(bufferSize); + + AutoTArray<T*, 8> deinterleaved; + if (aChannels == 1) { + PodCopy(static_cast<T*>(buffer->Data()), aBuffer, aFrames); + deinterleaved.AppendElement(static_cast<T*>(buffer->Data())); + } else { + deinterleaved.SetLength(aChannels); + T* samples = static_cast<T*>(buffer->Data()); + + size_t offset = 0; + for (uint32_t i = 0; i < aChannels; ++i) { + deinterleaved[i] = samples + offset; + offset += aFrames; + } + + DeinterleaveAndConvertBuffer(aBuffer, static_cast<uint32_t>(aFrames), + aChannels, deinterleaved.Elements()); + } + + AutoTArray<const T*, GUESS_AUDIO_CHANNELS> channelData; + channelData.AppendElements(deinterleaved); + return AudioChunk(buffer.forget(), channelData, + static_cast<TrackTime>(aFrames), aPrincipalHandle); + } + + const PrincipalHandle& GetPrincipalHandle() const { return mPrincipalHandle; } + + TrackTime mDuration = 0; // in frames within the buffer + RefPtr<ThreadSharedObject> mBuffer; // the buffer object whose lifetime is + // managed; null means data is all zeroes + // one pointer per channel; empty if and only if mBuffer is null + CopyableAutoTArray<const void*, GUESS_AUDIO_CHANNELS> mChannelData; + float mVolume = 1.0f; // volume multiplier to apply + // format of frames in mBuffer (or silence if mBuffer is null) + SampleFormat mBufferFormat = AUDIO_FORMAT_SILENCE; + // principalHandle for the data in this chunk. + // This can be compared to an nsIPrincipal* when back on main thread. + PrincipalHandle mPrincipalHandle = PRINCIPAL_HANDLE_NONE; +}; + +/** + * A list of audio samples consisting of a sequence of slices of SharedBuffers. + * The audio rate is determined by the track, not stored in this class. + */ +class AudioSegment : public MediaSegmentBase<AudioSegment, AudioChunk> { + // The channel count that MaxChannelCount() returned last time it was called. + uint32_t mMemoizedMaxChannelCount = 0; + + public: + typedef mozilla::AudioSampleFormat SampleFormat; + + AudioSegment() : MediaSegmentBase<AudioSegment, AudioChunk>(AUDIO) {} + + AudioSegment(AudioSegment&& aSegment) = default; + + AudioSegment(const AudioSegment&) = delete; + AudioSegment& operator=(const AudioSegment&) = delete; + + ~AudioSegment() = default; + + // Resample the whole segment in place. `aResampler` is an instance of a + // resampler, initialized with `aResamplerChannelCount` channels. If this + // function finds a chunk with more channels, `aResampler` is destroyed and a + // new resampler is created, and `aResamplerChannelCount` is updated with the + // new channel count value. + template <typename T> + void Resample(nsAutoRef<SpeexResamplerState>& aResampler, + uint32_t* aResamplerChannelCount, uint32_t aInRate, + uint32_t aOutRate) { + mDuration = 0; + + for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { + AutoTArray<nsTArray<T>, GUESS_AUDIO_CHANNELS> output; + AutoTArray<const T*, GUESS_AUDIO_CHANNELS> bufferPtrs; + AudioChunk& c = *ci; + // If this chunk is null, don't bother resampling, just alter its duration + if (c.IsNull()) { + c.mDuration = (c.mDuration * aOutRate) / aInRate; + mDuration += c.mDuration; + continue; + } + uint32_t channels = c.mChannelData.Length(); + // This might introduce a discontinuity, but a channel count change in the + // middle of a stream is not that common. This also initializes the + // resampler as late as possible. + if (channels != *aResamplerChannelCount) { + SpeexResamplerState* state = + speex_resampler_init(channels, aInRate, aOutRate, + SPEEX_RESAMPLER_QUALITY_DEFAULT, nullptr); + MOZ_ASSERT(state); + aResampler.own(state); + *aResamplerChannelCount = channels; + } + output.SetLength(channels); + bufferPtrs.SetLength(channels); + uint32_t inFrames = c.mDuration; + // Round up to allocate; the last frame may not be used. + NS_ASSERTION((UINT64_MAX - aInRate + 1) / c.mDuration >= aOutRate, + "Dropping samples"); + uint32_t outSize = + (static_cast<uint64_t>(c.mDuration) * aOutRate + aInRate - 1) / + aInRate; + for (uint32_t i = 0; i < channels; i++) { + T* out = output[i].AppendElements(outSize); + uint32_t outFrames = outSize; + + const T* in = static_cast<const T*>(c.mChannelData[i]); + dom::WebAudioUtils::SpeexResamplerProcess(aResampler.get(), i, in, + &inFrames, out, &outFrames); + MOZ_ASSERT(inFrames == c.mDuration); + + bufferPtrs[i] = out; + output[i].SetLength(outFrames); + } + MOZ_ASSERT(channels > 0); + c.mDuration = output[0].Length(); + c.mBuffer = new mozilla::SharedChannelArrayBuffer<T>(std::move(output)); + for (uint32_t i = 0; i < channels; i++) { + c.mChannelData[i] = bufferPtrs[i]; + } + mDuration += c.mDuration; + } + } + + void ResampleChunks(nsAutoRef<SpeexResamplerState>& aResampler, + uint32_t* aResamplerChannelCount, uint32_t aInRate, + uint32_t aOutRate); + + template <typename T> + void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer, + const nsTArray<const T*>& aChannelData, TrackTime aDuration, + const PrincipalHandle& aPrincipalHandle) { + AppendAndConsumeChunk(AudioChunk(std::move(aBuffer), aChannelData, + aDuration, aPrincipalHandle)); + } + void AppendSegment(const AudioSegment* aSegment) { + MOZ_ASSERT(aSegment); + + for (const AudioChunk& c : aSegment->mChunks) { + AudioChunk* chunk = AppendChunk(c.GetDuration()); + chunk->mBuffer = c.mBuffer; + chunk->mChannelData = c.mChannelData; + chunk->mBufferFormat = c.mBufferFormat; + chunk->mPrincipalHandle = c.mPrincipalHandle; + } + } + template <typename T> + void AppendFromInterleavedBuffer(const T* aBuffer, size_t aFrames, + uint32_t aChannels, + const PrincipalHandle& aPrincipalHandle) { + AppendAndConsumeChunk(AudioChunk::FromInterleavedBuffer<T>( + aBuffer, aFrames, aChannels, aPrincipalHandle)); + } + // Write the segement data into an interleaved buffer. Do mixing if the + // AudioChunk's channel count in the segment is different from aChannels. + // Returns sample count of the converted audio data. The converted data will + // be stored into aBuffer. + size_t WriteToInterleavedBuffer(nsTArray<AudioDataValue>& aBuffer, + uint32_t aChannels) const; + // Consumes aChunk, and append it to the segment if its duration is not zero. + void AppendAndConsumeChunk(AudioChunk&& aChunk) { + AudioChunk unused; + AudioChunk* chunk = &unused; + + // Always consume aChunk. The chunk's mBuffer can be non-null even if its + // duration is 0. + auto consume = MakeScopeExit([&] { + chunk->mBuffer = std::move(aChunk.mBuffer); + chunk->mChannelData = std::move(aChunk.mChannelData); + + MOZ_ASSERT(chunk->mBuffer || chunk->mChannelData.IsEmpty(), + "Appending invalid data ?"); + + chunk->mVolume = aChunk.mVolume; + chunk->mBufferFormat = aChunk.mBufferFormat; + chunk->mPrincipalHandle = std::move(aChunk.mPrincipalHandle); + }); + + if (aChunk.GetDuration() == 0) { + return; + } + + if (!mChunks.IsEmpty() && + mChunks.LastElement().CanCombineWithFollowing(aChunk)) { + mChunks.LastElement().mDuration += aChunk.GetDuration(); + mDuration += aChunk.GetDuration(); + return; + } + + chunk = AppendChunk(aChunk.mDuration); + } + void ApplyVolume(float aVolume); + // Mix the segment into a mixer, interleaved. This is useful to output a + // segment to a system audio callback. It up or down mixes to aChannelCount + // channels. + void WriteTo(AudioMixer& aMixer, uint32_t aChannelCount, + uint32_t aSampleRate); + // Mix the segment into a mixer, keeping it planar, up or down mixing to + // aChannelCount channels. + void Mix(AudioMixer& aMixer, uint32_t aChannelCount, uint32_t aSampleRate); + + // Returns the maximum channel count across all chunks in this segment. + // Should there be no chunk with a channel count we return the memoized return + // value from last time this method was called. + uint32_t MaxChannelCount() { + uint32_t channelCount = 0; + for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { + if (ci->ChannelCount()) { + channelCount = std::max(channelCount, ci->ChannelCount()); + } + } + if (channelCount == 0) { + return mMemoizedMaxChannelCount; + } + return mMemoizedMaxChannelCount = channelCount; + } + + static Type StaticType() { return AUDIO; } + + size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override { + return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); + } + + PrincipalHandle GetOldestPrinciple() const { + const AudioChunk* chunk = mChunks.IsEmpty() ? nullptr : &mChunks[0]; + return chunk ? chunk->GetPrincipalHandle() : PRINCIPAL_HANDLE_NONE; + } + + // Iterate on each chunks until the input function returns true. + template <typename Function> + void IterateOnChunks(const Function&& aFunction) { + for (uint32_t idx = 0; idx < mChunks.Length(); idx++) { + if (aFunction(&mChunks[idx])) { + return; + } + } + } +}; + +template <typename SrcT> +void WriteChunk(const AudioChunk& aChunk, uint32_t aOutputChannels, + float aVolume, AudioDataValue* aOutputBuffer) { + AutoTArray<const SrcT*, GUESS_AUDIO_CHANNELS> channelData; + + channelData = aChunk.ChannelData<SrcT>().Clone(); + + if (channelData.Length() < aOutputChannels) { + // Up-mix. Note that this might actually make channelData have more + // than aOutputChannels temporarily. + AudioChannelsUpMix(&channelData, aOutputChannels, + SilentChannel::ZeroChannel<SrcT>()); + } + if (channelData.Length() > aOutputChannels) { + // Down-mix. + DownmixAndInterleave(channelData, aChunk.mDuration, aVolume, + aOutputChannels, aOutputBuffer); + } else { + InterleaveAndConvertBuffer(channelData.Elements(), aChunk.mDuration, + aVolume, aOutputChannels, aOutputBuffer); + } +} + +} // namespace mozilla + +#endif /* MOZILLA_AUDIOSEGMENT_H_ */ |