Adding upstream version 124.0.1.upstream/124.0.1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
commit: 26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree: f435a8308119effd964b339f76abb83a57c29483 /dom/media/AudioSegment.cpp
parent: Initial commit. (diff)
download: firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
1 files changed, 292 insertions, 0 deletions
diff --git a/dom/media/AudioSegment.cpp b/dom/media/AudioSegment.cpp
new file mode 100644
index 0000000000..243cdffd0e
--- /dev/null
+++ b/dom/media/AudioSegment.cpp
@@ -0,0 +1,292 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AudioSegment.h"
+#include "AudioMixer.h"
+#include "AudioChannelFormat.h"
+#include "MediaTrackGraph.h"  // for nsAutoRefTraits<SpeexResamplerState>
+#include <speex/speex_resampler.h>
+
+namespace mozilla {
+
+const uint8_t
+    SilentChannel::gZeroChannel[MAX_AUDIO_SAMPLE_SIZE *
+                                SilentChannel::AUDIO_PROCESSING_FRAMES] = {0};
+
+template <>
+const float* SilentChannel::ZeroChannel<float>() {
+  return reinterpret_cast<const float*>(SilentChannel::gZeroChannel);
+}
+
+template <>
+const int16_t* SilentChannel::ZeroChannel<int16_t>() {
+  return reinterpret_cast<const int16_t*>(SilentChannel::gZeroChannel);
+}
+
+void AudioSegment::ApplyVolume(float aVolume) {
+  for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
+    ci->mVolume *= aVolume;
+  }
+}
+
+template <typename T>
+void AudioSegment::Resample(nsAutoRef<SpeexResamplerState>& aResampler,
+                            uint32_t* aResamplerChannelCount, uint32_t aInRate,
+                            uint32_t aOutRate) {
+  mDuration = 0;
+
+  for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
+    AutoTArray<nsTArray<T>, GUESS_AUDIO_CHANNELS> output;
+    AutoTArray<const T*, GUESS_AUDIO_CHANNELS> bufferPtrs;
+    AudioChunk& c = *ci;
+    // If this chunk is null, don't bother resampling, just alter its duration
+    if (c.IsNull()) {
+      c.mDuration = (c.mDuration * aOutRate) / aInRate;
+      mDuration += c.mDuration;
+      continue;
+    }
+    uint32_t channels = c.mChannelData.Length();
+    // This might introduce a discontinuity, but a channel count change in the
+    // middle of a stream is not that common. This also initializes the
+    // resampler as late as possible.
+    if (channels != *aResamplerChannelCount) {
+      SpeexResamplerState* state =
+          speex_resampler_init(channels, aInRate, aOutRate,
+                               SPEEX_RESAMPLER_QUALITY_DEFAULT, nullptr);
+      MOZ_ASSERT(state);
+      aResampler.own(state);
+      *aResamplerChannelCount = channels;
+    }
+    output.SetLength(channels);
+    bufferPtrs.SetLength(channels);
+    uint32_t inFrames = c.mDuration;
+    // Round up to allocate; the last frame may not be used.
+    NS_ASSERTION((UINT64_MAX - aInRate + 1) / c.mDuration >= aOutRate,
+                 "Dropping samples");
+    uint32_t outSize =
+        (static_cast<uint64_t>(c.mDuration) * aOutRate + aInRate - 1) / aInRate;
+    for (uint32_t i = 0; i < channels; i++) {
+      T* out = output[i].AppendElements(outSize);
+      uint32_t outFrames = outSize;
+
+      const T* in = static_cast<const T*>(c.mChannelData[i]);
+      dom::WebAudioUtils::SpeexResamplerProcess(aResampler.get(), i, in,
+                                                &inFrames, out, &outFrames);
+      MOZ_ASSERT(inFrames == c.mDuration);
+
+      bufferPtrs[i] = out;
+      output[i].SetLength(outFrames);
+    }
+    MOZ_ASSERT(channels > 0);
+    c.mDuration = output[0].Length();
+    c.mBuffer = new mozilla::SharedChannelArrayBuffer<T>(std::move(output));
+    for (uint32_t i = 0; i < channels; i++) {
+      c.mChannelData[i] = bufferPtrs[i];
+    }
+    mDuration += c.mDuration;
+  }
+}
+
+void AudioSegment::ResampleChunks(nsAutoRef<SpeexResamplerState>& aResampler,
+                                  uint32_t* aResamplerChannelCount,
+                                  uint32_t aInRate, uint32_t aOutRate) {
+  if (mChunks.IsEmpty()) {
+    return;
+  }
+
+  AudioSampleFormat format = AUDIO_FORMAT_SILENCE;
+  for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
+    if (ci->mBufferFormat != AUDIO_FORMAT_SILENCE) {
+      format = ci->mBufferFormat;
+    }
+  }
+
+  switch (format) {
+    // If the format is silence at this point, all the chunks are silent. The
+    // actual function we use does not matter, it's just a matter of changing
+    // the chunks duration.
+    case AUDIO_FORMAT_SILENCE:
+    case AUDIO_FORMAT_FLOAT32:
+      Resample<float>(aResampler, aResamplerChannelCount, aInRate, aOutRate);
+      break;
+    case AUDIO_FORMAT_S16:
+      Resample<int16_t>(aResampler, aResamplerChannelCount, aInRate, aOutRate);
+      break;
+    default:
+      MOZ_ASSERT(false);
+      break;
+  }
+}
+
+size_t AudioSegment::WriteToInterleavedBuffer(nsTArray<AudioDataValue>& aBuffer,
+                                              uint32_t aChannels) const {
+  size_t offset = 0;
+  if (GetDuration() <= 0) {
+    MOZ_ASSERT(GetDuration() == 0);
+    return offset;
+  }
+
+  // Calculate how many samples in this segment
+  size_t frames = static_cast<size_t>(GetDuration());
+  CheckedInt<size_t> samples(frames);
+  samples *= static_cast<size_t>(aChannels);
+  MOZ_ASSERT(samples.isValid());
+  if (!samples.isValid()) {
+    return offset;
+  }
+
+  // Enlarge buffer space if needed
+  if (samples.value() > aBuffer.Capacity()) {
+    aBuffer.SetCapacity(samples.value());
+  }
+  aBuffer.SetLengthAndRetainStorage(samples.value());
+  aBuffer.ClearAndRetainStorage();
+
+  // Convert the de-interleaved chunks into an interleaved buffer. Note that
+  // we may upmix or downmix the audio data if the channel in the chunks
+  // mismatch with aChannels
+  for (ConstChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
+    const AudioChunk& c = *ci;
+    size_t samplesInChunk = static_cast<size_t>(c.mDuration) * aChannels;
+    switch (c.mBufferFormat) {
+      case AUDIO_FORMAT_S16:
+        WriteChunk<int16_t>(c, aChannels, c.mVolume,
+                            aBuffer.Elements() + offset);
+        break;
+      case AUDIO_FORMAT_FLOAT32:
+        WriteChunk<float>(c, aChannels, c.mVolume, aBuffer.Elements() + offset);
+        break;
+      case AUDIO_FORMAT_SILENCE:
+        PodZero(aBuffer.Elements() + offset, samplesInChunk);
+        break;
+      default:
+        MOZ_ASSERT_UNREACHABLE("Unknown format");
+        PodZero(aBuffer.Elements() + offset, samplesInChunk);
+        break;
+    }
+    offset += samplesInChunk;
+  }
+  MOZ_DIAGNOSTIC_ASSERT(samples.value() == offset,
+                        "Segment's duration is incorrect");
+  aBuffer.SetLengthAndRetainStorage(offset);
+  return offset;
+}
+
+// This helps to to safely get a pointer to the position we want to start
+// writing a planar audio buffer, depending on the channel and the offset in the
+// buffer.
+static AudioDataValue* PointerForOffsetInChannel(AudioDataValue* aData,
+                                                 size_t aLengthSamples,
+                                                 uint32_t aChannelCount,
+                                                 uint32_t aChannel,
+                                                 uint32_t aOffsetSamples) {
+  size_t samplesPerChannel = aLengthSamples / aChannelCount;
+  size_t beginningOfChannel = samplesPerChannel * aChannel;
+  MOZ_ASSERT(aChannel * samplesPerChannel + aOffsetSamples < aLengthSamples,
+             "Offset request out of bounds.");
+  return aData + beginningOfChannel + aOffsetSamples;
+}
+
+template <typename SrcT>
+static void DownMixChunk(const AudioChunk& aChunk,
+                         Span<AudioDataValue* const> aOutputChannels) {
+  Span<const SrcT* const> channelData = aChunk.ChannelData<SrcT>();
+  uint32_t frameCount = aChunk.mDuration;
+  if (channelData.Length() > aOutputChannels.Length()) {
+    // Down mix.
+    AudioChannelsDownMix(channelData, aOutputChannels, frameCount);
+    for (AudioDataValue* outChannel : aOutputChannels) {
+      ScaleAudioSamples(outChannel, frameCount, aChunk.mVolume);
+    }
+  } else {
+    // The channel count is already what we want.
+    for (uint32_t channel = 0; channel < aOutputChannels.Length(); channel++) {
+      ConvertAudioSamplesWithScale(channelData[channel],
+                                   aOutputChannels[channel], frameCount,
+                                   aChunk.mVolume);
+    }
+  }
+}
+
+void AudioChunk::DownMixTo(
+    Span<AudioDataValue* const> aOutputChannelPtrs) const {
+  switch (mBufferFormat) {
+    case AUDIO_FORMAT_FLOAT32:
+      DownMixChunk<float>(*this, aOutputChannelPtrs);
+      return;
+    case AUDIO_FORMAT_S16:
+      DownMixChunk<int16_t>(*this, aOutputChannelPtrs);
+      return;
+    case AUDIO_FORMAT_SILENCE:
+      for (AudioDataValue* outChannel : aOutputChannelPtrs) {
+        std::fill_n(outChannel, mDuration, static_cast<AudioDataValue>(0));
+      }
+      return;
+      // Avoid `default:` so that `-Wswitch` catches missing enumerators at
+      // compile time.
+  }
+  MOZ_ASSERT_UNREACHABLE("buffer format");
+}
+
+void AudioSegment::Mix(AudioMixer& aMixer, uint32_t aOutputChannels,
+                       uint32_t aSampleRate) {
+  AutoTArray<AudioDataValue,
+             SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS>
+      buf;
+  AudioChunk upMixChunk;
+  uint32_t offsetSamples = 0;
+  uint32_t duration = GetDuration();
+
+  if (duration <= 0) {
+    MOZ_ASSERT(duration == 0);
+    return;
+  }
+
+  uint32_t outBufferLength = duration * aOutputChannels;
+  buf.SetLength(outBufferLength);
+
+  AutoTArray<AudioDataValue*, GUESS_AUDIO_CHANNELS> outChannelPtrs;
+  outChannelPtrs.SetLength(aOutputChannels);
+
+  uint32_t frames;
+  for (ChunkIterator ci(*this); !ci.IsEnded();
+       ci.Next(), offsetSamples += frames) {
+    const AudioChunk& c = *ci;
+    frames = c.mDuration;
+    for (uint32_t channel = 0; channel < aOutputChannels; channel++) {
+      outChannelPtrs[channel] =
+          PointerForOffsetInChannel(buf.Elements(), outBufferLength,
+                                    aOutputChannels, channel, offsetSamples);
+    }
+
+    // If the chunk is silent, simply write the right number of silence in the
+    // buffers.
+    if (c.mBufferFormat == AUDIO_FORMAT_SILENCE) {
+      for (AudioDataValue* outChannel : outChannelPtrs) {
+        PodZero(outChannel, frames);
+      }
+      continue;
+    }
+    // We need to upmix and downmix appropriately, depending on the
+    // desired input and output channels.
+    const AudioChunk* downMixInput = &c;
+    if (c.ChannelCount() < aOutputChannels) {
+      // Up-mix.
+      upMixChunk = c;
+      AudioChannelsUpMix<void>(&upMixChunk.mChannelData, aOutputChannels,
+                               SilentChannel::gZeroChannel);
+      downMixInput = &upMixChunk;
+    }
+    downMixInput->DownMixTo(outChannelPtrs);
+  }
+
+  if (offsetSamples) {
+    MOZ_ASSERT(offsetSamples == outBufferLength / aOutputChannels,
+               "We forgot to write some samples?");
+    aMixer.Mix(buf.Elements(), aOutputChannels, offsetSamples, aSampleRate);
+  }
+}
+
+}  // namespace mozilla
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
commit	26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree	f435a8308119effd964b339f76abb83a57c29483 /dom/media/AudioSegment.cpp
parent	Initial commit. (diff)
download	firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip