1 files changed, 485 insertions, 0 deletions
diff --git a/dom/media/AudioConverter.cpp b/dom/media/AudioConverter.cpp
new file mode 100644
index 0000000000..9f2c32ceed
--- /dev/null
+++ b/dom/media/AudioConverter.cpp
@@ -0,0 +1,485 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AudioConverter.h"
+#include <speex/speex_resampler.h>
+#include <string.h>
+#include <cmath>
+
+/*
+ *  Parts derived from MythTV AudioConvert Class
+ *  Created by Jean-Yves Avenard.
+ *
+ *  Copyright (C) Bubblestuff Pty Ltd 2013
+ *  Copyright (C) foobum@gmail.com 2010
+ */
+
+namespace mozilla {
+
+AudioConverter::AudioConverter(const AudioConfig& aIn, const AudioConfig& aOut)
+    : mIn(aIn), mOut(aOut), mResampler(nullptr) {
+  MOZ_DIAGNOSTIC_ASSERT(CanConvert(aIn, aOut),
+                        "The conversion is not supported");
+  mIn.Layout().MappingTable(mOut.Layout(), &mChannelOrderMap);
+  if (aIn.Rate() != aOut.Rate()) {
+    RecreateResampler();
+  }
+}
+
+AudioConverter::~AudioConverter() {
+  if (mResampler) {
+    speex_resampler_destroy(mResampler);
+    mResampler = nullptr;
+  }
+}
+
+bool AudioConverter::CanConvert(const AudioConfig& aIn,
+                                const AudioConfig& aOut) {
+  if (aIn.Format() != aOut.Format() ||
+      aIn.Interleaved() != aOut.Interleaved()) {
+    NS_WARNING("No format conversion is supported at this stage");
+    return false;
+  }
+  if (aIn.Channels() != aOut.Channels() && aOut.Channels() > 2) {
+    NS_WARNING(
+        "Only down/upmixing to mono or stereo is supported at this stage");
+    return false;
+  }
+  if (!aOut.Interleaved()) {
+    NS_WARNING("planar audio format not supported");
+    return false;
+  }
+  return true;
+}
+
+bool AudioConverter::CanWorkInPlace() const {
+  bool needDownmix = mIn.Channels() > mOut.Channels();
+  bool needUpmix = mIn.Channels() < mOut.Channels();
+  bool canDownmixInPlace =
+      mIn.Channels() * AudioConfig::SampleSize(mIn.Format()) >=
+      mOut.Channels() * AudioConfig::SampleSize(mOut.Format());
+  bool needResample = mIn.Rate() != mOut.Rate();
+  bool canResampleInPlace = mIn.Rate() >= mOut.Rate();
+  // We should be able to work in place if 1s of audio input takes less space
+  // than 1s of audio output. However, as we downmix before resampling we can't
+  // perform any upsampling in place (e.g. if incoming rate >= outgoing rate)
+  return !needUpmix && (!needDownmix || canDownmixInPlace) &&
+         (!needResample || canResampleInPlace);
+}
+
+size_t AudioConverter::ProcessInternal(void* aOut, const void* aIn,
+                                       size_t aFrames) {
+  if (!aFrames) {
+    return 0;
+  }
+
+  if (mIn.Channels() > mOut.Channels()) {
+    return DownmixAudio(aOut, aIn, aFrames);
+  }
+
+  if (mIn.Channels() < mOut.Channels()) {
+    return UpmixAudio(aOut, aIn, aFrames);
+  }
+
+  if (mIn.Layout() != mOut.Layout() && CanReorderAudio()) {
+    ReOrderInterleavedChannels(aOut, aIn, aFrames);
+  } else if (aIn != aOut) {
+    memmove(aOut, aIn, FramesOutToBytes(aFrames));
+  }
+  return aFrames;
+}
+
+// Reorder interleaved channels.
+// Can work in place (e.g aOut == aIn).
+template <class AudioDataType>
+void _ReOrderInterleavedChannels(AudioDataType* aOut, const AudioDataType* aIn,
+                                 uint32_t aFrames, uint32_t aChannels,
+                                 const uint8_t* aChannelOrderMap) {
+  MOZ_DIAGNOSTIC_ASSERT(aChannels <= AudioConfig::ChannelLayout::MAX_CHANNELS);
+  AudioDataType val[AudioConfig::ChannelLayout::MAX_CHANNELS];
+  for (uint32_t i = 0; i < aFrames; i++) {
+    for (uint32_t j = 0; j < aChannels; j++) {
+      val[j] = aIn[aChannelOrderMap[j]];
+    }
+    for (uint32_t j = 0; j < aChannels; j++) {
+      aOut[j] = val[j];
+    }
+    aOut += aChannels;
+    aIn += aChannels;
+  }
+}
+
+void AudioConverter::ReOrderInterleavedChannels(void* aOut, const void* aIn,
+                                                size_t aFrames) const {
+  MOZ_DIAGNOSTIC_ASSERT(mIn.Channels() == mOut.Channels());
+  MOZ_DIAGNOSTIC_ASSERT(CanReorderAudio());
+
+  if (mChannelOrderMap.IsEmpty() || mOut.Channels() == 1 ||
+      mOut.Layout() == mIn.Layout()) {
+    // If channel count is 1, planar and non-planar formats are the same or
+    // there's nothing to reorder, or if we don't know how to re-order.
+    if (aOut != aIn) {
+      memmove(aOut, aIn, FramesOutToBytes(aFrames));
+    }
+    return;
+  }
+
+  uint32_t bits = AudioConfig::FormatToBits(mOut.Format());
+  switch (bits) {
+    case 8:
+      _ReOrderInterleavedChannels((uint8_t*)aOut, (const uint8_t*)aIn, aFrames,
+                                  mIn.Channels(), mChannelOrderMap.Elements());
+      break;
+    case 16:
+      _ReOrderInterleavedChannels((int16_t*)aOut, (const int16_t*)aIn, aFrames,
+                                  mIn.Channels(), mChannelOrderMap.Elements());
+      break;
+    default:
+      MOZ_DIAGNOSTIC_ASSERT(AudioConfig::SampleSize(mOut.Format()) == 4);
+      _ReOrderInterleavedChannels((int32_t*)aOut, (const int32_t*)aIn, aFrames,
+                                  mIn.Channels(), mChannelOrderMap.Elements());
+      break;
+  }
+}
+
+static inline int16_t clipTo15(int32_t aX) {
+  return aX < -32768 ? -32768 : aX <= 32767 ? aX : 32767;
+}
+
+template <typename TYPE>
+static void dumbUpDownMix(TYPE* aOut, int32_t aOutChannels, const TYPE* aIn,
+                          int32_t aInChannels, int32_t aFrames) {
+  if (aIn == aOut) {
+    return;
+  }
+  int32_t commonChannels = std::min(aInChannels, aOutChannels);
+
+  for (int32_t i = 0; i < aFrames; i++) {
+    for (int32_t j = 0; j < commonChannels; j++) {
+      aOut[i * aOutChannels + j] = aIn[i * aInChannels + j];
+    }
+    if (aOutChannels > aInChannels) {
+      for (int32_t j = 0; j < aInChannels - aOutChannels; j++) {
+        aOut[i * aOutChannels + j] = 0;
+      }
+    }
+  }
+}
+
+size_t AudioConverter::DownmixAudio(void* aOut, const void* aIn,
+                                    size_t aFrames) const {
+  MOZ_DIAGNOSTIC_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 ||
+                        mIn.Format() == AudioConfig::FORMAT_FLT);
+  MOZ_DIAGNOSTIC_ASSERT(mIn.Channels() >= mOut.Channels());
+  MOZ_DIAGNOSTIC_ASSERT(mOut.Layout() == AudioConfig::ChannelLayout(2) ||
+                        mOut.Layout() == AudioConfig::ChannelLayout(1));
+
+  uint32_t inChannels = mIn.Channels();
+  uint32_t outChannels = mOut.Channels();
+
+  if (inChannels == outChannels) {
+    if (aOut != aIn) {
+      memmove(aOut, aIn, FramesOutToBytes(aFrames));
+    }
+    return aFrames;
+  }
+
+  if (!mIn.Layout().IsValid() || !mOut.Layout().IsValid()) {
+    // Dumb copy dropping extra channels.
+    if (mIn.Format() == AudioConfig::FORMAT_FLT) {
+      dumbUpDownMix(static_cast<float*>(aOut), outChannels,
+                    static_cast<const float*>(aIn), inChannels, aFrames);
+    } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
+      dumbUpDownMix(static_cast<int16_t*>(aOut), outChannels,
+                    static_cast<const int16_t*>(aIn), inChannels, aFrames);
+    } else {
+      MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
+    }
+    return aFrames;
+  }
+
+  MOZ_ASSERT(
+      mIn.Layout() == AudioConfig::ChannelLayout::SMPTEDefault(mIn.Layout()),
+      "Can only downmix input data in SMPTE layout");
+  if (inChannels > 2) {
+    if (mIn.Format() == AudioConfig::FORMAT_FLT) {
+      // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows
+      // 5-8.
+      static const float dmatrix[6][8][2] = {
+          /*3*/ {{0.5858f, 0}, {0, 0.5858f}, {0.4142f, 0.4142f}},
+          /*4*/
+          {{0.4226f, 0}, {0, 0.4226f}, {0.366f, 0.2114f}, {0.2114f, 0.366f}},
+          /*5*/
+          {{0.6510f, 0},
+           {0, 0.6510f},
+           {0.4600f, 0.4600f},
+           {0.5636f, 0.3254f},
+           {0.3254f, 0.5636f}},
+          /*6*/
+          {{0.5290f, 0},
+           {0, 0.5290f},
+           {0.3741f, 0.3741f},
+           {0.3741f, 0.3741f},
+           {0.4582f, 0.2645f},
+           {0.2645f, 0.4582f}},
+          /*7*/
+          {{0.4553f, 0},
+           {0, 0.4553f},
+           {0.3220f, 0.3220f},
+           {0.3220f, 0.3220f},
+           {0.2788f, 0.2788f},
+           {0.3943f, 0.2277f},
+           {0.2277f, 0.3943f}},
+          /*8*/
+          {{0.3886f, 0},
+           {0, 0.3886f},
+           {0.2748f, 0.2748f},
+           {0.2748f, 0.2748f},
+           {0.3366f, 0.1943f},
+           {0.1943f, 0.3366f},
+           {0.3366f, 0.1943f},
+           {0.1943f, 0.3366f}},
+      };
+      // Re-write the buffer with downmixed data
+      const float* in = static_cast<const float*>(aIn);
+      float* out = static_cast<float*>(aOut);
+      for (uint32_t i = 0; i < aFrames; i++) {
+        float sampL = 0.0;
+        float sampR = 0.0;
+        for (uint32_t j = 0; j < inChannels; j++) {
+          sampL += in[i * inChannels + j] * dmatrix[inChannels - 3][j][0];
+          sampR += in[i * inChannels + j] * dmatrix[inChannels - 3][j][1];
+        }
+        if (outChannels == 2) {
+          *out++ = sampL;
+          *out++ = sampR;
+        } else {
+          *out++ = (sampL + sampR) * 0.5;
+        }
+      }
+    } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
+      // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows
+      // 5-8. Coefficients in Q14.
+      static const int16_t dmatrix[6][8][2] = {
+          /*3*/ {{9598, 0}, {0, 9598}, {6786, 6786}},
+          /*4*/ {{6925, 0}, {0, 6925}, {5997, 3462}, {3462, 5997}},
+          /*5*/
+          {{10663, 0}, {0, 10663}, {7540, 7540}, {9234, 5331}, {5331, 9234}},
+          /*6*/
+          {{8668, 0},
+           {0, 8668},
+           {6129, 6129},
+           {6129, 6129},
+           {7507, 4335},
+           {4335, 7507}},
+          /*7*/
+          {{7459, 0},
+           {0, 7459},
+           {5275, 5275},
+           {5275, 5275},
+           {4568, 4568},
+           {6460, 3731},
+           {3731, 6460}},
+          /*8*/
+          {{6368, 0},
+           {0, 6368},
+           {4502, 4502},
+           {4502, 4502},
+           {5514, 3184},
+           {3184, 5514},
+           {5514, 3184},
+           {3184, 5514}}};
+      // Re-write the buffer with downmixed data
+      const int16_t* in = static_cast<const int16_t*>(aIn);
+      int16_t* out = static_cast<int16_t*>(aOut);
+      for (uint32_t i = 0; i < aFrames; i++) {
+        int32_t sampL = 0;
+        int32_t sampR = 0;
+        for (uint32_t j = 0; j < inChannels; j++) {
+          sampL += in[i * inChannels + j] * dmatrix[inChannels - 3][j][0];
+          sampR += in[i * inChannels + j] * dmatrix[inChannels - 3][j][1];
+        }
+        sampL = clipTo15((sampL + 8192) >> 14);
+        sampR = clipTo15((sampR + 8192) >> 14);
+        if (outChannels == 2) {
+          *out++ = sampL;
+          *out++ = sampR;
+        } else {
+          *out++ = (sampL + sampR) * 0.5;
+        }
+      }
+    } else {
+      MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
+    }
+    return aFrames;
+  }
+
+  MOZ_DIAGNOSTIC_ASSERT(inChannels == 2 && outChannels == 1);
+  if (mIn.Format() == AudioConfig::FORMAT_FLT) {
+    const float* in = static_cast<const float*>(aIn);
+    float* out = static_cast<float*>(aOut);
+    for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
+      float sample = 0.0;
+      // The sample of the buffer would be interleaved.
+      sample = (in[fIdx * inChannels] + in[fIdx * inChannels + 1]) * 0.5;
+      *out++ = sample;
+    }
+  } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
+    const int16_t* in = static_cast<const int16_t*>(aIn);
+    int16_t* out = static_cast<int16_t*>(aOut);
+    for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
+      int32_t sample = 0.0;
+      // The sample of the buffer would be interleaved.
+      sample = (in[fIdx * inChannels] + in[fIdx * inChannels + 1]) * 0.5;
+      *out++ = sample;
+    }
+  } else {
+    MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
+  }
+  return aFrames;
+}
+
+size_t AudioConverter::ResampleAudio(void* aOut, const void* aIn,
+                                     size_t aFrames) {
+  if (!mResampler) {
+    return 0;
+  }
+  uint32_t outframes = ResampleRecipientFrames(aFrames);
+  uint32_t inframes = aFrames;
+
+  int error;
+  if (mOut.Format() == AudioConfig::FORMAT_FLT) {
+    const float* in = reinterpret_cast<const float*>(aIn);
+    float* out = reinterpret_cast<float*>(aOut);
+    error = speex_resampler_process_interleaved_float(mResampler, in, &inframes,
+                                                      out, &outframes);
+  } else if (mOut.Format() == AudioConfig::FORMAT_S16) {
+    const int16_t* in = reinterpret_cast<const int16_t*>(aIn);
+    int16_t* out = reinterpret_cast<int16_t*>(aOut);
+    error = speex_resampler_process_interleaved_int(mResampler, in, &inframes,
+                                                    out, &outframes);
+  } else {
+    MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
+    error = RESAMPLER_ERR_ALLOC_FAILED;
+  }
+  MOZ_ASSERT(error == RESAMPLER_ERR_SUCCESS);
+  if (error != RESAMPLER_ERR_SUCCESS) {
+    speex_resampler_destroy(mResampler);
+    mResampler = nullptr;
+    return 0;
+  }
+  MOZ_ASSERT(inframes == aFrames, "Some frames will be dropped");
+  return outframes;
+}
+
+void AudioConverter::RecreateResampler() {
+  if (mResampler) {
+    speex_resampler_destroy(mResampler);
+  }
+  int error;
+  mResampler = speex_resampler_init(mOut.Channels(), mIn.Rate(), mOut.Rate(),
+                                    SPEEX_RESAMPLER_QUALITY_DEFAULT, &error);
+
+  if (error == RESAMPLER_ERR_SUCCESS) {
+    speex_resampler_skip_zeros(mResampler);
+  } else {
+    NS_WARNING("Failed to initialize resampler.");
+    mResampler = nullptr;
+  }
+}
+
+size_t AudioConverter::DrainResampler(void* aOut) {
+  if (!mResampler) {
+    return 0;
+  }
+  int frames = speex_resampler_get_input_latency(mResampler);
+  AlignedByteBuffer buffer(FramesOutToBytes(frames));
+  if (!buffer) {
+    // OOM
+    return 0;
+  }
+  frames = ResampleAudio(aOut, buffer.Data(), frames);
+  // Tore down the resampler as it's easier than handling follow-up.
+  RecreateResampler();
+  return frames;
+}
+
+size_t AudioConverter::UpmixAudio(void* aOut, const void* aIn,
+                                  size_t aFrames) const {
+  MOZ_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 ||
+             mIn.Format() == AudioConfig::FORMAT_FLT);
+  MOZ_ASSERT(mIn.Channels() < mOut.Channels());
+  MOZ_ASSERT(mIn.Channels() == 1, "Can only upmix mono for now");
+  MOZ_ASSERT(mOut.Channels() == 2, "Can only upmix to stereo for now");
+
+  if (!mIn.Layout().IsValid() || !mOut.Layout().IsValid() ||
+      mOut.Channels() != 2) {
+    // Dumb copy the channels and insert silence for the extra channels.
+    if (mIn.Format() == AudioConfig::FORMAT_FLT) {
+      dumbUpDownMix(static_cast<float*>(aOut), mOut.Channels(),
+                    static_cast<const float*>(aIn), mIn.Channels(), aFrames);
+    } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
+      dumbUpDownMix(static_cast<int16_t*>(aOut), mOut.Channels(),
+                    static_cast<const int16_t*>(aIn), mIn.Channels(), aFrames);
+    } else {
+      MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
+    }
+    return aFrames;
+  }
+
+  // Upmix mono to stereo.
+  // This is a very dumb mono to stereo upmixing, power levels are preserved
+  // following the calculation: left = right = -3dB*mono.
+  if (mIn.Format() == AudioConfig::FORMAT_FLT) {
+    const float m3db = std::sqrt(0.5);  // -3dB = sqrt(1/2)
+    const float* in = static_cast<const float*>(aIn);
+    float* out = static_cast<float*>(aOut);
+    for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
+      float sample = in[fIdx] * m3db;
+      // The samples of the buffer would be interleaved.
+      *out++ = sample;
+      *out++ = sample;
+    }
+  } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
+    const int16_t* in = static_cast<const int16_t*>(aIn);
+    int16_t* out = static_cast<int16_t*>(aOut);
+    for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
+      int16_t sample =
+          ((int32_t)in[fIdx] * 11585) >> 14;  // close enough to i*sqrt(0.5)
+      // The samples of the buffer would be interleaved.
+      *out++ = sample;
+      *out++ = sample;
+    }
+  } else {
+    MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
+  }
+
+  return aFrames;
+}
+
+size_t AudioConverter::ResampleRecipientFrames(size_t aFrames) const {
+  if (!aFrames && mIn.Rate() != mOut.Rate()) {
+    if (!mResampler) {
+      return 0;
+    }
+    // We drain by pushing in get_input_latency() samples of 0
+    aFrames = speex_resampler_get_input_latency(mResampler);
+  }
+  return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1;
+}
+
+size_t AudioConverter::FramesOutToSamples(size_t aFrames) const {
+  return aFrames * mOut.Channels();
+}
+
+size_t AudioConverter::SamplesInToFrames(size_t aSamples) const {
+  return aSamples / mIn.Channels();
+}
+
+size_t AudioConverter::FramesOutToBytes(size_t aFrames) const {
+  return FramesOutToSamples(aFrames) * AudioConfig::SampleSize(mOut.Format());
+}
+}  // namespace mozilla