1 files changed, 458 insertions, 0 deletions
diff --git a/dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp b/dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp
new file mode 100644
index 0000000000..28db667732
--- /dev/null
+++ b/dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp
@@ -0,0 +1,458 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "FFmpegAudioEncoder.h"
+
+#include "FFmpegRuntimeLinker.h"
+#include "FFmpegLog.h"
+#include "FFmpegUtils.h"
+#include "MediaData.h"
+
+#include "AudioSegment.h"
+
+namespace mozilla {
+
+FFmpegAudioEncoder<LIBAV_VER>::FFmpegAudioEncoder(
+    const FFmpegLibWrapper* aLib, AVCodecID aCodecID,
+    const RefPtr<TaskQueue>& aTaskQueue, const EncoderConfig& aConfig)
+    : FFmpegDataEncoder(aLib, aCodecID, aTaskQueue, aConfig) {}
+
+nsCString FFmpegAudioEncoder<LIBAV_VER>::GetDescriptionName() const {
+#ifdef USING_MOZFFVPX
+  return "ffvpx audio encoder"_ns;
+#else
+  const char* lib =
+#  if defined(MOZ_FFMPEG)
+      FFmpegRuntimeLinker::LinkStatusLibraryName();
+#  else
+      "no library: ffmpeg disabled during build";
+#  endif
+  return nsPrintfCString("ffmpeg audio encoder (%s)", lib);
+#endif
+}
+
+void FFmpegAudioEncoder<LIBAV_VER>::ResamplerDestroy::operator()(
+    SpeexResamplerState* aResampler) {
+  speex_resampler_destroy(aResampler);
+}
+
+nsresult FFmpegAudioEncoder<LIBAV_VER>::InitSpecific() {
+  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+
+  FFMPEG_LOG("FFmpegAudioEncoder::InitInternal");
+
+  // Initialize the common members of the encoder instance
+  AVCodec* codec = FFmpegDataEncoder<LIBAV_VER>::InitCommon();
+  if (!codec) {
+    FFMPEG_LOG("FFmpegDataEncoder::InitCommon failed");
+    return NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR;
+  }
+
+  // Find a compatible input rate for the codec, update the encoder config, and
+  // note the rate at which this instance was configured.
+  mInputSampleRate = AssertedCast<int>(mConfig.mSampleRate);
+  if (codec->supported_samplerates) {
+    // Ensure the sample-rate list is sorted, iterate and either find that the
+    // sample rate is supported, or pick the same rate just above the audio
+    // input sample-rate (as to not lose information). If the audio is higher
+    // than the highest supported sample-rate, down-sample to the highest
+    // sample-rate supported by the codec. This is the case when encoding high
+    // samplerate audio to opus.
+    AutoTArray<int, 16> supportedSampleRates;
+    IterateZeroTerminated(codec->supported_samplerates,
+                          [&supportedSampleRates](int aRate) mutable {
+                            supportedSampleRates.AppendElement(aRate);
+                          });
+    supportedSampleRates.Sort();
+
+    for (const auto& rate : supportedSampleRates) {
+      if (mInputSampleRate == rate) {
+        mConfig.mSampleRate = rate;
+        break;
+      }
+      if (mInputSampleRate < rate) {
+        // This rate is the smallest supported rate above the content's rate.
+        mConfig.mSampleRate = rate;
+        break;
+      }
+      if (mInputSampleRate > rate) {
+        mConfig.mSampleRate = rate;
+      }
+    }
+  }
+
+  if (mConfig.mSampleRate != AssertedCast<uint32_t>(mInputSampleRate)) {
+    // Need to resample to targetRate
+    int err;
+    SpeexResamplerState* resampler = speex_resampler_init(
+        mConfig.mNumberOfChannels, mInputSampleRate, mConfig.mSampleRate,
+        SPEEX_RESAMPLER_QUALITY_DEFAULT, &err);
+    if (!err) {
+      mResampler.reset(resampler);
+    } else {
+      FFMPEG_LOG(
+          "Error creating resampler in FFmpegAudioEncoder %dHz -> %dHz (%dch)",
+          mInputSampleRate, mConfig.mSampleRate, mConfig.mNumberOfChannels);
+    }
+  }
+
+  // And now the audio-specific part
+  mCodecContext->sample_rate = AssertedCast<int>(mConfig.mSampleRate);
+  mCodecContext->channels = AssertedCast<int>(mConfig.mNumberOfChannels);
+
+#if LIBAVCODEC_VERSION_MAJOR >= 60
+  // Gecko's ordering intentionnally matches ffmepg's ordering
+  mLib->av_channel_layout_default(&mCodecContext->ch_layout,
+                                  AssertedCast<int>(mCodecContext->channels));
+#endif
+
+  switch (mConfig.mCodec) {
+    case CodecType::Opus:
+      // When using libopus, ffmpeg supports interleaved float and s16 input.
+      mCodecContext->sample_fmt = AV_SAMPLE_FMT_FLT;
+      break;
+    case CodecType::Vorbis:
+      // When using libvorbis, ffmpeg only supports planar f32 input.
+      mCodecContext->sample_fmt = AV_SAMPLE_FMT_FLTP;
+      break;
+    default:
+      MOZ_ASSERT_UNREACHABLE("Not supported");
+  }
+
+  if (mConfig.mCodec == CodecType::Opus) {
+    // Default is VBR
+    if (mConfig.mBitrateMode == BitrateMode::Constant) {
+      mLib->av_opt_set(mCodecContext->priv_data, "vbr", "off", 0);
+    }
+    if (mConfig.mCodecSpecific.isSome()) {
+      MOZ_ASSERT(mConfig.mCodecSpecific->is<OpusSpecific>());
+      const OpusSpecific& specific = mConfig.mCodecSpecific->as<OpusSpecific>();
+      // This attribute maps directly to complexity
+      mCodecContext->compression_level = specific.mComplexity;
+      FFMPEG_LOG("Opus complexity set to %d", specific.mComplexity);
+      float frameDurationMs =
+          AssertedCast<float>(specific.mFrameDuration) / 1000.f;
+      if (mLib->av_opt_set_double(mCodecContext->priv_data, "frame_duration",
+                                  frameDurationMs, 0)) {
+        FFMPEG_LOG("Error setting the frame duration on Opus encoder");
+        return NS_ERROR_FAILURE;
+      }
+      FFMPEG_LOG("Opus frame duration set to %0.2f", frameDurationMs);
+      if (specific.mPacketLossPerc) {
+        if (mLib->av_opt_set_int(
+                mCodecContext->priv_data, "packet_loss",
+                AssertedCast<int64_t>(specific.mPacketLossPerc), 0)) {
+          FFMPEG_LOG("Error setting the packet loss percentage to %" PRIu64
+                     " on Opus encoder",
+                     specific.mPacketLossPerc);
+          return NS_ERROR_FAILURE;
+        }
+        FFMPEG_LOGV("Packet loss set to %d%% in Opus encoder",
+                    AssertedCast<int>(specific.mPacketLossPerc));
+      }
+      if (specific.mUseInBandFEC) {
+        if (mLib->av_opt_set(mCodecContext->priv_data, "fec", "on", 0)) {
+          FFMPEG_LOG("Error %s FEC on Opus encoder",
+                     specific.mUseInBandFEC ? "enabling" : "disabling");
+          return NS_ERROR_FAILURE;
+        }
+        FFMPEG_LOGV("In-band FEC enabled for Opus encoder.");
+      }
+      if (specific.mUseDTX) {
+        if (mLib->av_opt_set(mCodecContext->priv_data, "dtx", "on", 0)) {
+          FFMPEG_LOG("Error %s DTX on Opus encoder",
+                     specific.mUseDTX ? "enabling" : "disabling");
+          return NS_ERROR_FAILURE;
+        }
+        // DTX packets are a TOC byte, and possibly one byte of length, packets
+        // 3 bytes and larger are to be returned.
+        mDtxThreshold = 3;
+      }
+      // TODO: format
+      // https://bugzilla.mozilla.org/show_bug.cgi?id=1876066
+    }
+  }
+  // Override the time base: always the sample-rate the encoder is running at
+  mCodecContext->time_base =
+      AVRational{.num = 1, .den = mCodecContext->sample_rate};
+
+  MediaResult rv = FinishInitCommon(codec);
+  if (NS_FAILED(rv)) {
+    FFMPEG_LOG("FFmpeg encode initialization failure.");
+    return rv.Code();
+  }
+
+  return NS_OK;
+}
+
+// avcodec_send_frame and avcodec_receive_packet were introduced in version 58.
+#if LIBAVCODEC_VERSION_MAJOR >= 58
+
+Result<MediaDataEncoder::EncodedData, nsresult>
+FFmpegAudioEncoder<LIBAV_VER>::EncodeOnePacket(Span<float> aSamples,
+                                               media::TimeUnit aPts) {
+  // Allocate AVFrame.
+  if (!PrepareFrame()) {
+    FFMPEG_LOG("failed to allocate frame");
+    return Err(NS_ERROR_OUT_OF_MEMORY);
+  }
+
+  uint32_t frameCount = aSamples.Length() / mConfig.mNumberOfChannels;
+
+  // This method assumes that the audio has been packetized appropriately --
+  // packets smaller than the packet size are allowed when draining.
+  MOZ_ASSERT(AssertedCast<int>(frameCount) <= mCodecContext->frame_size);
+
+  mFrame->channels = AssertedCast<int>(mConfig.mNumberOfChannels);
+
+#  if LIBAVCODEC_VERSION_MAJOR >= 60
+  int rv = mLib->av_channel_layout_copy(&mFrame->ch_layout,
+                                        &mCodecContext->ch_layout);
+  if (rv < 0) {
+    FFMPEG_LOG("channel layout copy error: %s",
+               MakeErrorString(mLib, rv).get());
+    return Err(NS_ERROR_DOM_MEDIA_FATAL_ERR);
+  }
+#  endif
+
+  mFrame->sample_rate = AssertedCast<int>(mConfig.mSampleRate);
+  // Not a mistake, nb_samples is per channel in ffmpeg
+  mFrame->nb_samples = AssertedCast<int>(frameCount);
+  // Audio is converted below if needed
+  mFrame->format = mCodecContext->sample_fmt;
+  // Set presentation timestamp and duration of the AVFrame.
+#  if LIBAVCODEC_VERSION_MAJOR >= 59
+  mFrame->time_base =
+      AVRational{.num = 1, .den = static_cast<int>(mConfig.mSampleRate)};
+#  endif
+  mFrame->pts = aPts.ToTicksAtRate(mConfig.mSampleRate);
+  mFrame->pkt_duration = frameCount;
+#  if LIBAVCODEC_VERSION_MAJOR >= 60
+  mFrame->duration = frameCount;
+#  else
+  // Save duration in the time_base unit.
+  mDurationMap.Insert(mFrame->pts, mFrame->pkt_duration);
+#  endif
+
+  if (int ret = mLib->av_frame_get_buffer(mFrame, 16); ret < 0) {
+    FFMPEG_LOG("failed to allocate frame data: %s",
+               MakeErrorString(mLib, ret).get());
+    return Err(NS_ERROR_OUT_OF_MEMORY);
+  }
+
+  // Make sure AVFrame is writable.
+  if (int ret = mLib->av_frame_make_writable(mFrame); ret < 0) {
+    FFMPEG_LOG("failed to make frame writable: %s",
+               MakeErrorString(mLib, ret).get());
+    return Err(NS_ERROR_DOM_MEDIA_FATAL_ERR);
+  }
+
+  // The input is always in f32 interleaved for now
+  if (mCodecContext->sample_fmt == AV_SAMPLE_FMT_FLT) {
+    PodCopy(reinterpret_cast<float*>(mFrame->data[0]), aSamples.data(),
+            aSamples.Length());
+  } else {
+    MOZ_ASSERT(mCodecContext->sample_fmt == AV_SAMPLE_FMT_FLTP);
+    for (uint32_t i = 0; i < mConfig.mNumberOfChannels; i++) {
+      DeinterleaveAndConvertBuffer(aSamples.data(), mFrame->nb_samples,
+                                   mFrame->channels, mFrame->data);
+    }
+  }
+
+  // Now send the AVFrame to ffmpeg for encoding, same code for audio and video.
+  return FFmpegDataEncoder<LIBAV_VER>::EncodeWithModernAPIs();
+}
+
+Result<MediaDataEncoder::EncodedData, nsresult> FFmpegAudioEncoder<
+    LIBAV_VER>::EncodeInputWithModernAPIs(RefPtr<const MediaData> aSample) {
+  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+  MOZ_ASSERT(mCodecContext);
+  MOZ_ASSERT(aSample);
+
+  RefPtr<const AudioData> sample(aSample->As<AudioData>());
+
+  FFMPEG_LOG("Encoding %" PRIu32 " frames of audio at pts: %s",
+             sample->Frames(), sample->mTime.ToString().get());
+
+  if ((!mResampler && sample->mRate != mConfig.mSampleRate) ||
+      (mResampler &&
+       sample->mRate != AssertedCast<uint32_t>(mInputSampleRate)) ||
+      sample->mChannels != mConfig.mNumberOfChannels) {
+    FFMPEG_LOG(
+        "Rate or sample-rate at the inputof the encoder different from what "
+        "has been configured initially, erroring out");
+    return Result<MediaDataEncoder::EncodedData, nsresult>(
+        NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR);
+  }
+
+  // ffmpeg expects exactly sized input audio packets most of the time.
+  // Packetization is performed if needed, and audio packets of the correct size
+  // are fed to ffmpeg, with timestamps extrapolated the timestamp found on
+  // the input MediaData.
+
+  if (!mPacketizer) {
+    media::TimeUnit basePts = media::TimeUnit::Zero(mConfig.mSampleRate);
+    basePts += sample->mTime;
+    mPacketizer.emplace(mCodecContext->frame_size, sample->mChannels,
+                        basePts.ToTicksAtRate(mConfig.mSampleRate),
+                        mConfig.mSampleRate);
+  }
+
+  if (!mFirstPacketPts.IsValid()) {
+    mFirstPacketPts = sample->mTime;
+  }
+
+  Span<float> audio = sample->Data();
+
+  if (mResampler) {
+    // Ensure that all input frames are consumed each time by oversizing the
+    // output buffer.
+    int bufferLengthGuess = std::ceil(2. * static_cast<float>(audio.size()) *
+                                      mConfig.mSampleRate / mInputSampleRate);
+    mTempBuffer.SetLength(bufferLengthGuess);
+    uint32_t inputFrames = audio.size() / mConfig.mNumberOfChannels;
+    uint32_t inputFramesProcessed = inputFrames;
+    uint32_t outputFrames = bufferLengthGuess / mConfig.mNumberOfChannels;
+    DebugOnly<int> rv = speex_resampler_process_interleaved_float(
+        mResampler.get(), audio.data(), &inputFramesProcessed,
+        mTempBuffer.Elements(), &outputFrames);
+    audio = Span<float>(mTempBuffer.Elements(),
+                        outputFrames * mConfig.mNumberOfChannels);
+    MOZ_ASSERT(inputFrames == inputFramesProcessed,
+               "increate the buffer to consume all input each time");
+    MOZ_ASSERT(rv == RESAMPLER_ERR_SUCCESS);
+  }
+
+  EncodedData output;
+  MediaResult rv = NS_OK;
+
+  mPacketizer->Input(audio.data(), audio.Length() / mConfig.mNumberOfChannels);
+
+  // Dequeue and encode each packet
+  while (mPacketizer->PacketsAvailable() && rv.Code() == NS_OK) {
+    mTempBuffer.SetLength(mCodecContext->frame_size *
+                          mConfig.mNumberOfChannels);
+    media::TimeUnit pts = mPacketizer->Output(mTempBuffer.Elements());
+    auto audio = Span(mTempBuffer.Elements(), mTempBuffer.Length());
+    FFMPEG_LOG("Encoding %" PRIu32 " frames, pts: %s",
+               mPacketizer->PacketSize(), pts.ToString().get());
+    auto encodeResult = EncodeOnePacket(audio, pts);
+    if (encodeResult.isOk()) {
+      output.AppendElements(std::move(encodeResult.unwrap()));
+    } else {
+      return encodeResult;
+    }
+    pts += media::TimeUnit(mPacketizer->PacketSize(), mConfig.mSampleRate);
+  }
+  return Result<MediaDataEncoder::EncodedData, nsresult>(std::move(output));
+}
+
+Result<MediaDataEncoder::EncodedData, nsresult>
+FFmpegAudioEncoder<LIBAV_VER>::DrainWithModernAPIs() {
+  // If there's no packetizer, or it's empty, we can proceed immediately.
+  if (!mPacketizer || mPacketizer->FramesAvailable() == 0) {
+    return FFmpegDataEncoder<LIBAV_VER>::DrainWithModernAPIs();
+  }
+  EncodedData output;
+  MediaResult rv = NS_OK;
+  // Dequeue and encode each packet
+  mTempBuffer.SetLength(mCodecContext->frame_size *
+                        mPacketizer->ChannelCount());
+  uint32_t written;
+  media::TimeUnit pts = mPacketizer->Drain(mTempBuffer.Elements(), written);
+  auto audio =
+      Span(mTempBuffer.Elements(), written * mPacketizer->ChannelCount());
+  auto encodeResult = EncodeOnePacket(audio, pts);
+  if (encodeResult.isOk()) {
+    auto array = encodeResult.unwrap();
+    output.AppendElements(std::move(array));
+  } else {
+    return encodeResult;
+  }
+  // Now, drain the encoder
+  auto drainResult = FFmpegDataEncoder<LIBAV_VER>::DrainWithModernAPIs();
+  if (drainResult.isOk()) {
+    auto array = drainResult.unwrap();
+    output.AppendElements(std::move(array));
+  } else {
+    return drainResult;
+  }
+  return Result<MediaDataEncoder::EncodedData, nsresult>(std::move(output));
+}
+#endif  // if LIBAVCODEC_VERSION_MAJOR >= 58
+
+RefPtr<MediaRawData> FFmpegAudioEncoder<LIBAV_VER>::ToMediaRawData(
+    AVPacket* aPacket) {
+  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+  MOZ_ASSERT(aPacket);
+
+  if (aPacket->size < mDtxThreshold) {
+    FFMPEG_LOG(
+        "DTX enabled and packet is %d bytes (threshold %d), not returning.",
+        aPacket->size, mDtxThreshold);
+    return nullptr;
+  }
+
+  RefPtr<MediaRawData> data = ToMediaRawDataCommon(aPacket);
+
+  data->mTime = media::TimeUnit(aPacket->pts, mConfig.mSampleRate);
+  data->mTimecode = data->mTime;
+  data->mDuration =
+      media::TimeUnit(mCodecContext->frame_size, mConfig.mSampleRate);
+
+  // Handle encoder delay
+  // Tracked in https://github.com/w3c/webcodecs/issues/626 because not quite
+  // specced yet.
+  if (mFirstPacketPts > data->mTime) {
+    data->mOriginalPresentationWindow =
+        Some(media::TimeInterval{data->mTime, data->GetEndTime()});
+    // Duration is likely to be ajusted when the above spec issue is fixed. For
+    // now, leave it as-is
+    //  data->mDuration -= (mFirstPacketPts - data->mTime);
+    // if (data->mDuration.IsNegative()) {
+    //   data->mDuration = media::TimeUnit::Zero();
+    // }
+    data->mTime = mFirstPacketPts;
+  }
+
+  if (mPacketsDelivered++ == 0) {
+    // Attach extradata, and the config (including any channel / samplerate
+    // modification to fit the encoder requirements), if needed.
+    if (auto r = GetExtraData(aPacket); r.isOk()) {
+      data->mExtraData = r.unwrap();
+    }
+    data->mConfig = MakeUnique<EncoderConfig>(mConfig);
+  }
+
+  if (data->mExtraData) {
+    FFMPEG_LOG(
+        "FFmpegAudioEncoder out: [%s,%s] (%zu bytes, extradata %zu bytes)",
+        data->mTime.ToString().get(), data->mDuration.ToString().get(),
+        data->Size(), data->mExtraData->Length());
+  } else {
+    FFMPEG_LOG("FFmpegAudioEncoder out: [%s,%s] (%zu bytes)",
+               data->mTime.ToString().get(), data->mDuration.ToString().get(),
+               data->Size());
+  }
+
+  return data;
+}
+
+Result<already_AddRefed<MediaByteBuffer>, nsresult>
+FFmpegAudioEncoder<LIBAV_VER>::GetExtraData(AVPacket* /* aPacket */) {
+  if (!mCodecContext->extradata_size) {
+    return Err(NS_ERROR_NOT_AVAILABLE);
+  }
+  // Create extra data -- they are on the context.
+  auto extraData = MakeRefPtr<MediaByteBuffer>();
+  extraData->SetLength(mCodecContext->extradata_size);
+  MOZ_ASSERT(extraData);
+  PodCopy(extraData->Elements(), mCodecContext->extradata,
+          mCodecContext->extradata_size);
+  return extraData.forget();
+}
+
+}  // namespace mozilla