summaryrefslogtreecommitdiffstats
path: root/dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp')
-rw-r--r--dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp458
1 files changed, 458 insertions, 0 deletions
diff --git a/dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp b/dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp
new file mode 100644
index 0000000000..28db667732
--- /dev/null
+++ b/dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp
@@ -0,0 +1,458 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "FFmpegAudioEncoder.h"
+
+#include "FFmpegRuntimeLinker.h"
+#include "FFmpegLog.h"
+#include "FFmpegUtils.h"
+#include "MediaData.h"
+
+#include "AudioSegment.h"
+
+namespace mozilla {
+
+FFmpegAudioEncoder<LIBAV_VER>::FFmpegAudioEncoder(
+ const FFmpegLibWrapper* aLib, AVCodecID aCodecID,
+ const RefPtr<TaskQueue>& aTaskQueue, const EncoderConfig& aConfig)
+ : FFmpegDataEncoder(aLib, aCodecID, aTaskQueue, aConfig) {}
+
+nsCString FFmpegAudioEncoder<LIBAV_VER>::GetDescriptionName() const {
+#ifdef USING_MOZFFVPX
+ return "ffvpx audio encoder"_ns;
+#else
+ const char* lib =
+# if defined(MOZ_FFMPEG)
+ FFmpegRuntimeLinker::LinkStatusLibraryName();
+# else
+ "no library: ffmpeg disabled during build";
+# endif
+ return nsPrintfCString("ffmpeg audio encoder (%s)", lib);
+#endif
+}
+
+void FFmpegAudioEncoder<LIBAV_VER>::ResamplerDestroy::operator()(
+ SpeexResamplerState* aResampler) {
+ speex_resampler_destroy(aResampler);
+}
+
+nsresult FFmpegAudioEncoder<LIBAV_VER>::InitSpecific() {
+ MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+
+ FFMPEG_LOG("FFmpegAudioEncoder::InitInternal");
+
+ // Initialize the common members of the encoder instance
+ AVCodec* codec = FFmpegDataEncoder<LIBAV_VER>::InitCommon();
+ if (!codec) {
+ FFMPEG_LOG("FFmpegDataEncoder::InitCommon failed");
+ return NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR;
+ }
+
+ // Find a compatible input rate for the codec, update the encoder config, and
+ // note the rate at which this instance was configured.
+ mInputSampleRate = AssertedCast<int>(mConfig.mSampleRate);
+ if (codec->supported_samplerates) {
+ // Ensure the sample-rate list is sorted, iterate and either find that the
+ // sample rate is supported, or pick the same rate just above the audio
+ // input sample-rate (as to not lose information). If the audio is higher
+ // than the highest supported sample-rate, down-sample to the highest
+ // sample-rate supported by the codec. This is the case when encoding high
+ // samplerate audio to opus.
+ AutoTArray<int, 16> supportedSampleRates;
+ IterateZeroTerminated(codec->supported_samplerates,
+ [&supportedSampleRates](int aRate) mutable {
+ supportedSampleRates.AppendElement(aRate);
+ });
+ supportedSampleRates.Sort();
+
+ for (const auto& rate : supportedSampleRates) {
+ if (mInputSampleRate == rate) {
+ mConfig.mSampleRate = rate;
+ break;
+ }
+ if (mInputSampleRate < rate) {
+ // This rate is the smallest supported rate above the content's rate.
+ mConfig.mSampleRate = rate;
+ break;
+ }
+ if (mInputSampleRate > rate) {
+ mConfig.mSampleRate = rate;
+ }
+ }
+ }
+
+ if (mConfig.mSampleRate != AssertedCast<uint32_t>(mInputSampleRate)) {
+ // Need to resample to targetRate
+ int err;
+ SpeexResamplerState* resampler = speex_resampler_init(
+ mConfig.mNumberOfChannels, mInputSampleRate, mConfig.mSampleRate,
+ SPEEX_RESAMPLER_QUALITY_DEFAULT, &err);
+ if (!err) {
+ mResampler.reset(resampler);
+ } else {
+ FFMPEG_LOG(
+ "Error creating resampler in FFmpegAudioEncoder %dHz -> %dHz (%dch)",
+ mInputSampleRate, mConfig.mSampleRate, mConfig.mNumberOfChannels);
+ }
+ }
+
+ // And now the audio-specific part
+ mCodecContext->sample_rate = AssertedCast<int>(mConfig.mSampleRate);
+ mCodecContext->channels = AssertedCast<int>(mConfig.mNumberOfChannels);
+
+#if LIBAVCODEC_VERSION_MAJOR >= 60
+ // Gecko's ordering intentionnally matches ffmepg's ordering
+ mLib->av_channel_layout_default(&mCodecContext->ch_layout,
+ AssertedCast<int>(mCodecContext->channels));
+#endif
+
+ switch (mConfig.mCodec) {
+ case CodecType::Opus:
+ // When using libopus, ffmpeg supports interleaved float and s16 input.
+ mCodecContext->sample_fmt = AV_SAMPLE_FMT_FLT;
+ break;
+ case CodecType::Vorbis:
+ // When using libvorbis, ffmpeg only supports planar f32 input.
+ mCodecContext->sample_fmt = AV_SAMPLE_FMT_FLTP;
+ break;
+ default:
+ MOZ_ASSERT_UNREACHABLE("Not supported");
+ }
+
+ if (mConfig.mCodec == CodecType::Opus) {
+ // Default is VBR
+ if (mConfig.mBitrateMode == BitrateMode::Constant) {
+ mLib->av_opt_set(mCodecContext->priv_data, "vbr", "off", 0);
+ }
+ if (mConfig.mCodecSpecific.isSome()) {
+ MOZ_ASSERT(mConfig.mCodecSpecific->is<OpusSpecific>());
+ const OpusSpecific& specific = mConfig.mCodecSpecific->as<OpusSpecific>();
+ // This attribute maps directly to complexity
+ mCodecContext->compression_level = specific.mComplexity;
+ FFMPEG_LOG("Opus complexity set to %d", specific.mComplexity);
+ float frameDurationMs =
+ AssertedCast<float>(specific.mFrameDuration) / 1000.f;
+ if (mLib->av_opt_set_double(mCodecContext->priv_data, "frame_duration",
+ frameDurationMs, 0)) {
+ FFMPEG_LOG("Error setting the frame duration on Opus encoder");
+ return NS_ERROR_FAILURE;
+ }
+ FFMPEG_LOG("Opus frame duration set to %0.2f", frameDurationMs);
+ if (specific.mPacketLossPerc) {
+ if (mLib->av_opt_set_int(
+ mCodecContext->priv_data, "packet_loss",
+ AssertedCast<int64_t>(specific.mPacketLossPerc), 0)) {
+ FFMPEG_LOG("Error setting the packet loss percentage to %" PRIu64
+ " on Opus encoder",
+ specific.mPacketLossPerc);
+ return NS_ERROR_FAILURE;
+ }
+ FFMPEG_LOGV("Packet loss set to %d%% in Opus encoder",
+ AssertedCast<int>(specific.mPacketLossPerc));
+ }
+ if (specific.mUseInBandFEC) {
+ if (mLib->av_opt_set(mCodecContext->priv_data, "fec", "on", 0)) {
+ FFMPEG_LOG("Error %s FEC on Opus encoder",
+ specific.mUseInBandFEC ? "enabling" : "disabling");
+ return NS_ERROR_FAILURE;
+ }
+ FFMPEG_LOGV("In-band FEC enabled for Opus encoder.");
+ }
+ if (specific.mUseDTX) {
+ if (mLib->av_opt_set(mCodecContext->priv_data, "dtx", "on", 0)) {
+ FFMPEG_LOG("Error %s DTX on Opus encoder",
+ specific.mUseDTX ? "enabling" : "disabling");
+ return NS_ERROR_FAILURE;
+ }
+ // DTX packets are a TOC byte, and possibly one byte of length, packets
+ // 3 bytes and larger are to be returned.
+ mDtxThreshold = 3;
+ }
+ // TODO: format
+ // https://bugzilla.mozilla.org/show_bug.cgi?id=1876066
+ }
+ }
+ // Override the time base: always the sample-rate the encoder is running at
+ mCodecContext->time_base =
+ AVRational{.num = 1, .den = mCodecContext->sample_rate};
+
+ MediaResult rv = FinishInitCommon(codec);
+ if (NS_FAILED(rv)) {
+ FFMPEG_LOG("FFmpeg encode initialization failure.");
+ return rv.Code();
+ }
+
+ return NS_OK;
+}
+
+// avcodec_send_frame and avcodec_receive_packet were introduced in version 58.
+#if LIBAVCODEC_VERSION_MAJOR >= 58
+
+Result<MediaDataEncoder::EncodedData, nsresult>
+FFmpegAudioEncoder<LIBAV_VER>::EncodeOnePacket(Span<float> aSamples,
+ media::TimeUnit aPts) {
+ // Allocate AVFrame.
+ if (!PrepareFrame()) {
+ FFMPEG_LOG("failed to allocate frame");
+ return Err(NS_ERROR_OUT_OF_MEMORY);
+ }
+
+ uint32_t frameCount = aSamples.Length() / mConfig.mNumberOfChannels;
+
+ // This method assumes that the audio has been packetized appropriately --
+ // packets smaller than the packet size are allowed when draining.
+ MOZ_ASSERT(AssertedCast<int>(frameCount) <= mCodecContext->frame_size);
+
+ mFrame->channels = AssertedCast<int>(mConfig.mNumberOfChannels);
+
+# if LIBAVCODEC_VERSION_MAJOR >= 60
+ int rv = mLib->av_channel_layout_copy(&mFrame->ch_layout,
+ &mCodecContext->ch_layout);
+ if (rv < 0) {
+ FFMPEG_LOG("channel layout copy error: %s",
+ MakeErrorString(mLib, rv).get());
+ return Err(NS_ERROR_DOM_MEDIA_FATAL_ERR);
+ }
+# endif
+
+ mFrame->sample_rate = AssertedCast<int>(mConfig.mSampleRate);
+ // Not a mistake, nb_samples is per channel in ffmpeg
+ mFrame->nb_samples = AssertedCast<int>(frameCount);
+ // Audio is converted below if needed
+ mFrame->format = mCodecContext->sample_fmt;
+ // Set presentation timestamp and duration of the AVFrame.
+# if LIBAVCODEC_VERSION_MAJOR >= 59
+ mFrame->time_base =
+ AVRational{.num = 1, .den = static_cast<int>(mConfig.mSampleRate)};
+# endif
+ mFrame->pts = aPts.ToTicksAtRate(mConfig.mSampleRate);
+ mFrame->pkt_duration = frameCount;
+# if LIBAVCODEC_VERSION_MAJOR >= 60
+ mFrame->duration = frameCount;
+# else
+ // Save duration in the time_base unit.
+ mDurationMap.Insert(mFrame->pts, mFrame->pkt_duration);
+# endif
+
+ if (int ret = mLib->av_frame_get_buffer(mFrame, 16); ret < 0) {
+ FFMPEG_LOG("failed to allocate frame data: %s",
+ MakeErrorString(mLib, ret).get());
+ return Err(NS_ERROR_OUT_OF_MEMORY);
+ }
+
+ // Make sure AVFrame is writable.
+ if (int ret = mLib->av_frame_make_writable(mFrame); ret < 0) {
+ FFMPEG_LOG("failed to make frame writable: %s",
+ MakeErrorString(mLib, ret).get());
+ return Err(NS_ERROR_DOM_MEDIA_FATAL_ERR);
+ }
+
+ // The input is always in f32 interleaved for now
+ if (mCodecContext->sample_fmt == AV_SAMPLE_FMT_FLT) {
+ PodCopy(reinterpret_cast<float*>(mFrame->data[0]), aSamples.data(),
+ aSamples.Length());
+ } else {
+ MOZ_ASSERT(mCodecContext->sample_fmt == AV_SAMPLE_FMT_FLTP);
+ for (uint32_t i = 0; i < mConfig.mNumberOfChannels; i++) {
+ DeinterleaveAndConvertBuffer(aSamples.data(), mFrame->nb_samples,
+ mFrame->channels, mFrame->data);
+ }
+ }
+
+ // Now send the AVFrame to ffmpeg for encoding, same code for audio and video.
+ return FFmpegDataEncoder<LIBAV_VER>::EncodeWithModernAPIs();
+}
+
+Result<MediaDataEncoder::EncodedData, nsresult> FFmpegAudioEncoder<
+ LIBAV_VER>::EncodeInputWithModernAPIs(RefPtr<const MediaData> aSample) {
+ MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+ MOZ_ASSERT(mCodecContext);
+ MOZ_ASSERT(aSample);
+
+ RefPtr<const AudioData> sample(aSample->As<AudioData>());
+
+ FFMPEG_LOG("Encoding %" PRIu32 " frames of audio at pts: %s",
+ sample->Frames(), sample->mTime.ToString().get());
+
+ if ((!mResampler && sample->mRate != mConfig.mSampleRate) ||
+ (mResampler &&
+ sample->mRate != AssertedCast<uint32_t>(mInputSampleRate)) ||
+ sample->mChannels != mConfig.mNumberOfChannels) {
+ FFMPEG_LOG(
+ "Rate or sample-rate at the inputof the encoder different from what "
+ "has been configured initially, erroring out");
+ return Result<MediaDataEncoder::EncodedData, nsresult>(
+ NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR);
+ }
+
+ // ffmpeg expects exactly sized input audio packets most of the time.
+ // Packetization is performed if needed, and audio packets of the correct size
+ // are fed to ffmpeg, with timestamps extrapolated the timestamp found on
+ // the input MediaData.
+
+ if (!mPacketizer) {
+ media::TimeUnit basePts = media::TimeUnit::Zero(mConfig.mSampleRate);
+ basePts += sample->mTime;
+ mPacketizer.emplace(mCodecContext->frame_size, sample->mChannels,
+ basePts.ToTicksAtRate(mConfig.mSampleRate),
+ mConfig.mSampleRate);
+ }
+
+ if (!mFirstPacketPts.IsValid()) {
+ mFirstPacketPts = sample->mTime;
+ }
+
+ Span<float> audio = sample->Data();
+
+ if (mResampler) {
+ // Ensure that all input frames are consumed each time by oversizing the
+ // output buffer.
+ int bufferLengthGuess = std::ceil(2. * static_cast<float>(audio.size()) *
+ mConfig.mSampleRate / mInputSampleRate);
+ mTempBuffer.SetLength(bufferLengthGuess);
+ uint32_t inputFrames = audio.size() / mConfig.mNumberOfChannels;
+ uint32_t inputFramesProcessed = inputFrames;
+ uint32_t outputFrames = bufferLengthGuess / mConfig.mNumberOfChannels;
+ DebugOnly<int> rv = speex_resampler_process_interleaved_float(
+ mResampler.get(), audio.data(), &inputFramesProcessed,
+ mTempBuffer.Elements(), &outputFrames);
+ audio = Span<float>(mTempBuffer.Elements(),
+ outputFrames * mConfig.mNumberOfChannels);
+ MOZ_ASSERT(inputFrames == inputFramesProcessed,
+ "increate the buffer to consume all input each time");
+ MOZ_ASSERT(rv == RESAMPLER_ERR_SUCCESS);
+ }
+
+ EncodedData output;
+ MediaResult rv = NS_OK;
+
+ mPacketizer->Input(audio.data(), audio.Length() / mConfig.mNumberOfChannels);
+
+ // Dequeue and encode each packet
+ while (mPacketizer->PacketsAvailable() && rv.Code() == NS_OK) {
+ mTempBuffer.SetLength(mCodecContext->frame_size *
+ mConfig.mNumberOfChannels);
+ media::TimeUnit pts = mPacketizer->Output(mTempBuffer.Elements());
+ auto audio = Span(mTempBuffer.Elements(), mTempBuffer.Length());
+ FFMPEG_LOG("Encoding %" PRIu32 " frames, pts: %s",
+ mPacketizer->PacketSize(), pts.ToString().get());
+ auto encodeResult = EncodeOnePacket(audio, pts);
+ if (encodeResult.isOk()) {
+ output.AppendElements(std::move(encodeResult.unwrap()));
+ } else {
+ return encodeResult;
+ }
+ pts += media::TimeUnit(mPacketizer->PacketSize(), mConfig.mSampleRate);
+ }
+ return Result<MediaDataEncoder::EncodedData, nsresult>(std::move(output));
+}
+
+Result<MediaDataEncoder::EncodedData, nsresult>
+FFmpegAudioEncoder<LIBAV_VER>::DrainWithModernAPIs() {
+ // If there's no packetizer, or it's empty, we can proceed immediately.
+ if (!mPacketizer || mPacketizer->FramesAvailable() == 0) {
+ return FFmpegDataEncoder<LIBAV_VER>::DrainWithModernAPIs();
+ }
+ EncodedData output;
+ MediaResult rv = NS_OK;
+ // Dequeue and encode each packet
+ mTempBuffer.SetLength(mCodecContext->frame_size *
+ mPacketizer->ChannelCount());
+ uint32_t written;
+ media::TimeUnit pts = mPacketizer->Drain(mTempBuffer.Elements(), written);
+ auto audio =
+ Span(mTempBuffer.Elements(), written * mPacketizer->ChannelCount());
+ auto encodeResult = EncodeOnePacket(audio, pts);
+ if (encodeResult.isOk()) {
+ auto array = encodeResult.unwrap();
+ output.AppendElements(std::move(array));
+ } else {
+ return encodeResult;
+ }
+ // Now, drain the encoder
+ auto drainResult = FFmpegDataEncoder<LIBAV_VER>::DrainWithModernAPIs();
+ if (drainResult.isOk()) {
+ auto array = drainResult.unwrap();
+ output.AppendElements(std::move(array));
+ } else {
+ return drainResult;
+ }
+ return Result<MediaDataEncoder::EncodedData, nsresult>(std::move(output));
+}
+#endif // if LIBAVCODEC_VERSION_MAJOR >= 58
+
+RefPtr<MediaRawData> FFmpegAudioEncoder<LIBAV_VER>::ToMediaRawData(
+ AVPacket* aPacket) {
+ MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+ MOZ_ASSERT(aPacket);
+
+ if (aPacket->size < mDtxThreshold) {
+ FFMPEG_LOG(
+ "DTX enabled and packet is %d bytes (threshold %d), not returning.",
+ aPacket->size, mDtxThreshold);
+ return nullptr;
+ }
+
+ RefPtr<MediaRawData> data = ToMediaRawDataCommon(aPacket);
+
+ data->mTime = media::TimeUnit(aPacket->pts, mConfig.mSampleRate);
+ data->mTimecode = data->mTime;
+ data->mDuration =
+ media::TimeUnit(mCodecContext->frame_size, mConfig.mSampleRate);
+
+ // Handle encoder delay
+ // Tracked in https://github.com/w3c/webcodecs/issues/626 because not quite
+ // specced yet.
+ if (mFirstPacketPts > data->mTime) {
+ data->mOriginalPresentationWindow =
+ Some(media::TimeInterval{data->mTime, data->GetEndTime()});
+ // Duration is likely to be ajusted when the above spec issue is fixed. For
+ // now, leave it as-is
+ // data->mDuration -= (mFirstPacketPts - data->mTime);
+ // if (data->mDuration.IsNegative()) {
+ // data->mDuration = media::TimeUnit::Zero();
+ // }
+ data->mTime = mFirstPacketPts;
+ }
+
+ if (mPacketsDelivered++ == 0) {
+ // Attach extradata, and the config (including any channel / samplerate
+ // modification to fit the encoder requirements), if needed.
+ if (auto r = GetExtraData(aPacket); r.isOk()) {
+ data->mExtraData = r.unwrap();
+ }
+ data->mConfig = MakeUnique<EncoderConfig>(mConfig);
+ }
+
+ if (data->mExtraData) {
+ FFMPEG_LOG(
+ "FFmpegAudioEncoder out: [%s,%s] (%zu bytes, extradata %zu bytes)",
+ data->mTime.ToString().get(), data->mDuration.ToString().get(),
+ data->Size(), data->mExtraData->Length());
+ } else {
+ FFMPEG_LOG("FFmpegAudioEncoder out: [%s,%s] (%zu bytes)",
+ data->mTime.ToString().get(), data->mDuration.ToString().get(),
+ data->Size());
+ }
+
+ return data;
+}
+
+Result<already_AddRefed<MediaByteBuffer>, nsresult>
+FFmpegAudioEncoder<LIBAV_VER>::GetExtraData(AVPacket* /* aPacket */) {
+ if (!mCodecContext->extradata_size) {
+ return Err(NS_ERROR_NOT_AVAILABLE);
+ }
+ // Create extra data -- they are on the context.
+ auto extraData = MakeRefPtr<MediaByteBuffer>();
+ extraData->SetLength(mCodecContext->extradata_size);
+ MOZ_ASSERT(extraData);
+ PodCopy(extraData->Elements(), mCodecContext->extradata,
+ mCodecContext->extradata_size);
+ return extraData.forget();
+}
+
+} // namespace mozilla