diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-15 03:34:50 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-15 03:34:50 +0000 |
commit | def92d1b8e9d373e2f6f27c366d578d97d8960c6 (patch) | |
tree | 2ef34b9ad8bb9a9220e05d60352558b15f513894 /dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp | |
parent | Adding debian version 125.0.3-1. (diff) | |
download | firefox-def92d1b8e9d373e2f6f27c366d578d97d8960c6.tar.xz firefox-def92d1b8e9d373e2f6f27c366d578d97d8960c6.zip |
Merging upstream version 126.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp')
-rw-r--r-- | dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp | 458 |
1 files changed, 458 insertions, 0 deletions
diff --git a/dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp b/dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp new file mode 100644 index 0000000000..28db667732 --- /dev/null +++ b/dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp @@ -0,0 +1,458 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "FFmpegAudioEncoder.h" + +#include "FFmpegRuntimeLinker.h" +#include "FFmpegLog.h" +#include "FFmpegUtils.h" +#include "MediaData.h" + +#include "AudioSegment.h" + +namespace mozilla { + +FFmpegAudioEncoder<LIBAV_VER>::FFmpegAudioEncoder( + const FFmpegLibWrapper* aLib, AVCodecID aCodecID, + const RefPtr<TaskQueue>& aTaskQueue, const EncoderConfig& aConfig) + : FFmpegDataEncoder(aLib, aCodecID, aTaskQueue, aConfig) {} + +nsCString FFmpegAudioEncoder<LIBAV_VER>::GetDescriptionName() const { +#ifdef USING_MOZFFVPX + return "ffvpx audio encoder"_ns; +#else + const char* lib = +# if defined(MOZ_FFMPEG) + FFmpegRuntimeLinker::LinkStatusLibraryName(); +# else + "no library: ffmpeg disabled during build"; +# endif + return nsPrintfCString("ffmpeg audio encoder (%s)", lib); +#endif +} + +void FFmpegAudioEncoder<LIBAV_VER>::ResamplerDestroy::operator()( + SpeexResamplerState* aResampler) { + speex_resampler_destroy(aResampler); +} + +nsresult FFmpegAudioEncoder<LIBAV_VER>::InitSpecific() { + MOZ_ASSERT(mTaskQueue->IsOnCurrentThread()); + + FFMPEG_LOG("FFmpegAudioEncoder::InitInternal"); + + // Initialize the common members of the encoder instance + AVCodec* codec = FFmpegDataEncoder<LIBAV_VER>::InitCommon(); + if (!codec) { + FFMPEG_LOG("FFmpegDataEncoder::InitCommon failed"); + return NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR; + } + + // Find a compatible input rate for the codec, update the encoder config, and + // note the rate at which this instance was configured. + mInputSampleRate = AssertedCast<int>(mConfig.mSampleRate); + if (codec->supported_samplerates) { + // Ensure the sample-rate list is sorted, iterate and either find that the + // sample rate is supported, or pick the same rate just above the audio + // input sample-rate (as to not lose information). If the audio is higher + // than the highest supported sample-rate, down-sample to the highest + // sample-rate supported by the codec. This is the case when encoding high + // samplerate audio to opus. + AutoTArray<int, 16> supportedSampleRates; + IterateZeroTerminated(codec->supported_samplerates, + [&supportedSampleRates](int aRate) mutable { + supportedSampleRates.AppendElement(aRate); + }); + supportedSampleRates.Sort(); + + for (const auto& rate : supportedSampleRates) { + if (mInputSampleRate == rate) { + mConfig.mSampleRate = rate; + break; + } + if (mInputSampleRate < rate) { + // This rate is the smallest supported rate above the content's rate. + mConfig.mSampleRate = rate; + break; + } + if (mInputSampleRate > rate) { + mConfig.mSampleRate = rate; + } + } + } + + if (mConfig.mSampleRate != AssertedCast<uint32_t>(mInputSampleRate)) { + // Need to resample to targetRate + int err; + SpeexResamplerState* resampler = speex_resampler_init( + mConfig.mNumberOfChannels, mInputSampleRate, mConfig.mSampleRate, + SPEEX_RESAMPLER_QUALITY_DEFAULT, &err); + if (!err) { + mResampler.reset(resampler); + } else { + FFMPEG_LOG( + "Error creating resampler in FFmpegAudioEncoder %dHz -> %dHz (%dch)", + mInputSampleRate, mConfig.mSampleRate, mConfig.mNumberOfChannels); + } + } + + // And now the audio-specific part + mCodecContext->sample_rate = AssertedCast<int>(mConfig.mSampleRate); + mCodecContext->channels = AssertedCast<int>(mConfig.mNumberOfChannels); + +#if LIBAVCODEC_VERSION_MAJOR >= 60 + // Gecko's ordering intentionnally matches ffmepg's ordering + mLib->av_channel_layout_default(&mCodecContext->ch_layout, + AssertedCast<int>(mCodecContext->channels)); +#endif + + switch (mConfig.mCodec) { + case CodecType::Opus: + // When using libopus, ffmpeg supports interleaved float and s16 input. + mCodecContext->sample_fmt = AV_SAMPLE_FMT_FLT; + break; + case CodecType::Vorbis: + // When using libvorbis, ffmpeg only supports planar f32 input. + mCodecContext->sample_fmt = AV_SAMPLE_FMT_FLTP; + break; + default: + MOZ_ASSERT_UNREACHABLE("Not supported"); + } + + if (mConfig.mCodec == CodecType::Opus) { + // Default is VBR + if (mConfig.mBitrateMode == BitrateMode::Constant) { + mLib->av_opt_set(mCodecContext->priv_data, "vbr", "off", 0); + } + if (mConfig.mCodecSpecific.isSome()) { + MOZ_ASSERT(mConfig.mCodecSpecific->is<OpusSpecific>()); + const OpusSpecific& specific = mConfig.mCodecSpecific->as<OpusSpecific>(); + // This attribute maps directly to complexity + mCodecContext->compression_level = specific.mComplexity; + FFMPEG_LOG("Opus complexity set to %d", specific.mComplexity); + float frameDurationMs = + AssertedCast<float>(specific.mFrameDuration) / 1000.f; + if (mLib->av_opt_set_double(mCodecContext->priv_data, "frame_duration", + frameDurationMs, 0)) { + FFMPEG_LOG("Error setting the frame duration on Opus encoder"); + return NS_ERROR_FAILURE; + } + FFMPEG_LOG("Opus frame duration set to %0.2f", frameDurationMs); + if (specific.mPacketLossPerc) { + if (mLib->av_opt_set_int( + mCodecContext->priv_data, "packet_loss", + AssertedCast<int64_t>(specific.mPacketLossPerc), 0)) { + FFMPEG_LOG("Error setting the packet loss percentage to %" PRIu64 + " on Opus encoder", + specific.mPacketLossPerc); + return NS_ERROR_FAILURE; + } + FFMPEG_LOGV("Packet loss set to %d%% in Opus encoder", + AssertedCast<int>(specific.mPacketLossPerc)); + } + if (specific.mUseInBandFEC) { + if (mLib->av_opt_set(mCodecContext->priv_data, "fec", "on", 0)) { + FFMPEG_LOG("Error %s FEC on Opus encoder", + specific.mUseInBandFEC ? "enabling" : "disabling"); + return NS_ERROR_FAILURE; + } + FFMPEG_LOGV("In-band FEC enabled for Opus encoder."); + } + if (specific.mUseDTX) { + if (mLib->av_opt_set(mCodecContext->priv_data, "dtx", "on", 0)) { + FFMPEG_LOG("Error %s DTX on Opus encoder", + specific.mUseDTX ? "enabling" : "disabling"); + return NS_ERROR_FAILURE; + } + // DTX packets are a TOC byte, and possibly one byte of length, packets + // 3 bytes and larger are to be returned. + mDtxThreshold = 3; + } + // TODO: format + // https://bugzilla.mozilla.org/show_bug.cgi?id=1876066 + } + } + // Override the time base: always the sample-rate the encoder is running at + mCodecContext->time_base = + AVRational{.num = 1, .den = mCodecContext->sample_rate}; + + MediaResult rv = FinishInitCommon(codec); + if (NS_FAILED(rv)) { + FFMPEG_LOG("FFmpeg encode initialization failure."); + return rv.Code(); + } + + return NS_OK; +} + +// avcodec_send_frame and avcodec_receive_packet were introduced in version 58. +#if LIBAVCODEC_VERSION_MAJOR >= 58 + +Result<MediaDataEncoder::EncodedData, nsresult> +FFmpegAudioEncoder<LIBAV_VER>::EncodeOnePacket(Span<float> aSamples, + media::TimeUnit aPts) { + // Allocate AVFrame. + if (!PrepareFrame()) { + FFMPEG_LOG("failed to allocate frame"); + return Err(NS_ERROR_OUT_OF_MEMORY); + } + + uint32_t frameCount = aSamples.Length() / mConfig.mNumberOfChannels; + + // This method assumes that the audio has been packetized appropriately -- + // packets smaller than the packet size are allowed when draining. + MOZ_ASSERT(AssertedCast<int>(frameCount) <= mCodecContext->frame_size); + + mFrame->channels = AssertedCast<int>(mConfig.mNumberOfChannels); + +# if LIBAVCODEC_VERSION_MAJOR >= 60 + int rv = mLib->av_channel_layout_copy(&mFrame->ch_layout, + &mCodecContext->ch_layout); + if (rv < 0) { + FFMPEG_LOG("channel layout copy error: %s", + MakeErrorString(mLib, rv).get()); + return Err(NS_ERROR_DOM_MEDIA_FATAL_ERR); + } +# endif + + mFrame->sample_rate = AssertedCast<int>(mConfig.mSampleRate); + // Not a mistake, nb_samples is per channel in ffmpeg + mFrame->nb_samples = AssertedCast<int>(frameCount); + // Audio is converted below if needed + mFrame->format = mCodecContext->sample_fmt; + // Set presentation timestamp and duration of the AVFrame. +# if LIBAVCODEC_VERSION_MAJOR >= 59 + mFrame->time_base = + AVRational{.num = 1, .den = static_cast<int>(mConfig.mSampleRate)}; +# endif + mFrame->pts = aPts.ToTicksAtRate(mConfig.mSampleRate); + mFrame->pkt_duration = frameCount; +# if LIBAVCODEC_VERSION_MAJOR >= 60 + mFrame->duration = frameCount; +# else + // Save duration in the time_base unit. + mDurationMap.Insert(mFrame->pts, mFrame->pkt_duration); +# endif + + if (int ret = mLib->av_frame_get_buffer(mFrame, 16); ret < 0) { + FFMPEG_LOG("failed to allocate frame data: %s", + MakeErrorString(mLib, ret).get()); + return Err(NS_ERROR_OUT_OF_MEMORY); + } + + // Make sure AVFrame is writable. + if (int ret = mLib->av_frame_make_writable(mFrame); ret < 0) { + FFMPEG_LOG("failed to make frame writable: %s", + MakeErrorString(mLib, ret).get()); + return Err(NS_ERROR_DOM_MEDIA_FATAL_ERR); + } + + // The input is always in f32 interleaved for now + if (mCodecContext->sample_fmt == AV_SAMPLE_FMT_FLT) { + PodCopy(reinterpret_cast<float*>(mFrame->data[0]), aSamples.data(), + aSamples.Length()); + } else { + MOZ_ASSERT(mCodecContext->sample_fmt == AV_SAMPLE_FMT_FLTP); + for (uint32_t i = 0; i < mConfig.mNumberOfChannels; i++) { + DeinterleaveAndConvertBuffer(aSamples.data(), mFrame->nb_samples, + mFrame->channels, mFrame->data); + } + } + + // Now send the AVFrame to ffmpeg for encoding, same code for audio and video. + return FFmpegDataEncoder<LIBAV_VER>::EncodeWithModernAPIs(); +} + +Result<MediaDataEncoder::EncodedData, nsresult> FFmpegAudioEncoder< + LIBAV_VER>::EncodeInputWithModernAPIs(RefPtr<const MediaData> aSample) { + MOZ_ASSERT(mTaskQueue->IsOnCurrentThread()); + MOZ_ASSERT(mCodecContext); + MOZ_ASSERT(aSample); + + RefPtr<const AudioData> sample(aSample->As<AudioData>()); + + FFMPEG_LOG("Encoding %" PRIu32 " frames of audio at pts: %s", + sample->Frames(), sample->mTime.ToString().get()); + + if ((!mResampler && sample->mRate != mConfig.mSampleRate) || + (mResampler && + sample->mRate != AssertedCast<uint32_t>(mInputSampleRate)) || + sample->mChannels != mConfig.mNumberOfChannels) { + FFMPEG_LOG( + "Rate or sample-rate at the inputof the encoder different from what " + "has been configured initially, erroring out"); + return Result<MediaDataEncoder::EncodedData, nsresult>( + NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR); + } + + // ffmpeg expects exactly sized input audio packets most of the time. + // Packetization is performed if needed, and audio packets of the correct size + // are fed to ffmpeg, with timestamps extrapolated the timestamp found on + // the input MediaData. + + if (!mPacketizer) { + media::TimeUnit basePts = media::TimeUnit::Zero(mConfig.mSampleRate); + basePts += sample->mTime; + mPacketizer.emplace(mCodecContext->frame_size, sample->mChannels, + basePts.ToTicksAtRate(mConfig.mSampleRate), + mConfig.mSampleRate); + } + + if (!mFirstPacketPts.IsValid()) { + mFirstPacketPts = sample->mTime; + } + + Span<float> audio = sample->Data(); + + if (mResampler) { + // Ensure that all input frames are consumed each time by oversizing the + // output buffer. + int bufferLengthGuess = std::ceil(2. * static_cast<float>(audio.size()) * + mConfig.mSampleRate / mInputSampleRate); + mTempBuffer.SetLength(bufferLengthGuess); + uint32_t inputFrames = audio.size() / mConfig.mNumberOfChannels; + uint32_t inputFramesProcessed = inputFrames; + uint32_t outputFrames = bufferLengthGuess / mConfig.mNumberOfChannels; + DebugOnly<int> rv = speex_resampler_process_interleaved_float( + mResampler.get(), audio.data(), &inputFramesProcessed, + mTempBuffer.Elements(), &outputFrames); + audio = Span<float>(mTempBuffer.Elements(), + outputFrames * mConfig.mNumberOfChannels); + MOZ_ASSERT(inputFrames == inputFramesProcessed, + "increate the buffer to consume all input each time"); + MOZ_ASSERT(rv == RESAMPLER_ERR_SUCCESS); + } + + EncodedData output; + MediaResult rv = NS_OK; + + mPacketizer->Input(audio.data(), audio.Length() / mConfig.mNumberOfChannels); + + // Dequeue and encode each packet + while (mPacketizer->PacketsAvailable() && rv.Code() == NS_OK) { + mTempBuffer.SetLength(mCodecContext->frame_size * + mConfig.mNumberOfChannels); + media::TimeUnit pts = mPacketizer->Output(mTempBuffer.Elements()); + auto audio = Span(mTempBuffer.Elements(), mTempBuffer.Length()); + FFMPEG_LOG("Encoding %" PRIu32 " frames, pts: %s", + mPacketizer->PacketSize(), pts.ToString().get()); + auto encodeResult = EncodeOnePacket(audio, pts); + if (encodeResult.isOk()) { + output.AppendElements(std::move(encodeResult.unwrap())); + } else { + return encodeResult; + } + pts += media::TimeUnit(mPacketizer->PacketSize(), mConfig.mSampleRate); + } + return Result<MediaDataEncoder::EncodedData, nsresult>(std::move(output)); +} + +Result<MediaDataEncoder::EncodedData, nsresult> +FFmpegAudioEncoder<LIBAV_VER>::DrainWithModernAPIs() { + // If there's no packetizer, or it's empty, we can proceed immediately. + if (!mPacketizer || mPacketizer->FramesAvailable() == 0) { + return FFmpegDataEncoder<LIBAV_VER>::DrainWithModernAPIs(); + } + EncodedData output; + MediaResult rv = NS_OK; + // Dequeue and encode each packet + mTempBuffer.SetLength(mCodecContext->frame_size * + mPacketizer->ChannelCount()); + uint32_t written; + media::TimeUnit pts = mPacketizer->Drain(mTempBuffer.Elements(), written); + auto audio = + Span(mTempBuffer.Elements(), written * mPacketizer->ChannelCount()); + auto encodeResult = EncodeOnePacket(audio, pts); + if (encodeResult.isOk()) { + auto array = encodeResult.unwrap(); + output.AppendElements(std::move(array)); + } else { + return encodeResult; + } + // Now, drain the encoder + auto drainResult = FFmpegDataEncoder<LIBAV_VER>::DrainWithModernAPIs(); + if (drainResult.isOk()) { + auto array = drainResult.unwrap(); + output.AppendElements(std::move(array)); + } else { + return drainResult; + } + return Result<MediaDataEncoder::EncodedData, nsresult>(std::move(output)); +} +#endif // if LIBAVCODEC_VERSION_MAJOR >= 58 + +RefPtr<MediaRawData> FFmpegAudioEncoder<LIBAV_VER>::ToMediaRawData( + AVPacket* aPacket) { + MOZ_ASSERT(mTaskQueue->IsOnCurrentThread()); + MOZ_ASSERT(aPacket); + + if (aPacket->size < mDtxThreshold) { + FFMPEG_LOG( + "DTX enabled and packet is %d bytes (threshold %d), not returning.", + aPacket->size, mDtxThreshold); + return nullptr; + } + + RefPtr<MediaRawData> data = ToMediaRawDataCommon(aPacket); + + data->mTime = media::TimeUnit(aPacket->pts, mConfig.mSampleRate); + data->mTimecode = data->mTime; + data->mDuration = + media::TimeUnit(mCodecContext->frame_size, mConfig.mSampleRate); + + // Handle encoder delay + // Tracked in https://github.com/w3c/webcodecs/issues/626 because not quite + // specced yet. + if (mFirstPacketPts > data->mTime) { + data->mOriginalPresentationWindow = + Some(media::TimeInterval{data->mTime, data->GetEndTime()}); + // Duration is likely to be ajusted when the above spec issue is fixed. For + // now, leave it as-is + // data->mDuration -= (mFirstPacketPts - data->mTime); + // if (data->mDuration.IsNegative()) { + // data->mDuration = media::TimeUnit::Zero(); + // } + data->mTime = mFirstPacketPts; + } + + if (mPacketsDelivered++ == 0) { + // Attach extradata, and the config (including any channel / samplerate + // modification to fit the encoder requirements), if needed. + if (auto r = GetExtraData(aPacket); r.isOk()) { + data->mExtraData = r.unwrap(); + } + data->mConfig = MakeUnique<EncoderConfig>(mConfig); + } + + if (data->mExtraData) { + FFMPEG_LOG( + "FFmpegAudioEncoder out: [%s,%s] (%zu bytes, extradata %zu bytes)", + data->mTime.ToString().get(), data->mDuration.ToString().get(), + data->Size(), data->mExtraData->Length()); + } else { + FFMPEG_LOG("FFmpegAudioEncoder out: [%s,%s] (%zu bytes)", + data->mTime.ToString().get(), data->mDuration.ToString().get(), + data->Size()); + } + + return data; +} + +Result<already_AddRefed<MediaByteBuffer>, nsresult> +FFmpegAudioEncoder<LIBAV_VER>::GetExtraData(AVPacket* /* aPacket */) { + if (!mCodecContext->extradata_size) { + return Err(NS_ERROR_NOT_AVAILABLE); + } + // Create extra data -- they are on the context. + auto extraData = MakeRefPtr<MediaByteBuffer>(); + extraData->SetLength(mCodecContext->extradata_size); + MOZ_ASSERT(extraData); + PodCopy(extraData->Elements(), mCodecContext->extradata, + mCodecContext->extradata_size); + return extraData.forget(); +} + +} // namespace mozilla |