/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim:set ts=2 sw=2 sts=2 et cindent: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "FFmpegAudioEncoder.h" #include "FFmpegRuntimeLinker.h" #include "FFmpegLog.h" #include "FFmpegUtils.h" #include "MediaData.h" #include "AudioSegment.h" namespace mozilla { FFmpegAudioEncoder::FFmpegAudioEncoder( const FFmpegLibWrapper* aLib, AVCodecID aCodecID, const RefPtr& aTaskQueue, const EncoderConfig& aConfig) : FFmpegDataEncoder(aLib, aCodecID, aTaskQueue, aConfig) {} nsCString FFmpegAudioEncoder::GetDescriptionName() const { #ifdef USING_MOZFFVPX return "ffvpx audio encoder"_ns; #else const char* lib = # if defined(MOZ_FFMPEG) FFmpegRuntimeLinker::LinkStatusLibraryName(); # else "no library: ffmpeg disabled during build"; # endif return nsPrintfCString("ffmpeg audio encoder (%s)", lib); #endif } void FFmpegAudioEncoder::ResamplerDestroy::operator()( SpeexResamplerState* aResampler) { speex_resampler_destroy(aResampler); } nsresult FFmpegAudioEncoder::InitSpecific() { MOZ_ASSERT(mTaskQueue->IsOnCurrentThread()); FFMPEG_LOG("FFmpegAudioEncoder::InitInternal"); // Initialize the common members of the encoder instance AVCodec* codec = FFmpegDataEncoder::InitCommon(); if (!codec) { FFMPEG_LOG("FFmpegDataEncoder::InitCommon failed"); return NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR; } // Find a compatible input rate for the codec, update the encoder config, and // note the rate at which this instance was configured. mInputSampleRate = AssertedCast(mConfig.mSampleRate); if (codec->supported_samplerates) { // Ensure the sample-rate list is sorted, iterate and either find that the // sample rate is supported, or pick the same rate just above the audio // input sample-rate (as to not lose information). If the audio is higher // than the highest supported sample-rate, down-sample to the highest // sample-rate supported by the codec. This is the case when encoding high // samplerate audio to opus. AutoTArray supportedSampleRates; IterateZeroTerminated(codec->supported_samplerates, [&supportedSampleRates](int aRate) mutable { supportedSampleRates.AppendElement(aRate); }); supportedSampleRates.Sort(); for (const auto& rate : supportedSampleRates) { if (mInputSampleRate == rate) { mConfig.mSampleRate = rate; break; } if (mInputSampleRate < rate) { // This rate is the smallest supported rate above the content's rate. mConfig.mSampleRate = rate; break; } if (mInputSampleRate > rate) { mConfig.mSampleRate = rate; } } } if (mConfig.mSampleRate != AssertedCast(mInputSampleRate)) { // Need to resample to targetRate int err; SpeexResamplerState* resampler = speex_resampler_init( mConfig.mNumberOfChannels, mInputSampleRate, mConfig.mSampleRate, SPEEX_RESAMPLER_QUALITY_DEFAULT, &err); if (!err) { mResampler.reset(resampler); } else { FFMPEG_LOG( "Error creating resampler in FFmpegAudioEncoder %dHz -> %dHz (%dch)", mInputSampleRate, mConfig.mSampleRate, mConfig.mNumberOfChannels); } } // And now the audio-specific part mCodecContext->sample_rate = AssertedCast(mConfig.mSampleRate); #if LIBAVCODEC_VERSION_MAJOR >= 60 // Gecko's ordering intentionnally matches ffmepg's ordering mLib->av_channel_layout_default(&mCodecContext->ch_layout, AssertedCast(mConfig.mNumberOfChannels)); #else mCodecContext->channels = AssertedCast(mConfig.mNumberOfChannels); #endif switch (mConfig.mCodec) { case CodecType::Opus: // When using libopus, ffmpeg supports interleaved float and s16 input. mCodecContext->sample_fmt = AV_SAMPLE_FMT_FLT; break; case CodecType::Vorbis: // When using libvorbis, ffmpeg only supports planar f32 input. mCodecContext->sample_fmt = AV_SAMPLE_FMT_FLTP; break; default: MOZ_ASSERT_UNREACHABLE("Not supported"); } if (mConfig.mCodec == CodecType::Opus) { // Default is VBR if (mConfig.mBitrateMode == BitrateMode::Constant) { mLib->av_opt_set(mCodecContext->priv_data, "vbr", "off", 0); } if (mConfig.mCodecSpecific.isSome()) { MOZ_ASSERT(mConfig.mCodecSpecific->is()); const OpusSpecific& specific = mConfig.mCodecSpecific->as(); // This attribute maps directly to complexity mCodecContext->compression_level = specific.mComplexity; FFMPEG_LOG("Opus complexity set to %d", specific.mComplexity); float frameDurationMs = AssertedCast(specific.mFrameDuration) / 1000.f; if (mLib->av_opt_set_double(mCodecContext->priv_data, "frame_duration", frameDurationMs, 0)) { FFMPEG_LOG("Error setting the frame duration on Opus encoder"); return NS_ERROR_FAILURE; } FFMPEG_LOG("Opus frame duration set to %0.2f", frameDurationMs); if (specific.mPacketLossPerc) { if (mLib->av_opt_set_int( mCodecContext->priv_data, "packet_loss", AssertedCast(specific.mPacketLossPerc), 0)) { FFMPEG_LOG("Error setting the packet loss percentage to %" PRIu64 " on Opus encoder", specific.mPacketLossPerc); return NS_ERROR_FAILURE; } FFMPEG_LOGV("Packet loss set to %d%% in Opus encoder", AssertedCast(specific.mPacketLossPerc)); } if (specific.mUseInBandFEC) { if (mLib->av_opt_set(mCodecContext->priv_data, "fec", "on", 0)) { FFMPEG_LOG("Error %s FEC on Opus encoder", specific.mUseInBandFEC ? "enabling" : "disabling"); return NS_ERROR_FAILURE; } FFMPEG_LOGV("In-band FEC enabled for Opus encoder."); } if (specific.mUseDTX) { if (mLib->av_opt_set(mCodecContext->priv_data, "dtx", "on", 0)) { FFMPEG_LOG("Error %s DTX on Opus encoder", specific.mUseDTX ? "enabling" : "disabling"); return NS_ERROR_FAILURE; } // DTX packets are a TOC byte, and possibly one byte of length, packets // 3 bytes and larger are to be returned. mDtxThreshold = 3; } // TODO: format // https://bugzilla.mozilla.org/show_bug.cgi?id=1876066 } } // Override the time base: always the sample-rate the encoder is running at mCodecContext->time_base = AVRational{.num = 1, .den = mCodecContext->sample_rate}; MediaResult rv = FinishInitCommon(codec); if (NS_FAILED(rv)) { FFMPEG_LOG("FFmpeg encode initialization failure."); return rv.Code(); } return NS_OK; } // avcodec_send_frame and avcodec_receive_packet were introduced in version 58. #if LIBAVCODEC_VERSION_MAJOR >= 58 Result FFmpegAudioEncoder::EncodeOnePacket(Span aSamples, media::TimeUnit aPts) { // Allocate AVFrame. if (!PrepareFrame()) { FFMPEG_LOG("failed to allocate frame"); return Err(NS_ERROR_OUT_OF_MEMORY); } uint32_t frameCount = aSamples.Length() / mConfig.mNumberOfChannels; // This method assumes that the audio has been packetized appropriately -- // packets smaller than the packet size are allowed when draining. MOZ_ASSERT(AssertedCast(frameCount) <= mCodecContext->frame_size); ChannelCount(mFrame) = AssertedCast(mConfig.mNumberOfChannels); # if LIBAVCODEC_VERSION_MAJOR >= 60 int rv = mLib->av_channel_layout_copy(&mFrame->ch_layout, &mCodecContext->ch_layout); if (rv < 0) { FFMPEG_LOG("channel layout copy error: %s", MakeErrorString(mLib, rv).get()); return Err(NS_ERROR_DOM_MEDIA_FATAL_ERR); } # endif mFrame->sample_rate = AssertedCast(mConfig.mSampleRate); // Not a mistake, nb_samples is per channel in ffmpeg mFrame->nb_samples = AssertedCast(frameCount); // Audio is converted below if needed mFrame->format = mCodecContext->sample_fmt; // Set presentation timestamp and duration of the AVFrame. # if LIBAVCODEC_VERSION_MAJOR >= 59 mFrame->time_base = AVRational{.num = 1, .den = static_cast(mConfig.mSampleRate)}; # endif mFrame->pts = aPts.ToTicksAtRate(mConfig.mSampleRate); # if LIBAVCODEC_VERSION_MAJOR >= 60 mFrame->duration = frameCount; # else mFrame->pkt_duration = frameCount; // Save duration in the time_base unit. mDurationMap.Insert(mFrame->pts, mFrame->pkt_duration); # endif if (int ret = mLib->av_frame_get_buffer(mFrame, 16); ret < 0) { FFMPEG_LOG("failed to allocate frame data: %s", MakeErrorString(mLib, ret).get()); return Err(NS_ERROR_OUT_OF_MEMORY); } // Make sure AVFrame is writable. if (int ret = mLib->av_frame_make_writable(mFrame); ret < 0) { FFMPEG_LOG("failed to make frame writable: %s", MakeErrorString(mLib, ret).get()); return Err(NS_ERROR_DOM_MEDIA_FATAL_ERR); } // The input is always in f32 interleaved for now if (mCodecContext->sample_fmt == AV_SAMPLE_FMT_FLT) { PodCopy(reinterpret_cast(mFrame->data[0]), aSamples.data(), aSamples.Length()); } else { MOZ_ASSERT(mCodecContext->sample_fmt == AV_SAMPLE_FMT_FLTP); for (uint32_t i = 0; i < mConfig.mNumberOfChannels; i++) { DeinterleaveAndConvertBuffer(aSamples.data(), mFrame->nb_samples, mConfig.mNumberOfChannels, mFrame->data); } } // Now send the AVFrame to ffmpeg for encoding, same code for audio and video. return FFmpegDataEncoder::EncodeWithModernAPIs(); } Result FFmpegAudioEncoder< LIBAV_VER>::EncodeInputWithModernAPIs(RefPtr aSample) { MOZ_ASSERT(mTaskQueue->IsOnCurrentThread()); MOZ_ASSERT(mCodecContext); MOZ_ASSERT(aSample); RefPtr sample(aSample->As()); FFMPEG_LOG("Encoding %" PRIu32 " frames of audio at pts: %s", sample->Frames(), sample->mTime.ToString().get()); if ((!mResampler && sample->mRate != mConfig.mSampleRate) || (mResampler && sample->mRate != AssertedCast(mInputSampleRate)) || sample->mChannels != mConfig.mNumberOfChannels) { FFMPEG_LOG( "Rate or sample-rate at the inputof the encoder different from what " "has been configured initially, erroring out"); return Result( NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR); } // ffmpeg expects exactly sized input audio packets most of the time. // Packetization is performed if needed, and audio packets of the correct size // are fed to ffmpeg, with timestamps extrapolated the timestamp found on // the input MediaData. if (!mPacketizer) { media::TimeUnit basePts = media::TimeUnit::Zero(mConfig.mSampleRate); basePts += sample->mTime; mPacketizer.emplace(mCodecContext->frame_size, sample->mChannels, basePts.ToTicksAtRate(mConfig.mSampleRate), mConfig.mSampleRate); } if (!mFirstPacketPts.IsValid()) { mFirstPacketPts = sample->mTime; } Span audio = sample->Data(); if (mResampler) { // Ensure that all input frames are consumed each time by oversizing the // output buffer. int bufferLengthGuess = std::ceil(2. * static_cast(audio.size()) * mConfig.mSampleRate / mInputSampleRate); mTempBuffer.SetLength(bufferLengthGuess); uint32_t inputFrames = audio.size() / mConfig.mNumberOfChannels; uint32_t inputFramesProcessed = inputFrames; uint32_t outputFrames = bufferLengthGuess / mConfig.mNumberOfChannels; DebugOnly rv = speex_resampler_process_interleaved_float( mResampler.get(), audio.data(), &inputFramesProcessed, mTempBuffer.Elements(), &outputFrames); audio = Span(mTempBuffer.Elements(), outputFrames * mConfig.mNumberOfChannels); MOZ_ASSERT(inputFrames == inputFramesProcessed, "increate the buffer to consume all input each time"); MOZ_ASSERT(rv == RESAMPLER_ERR_SUCCESS); } EncodedData output; MediaResult rv = NS_OK; mPacketizer->Input(audio.data(), audio.Length() / mConfig.mNumberOfChannels); // Dequeue and encode each packet while (mPacketizer->PacketsAvailable() && rv.Code() == NS_OK) { mTempBuffer.SetLength(mCodecContext->frame_size * mConfig.mNumberOfChannels); media::TimeUnit pts = mPacketizer->Output(mTempBuffer.Elements()); auto audio = Span(mTempBuffer.Elements(), mTempBuffer.Length()); FFMPEG_LOG("Encoding %" PRIu32 " frames, pts: %s", mPacketizer->PacketSize(), pts.ToString().get()); auto encodeResult = EncodeOnePacket(audio, pts); if (encodeResult.isOk()) { output.AppendElements(std::move(encodeResult.unwrap())); } else { return encodeResult; } pts += media::TimeUnit(mPacketizer->PacketSize(), mConfig.mSampleRate); } return Result(std::move(output)); } Result FFmpegAudioEncoder::DrainWithModernAPIs() { // If there's no packetizer, or it's empty, we can proceed immediately. if (!mPacketizer || mPacketizer->FramesAvailable() == 0) { return FFmpegDataEncoder::DrainWithModernAPIs(); } EncodedData output; MediaResult rv = NS_OK; // Dequeue and encode each packet mTempBuffer.SetLength(mCodecContext->frame_size * mPacketizer->ChannelCount()); uint32_t written; media::TimeUnit pts = mPacketizer->Drain(mTempBuffer.Elements(), written); auto audio = Span(mTempBuffer.Elements(), written * mPacketizer->ChannelCount()); auto encodeResult = EncodeOnePacket(audio, pts); if (encodeResult.isOk()) { auto array = encodeResult.unwrap(); output.AppendElements(std::move(array)); } else { return encodeResult; } // Now, drain the encoder auto drainResult = FFmpegDataEncoder::DrainWithModernAPIs(); if (drainResult.isOk()) { auto array = drainResult.unwrap(); output.AppendElements(std::move(array)); } else { return drainResult; } return Result(std::move(output)); } #endif // if LIBAVCODEC_VERSION_MAJOR >= 58 RefPtr FFmpegAudioEncoder::ToMediaRawData( AVPacket* aPacket) { MOZ_ASSERT(mTaskQueue->IsOnCurrentThread()); MOZ_ASSERT(aPacket); if (aPacket->size < mDtxThreshold) { FFMPEG_LOG( "DTX enabled and packet is %d bytes (threshold %d), not returning.", aPacket->size, mDtxThreshold); return nullptr; } RefPtr data = ToMediaRawDataCommon(aPacket); data->mTime = media::TimeUnit(aPacket->pts, mConfig.mSampleRate); data->mTimecode = data->mTime; data->mDuration = media::TimeUnit(mCodecContext->frame_size, mConfig.mSampleRate); // Handle encoder delay // Tracked in https://github.com/w3c/webcodecs/issues/626 because not quite // specced yet. if (mFirstPacketPts > data->mTime) { data->mOriginalPresentationWindow = Some(media::TimeInterval{data->mTime, data->GetEndTime()}); // Duration is likely to be ajusted when the above spec issue is fixed. For // now, leave it as-is // data->mDuration -= (mFirstPacketPts - data->mTime); // if (data->mDuration.IsNegative()) { // data->mDuration = media::TimeUnit::Zero(); // } data->mTime = mFirstPacketPts; } if (mPacketsDelivered++ == 0) { // Attach extradata, and the config (including any channel / samplerate // modification to fit the encoder requirements), if needed. if (auto r = GetExtraData(aPacket); r.isOk()) { data->mExtraData = r.unwrap(); } data->mConfig = MakeUnique(mConfig); } if (data->mExtraData) { FFMPEG_LOG( "FFmpegAudioEncoder out: [%s,%s] (%zu bytes, extradata %zu bytes)", data->mTime.ToString().get(), data->mDuration.ToString().get(), data->Size(), data->mExtraData->Length()); } else { FFMPEG_LOG("FFmpegAudioEncoder out: [%s,%s] (%zu bytes)", data->mTime.ToString().get(), data->mDuration.ToString().get(), data->Size()); } return data; } Result, nsresult> FFmpegAudioEncoder::GetExtraData(AVPacket* /* aPacket */) { if (!mCodecContext->extradata_size) { return Err(NS_ERROR_NOT_AVAILABLE); } // Create extra data -- they are on the context. auto extraData = MakeRefPtr(); extraData->SetLength(mCodecContext->extradata_size); MOZ_ASSERT(extraData); PodCopy(extraData->Elements(), mCodecContext->extradata, mCodecContext->extradata_size); return extraData.forget(); } } // namespace mozilla