diff options
Diffstat (limited to 'dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp')
-rw-r--r-- | dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp | 421 |
1 files changed, 421 insertions, 0 deletions
diff --git a/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp b/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp new file mode 100644 index 0000000000..b6b3d7687e --- /dev/null +++ b/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp @@ -0,0 +1,421 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "FFmpegAudioDecoder.h" +#include "FFmpegLog.h" +#include "TimeUnits.h" +#include "VideoUtils.h" +#include "BufferReader.h" +#include "mozilla/StaticPrefs_media.h" +#include "mozilla/Telemetry.h" + +namespace mozilla { + +using TimeUnit = media::TimeUnit; + +FFmpegAudioDecoder<LIBAV_VER>::FFmpegAudioDecoder(FFmpegLibWrapper* aLib, + const AudioInfo& aConfig) + : FFmpegDataDecoder(aLib, GetCodecId(aConfig.mMimeType)) { + MOZ_COUNT_CTOR(FFmpegAudioDecoder); + + if (mCodecID == AV_CODEC_ID_AAC && + aConfig.mCodecSpecificConfig.is<AacCodecSpecificData>()) { + const AacCodecSpecificData& aacCodecSpecificData = + aConfig.mCodecSpecificConfig.as<AacCodecSpecificData>(); + mExtraData = new MediaByteBuffer; + // Ffmpeg expects the DecoderConfigDescriptor blob. + mExtraData->AppendElements( + *aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob); + mEncoderDelay = aacCodecSpecificData.mEncoderDelayFrames; + mEncoderPaddingOrTotalFrames = aacCodecSpecificData.mMediaFrameCount; + FFMPEG_LOG("FFmpegAudioDecoder (aac), found encoder delay (%" PRIu32 + ") and total frame count (%" PRIu64 + ") in codec-specific side data", + mEncoderDelay, TotalFrames()); + return; + } + + if (mCodecID == AV_CODEC_ID_MP3) { + // Downgraded from diagnostic assert due to BMO 1776524 on Android. + MOZ_ASSERT(aConfig.mCodecSpecificConfig.is<Mp3CodecSpecificData>()); + // Gracefully handle bad data. If don't hit the preceding assert once this + // has been shipped for awhile, we can remove it and make the following code + // non-conditional. + if (aConfig.mCodecSpecificConfig.is<Mp3CodecSpecificData>()) { + const Mp3CodecSpecificData& mp3CodecSpecificData = + aConfig.mCodecSpecificConfig.as<Mp3CodecSpecificData>(); + mEncoderDelay = mp3CodecSpecificData.mEncoderDelayFrames; + mEncoderPaddingOrTotalFrames = mp3CodecSpecificData.mEncoderPaddingFrames; + FFMPEG_LOG("FFmpegAudioDecoder (mp3), found encoder delay (%" PRIu32 + ")" + "and padding values (%" PRIu64 ") in codec-specific side-data", + mEncoderDelay, Padding()); + return; + } + } + + if (mCodecID == AV_CODEC_ID_FLAC) { + MOZ_DIAGNOSTIC_ASSERT( + aConfig.mCodecSpecificConfig.is<FlacCodecSpecificData>()); + // Gracefully handle bad data. If don't hit the preceding assert once this + // has been shipped for awhile, we can remove it and make the following code + // non-conditional. + if (aConfig.mCodecSpecificConfig.is<FlacCodecSpecificData>()) { + const FlacCodecSpecificData& flacCodecSpecificData = + aConfig.mCodecSpecificConfig.as<FlacCodecSpecificData>(); + if (flacCodecSpecificData.mStreamInfoBinaryBlob->IsEmpty()) { + // Flac files without headers will be missing stream info. In this case + // we don't want to feed ffmpeg empty extra data as it will fail, just + // early return. + return; + } + // Use a new MediaByteBuffer as the object will be modified during + // initialization. + mExtraData = new MediaByteBuffer; + mExtraData->AppendElements(*flacCodecSpecificData.mStreamInfoBinaryBlob); + return; + } + } + + // Gracefully handle failure to cover all codec specific cases above. Once + // we're confident there is no fall through from these cases above, we should + // remove this code. + RefPtr<MediaByteBuffer> audioCodecSpecificBinaryBlob = + GetAudioCodecSpecificBlob(aConfig.mCodecSpecificConfig); + if (audioCodecSpecificBinaryBlob && audioCodecSpecificBinaryBlob->Length()) { + // Use a new MediaByteBuffer as the object will be modified during + // initialization. + mExtraData = new MediaByteBuffer; + mExtraData->AppendElements(*audioCodecSpecificBinaryBlob); + } +} + +RefPtr<MediaDataDecoder::InitPromise> FFmpegAudioDecoder<LIBAV_VER>::Init() { + MediaResult rv = InitDecoder(); + + return NS_SUCCEEDED(rv) + ? InitPromise::CreateAndResolve(TrackInfo::kAudioTrack, __func__) + : InitPromise::CreateAndReject(rv, __func__); +} + +void FFmpegAudioDecoder<LIBAV_VER>::InitCodecContext() { + MOZ_ASSERT(mCodecContext); + // We do not want to set this value to 0 as FFmpeg by default will + // use the number of cores, which with our mozlibavutil get_cpu_count + // isn't implemented. + mCodecContext->thread_count = 1; + // FFmpeg takes this as a suggestion for what format to use for audio samples. + // LibAV 0.8 produces rubbish float interleaved samples, request 16 bits + // audio. +#ifdef MOZ_SAMPLE_TYPE_S16 + mCodecContext->request_sample_fmt = AV_SAMPLE_FMT_S16; +#else + mCodecContext->request_sample_fmt = + (mLib->mVersion == 53) ? AV_SAMPLE_FMT_S16 : AV_SAMPLE_FMT_FLT; +#endif +} + +static AlignedAudioBuffer CopyAndPackAudio(AVFrame* aFrame, + uint32_t aNumChannels, + uint32_t aNumAFrames) { + AlignedAudioBuffer audio(aNumChannels * aNumAFrames); + if (!audio) { + return audio; + } + +#ifdef MOZ_SAMPLE_TYPE_S16 + if (aFrame->format == AV_SAMPLE_FMT_FLT) { + // Audio data already packed. Need to convert from 32 bits Float to S16 + AudioDataValue* tmp = audio.get(); + float* data = reinterpret_cast<float**>(aFrame->data)[0]; + for (uint32_t frame = 0; frame < aNumAFrames; frame++) { + for (uint32_t channel = 0; channel < aNumChannels; channel++) { + *tmp++ = FloatToAudioSample<int16_t>(*data++); + } + } + } else if (aFrame->format == AV_SAMPLE_FMT_FLTP) { + // Planar audio data. Convert it from 32 bits float to S16 + // and pack it into something we can understand. + AudioDataValue* tmp = audio.get(); + float** data = reinterpret_cast<float**>(aFrame->data); + for (uint32_t frame = 0; frame < aNumAFrames; frame++) { + for (uint32_t channel = 0; channel < aNumChannels; channel++) { + *tmp++ = FloatToAudioSample<int16_t>(data[channel][frame]); + } + } + } else if (aFrame->format == AV_SAMPLE_FMT_S16) { + // Audio data already packed. No need to do anything other than copy it + // into a buffer we own. + memcpy(audio.get(), aFrame->data[0], + aNumChannels * aNumAFrames * sizeof(AudioDataValue)); + } else if (aFrame->format == AV_SAMPLE_FMT_S16P) { + // Planar audio data. Pack it into something we can understand. + AudioDataValue* tmp = audio.get(); + AudioDataValue** data = reinterpret_cast<AudioDataValue**>(aFrame->data); + for (uint32_t frame = 0; frame < aNumAFrames; frame++) { + for (uint32_t channel = 0; channel < aNumChannels; channel++) { + *tmp++ = data[channel][frame]; + } + } + } else if (aFrame->format == AV_SAMPLE_FMT_S32) { + // Audio data already packed. Need to convert from S32 to S16 + AudioDataValue* tmp = audio.get(); + int32_t* data = reinterpret_cast<int32_t**>(aFrame->data)[0]; + for (uint32_t frame = 0; frame < aNumAFrames; frame++) { + for (uint32_t channel = 0; channel < aNumChannels; channel++) { + *tmp++ = *data++ / (1U << 16); + } + } + } else if (aFrame->format == AV_SAMPLE_FMT_S32P) { + // Planar audio data. Convert it from S32 to S16 + // and pack it into something we can understand. + AudioDataValue* tmp = audio.get(); + int32_t** data = reinterpret_cast<int32_t**>(aFrame->data); + for (uint32_t frame = 0; frame < aNumAFrames; frame++) { + for (uint32_t channel = 0; channel < aNumChannels; channel++) { + *tmp++ = data[channel][frame] / (1U << 16); + } + } + } +#else + if (aFrame->format == AV_SAMPLE_FMT_FLT) { + // Audio data already packed. No need to do anything other than copy it + // into a buffer we own. + memcpy(audio.get(), aFrame->data[0], + aNumChannels * aNumAFrames * sizeof(AudioDataValue)); + } else if (aFrame->format == AV_SAMPLE_FMT_FLTP) { + // Planar audio data. Pack it into something we can understand. + AudioDataValue* tmp = audio.get(); + AudioDataValue** data = reinterpret_cast<AudioDataValue**>(aFrame->data); + for (uint32_t frame = 0; frame < aNumAFrames; frame++) { + for (uint32_t channel = 0; channel < aNumChannels; channel++) { + *tmp++ = data[channel][frame]; + } + } + } else if (aFrame->format == AV_SAMPLE_FMT_S16) { + // Audio data already packed. Need to convert from S16 to 32 bits Float + AudioDataValue* tmp = audio.get(); + int16_t* data = reinterpret_cast<int16_t**>(aFrame->data)[0]; + for (uint32_t frame = 0; frame < aNumAFrames; frame++) { + for (uint32_t channel = 0; channel < aNumChannels; channel++) { + *tmp++ = AudioSampleToFloat(*data++); + } + } + } else if (aFrame->format == AV_SAMPLE_FMT_S16P) { + // Planar audio data. Convert it from S16 to 32 bits float + // and pack it into something we can understand. + AudioDataValue* tmp = audio.get(); + int16_t** data = reinterpret_cast<int16_t**>(aFrame->data); + for (uint32_t frame = 0; frame < aNumAFrames; frame++) { + for (uint32_t channel = 0; channel < aNumChannels; channel++) { + *tmp++ = AudioSampleToFloat(data[channel][frame]); + } + } + } else if (aFrame->format == AV_SAMPLE_FMT_S32) { + // Audio data already packed. Need to convert from S16 to 32 bits Float + AudioDataValue* tmp = audio.get(); + int32_t* data = reinterpret_cast<int32_t**>(aFrame->data)[0]; + for (uint32_t frame = 0; frame < aNumAFrames; frame++) { + for (uint32_t channel = 0; channel < aNumChannels; channel++) { + *tmp++ = AudioSampleToFloat(*data++); + } + } + } else if (aFrame->format == AV_SAMPLE_FMT_S32P) { + // Planar audio data. Convert it from S32 to 32 bits float + // and pack it into something we can understand. + AudioDataValue* tmp = audio.get(); + int32_t** data = reinterpret_cast<int32_t**>(aFrame->data); + for (uint32_t frame = 0; frame < aNumAFrames; frame++) { + for (uint32_t channel = 0; channel < aNumChannels; channel++) { + *tmp++ = AudioSampleToFloat(data[channel][frame]); + } + } + } +#endif + + return audio; +} + +using ChannelLayout = AudioConfig::ChannelLayout; + +uint64_t FFmpegAudioDecoder<LIBAV_VER>::Padding() const { + MOZ_ASSERT(mCodecID == AV_CODEC_ID_MP3); + return mEncoderPaddingOrTotalFrames; +} +uint64_t FFmpegAudioDecoder<LIBAV_VER>::TotalFrames() const { + MOZ_ASSERT(mCodecID == AV_CODEC_ID_AAC); + return mEncoderPaddingOrTotalFrames; +} + +MediaResult FFmpegAudioDecoder<LIBAV_VER>::DoDecode(MediaRawData* aSample, + uint8_t* aData, int aSize, + bool* aGotFrame, + DecodedData& aResults) { + MOZ_ASSERT(mTaskQueue->IsOnCurrentThread()); + PROCESS_DECODE_LOG(aSample); + AVPacket packet; + mLib->av_init_packet(&packet); + + packet.data = const_cast<uint8_t*>(aData); + packet.size = aSize; + + if (aGotFrame) { + *aGotFrame = false; + } + + if (!PrepareFrame()) { + FFMPEG_LOG("FFmpegAudioDecoder: OOM in PrepareFrame"); + return MediaResult( + NS_ERROR_OUT_OF_MEMORY, + RESULT_DETAIL("FFmpeg audio decoder failed to allocate frame")); + } + + int64_t samplePosition = aSample->mOffset; + + while (packet.size > 0) { + int decoded = false; + int bytesConsumed = -1; +#if LIBAVCODEC_VERSION_MAJOR < 59 + bytesConsumed = + mLib->avcodec_decode_audio4(mCodecContext, mFrame, &decoded, &packet); + if (bytesConsumed < 0) { + NS_WARNING("FFmpeg audio decoder error."); + return MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, + RESULT_DETAIL("FFmpeg audio error:%d", bytesConsumed)); + } +#else +# define AVRESULT_OK 0 + int ret = mLib->avcodec_send_packet(mCodecContext, &packet); + switch (ret) { + case AVRESULT_OK: + bytesConsumed = packet.size; + break; + case AVERROR(EAGAIN): + break; + case AVERROR_EOF: + FFMPEG_LOG(" End of stream."); + return MediaResult(NS_ERROR_DOM_MEDIA_END_OF_STREAM, + RESULT_DETAIL("End of stream")); + default: + NS_WARNING("FFmpeg audio decoder error."); + return MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, + RESULT_DETAIL("FFmpeg audio error")); + } + + ret = mLib->avcodec_receive_frame(mCodecContext, mFrame); + switch (ret) { + case AVRESULT_OK: + decoded = true; + break; + case AVERROR(EAGAIN): + break; + case AVERROR_EOF: { + FFMPEG_LOG(" End of stream."); + return MediaResult(NS_ERROR_DOM_MEDIA_END_OF_STREAM, + RESULT_DETAIL("End of stream")); + } + } +#endif + + if (decoded) { + if (mFrame->format != AV_SAMPLE_FMT_FLT && + mFrame->format != AV_SAMPLE_FMT_FLTP && + mFrame->format != AV_SAMPLE_FMT_S16 && + mFrame->format != AV_SAMPLE_FMT_S16P && + mFrame->format != AV_SAMPLE_FMT_S32 && + mFrame->format != AV_SAMPLE_FMT_S32P) { + return MediaResult( + NS_ERROR_DOM_MEDIA_DECODE_ERR, + RESULT_DETAIL( + "FFmpeg audio decoder outputs unsupported audio format")); + } + uint32_t numChannels = mCodecContext->channels; + uint32_t samplingRate = mCodecContext->sample_rate; + + AlignedAudioBuffer audio = + CopyAndPackAudio(mFrame, numChannels, mFrame->nb_samples); + if (!audio) { + FFMPEG_LOG("FFmpegAudioDecoder: OOM"); + return MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__); + } + + FFMPEG_LOG("Packet decoded: [%s, %s] (%" PRId64 "us, %d frames)", + aSample->mTime.ToString().get(), + aSample->GetEndTime().ToString().get(), + aSample->mDuration.ToMicroseconds(), mFrame->nb_samples); + + media::TimeUnit duration = TimeUnit(mFrame->nb_samples, samplingRate); + if (!duration.IsValid()) { + FFMPEG_LOG("FFmpegAudioDecoder: invalid duration"); + return MediaResult(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR, + RESULT_DETAIL("Invalid sample duration")); + } + + media::TimeUnit pts = aSample->mTime; + media::TimeUnit newpts = pts + duration; + if (!newpts.IsValid()) { + FFMPEG_LOG("FFmpegAudioDecoder: invalid PTS."); + return MediaResult( + NS_ERROR_DOM_MEDIA_OVERFLOW_ERR, + RESULT_DETAIL("Invalid count of accumulated audio samples")); + } + + RefPtr<AudioData> data = + new AudioData(samplePosition, pts, std::move(audio), numChannels, + samplingRate, mCodecContext->channel_layout); + MOZ_ASSERT(duration == data->mDuration, "must be equal"); + aResults.AppendElement(std::move(data)); + + pts = newpts; + + if (aGotFrame) { + *aGotFrame = true; + } + } + // The packet wasn't sent to ffmpeg, another attempt will happen next + // iteration. + if (bytesConsumed != -1) { + packet.data += bytesConsumed; + packet.size -= bytesConsumed; + samplePosition += bytesConsumed; + } + } + return NS_OK; +} + +AVCodecID FFmpegAudioDecoder<LIBAV_VER>::GetCodecId( + const nsACString& aMimeType) { + if (aMimeType.EqualsLiteral("audio/mpeg")) { +#ifdef FFVPX_VERSION + if (!StaticPrefs::media_ffvpx_mp3_enabled()) { + return AV_CODEC_ID_NONE; + } +#endif + return AV_CODEC_ID_MP3; + } + if (aMimeType.EqualsLiteral("audio/flac")) { + return AV_CODEC_ID_FLAC; + } + if (aMimeType.EqualsLiteral("audio/mp4a-latm")) { + return AV_CODEC_ID_AAC; + } + + return AV_CODEC_ID_NONE; +} + +nsCString FFmpegAudioDecoder<LIBAV_VER>::GetCodecName() const { +#if LIBAVCODEC_VERSION_MAJOR > 53 + return nsCString(mLib->avcodec_descriptor_get(mCodecID)->name); +#else + return "unknown"_ns; +#endif +} + +FFmpegAudioDecoder<LIBAV_VER>::~FFmpegAudioDecoder() { + MOZ_COUNT_DTOR(FFmpegAudioDecoder); +} + +} // namespace mozilla |