summaryrefslogtreecommitdiffstats
path: root/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp421
1 files changed, 421 insertions, 0 deletions
diff --git a/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp b/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp
new file mode 100644
index 0000000000..b6b3d7687e
--- /dev/null
+++ b/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp
@@ -0,0 +1,421 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "FFmpegAudioDecoder.h"
+#include "FFmpegLog.h"
+#include "TimeUnits.h"
+#include "VideoUtils.h"
+#include "BufferReader.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "mozilla/Telemetry.h"
+
+namespace mozilla {
+
+using TimeUnit = media::TimeUnit;
+
+FFmpegAudioDecoder<LIBAV_VER>::FFmpegAudioDecoder(FFmpegLibWrapper* aLib,
+ const AudioInfo& aConfig)
+ : FFmpegDataDecoder(aLib, GetCodecId(aConfig.mMimeType)) {
+ MOZ_COUNT_CTOR(FFmpegAudioDecoder);
+
+ if (mCodecID == AV_CODEC_ID_AAC &&
+ aConfig.mCodecSpecificConfig.is<AacCodecSpecificData>()) {
+ const AacCodecSpecificData& aacCodecSpecificData =
+ aConfig.mCodecSpecificConfig.as<AacCodecSpecificData>();
+ mExtraData = new MediaByteBuffer;
+ // Ffmpeg expects the DecoderConfigDescriptor blob.
+ mExtraData->AppendElements(
+ *aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob);
+ mEncoderDelay = aacCodecSpecificData.mEncoderDelayFrames;
+ mEncoderPaddingOrTotalFrames = aacCodecSpecificData.mMediaFrameCount;
+ FFMPEG_LOG("FFmpegAudioDecoder (aac), found encoder delay (%" PRIu32
+ ") and total frame count (%" PRIu64
+ ") in codec-specific side data",
+ mEncoderDelay, TotalFrames());
+ return;
+ }
+
+ if (mCodecID == AV_CODEC_ID_MP3) {
+ // Downgraded from diagnostic assert due to BMO 1776524 on Android.
+ MOZ_ASSERT(aConfig.mCodecSpecificConfig.is<Mp3CodecSpecificData>());
+ // Gracefully handle bad data. If don't hit the preceding assert once this
+ // has been shipped for awhile, we can remove it and make the following code
+ // non-conditional.
+ if (aConfig.mCodecSpecificConfig.is<Mp3CodecSpecificData>()) {
+ const Mp3CodecSpecificData& mp3CodecSpecificData =
+ aConfig.mCodecSpecificConfig.as<Mp3CodecSpecificData>();
+ mEncoderDelay = mp3CodecSpecificData.mEncoderDelayFrames;
+ mEncoderPaddingOrTotalFrames = mp3CodecSpecificData.mEncoderPaddingFrames;
+ FFMPEG_LOG("FFmpegAudioDecoder (mp3), found encoder delay (%" PRIu32
+ ")"
+ "and padding values (%" PRIu64 ") in codec-specific side-data",
+ mEncoderDelay, Padding());
+ return;
+ }
+ }
+
+ if (mCodecID == AV_CODEC_ID_FLAC) {
+ MOZ_DIAGNOSTIC_ASSERT(
+ aConfig.mCodecSpecificConfig.is<FlacCodecSpecificData>());
+ // Gracefully handle bad data. If don't hit the preceding assert once this
+ // has been shipped for awhile, we can remove it and make the following code
+ // non-conditional.
+ if (aConfig.mCodecSpecificConfig.is<FlacCodecSpecificData>()) {
+ const FlacCodecSpecificData& flacCodecSpecificData =
+ aConfig.mCodecSpecificConfig.as<FlacCodecSpecificData>();
+ if (flacCodecSpecificData.mStreamInfoBinaryBlob->IsEmpty()) {
+ // Flac files without headers will be missing stream info. In this case
+ // we don't want to feed ffmpeg empty extra data as it will fail, just
+ // early return.
+ return;
+ }
+ // Use a new MediaByteBuffer as the object will be modified during
+ // initialization.
+ mExtraData = new MediaByteBuffer;
+ mExtraData->AppendElements(*flacCodecSpecificData.mStreamInfoBinaryBlob);
+ return;
+ }
+ }
+
+ // Gracefully handle failure to cover all codec specific cases above. Once
+ // we're confident there is no fall through from these cases above, we should
+ // remove this code.
+ RefPtr<MediaByteBuffer> audioCodecSpecificBinaryBlob =
+ GetAudioCodecSpecificBlob(aConfig.mCodecSpecificConfig);
+ if (audioCodecSpecificBinaryBlob && audioCodecSpecificBinaryBlob->Length()) {
+ // Use a new MediaByteBuffer as the object will be modified during
+ // initialization.
+ mExtraData = new MediaByteBuffer;
+ mExtraData->AppendElements(*audioCodecSpecificBinaryBlob);
+ }
+}
+
+RefPtr<MediaDataDecoder::InitPromise> FFmpegAudioDecoder<LIBAV_VER>::Init() {
+ MediaResult rv = InitDecoder();
+
+ return NS_SUCCEEDED(rv)
+ ? InitPromise::CreateAndResolve(TrackInfo::kAudioTrack, __func__)
+ : InitPromise::CreateAndReject(rv, __func__);
+}
+
+void FFmpegAudioDecoder<LIBAV_VER>::InitCodecContext() {
+ MOZ_ASSERT(mCodecContext);
+ // We do not want to set this value to 0 as FFmpeg by default will
+ // use the number of cores, which with our mozlibavutil get_cpu_count
+ // isn't implemented.
+ mCodecContext->thread_count = 1;
+ // FFmpeg takes this as a suggestion for what format to use for audio samples.
+ // LibAV 0.8 produces rubbish float interleaved samples, request 16 bits
+ // audio.
+#ifdef MOZ_SAMPLE_TYPE_S16
+ mCodecContext->request_sample_fmt = AV_SAMPLE_FMT_S16;
+#else
+ mCodecContext->request_sample_fmt =
+ (mLib->mVersion == 53) ? AV_SAMPLE_FMT_S16 : AV_SAMPLE_FMT_FLT;
+#endif
+}
+
+static AlignedAudioBuffer CopyAndPackAudio(AVFrame* aFrame,
+ uint32_t aNumChannels,
+ uint32_t aNumAFrames) {
+ AlignedAudioBuffer audio(aNumChannels * aNumAFrames);
+ if (!audio) {
+ return audio;
+ }
+
+#ifdef MOZ_SAMPLE_TYPE_S16
+ if (aFrame->format == AV_SAMPLE_FMT_FLT) {
+ // Audio data already packed. Need to convert from 32 bits Float to S16
+ AudioDataValue* tmp = audio.get();
+ float* data = reinterpret_cast<float**>(aFrame->data)[0];
+ for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
+ for (uint32_t channel = 0; channel < aNumChannels; channel++) {
+ *tmp++ = FloatToAudioSample<int16_t>(*data++);
+ }
+ }
+ } else if (aFrame->format == AV_SAMPLE_FMT_FLTP) {
+ // Planar audio data. Convert it from 32 bits float to S16
+ // and pack it into something we can understand.
+ AudioDataValue* tmp = audio.get();
+ float** data = reinterpret_cast<float**>(aFrame->data);
+ for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
+ for (uint32_t channel = 0; channel < aNumChannels; channel++) {
+ *tmp++ = FloatToAudioSample<int16_t>(data[channel][frame]);
+ }
+ }
+ } else if (aFrame->format == AV_SAMPLE_FMT_S16) {
+ // Audio data already packed. No need to do anything other than copy it
+ // into a buffer we own.
+ memcpy(audio.get(), aFrame->data[0],
+ aNumChannels * aNumAFrames * sizeof(AudioDataValue));
+ } else if (aFrame->format == AV_SAMPLE_FMT_S16P) {
+ // Planar audio data. Pack it into something we can understand.
+ AudioDataValue* tmp = audio.get();
+ AudioDataValue** data = reinterpret_cast<AudioDataValue**>(aFrame->data);
+ for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
+ for (uint32_t channel = 0; channel < aNumChannels; channel++) {
+ *tmp++ = data[channel][frame];
+ }
+ }
+ } else if (aFrame->format == AV_SAMPLE_FMT_S32) {
+ // Audio data already packed. Need to convert from S32 to S16
+ AudioDataValue* tmp = audio.get();
+ int32_t* data = reinterpret_cast<int32_t**>(aFrame->data)[0];
+ for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
+ for (uint32_t channel = 0; channel < aNumChannels; channel++) {
+ *tmp++ = *data++ / (1U << 16);
+ }
+ }
+ } else if (aFrame->format == AV_SAMPLE_FMT_S32P) {
+ // Planar audio data. Convert it from S32 to S16
+ // and pack it into something we can understand.
+ AudioDataValue* tmp = audio.get();
+ int32_t** data = reinterpret_cast<int32_t**>(aFrame->data);
+ for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
+ for (uint32_t channel = 0; channel < aNumChannels; channel++) {
+ *tmp++ = data[channel][frame] / (1U << 16);
+ }
+ }
+ }
+#else
+ if (aFrame->format == AV_SAMPLE_FMT_FLT) {
+ // Audio data already packed. No need to do anything other than copy it
+ // into a buffer we own.
+ memcpy(audio.get(), aFrame->data[0],
+ aNumChannels * aNumAFrames * sizeof(AudioDataValue));
+ } else if (aFrame->format == AV_SAMPLE_FMT_FLTP) {
+ // Planar audio data. Pack it into something we can understand.
+ AudioDataValue* tmp = audio.get();
+ AudioDataValue** data = reinterpret_cast<AudioDataValue**>(aFrame->data);
+ for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
+ for (uint32_t channel = 0; channel < aNumChannels; channel++) {
+ *tmp++ = data[channel][frame];
+ }
+ }
+ } else if (aFrame->format == AV_SAMPLE_FMT_S16) {
+ // Audio data already packed. Need to convert from S16 to 32 bits Float
+ AudioDataValue* tmp = audio.get();
+ int16_t* data = reinterpret_cast<int16_t**>(aFrame->data)[0];
+ for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
+ for (uint32_t channel = 0; channel < aNumChannels; channel++) {
+ *tmp++ = AudioSampleToFloat(*data++);
+ }
+ }
+ } else if (aFrame->format == AV_SAMPLE_FMT_S16P) {
+ // Planar audio data. Convert it from S16 to 32 bits float
+ // and pack it into something we can understand.
+ AudioDataValue* tmp = audio.get();
+ int16_t** data = reinterpret_cast<int16_t**>(aFrame->data);
+ for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
+ for (uint32_t channel = 0; channel < aNumChannels; channel++) {
+ *tmp++ = AudioSampleToFloat(data[channel][frame]);
+ }
+ }
+ } else if (aFrame->format == AV_SAMPLE_FMT_S32) {
+ // Audio data already packed. Need to convert from S16 to 32 bits Float
+ AudioDataValue* tmp = audio.get();
+ int32_t* data = reinterpret_cast<int32_t**>(aFrame->data)[0];
+ for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
+ for (uint32_t channel = 0; channel < aNumChannels; channel++) {
+ *tmp++ = AudioSampleToFloat(*data++);
+ }
+ }
+ } else if (aFrame->format == AV_SAMPLE_FMT_S32P) {
+ // Planar audio data. Convert it from S32 to 32 bits float
+ // and pack it into something we can understand.
+ AudioDataValue* tmp = audio.get();
+ int32_t** data = reinterpret_cast<int32_t**>(aFrame->data);
+ for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
+ for (uint32_t channel = 0; channel < aNumChannels; channel++) {
+ *tmp++ = AudioSampleToFloat(data[channel][frame]);
+ }
+ }
+ }
+#endif
+
+ return audio;
+}
+
+using ChannelLayout = AudioConfig::ChannelLayout;
+
+uint64_t FFmpegAudioDecoder<LIBAV_VER>::Padding() const {
+ MOZ_ASSERT(mCodecID == AV_CODEC_ID_MP3);
+ return mEncoderPaddingOrTotalFrames;
+}
+uint64_t FFmpegAudioDecoder<LIBAV_VER>::TotalFrames() const {
+ MOZ_ASSERT(mCodecID == AV_CODEC_ID_AAC);
+ return mEncoderPaddingOrTotalFrames;
+}
+
+MediaResult FFmpegAudioDecoder<LIBAV_VER>::DoDecode(MediaRawData* aSample,
+ uint8_t* aData, int aSize,
+ bool* aGotFrame,
+ DecodedData& aResults) {
+ MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+ PROCESS_DECODE_LOG(aSample);
+ AVPacket packet;
+ mLib->av_init_packet(&packet);
+
+ packet.data = const_cast<uint8_t*>(aData);
+ packet.size = aSize;
+
+ if (aGotFrame) {
+ *aGotFrame = false;
+ }
+
+ if (!PrepareFrame()) {
+ FFMPEG_LOG("FFmpegAudioDecoder: OOM in PrepareFrame");
+ return MediaResult(
+ NS_ERROR_OUT_OF_MEMORY,
+ RESULT_DETAIL("FFmpeg audio decoder failed to allocate frame"));
+ }
+
+ int64_t samplePosition = aSample->mOffset;
+
+ while (packet.size > 0) {
+ int decoded = false;
+ int bytesConsumed = -1;
+#if LIBAVCODEC_VERSION_MAJOR < 59
+ bytesConsumed =
+ mLib->avcodec_decode_audio4(mCodecContext, mFrame, &decoded, &packet);
+ if (bytesConsumed < 0) {
+ NS_WARNING("FFmpeg audio decoder error.");
+ return MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR,
+ RESULT_DETAIL("FFmpeg audio error:%d", bytesConsumed));
+ }
+#else
+# define AVRESULT_OK 0
+ int ret = mLib->avcodec_send_packet(mCodecContext, &packet);
+ switch (ret) {
+ case AVRESULT_OK:
+ bytesConsumed = packet.size;
+ break;
+ case AVERROR(EAGAIN):
+ break;
+ case AVERROR_EOF:
+ FFMPEG_LOG(" End of stream.");
+ return MediaResult(NS_ERROR_DOM_MEDIA_END_OF_STREAM,
+ RESULT_DETAIL("End of stream"));
+ default:
+ NS_WARNING("FFmpeg audio decoder error.");
+ return MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR,
+ RESULT_DETAIL("FFmpeg audio error"));
+ }
+
+ ret = mLib->avcodec_receive_frame(mCodecContext, mFrame);
+ switch (ret) {
+ case AVRESULT_OK:
+ decoded = true;
+ break;
+ case AVERROR(EAGAIN):
+ break;
+ case AVERROR_EOF: {
+ FFMPEG_LOG(" End of stream.");
+ return MediaResult(NS_ERROR_DOM_MEDIA_END_OF_STREAM,
+ RESULT_DETAIL("End of stream"));
+ }
+ }
+#endif
+
+ if (decoded) {
+ if (mFrame->format != AV_SAMPLE_FMT_FLT &&
+ mFrame->format != AV_SAMPLE_FMT_FLTP &&
+ mFrame->format != AV_SAMPLE_FMT_S16 &&
+ mFrame->format != AV_SAMPLE_FMT_S16P &&
+ mFrame->format != AV_SAMPLE_FMT_S32 &&
+ mFrame->format != AV_SAMPLE_FMT_S32P) {
+ return MediaResult(
+ NS_ERROR_DOM_MEDIA_DECODE_ERR,
+ RESULT_DETAIL(
+ "FFmpeg audio decoder outputs unsupported audio format"));
+ }
+ uint32_t numChannels = mCodecContext->channels;
+ uint32_t samplingRate = mCodecContext->sample_rate;
+
+ AlignedAudioBuffer audio =
+ CopyAndPackAudio(mFrame, numChannels, mFrame->nb_samples);
+ if (!audio) {
+ FFMPEG_LOG("FFmpegAudioDecoder: OOM");
+ return MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__);
+ }
+
+ FFMPEG_LOG("Packet decoded: [%s, %s] (%" PRId64 "us, %d frames)",
+ aSample->mTime.ToString().get(),
+ aSample->GetEndTime().ToString().get(),
+ aSample->mDuration.ToMicroseconds(), mFrame->nb_samples);
+
+ media::TimeUnit duration = TimeUnit(mFrame->nb_samples, samplingRate);
+ if (!duration.IsValid()) {
+ FFMPEG_LOG("FFmpegAudioDecoder: invalid duration");
+ return MediaResult(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR,
+ RESULT_DETAIL("Invalid sample duration"));
+ }
+
+ media::TimeUnit pts = aSample->mTime;
+ media::TimeUnit newpts = pts + duration;
+ if (!newpts.IsValid()) {
+ FFMPEG_LOG("FFmpegAudioDecoder: invalid PTS.");
+ return MediaResult(
+ NS_ERROR_DOM_MEDIA_OVERFLOW_ERR,
+ RESULT_DETAIL("Invalid count of accumulated audio samples"));
+ }
+
+ RefPtr<AudioData> data =
+ new AudioData(samplePosition, pts, std::move(audio), numChannels,
+ samplingRate, mCodecContext->channel_layout);
+ MOZ_ASSERT(duration == data->mDuration, "must be equal");
+ aResults.AppendElement(std::move(data));
+
+ pts = newpts;
+
+ if (aGotFrame) {
+ *aGotFrame = true;
+ }
+ }
+ // The packet wasn't sent to ffmpeg, another attempt will happen next
+ // iteration.
+ if (bytesConsumed != -1) {
+ packet.data += bytesConsumed;
+ packet.size -= bytesConsumed;
+ samplePosition += bytesConsumed;
+ }
+ }
+ return NS_OK;
+}
+
+AVCodecID FFmpegAudioDecoder<LIBAV_VER>::GetCodecId(
+ const nsACString& aMimeType) {
+ if (aMimeType.EqualsLiteral("audio/mpeg")) {
+#ifdef FFVPX_VERSION
+ if (!StaticPrefs::media_ffvpx_mp3_enabled()) {
+ return AV_CODEC_ID_NONE;
+ }
+#endif
+ return AV_CODEC_ID_MP3;
+ }
+ if (aMimeType.EqualsLiteral("audio/flac")) {
+ return AV_CODEC_ID_FLAC;
+ }
+ if (aMimeType.EqualsLiteral("audio/mp4a-latm")) {
+ return AV_CODEC_ID_AAC;
+ }
+
+ return AV_CODEC_ID_NONE;
+}
+
+nsCString FFmpegAudioDecoder<LIBAV_VER>::GetCodecName() const {
+#if LIBAVCODEC_VERSION_MAJOR > 53
+ return nsCString(mLib->avcodec_descriptor_get(mCodecID)->name);
+#else
+ return "unknown"_ns;
+#endif
+}
+
+FFmpegAudioDecoder<LIBAV_VER>::~FFmpegAudioDecoder() {
+ MOZ_COUNT_DTOR(FFmpegAudioDecoder);
+}
+
+} // namespace mozilla