diff options
Diffstat (limited to 'dom/media/platforms/wmf/WMFAudioMFTManager.cpp')
-rw-r--r-- | dom/media/platforms/wmf/WMFAudioMFTManager.cpp | 315 |
1 files changed, 315 insertions, 0 deletions
diff --git a/dom/media/platforms/wmf/WMFAudioMFTManager.cpp b/dom/media/platforms/wmf/WMFAudioMFTManager.cpp new file mode 100644 index 0000000000..6ebcf9a80a --- /dev/null +++ b/dom/media/platforms/wmf/WMFAudioMFTManager.cpp @@ -0,0 +1,315 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "WMFAudioMFTManager.h" +#include "MediaInfo.h" +#include "TimeUnits.h" +#include "VideoUtils.h" +#include "WMFUtils.h" +#include "mozilla/AbstractThread.h" +#include "mozilla/Logging.h" +#include "mozilla/Telemetry.h" +#include "nsTArray.h" +#include "BufferReader.h" +#include "mozilla/ScopeExit.h" + +#define LOG(...) MOZ_LOG(sPDMLog, mozilla::LogLevel::Debug, (__VA_ARGS__)) + +namespace mozilla { + +using media::TimeUnit; + +WMFAudioMFTManager::WMFAudioMFTManager(const AudioInfo& aConfig) + : mAudioChannels(aConfig.mChannels), + mChannelsMap(AudioConfig::ChannelLayout::UNKNOWN_MAP), + mAudioRate(aConfig.mRate), + mStreamType(GetStreamTypeFromMimeType(aConfig.mMimeType)) { + MOZ_COUNT_CTOR(WMFAudioMFTManager); + + if (mStreamType == WMFStreamType::AAC) { + const uint8_t* audioSpecConfig; + uint32_t configLength; + if (aConfig.mCodecSpecificConfig.is<AacCodecSpecificData>()) { + const AacCodecSpecificData& aacCodecSpecificData = + aConfig.mCodecSpecificConfig.as<AacCodecSpecificData>(); + audioSpecConfig = + aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->Elements(); + configLength = + aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->Length(); + + mRemainingEncoderDelay = mEncoderDelay = + aacCodecSpecificData.mEncoderDelayFrames; + mTotalMediaFrames = aacCodecSpecificData.mMediaFrameCount; + LOG("AudioMFT decoder: Found AAC decoder delay (%" PRIu32 + "frames) and total media frames (%" PRIu64 " frames)\n", + mEncoderDelay, mTotalMediaFrames); + } else { + // Gracefully handle failure to cover all codec specific cases above. Once + // we're confident there is no fall through from these cases above, we + // should remove this code. + RefPtr<MediaByteBuffer> audioCodecSpecificBinaryBlob = + GetAudioCodecSpecificBlob(aConfig.mCodecSpecificConfig); + audioSpecConfig = audioCodecSpecificBinaryBlob->Elements(); + configLength = audioCodecSpecificBinaryBlob->Length(); + } + AACAudioSpecificConfigToUserData(aConfig.mExtendedProfile, audioSpecConfig, + configLength, mUserData); + } +} + +WMFAudioMFTManager::~WMFAudioMFTManager() { + MOZ_COUNT_DTOR(WMFAudioMFTManager); +} + +const GUID& WMFAudioMFTManager::GetMediaSubtypeGUID() { + MOZ_ASSERT(StreamTypeIsAudio(mStreamType)); + switch (mStreamType) { + case WMFStreamType::AAC: + return MFAudioFormat_AAC; + case WMFStreamType::MP3: + return MFAudioFormat_MP3; + default: + return GUID_NULL; + }; +} + +bool WMFAudioMFTManager::Init() { + NS_ENSURE_TRUE(StreamTypeIsAudio(mStreamType), false); + + RefPtr<MFTDecoder> decoder(new MFTDecoder()); + // Note: MP3 MFT isn't registered as supporting Float output, but it works. + // Find PCM output MFTs as this is the common type. + HRESULT hr = WMFDecoderModule::CreateMFTDecoder(mStreamType, decoder); + NS_ENSURE_TRUE(SUCCEEDED(hr), false); + + // Setup input/output media types + RefPtr<IMFMediaType> inputType; + + hr = wmf::MFCreateMediaType(getter_AddRefs(inputType)); + NS_ENSURE_TRUE(SUCCEEDED(hr), false); + + hr = inputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio); + NS_ENSURE_TRUE(SUCCEEDED(hr), false); + + hr = inputType->SetGUID(MF_MT_SUBTYPE, GetMediaSubtypeGUID()); + NS_ENSURE_TRUE(SUCCEEDED(hr), false); + + hr = inputType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, mAudioRate); + NS_ENSURE_TRUE(SUCCEEDED(hr), false); + + hr = inputType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, mAudioChannels); + NS_ENSURE_TRUE(SUCCEEDED(hr), false); + + if (mStreamType == WMFStreamType::AAC) { + hr = inputType->SetUINT32(MF_MT_AAC_PAYLOAD_TYPE, 0x0); // Raw AAC packet + NS_ENSURE_TRUE(SUCCEEDED(hr), false); + + hr = inputType->SetBlob(MF_MT_USER_DATA, mUserData.Elements(), + mUserData.Length()); + NS_ENSURE_TRUE(SUCCEEDED(hr), false); + } + + RefPtr<IMFMediaType> outputType; + hr = wmf::MFCreateMediaType(getter_AddRefs(outputType)); + NS_ENSURE_TRUE(SUCCEEDED(hr), false); + + hr = outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio); + NS_ENSURE_TRUE(SUCCEEDED(hr), false); + + hr = outputType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_Float); + NS_ENSURE_TRUE(SUCCEEDED(hr), false); + + hr = outputType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, 32); + NS_ENSURE_TRUE(SUCCEEDED(hr), false); + + hr = decoder->SetMediaTypes(inputType, outputType); + NS_ENSURE_TRUE(SUCCEEDED(hr), false); + + mDecoder = decoder; + + return true; +} + +HRESULT +WMFAudioMFTManager::Input(MediaRawData* aSample) { + mLastInputTime = aSample->mTime; + return mDecoder->Input(aSample->Data(), uint32_t(aSample->Size()), + aSample->mTime.ToMicroseconds(), + aSample->mDuration.ToMicroseconds()); +} + +nsCString WMFAudioMFTManager::GetCodecName() const { + if (mStreamType == WMFStreamType::AAC) { + return "aac"_ns; + } else if (mStreamType == WMFStreamType::MP3) { + return "mp3"_ns; + } + return "unknown"_ns; +} + +HRESULT +WMFAudioMFTManager::UpdateOutputType() { + HRESULT hr; + + RefPtr<IMFMediaType> type; + hr = mDecoder->GetOutputMediaType(type); + NS_ENSURE_TRUE(SUCCEEDED(hr), hr); + + hr = type->GetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, &mAudioRate); + NS_ENSURE_TRUE(SUCCEEDED(hr), hr); + + hr = type->GetUINT32(MF_MT_AUDIO_NUM_CHANNELS, &mAudioChannels); + NS_ENSURE_TRUE(SUCCEEDED(hr), hr); + + uint32_t channelsMap; + hr = type->GetUINT32(MF_MT_AUDIO_CHANNEL_MASK, &channelsMap); + if (SUCCEEDED(hr)) { + mChannelsMap = channelsMap; + } else { + LOG("Unable to retrieve channel layout. Ignoring"); + mChannelsMap = AudioConfig::ChannelLayout::UNKNOWN_MAP; + } + + return S_OK; +} + +HRESULT +WMFAudioMFTManager::Output(int64_t aStreamOffset, RefPtr<MediaData>& aOutData) { + aOutData = nullptr; + RefPtr<IMFSample> sample; + HRESULT hr; + int typeChangeCount = 0; + const auto oldAudioRate = mAudioRate; + while (true) { + hr = mDecoder->Output(&sample); + if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) { + return hr; + } + if (hr == MF_E_TRANSFORM_STREAM_CHANGE) { + hr = mDecoder->FindDecoderOutputType(); + NS_ENSURE_TRUE(SUCCEEDED(hr), hr); + hr = UpdateOutputType(); + NS_ENSURE_TRUE(SUCCEEDED(hr), hr); + // Catch infinite loops, but some decoders perform at least 2 stream + // changes on consecutive calls, so be permissive. + // 100 is arbitrarily > 2. + NS_ENSURE_TRUE(typeChangeCount < 100, MF_E_TRANSFORM_STREAM_CHANGE); + ++typeChangeCount; + continue; + } + break; + } + + NS_ENSURE_TRUE(SUCCEEDED(hr), hr); + + if (!sample) { + LOG("Audio MFTDecoder returned success but null output."); + return E_FAIL; + } + + UINT32 discontinuity = false; + sample->GetUINT32(MFSampleExtension_Discontinuity, &discontinuity); + if (mFirstFrame || discontinuity) { + // Update the output type, in case this segment has a different + // rate. This also triggers on the first sample, which can have a + // different rate than is advertised in the container, and sometimes we + // don't get a MF_E_TRANSFORM_STREAM_CHANGE when the rate changes. + hr = UpdateOutputType(); + NS_ENSURE_TRUE(SUCCEEDED(hr), hr); + mFirstFrame = false; + } + + LONGLONG hns; + hr = sample->GetSampleTime(&hns); + if (FAILED(hr)) { + return E_FAIL; + } + TimeUnit pts = TimeUnit::FromHns(hns, mAudioRate); + NS_ENSURE_TRUE(pts.IsValid(), E_FAIL); + + RefPtr<IMFMediaBuffer> buffer; + hr = sample->ConvertToContiguousBuffer(getter_AddRefs(buffer)); + NS_ENSURE_TRUE(SUCCEEDED(hr), hr); + + BYTE* data = nullptr; // Note: *data will be owned by the IMFMediaBuffer, we + // don't need to free it. + DWORD maxLength = 0, currentLength = 0; + hr = buffer->Lock(&data, &maxLength, ¤tLength); + ScopeExit exit([buffer] { buffer->Unlock(); }); + NS_ENSURE_TRUE(SUCCEEDED(hr), hr); + + // Output is made of floats. + int32_t numSamples = currentLength / sizeof(float); + int32_t numFrames = numSamples / mAudioChannels; + MOZ_ASSERT(numFrames >= 0); + MOZ_ASSERT(numSamples >= 0); + if (numFrames == 0) { + // All data from this chunk stripped, loop back and try to output the next + // frame, if possible. + return S_OK; + } + + if (oldAudioRate != mAudioRate) { + LOG("Audio rate changed from %" PRIu32 " to %" PRIu32, oldAudioRate, + mAudioRate); + } + + AlignedAudioBuffer audioData(numSamples); + if (!audioData) { + return E_OUTOFMEMORY; + } + + float* floatData = reinterpret_cast<float*>(data); + PodCopy(audioData.Data(), floatData, numSamples); + + TimeUnit duration(numFrames, mAudioRate); + NS_ENSURE_TRUE(duration.IsValid(), E_FAIL); + + const bool isAudioRateChangedToHigher = oldAudioRate < mAudioRate; + if (IsPartialOutput(duration, isAudioRateChangedToHigher)) { + LOG("Encounter a partial frame?! duration shrinks from %s to %s", + mLastOutputDuration.ToString().get(), duration.ToString().get()); + return MF_E_TRANSFORM_NEED_MORE_INPUT; + } + + aOutData = new AudioData(aStreamOffset, pts, std::move(audioData), + mAudioChannels, mAudioRate, mChannelsMap); + MOZ_DIAGNOSTIC_ASSERT(duration == aOutData->mDuration, "must be equal"); + mLastOutputDuration = aOutData->mDuration; + +#ifdef LOG_SAMPLE_DECODE + LOG("Decoded audio sample! timestamp=%lld duration=%lld currentLength=%u", + pts.ToMicroseconds(), duration.ToMicroseconds(), currentLength); +#endif + + return S_OK; +} + +bool WMFAudioMFTManager::IsPartialOutput( + const media::TimeUnit& aNewOutputDuration, + const bool aIsRateChangedToHigher) const { + // This issue was found in Windows11, where AAC MFT decoder would incorrectly + // output partial output samples to us, even if MS's documentation said it + // won't happen [1]. More details are described in bug 1731430 comment 26. + // If the audio rate isn't changed to higher, which would result in shorter + // duration, but the new output duration is still shorter than the last one, + // then new output is possible an incorrect partial output. + // [1] + // https://docs.microsoft.com/en-us/windows/win32/medfound/mft-message-command-drain + if (mStreamType != WMFStreamType::AAC) { + return false; + } + if (mLastOutputDuration > aNewOutputDuration && !aIsRateChangedToHigher) { + return true; + } + return false; +} + +void WMFAudioMFTManager::Shutdown() { mDecoder = nullptr; } + +} // namespace mozilla + +#undef LOG |