diff options
Diffstat (limited to 'dom/media/platforms/apple')
-rw-r--r-- | dom/media/platforms/apple/AppleATDecoder.cpp | 672 | ||||
-rw-r--r-- | dom/media/platforms/apple/AppleATDecoder.h | 80 | ||||
-rw-r--r-- | dom/media/platforms/apple/AppleDecoderModule.cpp | 230 | ||||
-rw-r--r-- | dom/media/platforms/apple/AppleDecoderModule.h | 62 | ||||
-rw-r--r-- | dom/media/platforms/apple/AppleEncoderModule.cpp | 25 | ||||
-rw-r--r-- | dom/media/platforms/apple/AppleEncoderModule.h | 27 | ||||
-rw-r--r-- | dom/media/platforms/apple/AppleUtils.h | 88 | ||||
-rw-r--r-- | dom/media/platforms/apple/AppleVTDecoder.cpp | 761 | ||||
-rw-r--r-- | dom/media/platforms/apple/AppleVTDecoder.h | 145 | ||||
-rw-r--r-- | dom/media/platforms/apple/AppleVTEncoder.cpp | 628 | ||||
-rw-r--r-- | dom/media/platforms/apple/AppleVTEncoder.h | 85 |
11 files changed, 2803 insertions, 0 deletions
diff --git a/dom/media/platforms/apple/AppleATDecoder.cpp b/dom/media/platforms/apple/AppleATDecoder.cpp new file mode 100644 index 0000000000..13a6be3e31 --- /dev/null +++ b/dom/media/platforms/apple/AppleATDecoder.cpp @@ -0,0 +1,672 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "AppleATDecoder.h" +#include "Adts.h" +#include "AppleUtils.h" +#include "MP4Decoder.h" +#include "MediaInfo.h" +#include "VideoUtils.h" +#include "mozilla/Logging.h" +#include "mozilla/SyncRunnable.h" +#include "mozilla/UniquePtr.h" +#include "nsTArray.h" + +#define LOG(...) DDMOZ_LOG(sPDMLog, mozilla::LogLevel::Debug, __VA_ARGS__) +#define LOGEX(_this, ...) \ + DDMOZ_LOGEX(_this, sPDMLog, mozilla::LogLevel::Debug, __VA_ARGS__) +#define FourCC2Str(n) \ + ((char[5]){(char)(n >> 24), (char)(n >> 16), (char)(n >> 8), (char)(n), 0}) + +namespace mozilla { + +AppleATDecoder::AppleATDecoder(const AudioInfo& aConfig) + : mConfig(aConfig), + mFileStreamError(false), + mConverter(nullptr), + mOutputFormat(), + mStream(nullptr), + mParsedFramesForAACMagicCookie(0), + mErrored(false) { + MOZ_COUNT_CTOR(AppleATDecoder); + LOG("Creating Apple AudioToolbox decoder"); + LOG("Audio Decoder configuration: %s %d Hz %d channels %d bits per channel", + mConfig.mMimeType.get(), mConfig.mRate, mConfig.mChannels, + mConfig.mBitDepth); + + if (mConfig.mMimeType.EqualsLiteral("audio/mpeg")) { + mFormatID = kAudioFormatMPEGLayer3; + } else if (mConfig.mMimeType.EqualsLiteral("audio/mp4a-latm")) { + mFormatID = kAudioFormatMPEG4AAC; + if (aConfig.mCodecSpecificConfig.is<AacCodecSpecificData>()) { + const AacCodecSpecificData& aacCodecSpecificData = + aConfig.mCodecSpecificConfig.as<AacCodecSpecificData>(); + mEncoderDelay = aacCodecSpecificData.mEncoderDelayFrames; + mTotalMediaFrames = aacCodecSpecificData.mMediaFrameCount; + LOG("AppleATDecoder (aac), found encoder delay (%" PRIu32 + ") and total frame count (%" PRIu64 ") in codec-specific side data", + mEncoderDelay, mTotalMediaFrames); + } + } else { + mFormatID = 0; + } +} + +AppleATDecoder::~AppleATDecoder() { + MOZ_COUNT_DTOR(AppleATDecoder); + MOZ_ASSERT(!mConverter); +} + +RefPtr<MediaDataDecoder::InitPromise> AppleATDecoder::Init() { + if (!mFormatID) { + return InitPromise::CreateAndReject( + MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, + RESULT_DETAIL("Non recognised format")), + __func__); + } + mThread = GetCurrentSerialEventTarget(); + + return InitPromise::CreateAndResolve(TrackType::kAudioTrack, __func__); +} + +RefPtr<MediaDataDecoder::FlushPromise> AppleATDecoder::Flush() { + MOZ_ASSERT(mThread->IsOnCurrentThread()); + LOG("Flushing AudioToolbox AAC decoder"); + mQueuedSamples.Clear(); + mDecodedSamples.Clear(); + + if (mConverter) { + OSStatus rv = AudioConverterReset(mConverter); + if (rv) { + LOG("Error %d resetting AudioConverter", static_cast<int>(rv)); + } + } + if (mErrored) { + mParsedFramesForAACMagicCookie = 0; + mMagicCookie.Clear(); + ProcessShutdown(); + mErrored = false; + } + return FlushPromise::CreateAndResolve(true, __func__); +} + +RefPtr<MediaDataDecoder::DecodePromise> AppleATDecoder::Drain() { + MOZ_ASSERT(mThread->IsOnCurrentThread()); + LOG("Draining AudioToolbox AAC decoder"); + return DecodePromise::CreateAndResolve(DecodedData(), __func__); +} + +RefPtr<ShutdownPromise> AppleATDecoder::Shutdown() { + // mThread may not be set if Init hasn't been called first. + MOZ_ASSERT(!mThread || mThread->IsOnCurrentThread()); + ProcessShutdown(); + return ShutdownPromise::CreateAndResolve(true, __func__); +} + +void AppleATDecoder::ProcessShutdown() { + // mThread may not be set if Init hasn't been called first. + MOZ_ASSERT(!mThread || mThread->IsOnCurrentThread()); + + if (mStream) { + OSStatus rv = AudioFileStreamClose(mStream); + if (rv) { + LOG("error %d disposing of AudioFileStream", static_cast<int>(rv)); + return; + } + mStream = nullptr; + } + + if (mConverter) { + LOG("Shutdown: Apple AudioToolbox AAC decoder"); + OSStatus rv = AudioConverterDispose(mConverter); + if (rv) { + LOG("error %d disposing of AudioConverter", static_cast<int>(rv)); + } + mConverter = nullptr; + } +} + +nsCString AppleATDecoder::GetCodecName() const { + switch (mFormatID) { + case kAudioFormatMPEGLayer3: + return "mp3"_ns; + case kAudioFormatMPEG4AAC: + return "aac"_ns; + default: + return "unknown"_ns; + } +} + +struct PassthroughUserData { + UInt32 mChannels; + UInt32 mDataSize; + const void* mData; + AudioStreamPacketDescription mPacket; +}; + +// Error value we pass through the decoder to signal that nothing +// has gone wrong during decoding and we're done processing the packet. +const uint32_t kNoMoreDataErr = 'MOAR'; + +static OSStatus _PassthroughInputDataCallback( + AudioConverterRef aAudioConverter, UInt32* aNumDataPackets /* in/out */, + AudioBufferList* aData /* in/out */, + AudioStreamPacketDescription** aPacketDesc, void* aUserData) { + PassthroughUserData* userData = (PassthroughUserData*)aUserData; + if (!userData->mDataSize) { + *aNumDataPackets = 0; + return kNoMoreDataErr; + } + + if (aPacketDesc) { + userData->mPacket.mStartOffset = 0; + userData->mPacket.mVariableFramesInPacket = 0; + userData->mPacket.mDataByteSize = userData->mDataSize; + *aPacketDesc = &userData->mPacket; + } + + aData->mBuffers[0].mNumberChannels = userData->mChannels; + aData->mBuffers[0].mDataByteSize = userData->mDataSize; + aData->mBuffers[0].mData = const_cast<void*>(userData->mData); + + // No more data to provide following this run. + userData->mDataSize = 0; + + return noErr; +} + +RefPtr<MediaDataDecoder::DecodePromise> AppleATDecoder::Decode( + MediaRawData* aSample) { + MOZ_ASSERT(mThread->IsOnCurrentThread()); + LOG("mp4 input sample pts=%s duration=%s %s %llu bytes audio", + aSample->mTime.ToString().get(), aSample->GetEndTime().ToString().get(), + aSample->mKeyframe ? " keyframe" : "", + (unsigned long long)aSample->Size()); + + MediaResult rv = NS_OK; + if (!mConverter) { + rv = SetupDecoder(aSample); + if (rv != NS_OK && rv != NS_ERROR_NOT_INITIALIZED) { + return DecodePromise::CreateAndReject(rv, __func__); + } + } + + mQueuedSamples.AppendElement(aSample); + + if (rv == NS_OK) { + for (size_t i = 0; i < mQueuedSamples.Length(); i++) { + rv = DecodeSample(mQueuedSamples[i]); + if (NS_FAILED(rv)) { + mErrored = true; + return DecodePromise::CreateAndReject(rv, __func__); + } + } + mQueuedSamples.Clear(); + } + + DecodedData results = std::move(mDecodedSamples); + mDecodedSamples = DecodedData(); + return DecodePromise::CreateAndResolve(std::move(results), __func__); +} + +MediaResult AppleATDecoder::DecodeSample(MediaRawData* aSample) { + MOZ_ASSERT(mThread->IsOnCurrentThread()); + + // Array containing the queued decoded audio frames, about to be output. + nsTArray<AudioDataValue> outputData; + UInt32 channels = mOutputFormat.mChannelsPerFrame; + // Pick a multiple of the frame size close to a power of two + // for efficient allocation. We're mainly using this decoder to decode AAC, + // that has packets of 1024 audio frames. + const uint32_t MAX_AUDIO_FRAMES = 1024; + const uint32_t maxDecodedSamples = MAX_AUDIO_FRAMES * channels; + + // Descriptions for _decompressed_ audio packets. ignored. + auto packets = MakeUnique<AudioStreamPacketDescription[]>(MAX_AUDIO_FRAMES); + + // This API insists on having packets spoon-fed to it from a callback. + // This structure exists only to pass our state. + PassthroughUserData userData = {channels, (UInt32)aSample->Size(), + aSample->Data()}; + + // Decompressed audio buffer + AlignedAudioBuffer decoded(maxDecodedSamples); + if (!decoded) { + return NS_ERROR_OUT_OF_MEMORY; + } + + do { + AudioBufferList decBuffer; + decBuffer.mNumberBuffers = 1; + decBuffer.mBuffers[0].mNumberChannels = channels; + decBuffer.mBuffers[0].mDataByteSize = + maxDecodedSamples * sizeof(AudioDataValue); + decBuffer.mBuffers[0].mData = decoded.get(); + + // in: the max number of packets we can handle from the decoder. + // out: the number of packets the decoder is actually returning. + UInt32 numFrames = MAX_AUDIO_FRAMES; + + OSStatus rv = AudioConverterFillComplexBuffer( + mConverter, _PassthroughInputDataCallback, &userData, + &numFrames /* in/out */, &decBuffer, packets.get()); + + if (rv && rv != kNoMoreDataErr) { + LOG("Error decoding audio sample: %d\n", static_cast<int>(rv)); + return MediaResult( + NS_ERROR_DOM_MEDIA_DECODE_ERR, + RESULT_DETAIL("Error decoding audio sample: %d @ %s", + static_cast<int>(rv), aSample->mTime.ToString().get())); + } + + if (numFrames) { + AudioDataValue* outputFrames = decoded.get(); + outputData.AppendElements(outputFrames, numFrames * channels); + } + + if (rv == kNoMoreDataErr) { + break; + } + } while (true); + + if (outputData.IsEmpty()) { + return NS_OK; + } + + size_t numFrames = outputData.Length() / channels; + int rate = mOutputFormat.mSampleRate; + media::TimeUnit duration(numFrames, rate); + if (!duration.IsValid()) { + NS_WARNING("Invalid count of accumulated audio samples"); + return MediaResult( + NS_ERROR_DOM_MEDIA_OVERFLOW_ERR, + RESULT_DETAIL( + "Invalid count of accumulated audio samples: num:%llu rate:%d", + uint64_t(numFrames), rate)); + } + + LOG("Decoded audio packet [%s, %s] (duration: %s)\n", + aSample->mTime.ToString().get(), aSample->GetEndTime().ToString().get(), + duration.ToString().get()); + + AudioSampleBuffer data(outputData.Elements(), outputData.Length()); + if (!data.Data()) { + return NS_ERROR_OUT_OF_MEMORY; + } + if (mChannelLayout && !mAudioConverter) { + AudioConfig in(*mChannelLayout, channels, rate); + AudioConfig out(AudioConfig::ChannelLayout::SMPTEDefault(*mChannelLayout), + channels, rate); + mAudioConverter = MakeUnique<AudioConverter>(in, out); + } + if (mAudioConverter && mChannelLayout && mChannelLayout->IsValid()) { + MOZ_ASSERT(mAudioConverter->CanWorkInPlace()); + data = mAudioConverter->Process(std::move(data)); + } + + RefPtr<AudioData> audio = new AudioData( + aSample->mOffset, aSample->mTime, data.Forget(), channels, rate, + mChannelLayout && mChannelLayout->IsValid() + ? mChannelLayout->Map() + : AudioConfig::ChannelLayout::UNKNOWN_MAP); + MOZ_DIAGNOSTIC_ASSERT(duration == audio->mDuration, "must be equal"); + mDecodedSamples.AppendElement(std::move(audio)); + return NS_OK; +} + +MediaResult AppleATDecoder::GetInputAudioDescription( + AudioStreamBasicDescription& aDesc, const nsTArray<uint8_t>& aExtraData) { + MOZ_ASSERT(mThread->IsOnCurrentThread()); + + // Request the properties from CoreAudio using the codec magic cookie + AudioFormatInfo formatInfo; + PodZero(&formatInfo.mASBD); + formatInfo.mASBD.mFormatID = mFormatID; + if (mFormatID == kAudioFormatMPEG4AAC) { + formatInfo.mASBD.mFormatFlags = mConfig.mExtendedProfile; + } + formatInfo.mMagicCookieSize = aExtraData.Length(); + formatInfo.mMagicCookie = aExtraData.Elements(); + + UInt32 formatListSize; + // Attempt to retrieve the default format using + // kAudioFormatProperty_FormatInfo method. + // This method only retrieves the FramesPerPacket information required + // by the decoder, which depends on the codec type and profile. + aDesc.mFormatID = mFormatID; + aDesc.mChannelsPerFrame = mConfig.mChannels; + aDesc.mSampleRate = mConfig.mRate; + UInt32 inputFormatSize = sizeof(aDesc); + OSStatus rv = AudioFormatGetProperty(kAudioFormatProperty_FormatInfo, 0, NULL, + &inputFormatSize, &aDesc); + if (NS_WARN_IF(rv)) { + return MediaResult( + NS_ERROR_FAILURE, + RESULT_DETAIL("Unable to get format info:%d", int32_t(rv))); + } + + // If any of the methods below fail, we will return the default format as + // created using kAudioFormatProperty_FormatInfo above. + rv = AudioFormatGetPropertyInfo(kAudioFormatProperty_FormatList, + sizeof(formatInfo), &formatInfo, + &formatListSize); + if (rv || (formatListSize % sizeof(AudioFormatListItem))) { + return NS_OK; + } + size_t listCount = formatListSize / sizeof(AudioFormatListItem); + auto formatList = MakeUnique<AudioFormatListItem[]>(listCount); + + rv = AudioFormatGetProperty(kAudioFormatProperty_FormatList, + sizeof(formatInfo), &formatInfo, &formatListSize, + formatList.get()); + if (rv) { + return NS_OK; + } + LOG("found %zu available audio stream(s)", + formatListSize / sizeof(AudioFormatListItem)); + // Get the index number of the first playable format. + // This index number will be for the highest quality layer the platform + // is capable of playing. + UInt32 itemIndex; + UInt32 indexSize = sizeof(itemIndex); + rv = AudioFormatGetProperty(kAudioFormatProperty_FirstPlayableFormatFromList, + formatListSize, formatList.get(), &indexSize, + &itemIndex); + if (rv) { + return NS_OK; + } + + aDesc = formatList[itemIndex].mASBD; + + return NS_OK; +} + +AudioConfig::Channel ConvertChannelLabel(AudioChannelLabel id) { + switch (id) { + case kAudioChannelLabel_Left: + return AudioConfig::CHANNEL_FRONT_LEFT; + case kAudioChannelLabel_Right: + return AudioConfig::CHANNEL_FRONT_RIGHT; + case kAudioChannelLabel_Mono: + case kAudioChannelLabel_Center: + return AudioConfig::CHANNEL_FRONT_CENTER; + case kAudioChannelLabel_LFEScreen: + return AudioConfig::CHANNEL_LFE; + case kAudioChannelLabel_LeftSurround: + return AudioConfig::CHANNEL_SIDE_LEFT; + case kAudioChannelLabel_RightSurround: + return AudioConfig::CHANNEL_SIDE_RIGHT; + case kAudioChannelLabel_CenterSurround: + return AudioConfig::CHANNEL_BACK_CENTER; + case kAudioChannelLabel_RearSurroundLeft: + return AudioConfig::CHANNEL_BACK_LEFT; + case kAudioChannelLabel_RearSurroundRight: + return AudioConfig::CHANNEL_BACK_RIGHT; + default: + return AudioConfig::CHANNEL_INVALID; + } +} + +// Will set mChannelLayout if a channel layout could properly be identified +// and is supported. +nsresult AppleATDecoder::SetupChannelLayout() { + MOZ_ASSERT(mThread->IsOnCurrentThread()); + + // Determine the channel layout. + UInt32 propertySize; + UInt32 size; + OSStatus status = AudioConverterGetPropertyInfo( + mConverter, kAudioConverterOutputChannelLayout, &propertySize, NULL); + if (status || !propertySize) { + LOG("Couldn't get channel layout property (%s)", FourCC2Str(status)); + return NS_ERROR_FAILURE; + } + + auto data = MakeUnique<uint8_t[]>(propertySize); + size = propertySize; + status = AudioConverterGetProperty( + mConverter, kAudioConverterInputChannelLayout, &size, data.get()); + if (status || size != propertySize) { + LOG("Couldn't get channel layout property (%s)", FourCC2Str(status)); + return NS_ERROR_FAILURE; + } + + AudioChannelLayout* layout = + reinterpret_cast<AudioChannelLayout*>(data.get()); + AudioChannelLayoutTag tag = layout->mChannelLayoutTag; + + // if tag is kAudioChannelLayoutTag_UseChannelDescriptions then the structure + // directly contains the the channel layout mapping. + // If tag is kAudioChannelLayoutTag_UseChannelBitmap then the layout will + // be defined via the bitmap and can be retrieved using + // kAudioFormatProperty_ChannelLayoutForBitmap property. + // Otherwise the tag itself describes the layout. + if (tag != kAudioChannelLayoutTag_UseChannelDescriptions) { + AudioFormatPropertyID property = + tag == kAudioChannelLayoutTag_UseChannelBitmap + ? kAudioFormatProperty_ChannelLayoutForBitmap + : kAudioFormatProperty_ChannelLayoutForTag; + + if (property == kAudioFormatProperty_ChannelLayoutForBitmap) { + status = AudioFormatGetPropertyInfo( + property, sizeof(UInt32), &layout->mChannelBitmap, &propertySize); + } else { + status = AudioFormatGetPropertyInfo( + property, sizeof(AudioChannelLayoutTag), &tag, &propertySize); + } + if (status || !propertySize) { + LOG("Couldn't get channel layout property info (%s:%s)", + FourCC2Str(property), FourCC2Str(status)); + return NS_ERROR_FAILURE; + } + data = MakeUnique<uint8_t[]>(propertySize); + layout = reinterpret_cast<AudioChannelLayout*>(data.get()); + size = propertySize; + + if (property == kAudioFormatProperty_ChannelLayoutForBitmap) { + status = AudioFormatGetProperty(property, sizeof(UInt32), + &layout->mChannelBitmap, &size, layout); + } else { + status = AudioFormatGetProperty(property, sizeof(AudioChannelLayoutTag), + &tag, &size, layout); + } + if (status || size != propertySize) { + LOG("Couldn't get channel layout property (%s:%s)", FourCC2Str(property), + FourCC2Str(status)); + return NS_ERROR_FAILURE; + } + // We have retrieved the channel layout from the tag or bitmap. + // We can now directly use the channel descriptions. + layout->mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelDescriptions; + } + + if (layout->mNumberChannelDescriptions != mOutputFormat.mChannelsPerFrame) { + LOG("Not matching the original channel number"); + return NS_ERROR_FAILURE; + } + + AutoTArray<AudioConfig::Channel, 8> channels; + channels.SetLength(layout->mNumberChannelDescriptions); + for (uint32_t i = 0; i < layout->mNumberChannelDescriptions; i++) { + AudioChannelLabel id = layout->mChannelDescriptions[i].mChannelLabel; + AudioConfig::Channel channel = ConvertChannelLabel(id); + channels[i] = channel; + } + mChannelLayout = MakeUnique<AudioConfig::ChannelLayout>( + mOutputFormat.mChannelsPerFrame, channels.Elements()); + return NS_OK; +} + +MediaResult AppleATDecoder::SetupDecoder(MediaRawData* aSample) { + MOZ_ASSERT(mThread->IsOnCurrentThread()); + static const uint32_t MAX_FRAMES = 2; + + if (mFormatID == kAudioFormatMPEG4AAC && mConfig.mExtendedProfile == 2 && + mParsedFramesForAACMagicCookie < MAX_FRAMES) { + // Check for implicit SBR signalling if stream is AAC-LC + // This will provide us with an updated magic cookie for use with + // GetInputAudioDescription. + if (NS_SUCCEEDED(GetImplicitAACMagicCookie(aSample)) && + !mMagicCookie.Length()) { + // nothing found yet, will try again later + mParsedFramesForAACMagicCookie++; + return NS_ERROR_NOT_INITIALIZED; + } + // An error occurred, fallback to using default stream description + } + + LOG("Initializing Apple AudioToolbox decoder"); + + // Should we try and use magic cookie data from the AAC data? We do this if + // - We have an AAC config & + // - We do not aleady have magic cookie data. + // Otherwise we just use the existing cookie (which may be empty). + bool shouldUseAacMagicCookie = + mConfig.mCodecSpecificConfig.is<AacCodecSpecificData>() && + mMagicCookie.IsEmpty(); + + nsTArray<uint8_t>& magicCookie = + shouldUseAacMagicCookie + ? *mConfig.mCodecSpecificConfig.as<AacCodecSpecificData>() + .mEsDescriptorBinaryBlob + : mMagicCookie; + AudioStreamBasicDescription inputFormat; + PodZero(&inputFormat); + + MediaResult rv = GetInputAudioDescription(inputFormat, magicCookie); + if (NS_FAILED(rv)) { + return rv; + } + // Fill in the output format manually. + PodZero(&mOutputFormat); + mOutputFormat.mFormatID = kAudioFormatLinearPCM; + mOutputFormat.mSampleRate = inputFormat.mSampleRate; + mOutputFormat.mChannelsPerFrame = inputFormat.mChannelsPerFrame; +#if defined(MOZ_SAMPLE_TYPE_FLOAT32) + mOutputFormat.mBitsPerChannel = 32; + mOutputFormat.mFormatFlags = kLinearPCMFormatFlagIsFloat | 0; +#elif defined(MOZ_SAMPLE_TYPE_S16) + mOutputFormat.mBitsPerChannel = 16; + mOutputFormat.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger | 0; +#else +# error Unknown audio sample type +#endif + // Set up the decoder so it gives us one sample per frame + mOutputFormat.mFramesPerPacket = 1; + mOutputFormat.mBytesPerPacket = mOutputFormat.mBytesPerFrame = + mOutputFormat.mChannelsPerFrame * mOutputFormat.mBitsPerChannel / 8; + + OSStatus status = + AudioConverterNew(&inputFormat, &mOutputFormat, &mConverter); + if (status) { + LOG("Error %d constructing AudioConverter", int(status)); + mConverter = nullptr; + return MediaResult( + NS_ERROR_FAILURE, + RESULT_DETAIL("Error constructing AudioConverter:%d", int32_t(status))); + } + + if (magicCookie.Length() && mFormatID == kAudioFormatMPEG4AAC) { + status = AudioConverterSetProperty( + mConverter, kAudioConverterDecompressionMagicCookie, + magicCookie.Length(), magicCookie.Elements()); + if (status) { + LOG("Error setting AudioConverter AAC cookie:%d", int32_t(status)); + ProcessShutdown(); + return MediaResult( + NS_ERROR_FAILURE, + RESULT_DETAIL("Error setting AudioConverter AAC cookie:%d", + int32_t(status))); + } + } + + if (NS_FAILED(SetupChannelLayout())) { + NS_WARNING("Couldn't retrieve channel layout, will use default layout"); + } + + return NS_OK; +} + +static void _MetadataCallback(void* aAppleATDecoder, AudioFileStreamID aStream, + AudioFileStreamPropertyID aProperty, + UInt32* aFlags) { + AppleATDecoder* decoder = static_cast<AppleATDecoder*>(aAppleATDecoder); + MOZ_RELEASE_ASSERT(decoder->mThread->IsOnCurrentThread()); + + LOGEX(decoder, "MetadataCallback receiving: '%s'", FourCC2Str(aProperty)); + if (aProperty == kAudioFileStreamProperty_MagicCookieData) { + UInt32 size; + Boolean writeable; + OSStatus rv = + AudioFileStreamGetPropertyInfo(aStream, aProperty, &size, &writeable); + if (rv) { + LOGEX(decoder, "Couldn't get property info for '%s' (%s)", + FourCC2Str(aProperty), FourCC2Str(rv)); + decoder->mFileStreamError = true; + return; + } + auto data = MakeUnique<uint8_t[]>(size); + rv = AudioFileStreamGetProperty(aStream, aProperty, &size, data.get()); + if (rv) { + LOGEX(decoder, "Couldn't get property '%s' (%s)", FourCC2Str(aProperty), + FourCC2Str(rv)); + decoder->mFileStreamError = true; + return; + } + decoder->mMagicCookie.AppendElements(data.get(), size); + } +} + +static void _SampleCallback(void* aSBR, UInt32 aNumBytes, UInt32 aNumPackets, + const void* aData, + AudioStreamPacketDescription* aPackets) {} + +nsresult AppleATDecoder::GetImplicitAACMagicCookie( + const MediaRawData* aSample) { + MOZ_ASSERT(mThread->IsOnCurrentThread()); + + // Prepend ADTS header to AAC audio. + RefPtr<MediaRawData> adtssample(aSample->Clone()); + if (!adtssample) { + return NS_ERROR_OUT_OF_MEMORY; + } + int8_t frequency_index = Adts::GetFrequencyIndex(mConfig.mRate); + + bool rv = Adts::ConvertSample(mConfig.mChannels, frequency_index, + mConfig.mProfile, adtssample); + if (!rv) { + NS_WARNING("Failed to apply ADTS header"); + return NS_ERROR_FAILURE; + } + if (!mStream) { + OSStatus rv = AudioFileStreamOpen(this, _MetadataCallback, _SampleCallback, + kAudioFileAAC_ADTSType, &mStream); + if (rv) { + NS_WARNING("Couldn't open AudioFileStream"); + return NS_ERROR_FAILURE; + } + } + + OSStatus status = AudioFileStreamParseBytes( + mStream, adtssample->Size(), adtssample->Data(), 0 /* discontinuity */); + if (status) { + NS_WARNING("Couldn't parse sample"); + } + + if (status || mFileStreamError || mMagicCookie.Length()) { + // We have decoded a magic cookie or an error occurred as such + // we won't need the stream any longer. + AudioFileStreamClose(mStream); + mStream = nullptr; + } + + return (mFileStreamError || status) ? NS_ERROR_FAILURE : NS_OK; +} + +} // namespace mozilla + +#undef LOG +#undef LOGEX diff --git a/dom/media/platforms/apple/AppleATDecoder.h b/dom/media/platforms/apple/AppleATDecoder.h new file mode 100644 index 0000000000..d7aba2aacb --- /dev/null +++ b/dom/media/platforms/apple/AppleATDecoder.h @@ -0,0 +1,80 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_AppleATDecoder_h +#define mozilla_AppleATDecoder_h + +#include <AudioToolbox/AudioToolbox.h> +#include "PlatformDecoderModule.h" +#include "mozilla/Vector.h" +#include "AudioConverter.h" + +namespace mozilla { + +class TaskQueue; + +DDLoggedTypeDeclNameAndBase(AppleATDecoder, MediaDataDecoder); + +class AppleATDecoder final : public MediaDataDecoder, + public DecoderDoctorLifeLogger<AppleATDecoder> { + public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(AppleATDecoder, final); + + explicit AppleATDecoder(const AudioInfo& aConfig); + + RefPtr<InitPromise> Init() override; + RefPtr<DecodePromise> Decode(MediaRawData* aSample) override; + RefPtr<DecodePromise> Drain() override; + RefPtr<FlushPromise> Flush() override; + RefPtr<ShutdownPromise> Shutdown() override; + + nsCString GetDescriptionName() const override { + return "apple coremedia decoder"_ns; + } + + nsCString GetCodecName() const override; + + // Callbacks also need access to the config. + const AudioInfo mConfig; + + // Use to extract magic cookie for HE-AAC detection. + nsTArray<uint8_t> mMagicCookie; + // Will be set to true should an error occurred while attempting to retrieve + // the magic cookie property. + bool mFileStreamError; + + nsCOMPtr<nsISerialEventTarget> mThread; + + private: + ~AppleATDecoder(); + + AudioConverterRef mConverter; + AudioStreamBasicDescription mOutputFormat; + UInt32 mFormatID; + AudioFileStreamID mStream; + nsTArray<RefPtr<MediaRawData>> mQueuedSamples; + UniquePtr<AudioConfig::ChannelLayout> mChannelLayout; + UniquePtr<AudioConverter> mAudioConverter; + DecodedData mDecodedSamples; + + void ProcessShutdown(); + MediaResult DecodeSample(MediaRawData* aSample); + MediaResult GetInputAudioDescription(AudioStreamBasicDescription& aDesc, + const nsTArray<uint8_t>& aExtraData); + // Setup AudioConverter once all information required has been gathered. + // Will return NS_ERROR_NOT_INITIALIZED if more data is required. + MediaResult SetupDecoder(MediaRawData* aSample); + nsresult GetImplicitAACMagicCookie(const MediaRawData* aSample); + nsresult SetupChannelLayout(); + uint32_t mParsedFramesForAACMagicCookie; + uint32_t mEncoderDelay = 0; + uint64_t mTotalMediaFrames = 0; + bool mErrored; +}; + +} // namespace mozilla + +#endif // mozilla_AppleATDecoder_h diff --git a/dom/media/platforms/apple/AppleDecoderModule.cpp b/dom/media/platforms/apple/AppleDecoderModule.cpp new file mode 100644 index 0000000000..2e17d93313 --- /dev/null +++ b/dom/media/platforms/apple/AppleDecoderModule.cpp @@ -0,0 +1,230 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "AppleDecoderModule.h" + +#include <dlfcn.h> + +#include "AppleATDecoder.h" +#include "AppleVTDecoder.h" +#include "MP4Decoder.h" +#include "VideoUtils.h" +#include "VPXDecoder.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/Logging.h" +#include "mozilla/StaticPrefs_media.h" +#include "mozilla/gfx/gfxVars.h" + +extern "C" { +// Only exists from MacOS 11 +extern void VTRegisterSupplementalVideoDecoderIfAvailable( + CMVideoCodecType codecType) __attribute__((weak_import)); +extern Boolean VTIsHardwareDecodeSupported(CMVideoCodecType codecType) + __attribute__((weak_import)); +} + +namespace mozilla { + +bool AppleDecoderModule::sInitialized = false; +bool AppleDecoderModule::sCanUseVP9Decoder = false; + +/* static */ +void AppleDecoderModule::Init() { + if (sInitialized) { + return; + } + + sInitialized = true; + if (RegisterSupplementalVP9Decoder()) { + sCanUseVP9Decoder = CanCreateHWDecoder(media::MediaCodec::VP9); + } +} + +nsresult AppleDecoderModule::Startup() { + if (!sInitialized) { + return NS_ERROR_FAILURE; + } + return NS_OK; +} + +already_AddRefed<MediaDataDecoder> AppleDecoderModule::CreateVideoDecoder( + const CreateDecoderParams& aParams) { + if (Supports(SupportDecoderParams(aParams), nullptr /* diagnostics */) == + media::DecodeSupport::Unsupported) { + return nullptr; + } + RefPtr<MediaDataDecoder> decoder; + if (IsVideoSupported(aParams.VideoConfig(), aParams.mOptions)) { + decoder = new AppleVTDecoder(aParams.VideoConfig(), aParams.mImageContainer, + aParams.mOptions, aParams.mKnowsCompositor, + aParams.mTrackingId); + } + return decoder.forget(); +} + +already_AddRefed<MediaDataDecoder> AppleDecoderModule::CreateAudioDecoder( + const CreateDecoderParams& aParams) { + if (Supports(SupportDecoderParams(aParams), nullptr /* diagnostics */) == + media::DecodeSupport::Unsupported) { + return nullptr; + } + RefPtr<MediaDataDecoder> decoder = new AppleATDecoder(aParams.AudioConfig()); + return decoder.forget(); +} + +media::DecodeSupportSet AppleDecoderModule::SupportsMimeType( + const nsACString& aMimeType, DecoderDoctorDiagnostics* aDiagnostics) const { + bool checkSupport = (aMimeType.EqualsLiteral("audio/mpeg") && + !StaticPrefs::media_ffvpx_mp3_enabled()) || + aMimeType.EqualsLiteral("audio/mp4a-latm") || + MP4Decoder::IsH264(aMimeType) || + VPXDecoder::IsVP9(aMimeType); + media::DecodeSupportSet supportType{media::DecodeSupport::Unsupported}; + + if (checkSupport) { + UniquePtr<TrackInfo> trackInfo = CreateTrackInfoWithMIMEType(aMimeType); + if (!trackInfo) { + supportType = media::DecodeSupport::Unsupported; + } else if (trackInfo->IsAudio()) { + supportType = media::DecodeSupport::SoftwareDecode; + } else { + supportType = Supports(SupportDecoderParams(*trackInfo), aDiagnostics); + } + } + + MOZ_LOG(sPDMLog, LogLevel::Debug, + ("Apple decoder %s requested type '%s'", + supportType == media::DecodeSupport::Unsupported ? "rejects" + : "supports", + aMimeType.BeginReading())); + return supportType; +} + +media::DecodeSupportSet AppleDecoderModule::Supports( + const SupportDecoderParams& aParams, + DecoderDoctorDiagnostics* aDiagnostics) const { + const auto& trackInfo = aParams.mConfig; + if (trackInfo.IsAudio()) { + return SupportsMimeType(trackInfo.mMimeType, aDiagnostics); + } + bool checkSupport = trackInfo.GetAsVideoInfo() && + IsVideoSupported(*trackInfo.GetAsVideoInfo()); + if (checkSupport) { + if (trackInfo.mMimeType == "video/vp9" && + CanCreateHWDecoder(media::MediaCodec::VP9)) { + return media::DecodeSupport::HardwareDecode; + } + return media::DecodeSupport::SoftwareDecode; + } + return media::DecodeSupport::Unsupported; +} + +bool AppleDecoderModule::IsVideoSupported( + const VideoInfo& aConfig, + const CreateDecoderParams::OptionSet& aOptions) const { + if (MP4Decoder::IsH264(aConfig.mMimeType)) { + return true; + } + if (!VPXDecoder::IsVP9(aConfig.mMimeType) || !sCanUseVP9Decoder || + aOptions.contains( + CreateDecoderParams::Option::HardwareDecoderNotAllowed)) { + return false; + } + if (aConfig.HasAlpha()) { + return false; + } + + // HW VP9 decoder only supports 8 or 10 bit color. + if (aConfig.mColorDepth != gfx::ColorDepth::COLOR_8 && + aConfig.mColorDepth != gfx::ColorDepth::COLOR_10) { + return false; + } + + // See if we have a vpcC box, and check further constraints. + // HW VP9 Decoder supports Profile 0 & 2 (YUV420) + if (aConfig.mExtraData && aConfig.mExtraData->Length() < 5) { + return true; // Assume it's okay. + } + int profile = aConfig.mExtraData->ElementAt(4); + + if (profile != 0 && profile != 2) { + return false; + } + + return true; +} + +/* static */ +bool AppleDecoderModule::CanCreateHWDecoder(media::MediaCodec aCodec) { + // Check whether HW decode should even be enabled + if (!gfx::gfxVars::CanUseHardwareVideoDecoding()) { + return false; + } + + VideoInfo info(1920, 1080); + bool checkSupport = false; + + // We must wrap the code within __builtin_available to avoid compilation + // warning as VTIsHardwareDecodeSupported is only available from macOS 10.13. + if (__builtin_available(macOS 10.13, *)) { + if (!VTIsHardwareDecodeSupported) { + return false; + } + switch (aCodec) { + case media::MediaCodec::VP9: + info.mMimeType = "video/vp9"; + VPXDecoder::GetVPCCBox(info.mExtraData, VPXDecoder::VPXStreamInfo()); + checkSupport = VTIsHardwareDecodeSupported(kCMVideoCodecType_VP9); + break; + default: + // Only support VP9 HW decode for time being + checkSupport = false; + break; + } + } + // Attempt to create decoder + if (checkSupport) { + RefPtr<AppleVTDecoder> decoder = + new AppleVTDecoder(info, nullptr, {}, nullptr, Nothing()); + MediaResult rv = decoder->InitializeSession(); + if (!NS_SUCCEEDED(rv)) { + return false; + } + nsAutoCString failureReason; + bool hwSupport = decoder->IsHardwareAccelerated(failureReason); + decoder->Shutdown(); + if (!hwSupport) { + MOZ_LOG(sPDMLog, LogLevel::Debug, + ("Apple HW decode failure: '%s'", failureReason.BeginReading())); + } + return hwSupport; + } + return false; +} + +/* static */ +bool AppleDecoderModule::RegisterSupplementalVP9Decoder() { + static bool sRegisterIfAvailable = []() { +#if !defined(MAC_OS_VERSION_11_0) || \ + MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_VERSION_11_0 + if (nsCocoaFeatures::OnBigSurOrLater()) { +#else + if (__builtin_available(macos 11.0, *)) { +#endif + VTRegisterSupplementalVideoDecoderIfAvailable(kCMVideoCodecType_VP9); + return true; + } + return false; + }(); + return sRegisterIfAvailable; +} + +/* static */ +already_AddRefed<PlatformDecoderModule> AppleDecoderModule::Create() { + return MakeAndAddRef<AppleDecoderModule>(); +} + +} // namespace mozilla diff --git a/dom/media/platforms/apple/AppleDecoderModule.h b/dom/media/platforms/apple/AppleDecoderModule.h new file mode 100644 index 0000000000..f869243a5c --- /dev/null +++ b/dom/media/platforms/apple/AppleDecoderModule.h @@ -0,0 +1,62 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_AppleDecoderModule_h +#define mozilla_AppleDecoderModule_h + +#include "PlatformDecoderModule.h" + +namespace mozilla { + +class AppleDecoderModule : public PlatformDecoderModule { + template <typename T, typename... Args> + friend already_AddRefed<T> MakeAndAddRef(Args&&...); + + public: + static already_AddRefed<PlatformDecoderModule> Create(); + + nsresult Startup() override; + + // Decode thread. + already_AddRefed<MediaDataDecoder> CreateVideoDecoder( + const CreateDecoderParams& aParams) override; + + // Decode thread. + already_AddRefed<MediaDataDecoder> CreateAudioDecoder( + const CreateDecoderParams& aParams) override; + + media::DecodeSupportSet SupportsMimeType( + const nsACString& aMimeType, + DecoderDoctorDiagnostics* aDiagnostics) const override; + + media::DecodeSupportSet Supports( + const SupportDecoderParams& aParams, + DecoderDoctorDiagnostics* aDiagnostics) const override; + + static void Init(); + + static bool sCanUseVP9Decoder; + + static constexpr int kCMVideoCodecType_H264{'avc1'}; + static constexpr int kCMVideoCodecType_VP9{'vp09'}; + + private: + AppleDecoderModule() = default; + virtual ~AppleDecoderModule() = default; + + static bool sInitialized; + bool IsVideoSupported(const VideoInfo& aConfig, + const CreateDecoderParams::OptionSet& aOptions = + CreateDecoderParams::OptionSet()) const; + // Enable VP9 HW decoder. + static bool RegisterSupplementalVP9Decoder(); + // Return true if a dummy hardware decoder could be created. + static bool CanCreateHWDecoder(media::MediaCodec aCodec); +}; + +} // namespace mozilla + +#endif // mozilla_AppleDecoderModule_h diff --git a/dom/media/platforms/apple/AppleEncoderModule.cpp b/dom/media/platforms/apple/AppleEncoderModule.cpp new file mode 100644 index 0000000000..f0321297a4 --- /dev/null +++ b/dom/media/platforms/apple/AppleEncoderModule.cpp @@ -0,0 +1,25 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "AppleEncoderModule.h" + +#include "AppleVTEncoder.h" +#include "MP4Decoder.h" + +namespace mozilla { + +bool AppleEncoderModule::SupportsMimeType(const nsACString& aMimeType) const { + return MP4Decoder::IsH264(aMimeType); +} + +already_AddRefed<MediaDataEncoder> AppleEncoderModule::CreateVideoEncoder( + const CreateEncoderParams& aParams, const bool aHardwareNotAllowed) const { + RefPtr<MediaDataEncoder> encoder(new AppleVTEncoder( + aParams.ToH264Config(), aParams.mTaskQueue, aHardwareNotAllowed)); + return encoder.forget(); +} + +} // namespace mozilla diff --git a/dom/media/platforms/apple/AppleEncoderModule.h b/dom/media/platforms/apple/AppleEncoderModule.h new file mode 100644 index 0000000000..ec2868f104 --- /dev/null +++ b/dom/media/platforms/apple/AppleEncoderModule.h @@ -0,0 +1,27 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef AppleEncoderModule_h_ +#define AppleEncoderModule_h_ + +#include "PlatformEncoderModule.h" + +namespace mozilla { +class AppleEncoderModule final : public PlatformEncoderModule { + public: + AppleEncoderModule() {} + virtual ~AppleEncoderModule() {} + + bool SupportsMimeType(const nsACString& aMimeType) const override; + + already_AddRefed<MediaDataEncoder> CreateVideoEncoder( + const CreateEncoderParams& aParams, + const bool aHardwareNotAllowed) const override; +}; + +} // namespace mozilla + +#endif /* AppleEncoderModule_h_ */ diff --git a/dom/media/platforms/apple/AppleUtils.h b/dom/media/platforms/apple/AppleUtils.h new file mode 100644 index 0000000000..96bf079b0c --- /dev/null +++ b/dom/media/platforms/apple/AppleUtils.h @@ -0,0 +1,88 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Utility functions to help with Apple API calls. + +#ifndef mozilla_AppleUtils_h +#define mozilla_AppleUtils_h + +#include "mozilla/Attributes.h" +#include <CoreFoundation/CFBase.h> // For CFRelease() +#include <CoreVideo/CVBuffer.h> // For CVBufferRelease() + +namespace mozilla { + +// Wrapper class to call CFRelease/CVBufferRelease on reference types +// when they go out of scope. +template <class T, class F, F relFunc> +class AutoObjRefRelease { + public: + MOZ_IMPLICIT AutoObjRefRelease(T aRef) : mRef(aRef) {} + ~AutoObjRefRelease() { + if (mRef) { + relFunc(mRef); + } + } + // Return the wrapped ref so it can be used as an in parameter. + operator T() { return mRef; } + // Return a pointer to the wrapped ref for use as an out parameter. + T* receive() { return &mRef; } + + private: + // Copy operator isn't supported and is not implemented. + AutoObjRefRelease<T, F, relFunc>& operator=( + const AutoObjRefRelease<T, F, relFunc>&); + T mRef; +}; + +template <typename T> +using AutoCFRelease = AutoObjRefRelease<T, decltype(&CFRelease), &CFRelease>; +template <typename T> +using AutoCVBufferRelease = + AutoObjRefRelease<T, decltype(&CVBufferRelease), &CVBufferRelease>; + +// CFRefPtr: A CoreFoundation smart pointer. +template <class T> +class CFRefPtr { + public: + explicit CFRefPtr(T aRef) : mRef(aRef) { + if (mRef) { + CFRetain(mRef); + } + } + // Copy constructor. + CFRefPtr(const CFRefPtr<T>& aCFRefPtr) : mRef(aCFRefPtr.mRef) { + if (mRef) { + CFRetain(mRef); + } + } + // Copy operator + CFRefPtr<T>& operator=(const CFRefPtr<T>& aCFRefPtr) { + if (mRef == aCFRefPtr.mRef) { + return; + } + if (mRef) { + CFRelease(mRef); + } + mRef = aCFRefPtr.mRef; + if (mRef) { + CFRetain(mRef); + } + return *this; + } + ~CFRefPtr() { + if (mRef) { + CFRelease(mRef); + } + } + // Return the wrapped ref so it can be used as an in parameter. + operator T() { return mRef; } + + private: + T mRef; +}; + +} // namespace mozilla + +#endif // mozilla_AppleUtils_h diff --git a/dom/media/platforms/apple/AppleVTDecoder.cpp b/dom/media/platforms/apple/AppleVTDecoder.cpp new file mode 100644 index 0000000000..7abc46274b --- /dev/null +++ b/dom/media/platforms/apple/AppleVTDecoder.cpp @@ -0,0 +1,761 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "AppleVTDecoder.h" + +#include <CoreVideo/CVPixelBufferIOSurface.h> +#include <IOSurface/IOSurface.h> +#include <limits> + +#include "AppleDecoderModule.h" +#include "AppleUtils.h" +#include "CallbackThreadRegistry.h" +#include "H264.h" +#include "MP4Decoder.h" +#include "MacIOSurfaceImage.h" +#include "MediaData.h" +#include "VPXDecoder.h" +#include "VideoUtils.h" +#include "gfxMacUtils.h" +#include "gfxPlatform.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/Logging.h" +#include "mozilla/TaskQueue.h" +#include "mozilla/gfx/gfxVars.h" +#include "nsThreadUtils.h" + +#define LOG(...) DDMOZ_LOG(sPDMLog, mozilla::LogLevel::Debug, __VA_ARGS__) +#define LOGEX(_this, ...) \ + DDMOZ_LOGEX(_this, sPDMLog, mozilla::LogLevel::Debug, __VA_ARGS__) + +namespace mozilla { + +using namespace layers; + +AppleVTDecoder::AppleVTDecoder(const VideoInfo& aConfig, + layers::ImageContainer* aImageContainer, + CreateDecoderParams::OptionSet aOptions, + layers::KnowsCompositor* aKnowsCompositor, + Maybe<TrackingId> aTrackingId) + : mExtraData(aConfig.mExtraData), + mPictureWidth(aConfig.mImage.width), + mPictureHeight(aConfig.mImage.height), + mDisplayWidth(aConfig.mDisplay.width), + mDisplayHeight(aConfig.mDisplay.height), + mColorSpace(aConfig.mColorSpace + ? *aConfig.mColorSpace + : DefaultColorSpace({mPictureWidth, mPictureHeight})), + mColorPrimaries(aConfig.mColorPrimaries ? *aConfig.mColorPrimaries + : gfx::ColorSpace2::BT709), + mTransferFunction(aConfig.mTransferFunction + ? *aConfig.mTransferFunction + : gfx::TransferFunction::BT709), + mColorRange(aConfig.mColorRange), + mColorDepth(aConfig.mColorDepth), + mStreamType(MP4Decoder::IsH264(aConfig.mMimeType) ? StreamType::H264 + : VPXDecoder::IsVP9(aConfig.mMimeType) ? StreamType::VP9 + : StreamType::Unknown), + mTaskQueue(TaskQueue::Create( + GetMediaThreadPool(MediaThreadType::PLATFORM_DECODER), + "AppleVTDecoder")), + mMaxRefFrames( + mStreamType != StreamType::H264 || + aOptions.contains(CreateDecoderParams::Option::LowLatency) + ? 0 + : H264::ComputeMaxRefFrames(aConfig.mExtraData)), + mImageContainer(aImageContainer), + mKnowsCompositor(aKnowsCompositor) +#ifdef MOZ_WIDGET_UIKIT + , + mUseSoftwareImages(true) +#else + , + mUseSoftwareImages(aKnowsCompositor && + aKnowsCompositor->GetWebRenderCompositorType() == + layers::WebRenderCompositor::SOFTWARE) +#endif + , + mTrackingId(aTrackingId), + mIsFlushing(false), + mCallbackThreadId(), + mMonitor("AppleVTDecoder"), + mPromise(&mMonitor), // To ensure our PromiseHolder is only ever accessed + // with the monitor held. + mFormat(nullptr), + mSession(nullptr), + mIsHardwareAccelerated(false) { + MOZ_COUNT_CTOR(AppleVTDecoder); + MOZ_ASSERT(mStreamType != StreamType::Unknown); + // TODO: Verify aConfig.mime_type. + LOG("Creating AppleVTDecoder for %dx%d %s video", mDisplayWidth, + mDisplayHeight, mStreamType == StreamType::H264 ? "H.264" : "VP9"); +} + +AppleVTDecoder::~AppleVTDecoder() { MOZ_COUNT_DTOR(AppleVTDecoder); } + +RefPtr<MediaDataDecoder::InitPromise> AppleVTDecoder::Init() { + MediaResult rv = InitializeSession(); + + if (NS_SUCCEEDED(rv)) { + return InitPromise::CreateAndResolve(TrackType::kVideoTrack, __func__); + } + + return InitPromise::CreateAndReject(rv, __func__); +} + +RefPtr<MediaDataDecoder::DecodePromise> AppleVTDecoder::Decode( + MediaRawData* aSample) { + LOG("mp4 input sample %p pts %lld duration %lld us%s %zu bytes", aSample, + aSample->mTime.ToMicroseconds(), aSample->mDuration.ToMicroseconds(), + aSample->mKeyframe ? " keyframe" : "", aSample->Size()); + + RefPtr<AppleVTDecoder> self = this; + RefPtr<MediaRawData> sample = aSample; + return InvokeAsync(mTaskQueue, __func__, [self, this, sample] { + RefPtr<DecodePromise> p; + { + MonitorAutoLock mon(mMonitor); + p = mPromise.Ensure(__func__); + } + ProcessDecode(sample); + return p; + }); +} + +RefPtr<MediaDataDecoder::FlushPromise> AppleVTDecoder::Flush() { + mIsFlushing = true; + return InvokeAsync(mTaskQueue, this, __func__, &AppleVTDecoder::ProcessFlush); +} + +RefPtr<MediaDataDecoder::DecodePromise> AppleVTDecoder::Drain() { + return InvokeAsync(mTaskQueue, this, __func__, &AppleVTDecoder::ProcessDrain); +} + +RefPtr<ShutdownPromise> AppleVTDecoder::Shutdown() { + RefPtr<AppleVTDecoder> self = this; + return InvokeAsync(mTaskQueue, __func__, [self]() { + self->ProcessShutdown(); + return self->mTaskQueue->BeginShutdown(); + }); +} + +// Helper to fill in a timestamp structure. +static CMSampleTimingInfo TimingInfoFromSample(MediaRawData* aSample) { + CMSampleTimingInfo timestamp; + + timestamp.duration = + CMTimeMake(aSample->mDuration.ToMicroseconds(), USECS_PER_S); + timestamp.presentationTimeStamp = + CMTimeMake(aSample->mTime.ToMicroseconds(), USECS_PER_S); + timestamp.decodeTimeStamp = + CMTimeMake(aSample->mTimecode.ToMicroseconds(), USECS_PER_S); + + return timestamp; +} + +void AppleVTDecoder::ProcessDecode(MediaRawData* aSample) { + AssertOnTaskQueue(); + PROCESS_DECODE_LOG(aSample); + + if (mIsFlushing) { + MonitorAutoLock mon(mMonitor); + mPromise.Reject(NS_ERROR_DOM_MEDIA_CANCELED, __func__); + return; + } + + mTrackingId.apply([&](const auto& aId) { + MediaInfoFlag flag = MediaInfoFlag::None; + flag |= (aSample->mKeyframe ? MediaInfoFlag::KeyFrame + : MediaInfoFlag::NonKeyFrame); + flag |= (mIsHardwareAccelerated ? MediaInfoFlag::HardwareDecoding + : MediaInfoFlag::SoftwareDecoding); + switch (mStreamType) { + case StreamType::H264: + flag |= MediaInfoFlag::VIDEO_H264; + break; + case StreamType::VP9: + flag |= MediaInfoFlag::VIDEO_VP9; + break; + default: + break; + } + mPerformanceRecorder.Start(aSample->mTimecode.ToMicroseconds(), + "AppleVTDecoder"_ns, aId, flag); + }); + + AutoCFRelease<CMBlockBufferRef> block = nullptr; + AutoCFRelease<CMSampleBufferRef> sample = nullptr; + VTDecodeInfoFlags infoFlags; + OSStatus rv; + + // FIXME: This copies the sample data. I think we can provide + // a custom block source which reuses the aSample buffer. + // But note that there may be a problem keeping the samples + // alive over multiple frames. + rv = CMBlockBufferCreateWithMemoryBlock( + kCFAllocatorDefault, // Struct allocator. + const_cast<uint8_t*>(aSample->Data()), aSample->Size(), + kCFAllocatorNull, // Block allocator. + NULL, // Block source. + 0, // Data offset. + aSample->Size(), false, block.receive()); + if (rv != noErr) { + NS_ERROR("Couldn't create CMBlockBuffer"); + MonitorAutoLock mon(mMonitor); + mPromise.Reject( + MediaResult(NS_ERROR_OUT_OF_MEMORY, + RESULT_DETAIL("CMBlockBufferCreateWithMemoryBlock:%x", rv)), + __func__); + return; + } + + CMSampleTimingInfo timestamp = TimingInfoFromSample(aSample); + rv = CMSampleBufferCreate(kCFAllocatorDefault, block, true, 0, 0, mFormat, 1, + 1, ×tamp, 0, NULL, sample.receive()); + if (rv != noErr) { + NS_ERROR("Couldn't create CMSampleBuffer"); + MonitorAutoLock mon(mMonitor); + mPromise.Reject(MediaResult(NS_ERROR_OUT_OF_MEMORY, + RESULT_DETAIL("CMSampleBufferCreate:%x", rv)), + __func__); + return; + } + + VTDecodeFrameFlags decodeFlags = + kVTDecodeFrame_EnableAsynchronousDecompression; + rv = VTDecompressionSessionDecodeFrame( + mSession, sample, decodeFlags, CreateAppleFrameRef(aSample), &infoFlags); + if (infoFlags & kVTDecodeInfo_FrameDropped) { + MonitorAutoLock mon(mMonitor); + // Smile and nod + NS_WARNING("Decoder synchronously dropped frame"); + MaybeResolveBufferedFrames(); + return; + } + + if (rv != noErr) { + LOG("AppleVTDecoder: Error %d VTDecompressionSessionDecodeFrame", rv); + NS_WARNING("Couldn't pass frame to decoder"); + // It appears that even when VTDecompressionSessionDecodeFrame returned a + // failure. Decoding sometimes actually get processed. + MonitorAutoLock mon(mMonitor); + mPromise.RejectIfExists( + MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, + RESULT_DETAIL("VTDecompressionSessionDecodeFrame:%x", rv)), + __func__); + return; + } +} + +void AppleVTDecoder::ProcessShutdown() { + if (mSession) { + LOG("%s: cleaning up session %p", __func__, mSession); + VTDecompressionSessionInvalidate(mSession); + CFRelease(mSession); + mSession = nullptr; + } + if (mFormat) { + LOG("%s: releasing format %p", __func__, mFormat); + CFRelease(mFormat); + mFormat = nullptr; + } +} + +RefPtr<MediaDataDecoder::FlushPromise> AppleVTDecoder::ProcessFlush() { + AssertOnTaskQueue(); + nsresult rv = WaitForAsynchronousFrames(); + if (NS_FAILED(rv)) { + LOG("AppleVTDecoder::Flush failed waiting for platform decoder"); + } + MonitorAutoLock mon(mMonitor); + mPromise.RejectIfExists(NS_ERROR_DOM_MEDIA_CANCELED, __func__); + + while (!mReorderQueue.IsEmpty()) { + mReorderQueue.Pop(); + } + mPerformanceRecorder.Record(std::numeric_limits<int64_t>::max()); + mSeekTargetThreshold.reset(); + mIsFlushing = false; + return FlushPromise::CreateAndResolve(true, __func__); +} + +RefPtr<MediaDataDecoder::DecodePromise> AppleVTDecoder::ProcessDrain() { + AssertOnTaskQueue(); + nsresult rv = WaitForAsynchronousFrames(); + if (NS_FAILED(rv)) { + LOG("AppleVTDecoder::Drain failed waiting for platform decoder"); + } + MonitorAutoLock mon(mMonitor); + DecodedData samples; + while (!mReorderQueue.IsEmpty()) { + samples.AppendElement(mReorderQueue.Pop()); + } + return DecodePromise::CreateAndResolve(std::move(samples), __func__); +} + +AppleVTDecoder::AppleFrameRef* AppleVTDecoder::CreateAppleFrameRef( + const MediaRawData* aSample) { + MOZ_ASSERT(aSample); + return new AppleFrameRef(*aSample); +} + +void AppleVTDecoder::SetSeekThreshold(const media::TimeUnit& aTime) { + if (aTime.IsValid()) { + mSeekTargetThreshold = Some(aTime); + } else { + mSeekTargetThreshold.reset(); + } +} + +// +// Implementation details. +// + +// Callback passed to the VideoToolbox decoder for returning data. +// This needs to be static because the API takes a C-style pair of +// function and userdata pointers. This validates parameters and +// forwards the decoded image back to an object method. +static void PlatformCallback(void* decompressionOutputRefCon, + void* sourceFrameRefCon, OSStatus status, + VTDecodeInfoFlags flags, CVImageBufferRef image, + CMTime presentationTimeStamp, + CMTime presentationDuration) { + AppleVTDecoder* decoder = + static_cast<AppleVTDecoder*>(decompressionOutputRefCon); + LOGEX(decoder, "AppleVideoDecoder %s status %d flags %d", __func__, + static_cast<int>(status), flags); + + UniquePtr<AppleVTDecoder::AppleFrameRef> frameRef( + static_cast<AppleVTDecoder::AppleFrameRef*>(sourceFrameRefCon)); + + // Validate our arguments. + if (status != noErr) { + NS_WARNING("VideoToolbox decoder returned an error"); + decoder->OnDecodeError(status); + return; + } else if (!image) { + NS_WARNING("VideoToolbox decoder returned no data"); + } else if (flags & kVTDecodeInfo_FrameDropped) { + NS_WARNING(" ...frame tagged as dropped..."); + } else { + MOZ_ASSERT(CFGetTypeID(image) == CVPixelBufferGetTypeID(), + "VideoToolbox returned an unexpected image type"); + } + + decoder->OutputFrame(image, *frameRef); +} + +void AppleVTDecoder::MaybeResolveBufferedFrames() { + mMonitor.AssertCurrentThreadOwns(); + + if (mPromise.IsEmpty()) { + return; + } + + DecodedData results; + while (mReorderQueue.Length() > mMaxRefFrames) { + results.AppendElement(mReorderQueue.Pop()); + } + mPromise.Resolve(std::move(results), __func__); +} + +void AppleVTDecoder::MaybeRegisterCallbackThread() { + ProfilerThreadId id = profiler_current_thread_id(); + if (MOZ_LIKELY(id == mCallbackThreadId)) { + return; + } + mCallbackThreadId = id; + CallbackThreadRegistry::Get()->Register(mCallbackThreadId, + "AppleVTDecoderCallback"); +} + +nsCString AppleVTDecoder::GetCodecName() const { + switch (mStreamType) { + case StreamType::H264: + return "h264"_ns; + case StreamType::VP9: + return "vp9"_ns; + default: + return "unknown"_ns; + } +} + +// Copy and return a decoded frame. +void AppleVTDecoder::OutputFrame(CVPixelBufferRef aImage, + AppleVTDecoder::AppleFrameRef aFrameRef) { + MaybeRegisterCallbackThread(); + + if (mIsFlushing) { + // We are in the process of flushing or shutting down; ignore frame. + return; + } + + LOG("mp4 output frame %lld dts %lld pts %lld duration %lld us%s", + aFrameRef.byte_offset, aFrameRef.decode_timestamp.ToMicroseconds(), + aFrameRef.composition_timestamp.ToMicroseconds(), + aFrameRef.duration.ToMicroseconds(), + aFrameRef.is_sync_point ? " keyframe" : ""); + + if (!aImage) { + // Image was dropped by decoder or none return yet. + // We need more input to continue. + MonitorAutoLock mon(mMonitor); + MaybeResolveBufferedFrames(); + return; + } + + bool useNullSample = false; + if (mSeekTargetThreshold.isSome()) { + if ((aFrameRef.composition_timestamp + aFrameRef.duration) < + mSeekTargetThreshold.ref()) { + useNullSample = true; + } else { + mSeekTargetThreshold.reset(); + } + } + + // Where our resulting image will end up. + RefPtr<MediaData> data; + // Bounds. + VideoInfo info; + info.mDisplay = gfx::IntSize(mDisplayWidth, mDisplayHeight); + + if (useNullSample) { + data = new NullData(aFrameRef.byte_offset, aFrameRef.composition_timestamp, + aFrameRef.duration); + } else if (mUseSoftwareImages) { + size_t width = CVPixelBufferGetWidth(aImage); + size_t height = CVPixelBufferGetHeight(aImage); + DebugOnly<size_t> planes = CVPixelBufferGetPlaneCount(aImage); + MOZ_ASSERT(planes == 3, "Likely not YUV420 format and it must be."); + + VideoData::YCbCrBuffer buffer; + + // Lock the returned image data. + CVReturn rv = + CVPixelBufferLockBaseAddress(aImage, kCVPixelBufferLock_ReadOnly); + if (rv != kCVReturnSuccess) { + NS_ERROR("error locking pixel data"); + MonitorAutoLock mon(mMonitor); + mPromise.Reject( + MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, + RESULT_DETAIL("CVPixelBufferLockBaseAddress:%x", rv)), + __func__); + return; + } + // Y plane. + buffer.mPlanes[0].mData = + static_cast<uint8_t*>(CVPixelBufferGetBaseAddressOfPlane(aImage, 0)); + buffer.mPlanes[0].mStride = CVPixelBufferGetBytesPerRowOfPlane(aImage, 0); + buffer.mPlanes[0].mWidth = width; + buffer.mPlanes[0].mHeight = height; + buffer.mPlanes[0].mSkip = 0; + // Cb plane. + buffer.mPlanes[1].mData = + static_cast<uint8_t*>(CVPixelBufferGetBaseAddressOfPlane(aImage, 1)); + buffer.mPlanes[1].mStride = CVPixelBufferGetBytesPerRowOfPlane(aImage, 1); + buffer.mPlanes[1].mWidth = (width + 1) / 2; + buffer.mPlanes[1].mHeight = (height + 1) / 2; + buffer.mPlanes[1].mSkip = 0; + // Cr plane. + buffer.mPlanes[2].mData = + static_cast<uint8_t*>(CVPixelBufferGetBaseAddressOfPlane(aImage, 2)); + buffer.mPlanes[2].mStride = CVPixelBufferGetBytesPerRowOfPlane(aImage, 2); + buffer.mPlanes[2].mWidth = (width + 1) / 2; + buffer.mPlanes[2].mHeight = (height + 1) / 2; + buffer.mPlanes[2].mSkip = 0; + + buffer.mChromaSubsampling = gfx::ChromaSubsampling::HALF_WIDTH_AND_HEIGHT; + buffer.mYUVColorSpace = mColorSpace; + buffer.mColorPrimaries = mColorPrimaries; + buffer.mColorRange = mColorRange; + + gfx::IntRect visible = gfx::IntRect(0, 0, mPictureWidth, mPictureHeight); + + // Copy the image data into our own format. + data = VideoData::CreateAndCopyData( + info, mImageContainer, aFrameRef.byte_offset, + aFrameRef.composition_timestamp, aFrameRef.duration, buffer, + aFrameRef.is_sync_point, aFrameRef.decode_timestamp, visible, + mKnowsCompositor); + // Unlock the returned image data. + CVPixelBufferUnlockBaseAddress(aImage, kCVPixelBufferLock_ReadOnly); + } else { +#ifndef MOZ_WIDGET_UIKIT + // Set pixel buffer properties on aImage before we extract its surface. + // This ensures that we can use defined enums to set values instead + // of later setting magic CFSTR values on the surface itself. + if (mColorSpace == gfx::YUVColorSpace::BT601) { + CVBufferSetAttachment(aImage, kCVImageBufferYCbCrMatrixKey, + kCVImageBufferYCbCrMatrix_ITU_R_601_4, + kCVAttachmentMode_ShouldPropagate); + } else if (mColorSpace == gfx::YUVColorSpace::BT709) { + CVBufferSetAttachment(aImage, kCVImageBufferYCbCrMatrixKey, + kCVImageBufferYCbCrMatrix_ITU_R_709_2, + kCVAttachmentMode_ShouldPropagate); + } else if (mColorSpace == gfx::YUVColorSpace::BT2020) { + CVBufferSetAttachment(aImage, kCVImageBufferYCbCrMatrixKey, + kCVImageBufferYCbCrMatrix_ITU_R_2020, + kCVAttachmentMode_ShouldPropagate); + } + + if (mColorPrimaries == gfx::ColorSpace2::BT709) { + CVBufferSetAttachment(aImage, kCVImageBufferColorPrimariesKey, + kCVImageBufferColorPrimaries_ITU_R_709_2, + kCVAttachmentMode_ShouldPropagate); + } else if (mColorPrimaries == gfx::ColorSpace2::BT2020) { + CVBufferSetAttachment(aImage, kCVImageBufferColorPrimariesKey, + kCVImageBufferColorPrimaries_ITU_R_2020, + kCVAttachmentMode_ShouldPropagate); + } + + // Transfer function is applied independently from the colorSpace. + CVBufferSetAttachment( + aImage, kCVImageBufferTransferFunctionKey, + gfxMacUtils::CFStringForTransferFunction(mTransferFunction), + kCVAttachmentMode_ShouldPropagate); + + CFTypeRefPtr<IOSurfaceRef> surface = + CFTypeRefPtr<IOSurfaceRef>::WrapUnderGetRule( + CVPixelBufferGetIOSurface(aImage)); + MOZ_ASSERT(surface, "Decoder didn't return an IOSurface backed buffer"); + + RefPtr<MacIOSurface> macSurface = new MacIOSurface(std::move(surface)); + macSurface->SetYUVColorSpace(mColorSpace); + macSurface->mColorPrimaries = mColorPrimaries; + + RefPtr<layers::Image> image = new layers::MacIOSurfaceImage(macSurface); + + data = VideoData::CreateFromImage( + info.mDisplay, aFrameRef.byte_offset, aFrameRef.composition_timestamp, + aFrameRef.duration, image.forget(), aFrameRef.is_sync_point, + aFrameRef.decode_timestamp); +#else + MOZ_ASSERT_UNREACHABLE("No MacIOSurface on iOS"); +#endif + } + + if (!data) { + NS_ERROR("Couldn't create VideoData for frame"); + MonitorAutoLock mon(mMonitor); + mPromise.Reject(MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__), __func__); + return; + } + + mPerformanceRecorder.Record( + aFrameRef.decode_timestamp.ToMicroseconds(), [&](DecodeStage& aStage) { + aStage.SetResolution(static_cast<int>(CVPixelBufferGetWidth(aImage)), + static_cast<int>(CVPixelBufferGetHeight(aImage))); + auto format = [&]() -> Maybe<DecodeStage::ImageFormat> { + switch (CVPixelBufferGetPixelFormatType(aImage)) { + case kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange: + case kCVPixelFormatType_420YpCbCr8BiPlanarFullRange: + return Some(DecodeStage::NV12); + case kCVPixelFormatType_422YpCbCr8_yuvs: + case kCVPixelFormatType_422YpCbCr8FullRange: + return Some(DecodeStage::YUV422P); + case kCVPixelFormatType_32BGRA: + return Some(DecodeStage::RGBA32); + default: + return Nothing(); + } + }(); + format.apply([&](auto aFormat) { aStage.SetImageFormat(aFormat); }); + aStage.SetColorDepth(mColorDepth); + aStage.SetYUVColorSpace(mColorSpace); + aStage.SetColorRange(mColorRange); + }); + + // Frames come out in DTS order but we need to output them + // in composition order. + MonitorAutoLock mon(mMonitor); + mReorderQueue.Push(std::move(data)); + MaybeResolveBufferedFrames(); + + LOG("%llu decoded frames queued", + static_cast<unsigned long long>(mReorderQueue.Length())); +} + +void AppleVTDecoder::OnDecodeError(OSStatus aError) { + MonitorAutoLock mon(mMonitor); + mPromise.RejectIfExists( + MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, + RESULT_DETAIL("OnDecodeError:%x", aError)), + __func__); +} + +nsresult AppleVTDecoder::WaitForAsynchronousFrames() { + OSStatus rv = VTDecompressionSessionWaitForAsynchronousFrames(mSession); + if (rv != noErr) { + NS_ERROR("AppleVTDecoder: Error waiting for asynchronous frames"); + return NS_ERROR_FAILURE; + } + return NS_OK; +} + +MediaResult AppleVTDecoder::InitializeSession() { + OSStatus rv; + + AutoCFRelease<CFDictionaryRef> extensions = CreateDecoderExtensions(); + + rv = CMVideoFormatDescriptionCreate( + kCFAllocatorDefault, + mStreamType == StreamType::H264 + ? kCMVideoCodecType_H264 + : CMVideoCodecType(AppleDecoderModule::kCMVideoCodecType_VP9), + mPictureWidth, mPictureHeight, extensions, &mFormat); + if (rv != noErr) { + return MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, + RESULT_DETAIL("Couldn't create format description!")); + } + + // Contruct video decoder selection spec. + AutoCFRelease<CFDictionaryRef> spec = CreateDecoderSpecification(); + + // Contruct output configuration. + AutoCFRelease<CFDictionaryRef> outputConfiguration = + CreateOutputConfiguration(); + + VTDecompressionOutputCallbackRecord cb = {PlatformCallback, this}; + rv = + VTDecompressionSessionCreate(kCFAllocatorDefault, mFormat, + spec, // Video decoder selection. + outputConfiguration, // Output video format. + &cb, &mSession); + + if (rv != noErr) { + return MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, + RESULT_DETAIL("Couldn't create decompression session!")); + } + + CFBooleanRef isUsingHW = nullptr; + rv = VTSessionCopyProperty( + mSession, + kVTDecompressionPropertyKey_UsingHardwareAcceleratedVideoDecoder, + kCFAllocatorDefault, &isUsingHW); + if (rv == noErr) { + mIsHardwareAccelerated = isUsingHW == kCFBooleanTrue; + LOG("AppleVTDecoder: %s hardware accelerated decoding", + mIsHardwareAccelerated ? "using" : "not using"); + } else { + LOG("AppleVTDecoder: maybe hardware accelerated decoding " + "(VTSessionCopyProperty query failed)"); + } + if (isUsingHW) { + CFRelease(isUsingHW); + } + + return NS_OK; +} + +CFDictionaryRef AppleVTDecoder::CreateDecoderExtensions() { + AutoCFRelease<CFDataRef> data = CFDataCreate( + kCFAllocatorDefault, mExtraData->Elements(), mExtraData->Length()); + + const void* atomsKey[1]; + atomsKey[0] = mStreamType == StreamType::H264 ? CFSTR("avcC") : CFSTR("vpcC"); + const void* atomsValue[] = {data}; + static_assert(ArrayLength(atomsKey) == ArrayLength(atomsValue), + "Non matching keys/values array size"); + + AutoCFRelease<CFDictionaryRef> atoms = CFDictionaryCreate( + kCFAllocatorDefault, atomsKey, atomsValue, ArrayLength(atomsKey), + &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); + + const void* extensionKeys[] = { + kCVImageBufferChromaLocationBottomFieldKey, + kCVImageBufferChromaLocationTopFieldKey, + kCMFormatDescriptionExtension_SampleDescriptionExtensionAtoms}; + + const void* extensionValues[] = {kCVImageBufferChromaLocation_Left, + kCVImageBufferChromaLocation_Left, atoms}; + static_assert(ArrayLength(extensionKeys) == ArrayLength(extensionValues), + "Non matching keys/values array size"); + + return CFDictionaryCreate(kCFAllocatorDefault, extensionKeys, extensionValues, + ArrayLength(extensionKeys), + &kCFTypeDictionaryKeyCallBacks, + &kCFTypeDictionaryValueCallBacks); +} + +CFDictionaryRef AppleVTDecoder::CreateDecoderSpecification() { + const void* specKeys[] = { + kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder}; + const void* specValues[1]; + if (gfx::gfxVars::CanUseHardwareVideoDecoding()) { + specValues[0] = kCFBooleanTrue; + } else { + // This GPU is blacklisted for hardware decoding. + specValues[0] = kCFBooleanFalse; + } + static_assert(ArrayLength(specKeys) == ArrayLength(specValues), + "Non matching keys/values array size"); + + return CFDictionaryCreate( + kCFAllocatorDefault, specKeys, specValues, ArrayLength(specKeys), + &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); +} + +CFDictionaryRef AppleVTDecoder::CreateOutputConfiguration() { + if (mUseSoftwareImages) { + // Output format type: + SInt32 PixelFormatTypeValue = kCVPixelFormatType_420YpCbCr8Planar; + AutoCFRelease<CFNumberRef> PixelFormatTypeNumber = CFNumberCreate( + kCFAllocatorDefault, kCFNumberSInt32Type, &PixelFormatTypeValue); + const void* outputKeys[] = {kCVPixelBufferPixelFormatTypeKey}; + const void* outputValues[] = {PixelFormatTypeNumber}; + static_assert(ArrayLength(outputKeys) == ArrayLength(outputValues), + "Non matching keys/values array size"); + + return CFDictionaryCreate( + kCFAllocatorDefault, outputKeys, outputValues, ArrayLength(outputKeys), + &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); + } + +#ifndef MOZ_WIDGET_UIKIT + // Output format type: + + bool is10Bit = (gfx::BitDepthForColorDepth(mColorDepth) == 10); + SInt32 PixelFormatTypeValue = + mColorRange == gfx::ColorRange::FULL + ? (is10Bit ? kCVPixelFormatType_420YpCbCr10BiPlanarFullRange + : kCVPixelFormatType_420YpCbCr8BiPlanarFullRange) + : (is10Bit ? kCVPixelFormatType_420YpCbCr10BiPlanarVideoRange + : kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange); + AutoCFRelease<CFNumberRef> PixelFormatTypeNumber = CFNumberCreate( + kCFAllocatorDefault, kCFNumberSInt32Type, &PixelFormatTypeValue); + // Construct IOSurface Properties + const void* IOSurfaceKeys[] = {kIOSurfaceIsGlobal}; + const void* IOSurfaceValues[] = {kCFBooleanTrue}; + static_assert(ArrayLength(IOSurfaceKeys) == ArrayLength(IOSurfaceValues), + "Non matching keys/values array size"); + + // Contruct output configuration. + AutoCFRelease<CFDictionaryRef> IOSurfaceProperties = CFDictionaryCreate( + kCFAllocatorDefault, IOSurfaceKeys, IOSurfaceValues, + ArrayLength(IOSurfaceKeys), &kCFTypeDictionaryKeyCallBacks, + &kCFTypeDictionaryValueCallBacks); + + const void* outputKeys[] = {kCVPixelBufferIOSurfacePropertiesKey, + kCVPixelBufferPixelFormatTypeKey, + kCVPixelBufferOpenGLCompatibilityKey}; + const void* outputValues[] = {IOSurfaceProperties, PixelFormatTypeNumber, + kCFBooleanTrue}; + static_assert(ArrayLength(outputKeys) == ArrayLength(outputValues), + "Non matching keys/values array size"); + + return CFDictionaryCreate( + kCFAllocatorDefault, outputKeys, outputValues, ArrayLength(outputKeys), + &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); +#else + MOZ_ASSERT_UNREACHABLE("No MacIOSurface on iOS"); +#endif +} + +} // namespace mozilla + +#undef LOG +#undef LOGEX diff --git a/dom/media/platforms/apple/AppleVTDecoder.h b/dom/media/platforms/apple/AppleVTDecoder.h new file mode 100644 index 0000000000..140a335628 --- /dev/null +++ b/dom/media/platforms/apple/AppleVTDecoder.h @@ -0,0 +1,145 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_AppleVTDecoder_h +#define mozilla_AppleVTDecoder_h + +#include <CoreFoundation/CFDictionary.h> // For CFDictionaryRef +#include <CoreMedia/CoreMedia.h> // For CMVideoFormatDescriptionRef +#include <VideoToolbox/VideoToolbox.h> // For VTDecompressionSessionRef + +#include "AppleDecoderModule.h" +#include "PerformanceRecorder.h" +#include "PlatformDecoderModule.h" +#include "ReorderQueue.h" +#include "TimeUnits.h" +#include "mozilla/Atomics.h" +#include "mozilla/gfx/Types.h" +#include "mozilla/ProfilerUtils.h" + +namespace mozilla { + +DDLoggedTypeDeclNameAndBase(AppleVTDecoder, MediaDataDecoder); + +class AppleVTDecoder final : public MediaDataDecoder, + public DecoderDoctorLifeLogger<AppleVTDecoder> { + public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(AppleVTDecoder, final); + + AppleVTDecoder(const VideoInfo& aConfig, + layers::ImageContainer* aImageContainer, + CreateDecoderParams::OptionSet aOptions, + layers::KnowsCompositor* aKnowsCompositor, + Maybe<TrackingId> aTrackingId); + + class AppleFrameRef { + public: + media::TimeUnit decode_timestamp; + media::TimeUnit composition_timestamp; + media::TimeUnit duration; + int64_t byte_offset; + bool is_sync_point; + + explicit AppleFrameRef(const MediaRawData& aSample) + : decode_timestamp(aSample.mTimecode), + composition_timestamp(aSample.mTime), + duration(aSample.mDuration), + byte_offset(aSample.mOffset), + is_sync_point(aSample.mKeyframe) {} + }; + + RefPtr<InitPromise> Init() override; + RefPtr<DecodePromise> Decode(MediaRawData* aSample) override; + RefPtr<DecodePromise> Drain() override; + RefPtr<FlushPromise> Flush() override; + RefPtr<ShutdownPromise> Shutdown() override; + void SetSeekThreshold(const media::TimeUnit& aTime) override; + + bool IsHardwareAccelerated(nsACString& aFailureReason) const override { + return mIsHardwareAccelerated; + } + + nsCString GetDescriptionName() const override { + return mIsHardwareAccelerated ? "apple hardware VT decoder"_ns + : "apple software VT decoder"_ns; + } + + nsCString GetCodecName() const override; + + ConversionRequired NeedsConversion() const override { + return ConversionRequired::kNeedAVCC; + } + + // Access from the taskqueue and the decoder's thread. + // OutputFrame is thread-safe. + void OutputFrame(CVPixelBufferRef aImage, AppleFrameRef aFrameRef); + void OnDecodeError(OSStatus aError); + + private: + friend class AppleDecoderModule; // To access InitializeSession. + virtual ~AppleVTDecoder(); + RefPtr<FlushPromise> ProcessFlush(); + RefPtr<DecodePromise> ProcessDrain(); + void ProcessShutdown(); + void ProcessDecode(MediaRawData* aSample); + void MaybeResolveBufferedFrames(); + + void MaybeRegisterCallbackThread(); + + void AssertOnTaskQueue() { MOZ_ASSERT(mTaskQueue->IsCurrentThreadIn()); } + + AppleFrameRef* CreateAppleFrameRef(const MediaRawData* aSample); + CFDictionaryRef CreateOutputConfiguration(); + + const RefPtr<MediaByteBuffer> mExtraData; + const uint32_t mPictureWidth; + const uint32_t mPictureHeight; + const uint32_t mDisplayWidth; + const uint32_t mDisplayHeight; + const gfx::YUVColorSpace mColorSpace; + const gfx::ColorSpace2 mColorPrimaries; + const gfx::TransferFunction mTransferFunction; + const gfx::ColorRange mColorRange; + const gfx::ColorDepth mColorDepth; + + // Method to set up the decompression session. + MediaResult InitializeSession(); + nsresult WaitForAsynchronousFrames(); + CFDictionaryRef CreateDecoderSpecification(); + CFDictionaryRef CreateDecoderExtensions(); + + enum class StreamType { Unknown, H264, VP9 }; + const StreamType mStreamType; + const RefPtr<TaskQueue> mTaskQueue; + const uint32_t mMaxRefFrames; + const RefPtr<layers::ImageContainer> mImageContainer; + const RefPtr<layers::KnowsCompositor> mKnowsCompositor; + const bool mUseSoftwareImages; + const Maybe<TrackingId> mTrackingId; + + // Set on reader/decode thread calling Flush() to indicate that output is + // not required and so input samples on mTaskQueue need not be processed. + Atomic<bool> mIsFlushing; + std::atomic<ProfilerThreadId> mCallbackThreadId; + // Protects mReorderQueue and mPromise. + Monitor mMonitor MOZ_UNANNOTATED; + ReorderQueue mReorderQueue; + MozMonitoredPromiseHolder<DecodePromise> mPromise; + + // Decoded frame will be dropped if its pts is smaller than this + // value. It shold be initialized before Input() or after Flush(). So it is + // safe to access it in OutputFrame without protecting. + Maybe<media::TimeUnit> mSeekTargetThreshold; + + CMVideoFormatDescriptionRef mFormat; + VTDecompressionSessionRef mSession; + Atomic<bool> mIsHardwareAccelerated; + PerformanceRecorderMulti<DecodeStage> mPerformanceRecorder; +}; + +} // namespace mozilla + +#endif // mozilla_AppleVTDecoder_h diff --git a/dom/media/platforms/apple/AppleVTEncoder.cpp b/dom/media/platforms/apple/AppleVTEncoder.cpp new file mode 100644 index 0000000000..af91d99bcb --- /dev/null +++ b/dom/media/platforms/apple/AppleVTEncoder.cpp @@ -0,0 +1,628 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "AppleVTEncoder.h" + +#include <CoreFoundation/CFArray.h> +#include <CoreFoundation/CFByteOrder.h> +#include <CoreFoundation/CFDictionary.h> + +#include "ImageContainer.h" +#include "AnnexB.h" +#include "H264.h" + +#include "libyuv.h" + +#include "AppleUtils.h" + +namespace mozilla { +extern LazyLogModule sPEMLog; +#define VTENC_LOGE(fmt, ...) \ + MOZ_LOG(sPEMLog, mozilla::LogLevel::Error, \ + ("[AppleVTEncoder] %s: " fmt, __func__, ##__VA_ARGS__)) +#define VTENC_LOGD(fmt, ...) \ + MOZ_LOG(sPEMLog, mozilla::LogLevel::Debug, \ + ("[AppleVTEncoder] %s: " fmt, __func__, ##__VA_ARGS__)) + +static CFDictionaryRef BuildEncoderSpec(const bool aHardwareNotAllowed) { + const void* keys[] = { + kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder}; + const void* values[] = {aHardwareNotAllowed ? kCFBooleanFalse + : kCFBooleanTrue}; + + static_assert(ArrayLength(keys) == ArrayLength(values), + "Non matching keys/values array size"); + return CFDictionaryCreate(kCFAllocatorDefault, keys, values, + ArrayLength(keys), &kCFTypeDictionaryKeyCallBacks, + &kCFTypeDictionaryValueCallBacks); +} + +static void FrameCallback(void* aEncoder, void* aFrameRefCon, OSStatus aStatus, + VTEncodeInfoFlags aInfoFlags, + CMSampleBufferRef aSampleBuffer) { + if (aStatus != noErr || !aSampleBuffer) { + VTENC_LOGE("VideoToolbox encoder returned no data status=%d sample=%p", + aStatus, aSampleBuffer); + aSampleBuffer = nullptr; + } else if (aInfoFlags & kVTEncodeInfo_FrameDropped) { + VTENC_LOGE("frame tagged as dropped"); + return; + } + (static_cast<AppleVTEncoder*>(aEncoder))->OutputFrame(aSampleBuffer); +} + +static bool SetAverageBitrate(VTCompressionSessionRef& aSession, + MediaDataEncoder::Rate aBitsPerSec) { + int64_t bps(aBitsPerSec); + AutoCFRelease<CFNumberRef> bitrate( + CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &bps)); + return VTSessionSetProperty(aSession, + kVTCompressionPropertyKey_AverageBitRate, + bitrate) == noErr; +} + +static bool SetRealtimeProperties(VTCompressionSessionRef& aSession) { + return VTSessionSetProperty(aSession, kVTCompressionPropertyKey_RealTime, + kCFBooleanTrue) == noErr && + VTSessionSetProperty(aSession, + kVTCompressionPropertyKey_AllowFrameReordering, + kCFBooleanFalse) == noErr; +} + +static bool SetProfileLevel(VTCompressionSessionRef& aSession, + AppleVTEncoder::H264Specific::ProfileLevel aValue) { + CFStringRef profileLevel = nullptr; + switch (aValue) { + case AppleVTEncoder::H264Specific::ProfileLevel::BaselineAutoLevel: + profileLevel = kVTProfileLevel_H264_Baseline_AutoLevel; + break; + case AppleVTEncoder::H264Specific::ProfileLevel::MainAutoLevel: + profileLevel = kVTProfileLevel_H264_Main_AutoLevel; + break; + } + + return profileLevel ? VTSessionSetProperty( + aSession, kVTCompressionPropertyKey_ProfileLevel, + profileLevel) == noErr + : false; +} + +RefPtr<MediaDataEncoder::InitPromise> AppleVTEncoder::Init() { + MOZ_ASSERT(!mInited, "Cannot initialize encoder again without shutting down"); + + if (mConfig.mSize.width == 0 || mConfig.mSize.height == 0) { + return InitPromise::CreateAndReject(NS_ERROR_ILLEGAL_VALUE, __func__); + } + + AutoCFRelease<CFDictionaryRef> spec(BuildEncoderSpec(mHardwareNotAllowed)); + AutoCFRelease<CFDictionaryRef> srcBufferAttr( + BuildSourceImageBufferAttributes()); + if (!srcBufferAttr) { + return InitPromise::CreateAndReject( + MediaResult(NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR, + "fail to create source buffer attributes"), + __func__); + } + + OSStatus status = VTCompressionSessionCreate( + kCFAllocatorDefault, mConfig.mSize.width, mConfig.mSize.height, + kCMVideoCodecType_H264, spec, srcBufferAttr, kCFAllocatorDefault, + &FrameCallback, this /* outputCallbackRefCon */, &mSession); + + if (status != noErr) { + return InitPromise::CreateAndReject( + MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, + "fail to create encoder session"), + __func__); + } + + if (!SetAverageBitrate(mSession, mConfig.mBitsPerSec)) { + return InitPromise::CreateAndReject( + MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, + "fail to configurate average bitrate"), + __func__); + } + + if (mConfig.mUsage == Usage::Realtime && !SetRealtimeProperties(mSession)) { + VTENC_LOGE("fail to configurate realtime properties"); + return InitPromise::CreateAndReject( + MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, + "fail to configurate average bitrate"), + __func__); + } + + int64_t interval = + mConfig.mKeyframeInterval > std::numeric_limits<int64_t>::max() + ? std::numeric_limits<int64_t>::max() + : mConfig.mKeyframeInterval; + AutoCFRelease<CFNumberRef> cf( + CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &interval)); + if (VTSessionSetProperty(mSession, + kVTCompressionPropertyKey_MaxKeyFrameInterval, + cf) != noErr) { + return InitPromise::CreateAndReject( + MediaResult( + NS_ERROR_DOM_MEDIA_FATAL_ERR, + nsPrintfCString("fail to configurate keyframe interval:%" PRId64, + interval)), + __func__); + } + + if (mConfig.mCodecSpecific) { + const H264Specific& specific = mConfig.mCodecSpecific.ref(); + if (!SetProfileLevel(mSession, specific.mProfileLevel)) { + return InitPromise::CreateAndReject( + MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, + nsPrintfCString("fail to configurate profile level:%d", + specific.mProfileLevel)), + __func__); + } + } + + CFBooleanRef isUsingHW = nullptr; + status = VTSessionCopyProperty( + mSession, kVTCompressionPropertyKey_UsingHardwareAcceleratedVideoEncoder, + kCFAllocatorDefault, &isUsingHW); + mIsHardwareAccelerated = status == noErr && isUsingHW == kCFBooleanTrue; + if (isUsingHW) { + CFRelease(isUsingHW); + } + + mError = NS_OK; + return InitPromise::CreateAndResolve(TrackInfo::TrackType::kVideoTrack, + __func__); +} + +static Maybe<OSType> MapPixelFormat(MediaDataEncoder::PixelFormat aFormat) { + switch (aFormat) { + case MediaDataEncoder::PixelFormat::RGBA32: + case MediaDataEncoder::PixelFormat::BGRA32: + return Some(kCVPixelFormatType_32BGRA); + case MediaDataEncoder::PixelFormat::RGB24: + return Some(kCVPixelFormatType_24RGB); + case MediaDataEncoder::PixelFormat::BGR24: + return Some(kCVPixelFormatType_24BGR); + case MediaDataEncoder::PixelFormat::GRAY8: + return Some(kCVPixelFormatType_OneComponent8); + case MediaDataEncoder::PixelFormat::YUV444P: + return Some(kCVPixelFormatType_444YpCbCr8); + case MediaDataEncoder::PixelFormat::YUV420P: + return Some(kCVPixelFormatType_420YpCbCr8PlanarFullRange); + case MediaDataEncoder::PixelFormat::YUV420SP_NV12: + return Some(kCVPixelFormatType_420YpCbCr8BiPlanarFullRange); + default: + return Nothing(); + } +} + +CFDictionaryRef AppleVTEncoder::BuildSourceImageBufferAttributes() { + Maybe<OSType> fmt = MapPixelFormat(mConfig.mSourcePixelFormat); + if (fmt.isNothing()) { + VTENC_LOGE("unsupported source pixel format"); + return nullptr; + } + + // Source image buffer attributes + const void* keys[] = {kCVPixelBufferOpenGLCompatibilityKey, // TODO + kCVPixelBufferIOSurfacePropertiesKey, // TODO + kCVPixelBufferPixelFormatTypeKey}; + + AutoCFRelease<CFDictionaryRef> ioSurfaceProps(CFDictionaryCreate( + kCFAllocatorDefault, nullptr, nullptr, 0, &kCFTypeDictionaryKeyCallBacks, + &kCFTypeDictionaryValueCallBacks)); + AutoCFRelease<CFNumberRef> pixelFormat( + CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &fmt)); + const void* values[] = {kCFBooleanTrue, ioSurfaceProps, pixelFormat}; + + MOZ_ASSERT(ArrayLength(keys) == ArrayLength(values), + "Non matching keys/values array size"); + + return CFDictionaryCreate(kCFAllocatorDefault, keys, values, + ArrayLength(keys), &kCFTypeDictionaryKeyCallBacks, + &kCFTypeDictionaryValueCallBacks); +} + +static bool IsKeyframe(CMSampleBufferRef aSample) { + CFArrayRef attachments = CMSampleBufferGetSampleAttachmentsArray(aSample, 0); + if (attachments == nullptr || CFArrayGetCount(attachments) == 0) { + return false; + } + + return !CFDictionaryContainsKey( + static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(attachments, 0)), + kCMSampleAttachmentKey_NotSync); +} + +static size_t GetNumParamSets(CMFormatDescriptionRef aDescription) { + size_t numParamSets = 0; + OSStatus status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex( + aDescription, 0, nullptr, nullptr, &numParamSets, nullptr); + if (status != noErr) { + VTENC_LOGE("Cannot get number of parameter sets from format description"); + } + + return numParamSets; +} + +static const uint8_t kNALUStart[4] = {0, 0, 0, 1}; + +static size_t GetParamSet(CMFormatDescriptionRef aDescription, size_t aIndex, + const uint8_t** aDataPtr) { + size_t length = 0; + int headerSize = 0; + if (CMVideoFormatDescriptionGetH264ParameterSetAtIndex( + aDescription, aIndex, aDataPtr, &length, nullptr, &headerSize) != + noErr) { + VTENC_LOGE("fail to get parameter set from format description"); + return 0; + } + MOZ_ASSERT(headerSize == sizeof(kNALUStart), "Only support 4 byte header"); + + return length; +} + +static bool WriteSPSPPS(MediaRawData* aDst, + CMFormatDescriptionRef aDescription) { + // Get SPS/PPS + const size_t numParamSets = GetNumParamSets(aDescription); + UniquePtr<MediaRawDataWriter> writer(aDst->CreateWriter()); + for (size_t i = 0; i < numParamSets; i++) { + const uint8_t* data = nullptr; + size_t length = GetParamSet(aDescription, i, &data); + if (length == 0) { + return false; + } + if (!writer->Append(kNALUStart, sizeof(kNALUStart))) { + VTENC_LOGE("Cannot write NAL unit start code"); + return false; + } + if (!writer->Append(data, length)) { + VTENC_LOGE("Cannot write parameter set"); + return false; + } + } + return true; +} + +static RefPtr<MediaByteBuffer> extractAvcc( + CMFormatDescriptionRef aDescription) { + CFPropertyListRef list = CMFormatDescriptionGetExtension( + aDescription, + kCMFormatDescriptionExtension_SampleDescriptionExtensionAtoms); + if (!list) { + VTENC_LOGE("fail to get atoms"); + return nullptr; + } + CFDataRef avcC = static_cast<CFDataRef>( + CFDictionaryGetValue(static_cast<CFDictionaryRef>(list), CFSTR("avcC"))); + if (!avcC) { + VTENC_LOGE("fail to extract avcC"); + return nullptr; + } + CFIndex length = CFDataGetLength(avcC); + const UInt8* bytes = CFDataGetBytePtr(avcC); + if (length <= 0 || !bytes) { + VTENC_LOGE("empty avcC"); + return nullptr; + } + + RefPtr<MediaByteBuffer> config = new MediaByteBuffer(length); + config->AppendElements(bytes, length); + return config; +} + +bool AppleVTEncoder::WriteExtraData(MediaRawData* aDst, CMSampleBufferRef aSrc, + const bool aAsAnnexB) { + if (!IsKeyframe(aSrc)) { + return true; + } + + aDst->mKeyframe = true; + CMFormatDescriptionRef desc = CMSampleBufferGetFormatDescription(aSrc); + if (!desc) { + VTENC_LOGE("fail to get format description from sample"); + return false; + } + + if (aAsAnnexB) { + return WriteSPSPPS(aDst, desc); + } + + RefPtr<MediaByteBuffer> avcc = extractAvcc(desc); + if (!avcc) { + return false; + } + + if (!mAvcc || !H264::CompareExtraData(avcc, mAvcc)) { + mAvcc = avcc; + aDst->mExtraData = mAvcc; + } + + return avcc != nullptr; +} + +static bool WriteNALUs(MediaRawData* aDst, CMSampleBufferRef aSrc, + bool aAsAnnexB = false) { + size_t srcRemaining = CMSampleBufferGetTotalSampleSize(aSrc); + CMBlockBufferRef block = CMSampleBufferGetDataBuffer(aSrc); + if (!block) { + VTENC_LOGE("Cannot get block buffer frome sample"); + return false; + } + UniquePtr<MediaRawDataWriter> writer(aDst->CreateWriter()); + size_t writtenLength = aDst->Size(); + // Ensure capacity. + if (!writer->SetSize(writtenLength + srcRemaining)) { + VTENC_LOGE("Cannot allocate buffer"); + return false; + } + size_t readLength = 0; + while (srcRemaining > 0) { + // Extract the size of next NAL unit + uint8_t unitSizeBytes[4]; + MOZ_ASSERT(srcRemaining > sizeof(unitSizeBytes)); + if (CMBlockBufferCopyDataBytes(block, readLength, sizeof(unitSizeBytes), + reinterpret_cast<uint32_t*>( + unitSizeBytes)) != kCMBlockBufferNoErr) { + VTENC_LOGE("Cannot copy unit size bytes"); + return false; + } + size_t unitSize = + CFSwapInt32BigToHost(*reinterpret_cast<uint32_t*>(unitSizeBytes)); + + if (aAsAnnexB) { + // Replace unit size bytes with NALU start code. + PodCopy(writer->Data() + writtenLength, kNALUStart, sizeof(kNALUStart)); + readLength += sizeof(unitSizeBytes); + srcRemaining -= sizeof(unitSizeBytes); + writtenLength += sizeof(kNALUStart); + } else { + // Copy unit size bytes + data. + unitSize += sizeof(unitSizeBytes); + } + MOZ_ASSERT(writtenLength + unitSize <= aDst->Size()); + // Copy NAL unit data + if (CMBlockBufferCopyDataBytes(block, readLength, unitSize, + writer->Data() + writtenLength) != + kCMBlockBufferNoErr) { + VTENC_LOGE("Cannot copy unit data"); + return false; + } + readLength += unitSize; + srcRemaining -= unitSize; + writtenLength += unitSize; + } + MOZ_ASSERT(writtenLength == aDst->Size()); + return true; +} + +void AppleVTEncoder::OutputFrame(CMSampleBufferRef aBuffer) { + RefPtr<MediaRawData> output(new MediaRawData()); + + bool asAnnexB = mConfig.mUsage == Usage::Realtime; + bool succeeded = WriteExtraData(output, aBuffer, asAnnexB) && + WriteNALUs(output, aBuffer, asAnnexB); + + output->mTime = media::TimeUnit::FromSeconds( + CMTimeGetSeconds(CMSampleBufferGetPresentationTimeStamp(aBuffer))); + output->mDuration = media::TimeUnit::FromSeconds( + CMTimeGetSeconds(CMSampleBufferGetOutputDuration(aBuffer))); + ProcessOutput(succeeded ? std::move(output) : nullptr); +} + +void AppleVTEncoder::ProcessOutput(RefPtr<MediaRawData>&& aOutput) { + if (!mTaskQueue->IsCurrentThreadIn()) { + nsresult rv = mTaskQueue->Dispatch(NewRunnableMethod<RefPtr<MediaRawData>>( + "AppleVTEncoder::ProcessOutput", this, &AppleVTEncoder::ProcessOutput, + std::move(aOutput))); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + return; + } + AssertOnTaskQueue(); + + if (aOutput) { + mEncodedData.AppendElement(std::move(aOutput)); + } else { + mError = NS_ERROR_DOM_MEDIA_FATAL_ERR; + } +} + +RefPtr<MediaDataEncoder::EncodePromise> AppleVTEncoder::Encode( + const MediaData* aSample) { + MOZ_ASSERT(aSample != nullptr); + RefPtr<const VideoData> sample(aSample->As<const VideoData>()); + + return InvokeAsync<RefPtr<const VideoData>>(mTaskQueue, this, __func__, + &AppleVTEncoder::ProcessEncode, + std::move(sample)); +} + +RefPtr<MediaDataEncoder::EncodePromise> AppleVTEncoder::ProcessEncode( + RefPtr<const VideoData> aSample) { + AssertOnTaskQueue(); + MOZ_ASSERT(mSession); + + if (NS_FAILED(mError)) { + return EncodePromise::CreateAndReject(mError, __func__); + } + + AutoCVBufferRelease<CVImageBufferRef> buffer( + CreateCVPixelBuffer(aSample->mImage)); + if (!buffer) { + return EncodePromise::CreateAndReject(NS_ERROR_OUT_OF_MEMORY, __func__); + } + + CFDictionaryRef frameProps = nullptr; + if (aSample->mKeyframe) { + CFTypeRef keys[] = {kVTEncodeFrameOptionKey_ForceKeyFrame}; + CFTypeRef values[] = {kCFBooleanTrue}; + MOZ_ASSERT(ArrayLength(keys) == ArrayLength(values)); + frameProps = CFDictionaryCreate( + kCFAllocatorDefault, keys, values, ArrayLength(keys), + &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); + }; + + VTEncodeInfoFlags info; + OSStatus status = VTCompressionSessionEncodeFrame( + mSession, buffer, + CMTimeMake(aSample->mTime.ToMicroseconds(), USECS_PER_S), + CMTimeMake(aSample->mDuration.ToMicroseconds(), USECS_PER_S), frameProps, + nullptr /* sourceFrameRefcon */, &info); + if (status != noErr) { + return EncodePromise::CreateAndReject(NS_ERROR_DOM_MEDIA_FATAL_ERR, + __func__); + } + + return EncodePromise::CreateAndResolve(std::move(mEncodedData), __func__); +} + +static size_t NumberOfPlanes(MediaDataEncoder::PixelFormat aPixelFormat) { + switch (aPixelFormat) { + case MediaDataEncoder::PixelFormat::RGBA32: + case MediaDataEncoder::PixelFormat::BGRA32: + case MediaDataEncoder::PixelFormat::RGB24: + case MediaDataEncoder::PixelFormat::BGR24: + case MediaDataEncoder::PixelFormat::GRAY8: + return 1; + case MediaDataEncoder::PixelFormat::YUV444P: + case MediaDataEncoder::PixelFormat::YUV420P: + return 3; + case MediaDataEncoder::PixelFormat::YUV420SP_NV12: + return 2; + default: + VTENC_LOGE("Unsupported input pixel format"); + return 0; + } +} + +using namespace layers; + +static void ReleaseImage(void* aImageGrip, const void* aDataPtr, + size_t aDataSize, size_t aNumOfPlanes, + const void** aPlanes) { + (static_cast<PlanarYCbCrImage*>(aImageGrip))->Release(); +} + +CVPixelBufferRef AppleVTEncoder::CreateCVPixelBuffer(const Image* aSource) { + AssertOnTaskQueue(); + + // TODO: support types other than YUV + PlanarYCbCrImage* image = const_cast<Image*>(aSource)->AsPlanarYCbCrImage(); + if (!image || !image->GetData()) { + return nullptr; + } + + OSType format = MapPixelFormat(mConfig.mSourcePixelFormat).ref(); + size_t numPlanes = NumberOfPlanes(mConfig.mSourcePixelFormat); + const PlanarYCbCrImage::Data* yuv = image->GetData(); + if (!yuv) { + return nullptr; + } + auto ySize = yuv->YDataSize(); + auto cbcrSize = yuv->CbCrDataSize(); + void* addresses[3] = {}; + size_t widths[3] = {}; + size_t heights[3] = {}; + size_t strides[3] = {}; + switch (numPlanes) { + case 3: + addresses[2] = yuv->mCrChannel; + widths[2] = cbcrSize.width; + heights[2] = cbcrSize.height; + strides[2] = yuv->mCbCrStride; + [[fallthrough]]; + case 2: + addresses[1] = yuv->mCbChannel; + widths[1] = cbcrSize.width; + heights[1] = cbcrSize.height; + strides[1] = yuv->mCbCrStride; + [[fallthrough]]; + case 1: + addresses[0] = yuv->mYChannel; + widths[0] = ySize.width; + heights[0] = ySize.height; + strides[0] = yuv->mYStride; + break; + default: + return nullptr; + } + + CVPixelBufferRef buffer = nullptr; + image->AddRef(); // Grip input buffers. + CVReturn rv = CVPixelBufferCreateWithPlanarBytes( + kCFAllocatorDefault, yuv->mPictureRect.width, yuv->mPictureRect.height, + format, nullptr /* dataPtr */, 0 /* dataSize */, numPlanes, addresses, + widths, heights, strides, ReleaseImage /* releaseCallback */, + image /* releaseRefCon */, nullptr /* pixelBufferAttributes */, &buffer); + if (rv == kCVReturnSuccess) { + return buffer; + // |image| will be released in |ReleaseImage()|. + } else { + image->Release(); + return nullptr; + } +} + +RefPtr<MediaDataEncoder::EncodePromise> AppleVTEncoder::Drain() { + return InvokeAsync(mTaskQueue, this, __func__, &AppleVTEncoder::ProcessDrain); +} + +RefPtr<MediaDataEncoder::EncodePromise> AppleVTEncoder::ProcessDrain() { + AssertOnTaskQueue(); + MOZ_ASSERT(mSession); + + if (mFramesCompleted) { + MOZ_DIAGNOSTIC_ASSERT(mEncodedData.IsEmpty()); + return EncodePromise::CreateAndResolve(EncodedData(), __func__); + } + + OSStatus status = + VTCompressionSessionCompleteFrames(mSession, kCMTimeIndefinite); + if (status != noErr) { + return EncodePromise::CreateAndReject(NS_ERROR_DOM_MEDIA_FATAL_ERR, + __func__); + } + mFramesCompleted = true; + // VTCompressionSessionCompleteFrames() could have queued multiple tasks with + // the new drained frames. Dispatch a task after them to resolve the promise + // with those frames. + RefPtr<AppleVTEncoder> self = this; + return InvokeAsync(mTaskQueue, __func__, [self]() { + EncodedData pendingFrames(std::move(self->mEncodedData)); + self->mEncodedData = EncodedData(); + return EncodePromise::CreateAndResolve(std::move(pendingFrames), __func__); + }); +} + +RefPtr<ShutdownPromise> AppleVTEncoder::Shutdown() { + return InvokeAsync(mTaskQueue, this, __func__, + &AppleVTEncoder::ProcessShutdown); +} + +RefPtr<ShutdownPromise> AppleVTEncoder::ProcessShutdown() { + if (mSession) { + VTCompressionSessionInvalidate(mSession); + CFRelease(mSession); + mSession = nullptr; + mInited = false; + } + return ShutdownPromise::CreateAndResolve(true, __func__); +} + +RefPtr<GenericPromise> AppleVTEncoder::SetBitrate( + MediaDataEncoder::Rate aBitsPerSec) { + RefPtr<AppleVTEncoder> self = this; + return InvokeAsync(mTaskQueue, __func__, [self, aBitsPerSec]() { + MOZ_ASSERT(self->mSession); + return SetAverageBitrate(self->mSession, aBitsPerSec) + ? GenericPromise::CreateAndResolve(true, __func__) + : GenericPromise::CreateAndReject( + NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR, __func__); + }); +} + +} // namespace mozilla diff --git a/dom/media/platforms/apple/AppleVTEncoder.h b/dom/media/platforms/apple/AppleVTEncoder.h new file mode 100644 index 0000000000..7f12f7ebb5 --- /dev/null +++ b/dom/media/platforms/apple/AppleVTEncoder.h @@ -0,0 +1,85 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_AppleVTEncoder_h_ +#define mozilla_AppleVTEncoder_h_ + +#include <CoreMedia/CoreMedia.h> +#include <VideoToolbox/VideoToolbox.h> + +#include "PlatformEncoderModule.h" +#include "TimeUnits.h" + +namespace mozilla { + +namespace layers { +class Image; +} + +class AppleVTEncoder final : public MediaDataEncoder { + public: + using Config = H264Config; + + AppleVTEncoder(const Config& aConfig, RefPtr<TaskQueue> aTaskQueue, + const bool aHwardwareNotAllowed) + : mConfig(aConfig), + mTaskQueue(aTaskQueue), + mHardwareNotAllowed(aHwardwareNotAllowed), + mFramesCompleted(false), + mError(NS_OK), + mSession(nullptr) { + MOZ_ASSERT(mConfig.mSize.width > 0 && mConfig.mSize.height > 0); + MOZ_ASSERT(mTaskQueue); + } + + RefPtr<InitPromise> Init() override; + RefPtr<EncodePromise> Encode(const MediaData* aSample) override; + RefPtr<EncodePromise> Drain() override; + RefPtr<ShutdownPromise> Shutdown() override; + RefPtr<GenericPromise> SetBitrate(Rate aBitsPerSec) override; + + nsCString GetDescriptionName() const override { + MOZ_ASSERT(mSession); + return mIsHardwareAccelerated ? "apple hardware VT encoder"_ns + : "apple software VT encoder"_ns; + } + + void OutputFrame(CMSampleBufferRef aBuffer); + + private: + virtual ~AppleVTEncoder() { MOZ_ASSERT(!mSession); } + RefPtr<EncodePromise> ProcessEncode(RefPtr<const VideoData> aSample); + void ProcessOutput(RefPtr<MediaRawData>&& aOutput); + void ResolvePromise(); + RefPtr<EncodePromise> ProcessDrain(); + RefPtr<ShutdownPromise> ProcessShutdown(); + + CFDictionaryRef BuildSourceImageBufferAttributes(); + CVPixelBufferRef CreateCVPixelBuffer(const layers::Image* aSource); + bool WriteExtraData(MediaRawData* aDst, CMSampleBufferRef aSrc, + const bool aAsAnnexB); + void AssertOnTaskQueue() { MOZ_ASSERT(mTaskQueue->IsCurrentThreadIn()); } + + const Config mConfig; + const RefPtr<TaskQueue> mTaskQueue; + const bool mHardwareNotAllowed; + // Access only in mTaskQueue. + EncodedData mEncodedData; + bool mFramesCompleted; + RefPtr<MediaByteBuffer> mAvcc; // Stores latest avcC data. + MediaResult mError; + + // Written by Init() but used only in task queue. + VTCompressionSessionRef mSession; + // Can be accessed on any thread, but only written on during init. + Atomic<bool> mIsHardwareAccelerated; + // Written during init and shutdown. + Atomic<bool> mInited; +}; + +} // namespace mozilla + +#endif // mozilla_AppleVTEncoder_h_ |