/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim:set ts=2 sw=2 sts=2 et cindent: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "AppleVTEncoder.h" #include #include #include #include #include "AnnexB.h" #include "H264.h" #include "ImageContainer.h" #include "mozilla/dom/BindingUtils.h" #include "mozilla/dom/ImageUtils.h" namespace mozilla { extern LazyLogModule sPEMLog; #define LOGE(fmt, ...) \ MOZ_LOG(sPEMLog, mozilla::LogLevel::Error, \ ("[AppleVTEncoder] %s: " fmt, __func__, ##__VA_ARGS__)) #define LOGW(fmt, ...) \ MOZ_LOG(sPEMLog, mozilla::LogLevel::Warning, \ ("[AppleVTEncoder] %s: " fmt, __func__, ##__VA_ARGS__)) #define LOGD(fmt, ...) \ MOZ_LOG(sPEMLog, mozilla::LogLevel::Debug, \ ("[AppleVTEncoder] %s: " fmt, __func__, ##__VA_ARGS__)) #define LOGV(fmt, ...) \ MOZ_LOG(sPEMLog, mozilla::LogLevel::Verbose, \ ("[AppleVTEncoder] %s: " fmt, __func__, ##__VA_ARGS__)) static CFDictionaryRef BuildEncoderSpec(const bool aHardwareNotAllowed, const bool aLowLatencyRateControl) { if (__builtin_available(macos 11.3, *)) { if (aLowLatencyRateControl) { // If doing low-latency rate control, the hardware encoder is required. const void* keys[] = { kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder, kVTVideoEncoderSpecification_EnableLowLatencyRateControl}; const void* values[] = {kCFBooleanTrue, kCFBooleanTrue}; static_assert(std::size(keys) == std::size(values), "Non matching keys/values array size"); return CFDictionaryCreate(kCFAllocatorDefault, keys, values, std::size(keys), &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); } } const void* keys[] = { kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder}; const void* values[] = {aHardwareNotAllowed ? kCFBooleanFalse : kCFBooleanTrue}; static_assert(std::size(keys) == std::size(values), "Non matching keys/values array size"); return CFDictionaryCreate(kCFAllocatorDefault, keys, values, std::size(keys), &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); } static void FrameCallback(void* aEncoder, void* aFrameRefCon, OSStatus aStatus, VTEncodeInfoFlags aInfoFlags, CMSampleBufferRef aSampleBuffer) { (static_cast(aEncoder)) ->OutputFrame(aStatus, aInfoFlags, aSampleBuffer); } bool AppleVTEncoder::SetAverageBitrate(uint32_t aBitsPerSec) { MOZ_ASSERT(mSession); SessionPropertyManager mgr(mSession); return mgr.Set(kVTCompressionPropertyKey_AverageBitRate, int64_t(aBitsPerSec)) == noErr; } bool AppleVTEncoder::SetConstantBitrate(uint32_t aBitsPerSec) { MOZ_ASSERT(mSession); if (__builtin_available(macos 13.0, *)) { SessionPropertyManager mgr(mSession); OSStatus rv = mgr.Set(kVTCompressionPropertyKey_ConstantBitRate, AssertedCast(aBitsPerSec)); if (rv == kVTPropertyNotSupportedErr) { LOGE("Constant bitrate not supported."); } return rv == noErr; } return false; } bool AppleVTEncoder::SetBitrateAndMode(BitrateMode aBitrateMode, uint32_t aBitsPerSec) { if (aBitrateMode == BitrateMode::Variable) { return SetAverageBitrate(aBitsPerSec); } return SetConstantBitrate(aBitsPerSec); } bool AppleVTEncoder::SetFrameRate(int64_t aFPS) { MOZ_ASSERT(mSession); SessionPropertyManager mgr(mSession); return mgr.Set(kVTCompressionPropertyKey_ExpectedFrameRate, aFPS) == noErr; } bool AppleVTEncoder::SetRealtime(bool aEnabled) { MOZ_ASSERT(mSession); // B-frames has been disabled in Init(), so no need to set it here. SessionPropertyManager mgr(mSession); OSStatus status = mgr.Set(kVTCompressionPropertyKey_RealTime, aEnabled); LOGD("%s real time, status: %d", aEnabled ? "Enable" : "Disable", status); if (status != noErr) { return false; } if (__builtin_available(macos 11.0, *)) { status = mgr.Set( kVTCompressionPropertyKey_PrioritizeEncodingSpeedOverQuality, aEnabled); LOGD("%s PrioritizeEncodingSpeedOverQuality, status: %d", aEnabled ? "Enable" : "Disable", status); if (status != noErr && status != kVTPropertyNotSupportedErr) { return false; } } int32_t maxFrameDelayCount = aEnabled ? 0 : kVTUnlimitedFrameDelayCount; status = mgr.Set(kVTCompressionPropertyKey_MaxFrameDelayCount, maxFrameDelayCount); LOGD("Set max frame delay count to %d, status: %d", maxFrameDelayCount, status); if (status != noErr && status != kVTPropertyNotSupportedErr) { return false; } return true; } bool AppleVTEncoder::SetProfileLevel(H264_PROFILE aValue) { MOZ_ASSERT(mSession); CFStringRef profileLevel = nullptr; switch (aValue) { case H264_PROFILE::H264_PROFILE_BASE: profileLevel = kVTProfileLevel_H264_Baseline_AutoLevel; break; case H264_PROFILE::H264_PROFILE_MAIN: profileLevel = kVTProfileLevel_H264_Main_AutoLevel; break; case H264_PROFILE::H264_PROFILE_HIGH: profileLevel = kVTProfileLevel_H264_High_AutoLevel; break; default: LOGE("Profile %d not handled", static_cast(aValue)); } if (profileLevel == nullptr) { return false; } SessionPropertyManager mgr(mSession); return mgr.Set(kVTCompressionPropertyKey_ProfileLevel, profileLevel) == noErr; } static Maybe MapColorPrimaries( const gfx::ColorSpace2& aPrimaries) { switch (aPrimaries) { case gfx::ColorSpace2::Display: return Nothing(); case gfx::ColorSpace2::SRGB: return Some(kCVImageBufferColorPrimaries_P22); case gfx::ColorSpace2::DISPLAY_P3: return Some(kCVImageBufferColorPrimaries_P3_D65); case gfx::ColorSpace2::BT601_525: return Some(kCVImageBufferColorPrimaries_SMPTE_C); case gfx::ColorSpace2::BT709: return Some(kCVImageBufferColorPrimaries_ITU_R_709_2); case gfx::ColorSpace2::BT2020: return Some(kCVImageBufferColorPrimaries_ITU_R_2020); } MOZ_ASSERT_UNREACHABLE("Unsupported color primaries"); return Nothing(); } static Maybe MapYCbCrMatrix(const gfx::YUVColorSpace& aMatrix) { switch (aMatrix) { case gfx::YUVColorSpace::BT601: return Some(kCVImageBufferYCbCrMatrix_ITU_R_601_4); case gfx::YUVColorSpace::BT709: return Some(kCVImageBufferYCbCrMatrix_ITU_R_709_2); case gfx::YUVColorSpace::BT2020: return Some(kCVImageBufferYCbCrMatrix_ITU_R_2020); case gfx::YUVColorSpace::Identity: return Nothing(); } MOZ_ASSERT_UNREACHABLE("Unsupported YCbCr matrix"); return Nothing(); } static Maybe MapTransferFunction( const gfx::TransferFunction& aTransferFunction) { switch (aTransferFunction) { case gfx::TransferFunction::BT709: return Some(kCVImageBufferTransferFunction_ITU_R_709_2); case gfx::TransferFunction::SRGB: return Some(kCVImageBufferTransferFunction_sRGB); case gfx::TransferFunction::PQ: return Some(kCVImageBufferTransferFunction_SMPTE_ST_2084_PQ); case gfx::TransferFunction::HLG: return Some(kCVImageBufferTransferFunction_ITU_R_2100_HLG); } MOZ_ASSERT_UNREACHABLE("Unsupported transfer function"); return Nothing(); } struct EncoderColorSpace { CFStringRef mColorPrimaries = nullptr; CFStringRef mYCbCrMatrix = nullptr; CFStringRef mTransferFunction = nullptr; }; static Result MapColorSpace( const EncoderConfig::VideoColorSpace& aColorSpace) { EncoderColorSpace colorSpace; if (aColorSpace.mPrimaries) { Maybe p = MapColorPrimaries(aColorSpace.mPrimaries.ref()); if (p.isNothing()) { return Err(MediaResult( NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR, RESULT_DETAIL("Unsupported color primaries: %u", static_cast(aColorSpace.mPrimaries.ref())))); } colorSpace.mColorPrimaries = p.value(); } if (aColorSpace.mMatrix) { Maybe m = MapYCbCrMatrix(aColorSpace.mMatrix.ref()); if (m.isNothing()) { return Err(MediaResult( NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR, RESULT_DETAIL("Unsupported YCbCr matrix: %u", static_cast(aColorSpace.mMatrix.ref())))); } colorSpace.mYCbCrMatrix = m.value(); } if (aColorSpace.mTransferFunction) { Maybe f = MapTransferFunction(aColorSpace.mTransferFunction.ref()); if (f.isNothing()) { return Err(MediaResult( NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR, RESULT_DETAIL( "Unsupported transfer function: %u", static_cast(aColorSpace.mTransferFunction.ref())))); } colorSpace.mTransferFunction = f.value(); } return colorSpace; } bool AppleVTEncoder::IsSettingColorSpaceSupported() const { SessionPropertyManager mgr(mSession); return mgr.IsSupported(kVTCompressionPropertyKey_ColorPrimaries) && mgr.IsSupported(kVTCompressionPropertyKey_YCbCrMatrix) && mgr.IsSupported(kVTCompressionPropertyKey_TransferFunction); } MediaResult AppleVTEncoder::SetColorSpace( const EncoderConfig::SampleFormat& aFormat) { MOZ_ASSERT(mSession); if (!aFormat.IsYUV()) { return MediaResult(NS_OK, "Skip setting color space for non-YUV formats"); } if (!IsSettingColorSpaceSupported()) { return MediaResult(NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR, "Setting color space not supported"); } auto r = MapColorSpace(aFormat.mColorSpace); if (r.isErr()) { return r.unwrapErr(); } EncoderColorSpace colorSpace = r.unwrap(); SessionPropertyManager mgr(mSession); AutoTArray properties; if (colorSpace.mColorPrimaries) { OSStatus status = mgr.Set(kVTCompressionPropertyKey_ColorPrimaries, colorSpace.mColorPrimaries); if (status != noErr) { return MediaResult( NS_ERROR_DOM_MEDIA_FATAL_ERR, RESULT_DETAIL("Failed to set color primaries. Error: %d", status)); } properties.AppendElement("ColorPrimaries"); } if (colorSpace.mYCbCrMatrix) { OSStatus status = mgr.Set(kVTCompressionPropertyKey_YCbCrMatrix, colorSpace.mYCbCrMatrix); if (status != noErr) { return MediaResult( NS_ERROR_DOM_MEDIA_FATAL_ERR, RESULT_DETAIL("Failed to set YCbCr matrix. Error: %d", status)); } properties.AppendElement("YCbCrMatrix"); } if (colorSpace.mTransferFunction) { OSStatus status = mgr.Set(kVTCompressionPropertyKey_TransferFunction, colorSpace.mTransferFunction); if (status != noErr) { return MediaResult( NS_ERROR_DOM_MEDIA_FATAL_ERR, RESULT_DETAIL("Failed to set transfer function. Error: %d", status)); } properties.AppendElement("TransferFunction"); } nsCString msg; if (properties.IsEmpty()) { msg = "No color space properties set"_ns; } else { msg = StringJoin(","_ns, properties); msg.Append(" set"); } return MediaResult(NS_OK, msg); } static Result MapPixelFormat( dom::ImageBitmapFormat aFormat, gfx::ColorRange aColorRange) { const bool isFullRange = aColorRange == gfx::ColorRange::FULL; Maybe fmt; switch (aFormat) { case dom::ImageBitmapFormat::YUV444P: return kCVPixelFormatType_444YpCbCr8; case dom::ImageBitmapFormat::YUV420P: return isFullRange ? kCVPixelFormatType_420YpCbCr8PlanarFullRange : kCVPixelFormatType_420YpCbCr8Planar; case dom::ImageBitmapFormat::YUV420SP_NV12: return isFullRange ? kCVPixelFormatType_420YpCbCr8BiPlanarFullRange : kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange; case dom::ImageBitmapFormat::RGBA32: fmt.emplace(kCVPixelFormatType_32RGBA); break; case dom::ImageBitmapFormat::BGRA32: fmt.emplace(kCVPixelFormatType_32BGRA); break; case dom::ImageBitmapFormat::RGB24: fmt.emplace(kCVPixelFormatType_24RGB); break; case dom::ImageBitmapFormat::BGR24: fmt.emplace(kCVPixelFormatType_24BGR); break; case dom::ImageBitmapFormat::GRAY8: fmt.emplace(kCVPixelFormatType_OneComponent8); break; default: MOZ_ASSERT_UNREACHABLE("Unsupported image format"); } // Limited RGB formats are not supported on MacOS (Bug 1957758). if (fmt) { if (!isFullRange) { return Err(MediaResult( NS_ERROR_NOT_IMPLEMENTED, RESULT_DETAIL("format %s with limited colorspace is not supported", dom::GetEnumString(aFormat).get()))); } return fmt.value(); } return Err(MediaResult(NS_ERROR_NOT_IMPLEMENTED, RESULT_DETAIL("format %s is not supported", dom::GetEnumString(aFormat).get()))); } RefPtr AppleVTEncoder::Init() { MOZ_ASSERT(!mSession, "Cannot initialize encoder again without shutting down"); MediaResult r = InitSession(); if (NS_FAILED(r.Code())) { LOGE("%s", r.Description().get()); return InitPromise::CreateAndReject(r, __func__); } mError = NS_OK; return InitPromise::CreateAndResolve(true, __func__); } MediaResult AppleVTEncoder::InitSession() { MOZ_ASSERT(!mSession); auto errorExit = MakeScopeExit([&] { InvalidateSessionIfNeeded(); }); if (mConfig.mSize.width == 0 || mConfig.mSize.height == 0) { return MediaResult( NS_ERROR_ILLEGAL_VALUE, RESULT_DETAIL("Neither width (%d) nor height (%d) can be zero", mConfig.mSize.width, mConfig.mSize.height)); } if (mConfig.mScalabilityMode != ScalabilityMode::None && !OSSupportsSVC()) { return MediaResult(NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR, "SVC only supported on macOS 11.3 and more recent"_ns); } bool lowLatencyRateControl = mConfig.mUsage == Usage::Realtime || mConfig.mScalabilityMode != ScalabilityMode::None; LOGD("low latency rate control: %s, Hardware allowed: %s", lowLatencyRateControl ? "yes" : "no", mHardwareNotAllowed ? "no" : "yes"); AutoCFTypeRef spec( BuildEncoderSpec(mHardwareNotAllowed, lowLatencyRateControl)); // Bug 1955153: Set sourceImageBufferAttributes using the pixel format derived // from mConfig.mFormat. OSStatus status = VTCompressionSessionCreate( kCFAllocatorDefault, mConfig.mSize.width, mConfig.mSize.height, kCMVideoCodecType_H264, spec, nullptr /* sourceImageBufferAttributes */, kCFAllocatorDefault, &FrameCallback, this /* outputCallbackRefCon */, mSession.Receive()); if (status != noErr) { return MediaResult( NS_ERROR_DOM_MEDIA_FATAL_ERR, RESULT_DETAIL("fail to create encoder session. Error: %d", status)); } SessionPropertyManager mgr(mSession); status = mgr.Set(kVTCompressionPropertyKey_AllowFrameReordering, false); if (status != noErr) { return MediaResult( NS_ERROR_DOM_MEDIA_FATAL_ERR, RESULT_DETAIL("Couldn't disable bframes. Error: %d", status)); } if (mConfig.mUsage == Usage::Realtime && !SetRealtime(true)) { return MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "fail to configure real-time"_ns); } if (mConfig.mBitrate) { if (mConfig.mCodec == CodecType::H264 && mConfig.mBitrateMode == BitrateMode::Constant) { // Not supported, fall-back to VBR. LOGD("H264 CBR not supported in VideoToolbox, falling back to VBR"); mConfig.mBitrateMode = BitrateMode::Variable; } bool rv = SetBitrateAndMode(mConfig.mBitrateMode, mConfig.mBitrate); if (!rv) { return MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "fail to configurate bitrate"_ns); } } if (mConfig.mScalabilityMode != ScalabilityMode::None) { if (__builtin_available(macos 11.3, *)) { float baseLayerFPSRatio = 1.0f; switch (mConfig.mScalabilityMode) { case ScalabilityMode::L1T2: baseLayerFPSRatio = 0.5; break; case ScalabilityMode::L1T3: // Not supported in hw on macOS, but is accepted and errors out when // encoding. Reject the configuration now. return MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, RESULT_DETAIL("macOS only support L1T2 h264 SVC")); default: MOZ_ASSERT_UNREACHABLE("Unhandled value"); } status = mgr.Set(kVTCompressionPropertyKey_BaseLayerFrameRateFraction, baseLayerFPSRatio); if (status != noErr) { return MediaResult( NS_ERROR_DOM_MEDIA_FATAL_ERR, RESULT_DETAIL("fail to configure SVC (base ratio: %f). Error: %d", baseLayerFPSRatio, status)); } } else { return MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "macOS version too old to enable SVC"_ns); } } int64_t interval = mConfig.mKeyframeInterval > std::numeric_limits::max() ? std::numeric_limits::max() : AssertedCast(mConfig.mKeyframeInterval); status = mgr.Set(kVTCompressionPropertyKey_MaxKeyFrameInterval, interval); if (status != noErr) { return MediaResult( NS_ERROR_DOM_MEDIA_FATAL_ERR, RESULT_DETAIL("fail to configurate keyframe interval: %" PRId64 ". Error: %d", interval, status)); } if (mConfig.mCodecSpecific) { const H264Specific& specific = mConfig.mCodecSpecific->as(); if (!SetProfileLevel(specific.mProfile)) { return MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, RESULT_DETAIL("fail to configurate profile level:%d", int(specific.mProfile))); } } MediaResult colorSpaceResult = SetColorSpace(mConfig.mFormat); if (NS_SUCCEEDED(colorSpaceResult.Code())) { LOGD("%s", colorSpaceResult.Description().get()); } else if (colorSpaceResult.Code() == NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR) { // Color space not supported, ignore. LOGW("%s", colorSpaceResult.Description().get()); } else { MOZ_ASSERT(NS_FAILED(colorSpaceResult.Code())); LOGE("%s", colorSpaceResult.Description().get()); return colorSpaceResult; } bool isUsingHW = false; status = mgr.Copy(kVTCompressionPropertyKey_UsingHardwareAcceleratedVideoEncoder, isUsingHW); mIsHardwareAccelerated = status == noErr && isUsingHW; LOGD("Using hw acceleration: %s", mIsHardwareAccelerated ? "yes" : "no"); errorExit.release(); return NS_OK; } void AppleVTEncoder::InvalidateSessionIfNeeded() { if (mSession) { VTCompressionSessionInvalidate(mSession); mSession.Reset(); } } CFDictionaryRef AppleVTEncoder::BuildSourceImageBufferAttributes( OSType aPixelFormat) { // Source image buffer attributes const void* keys[] = {kCVPixelBufferOpenGLCompatibilityKey, // TODO kCVPixelBufferIOSurfacePropertiesKey, // TODO kCVPixelBufferPixelFormatTypeKey}; AutoCFTypeRef ioSurfaceProps(CFDictionaryCreate( kCFAllocatorDefault, nullptr, nullptr, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks)); AutoCFTypeRef pixelFormat( CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &aPixelFormat)); const void* values[] = {kCFBooleanTrue, ioSurfaceProps, pixelFormat}; MOZ_ASSERT(std::size(keys) == std::size(values), "Non matching keys/values array size"); return CFDictionaryCreate(kCFAllocatorDefault, keys, values, std::size(keys), &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); } static bool IsKeyframe(CMSampleBufferRef aSample) { CFArrayRef attachments = CMSampleBufferGetSampleAttachmentsArray(aSample, 0); if (attachments == nullptr || CFArrayGetCount(attachments) == 0) { return false; } return !CFDictionaryContainsKey( static_cast(CFArrayGetValueAtIndex(attachments, 0)), kCMSampleAttachmentKey_NotSync); } static size_t GetNumParamSets(CMFormatDescriptionRef aDescription) { size_t numParamSets = 0; OSStatus status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex( aDescription, 0, nullptr, nullptr, &numParamSets, nullptr); if (status != noErr) { LOGE("Cannot get number of parameter sets from format description"); } return numParamSets; } static const uint8_t kNALUStart[4] = {0, 0, 0, 1}; static size_t GetParamSet(CMFormatDescriptionRef aDescription, size_t aIndex, const uint8_t** aDataPtr) { size_t length = 0; int headerSize = 0; if (CMVideoFormatDescriptionGetH264ParameterSetAtIndex( aDescription, aIndex, aDataPtr, &length, nullptr, &headerSize) != noErr) { LOGE("failed to get parameter set from format description"); return 0; } MOZ_ASSERT(headerSize == sizeof(kNALUStart), "Only support 4 byte header"); return length; } static bool WriteSPSPPS(MediaRawData* aDst, CMFormatDescriptionRef aDescription) { // Get SPS/PPS const size_t numParamSets = GetNumParamSets(aDescription); UniquePtr writer(aDst->CreateWriter()); for (size_t i = 0; i < numParamSets; i++) { const uint8_t* data = nullptr; size_t length = GetParamSet(aDescription, i, &data); if (length == 0) { return false; } if (!writer->Append(kNALUStart, sizeof(kNALUStart))) { LOGE("Cannot write NAL unit start code"); return false; } if (!writer->Append(data, length)) { LOGE("Cannot write parameter set"); return false; } } return true; } static RefPtr extractAvcc( CMFormatDescriptionRef aDescription) { CFPropertyListRef list = CMFormatDescriptionGetExtension( aDescription, kCMFormatDescriptionExtension_SampleDescriptionExtensionAtoms); if (!list) { LOGE("fail to get atoms"); return nullptr; } CFDataRef avcC = static_cast( CFDictionaryGetValue(static_cast(list), CFSTR("avcC"))); if (!avcC) { LOGE("fail to extract avcC"); return nullptr; } CFIndex length = CFDataGetLength(avcC); const UInt8* bytes = CFDataGetBytePtr(avcC); if (length <= 0 || !bytes) { LOGE("empty avcC"); return nullptr; } RefPtr config = new MediaByteBuffer(length); config->AppendElements(bytes, length); return config; } bool AppleVTEncoder::WriteExtraData(MediaRawData* aDst, CMSampleBufferRef aSrc, const bool aAsAnnexB) { if (!IsKeyframe(aSrc)) { return true; } LOGV("Writing extra data (%s) for keyframe", aAsAnnexB ? "AnnexB" : "AVCC"); aDst->mKeyframe = true; CMFormatDescriptionRef desc = CMSampleBufferGetFormatDescription(aSrc); if (!desc) { LOGE("fail to get format description from sample"); return false; } if (aAsAnnexB) { return WriteSPSPPS(aDst, desc); } RefPtr avcc = extractAvcc(desc); if (!avcc) { LOGE("failed to extract avcc"); return false; } if (!mAvcc || !H264::CompareExtraData(avcc, mAvcc)) { LOGV("avcC changed, updating"); mAvcc = avcc; aDst->mExtraData = mAvcc; } return true; } static bool WriteNALUs(MediaRawData* aDst, CMSampleBufferRef aSrc, bool aAsAnnexB = false) { size_t srcRemaining = CMSampleBufferGetTotalSampleSize(aSrc); CMBlockBufferRef block = CMSampleBufferGetDataBuffer(aSrc); if (!block) { LOGE("Cannot get block buffer frome sample"); return false; } UniquePtr writer(aDst->CreateWriter()); size_t writtenLength = aDst->Size(); // Ensure capacity. if (!writer->SetSize(writtenLength + srcRemaining)) { LOGE("Cannot allocate buffer"); return false; } size_t readLength = 0; while (srcRemaining > 0) { // Extract the size of next NAL unit uint8_t unitSizeBytes[4]; MOZ_ASSERT(srcRemaining > sizeof(unitSizeBytes)); if (CMBlockBufferCopyDataBytes(block, readLength, sizeof(unitSizeBytes), reinterpret_cast( unitSizeBytes)) != kCMBlockBufferNoErr) { LOGE("Cannot copy unit size bytes"); return false; } size_t unitSize = CFSwapInt32BigToHost(*reinterpret_cast(unitSizeBytes)); if (aAsAnnexB) { // Replace unit size bytes with NALU start code. PodCopy(writer->Data() + writtenLength, kNALUStart, sizeof(kNALUStart)); readLength += sizeof(unitSizeBytes); srcRemaining -= sizeof(unitSizeBytes); writtenLength += sizeof(kNALUStart); } else { // Copy unit size bytes + data. unitSize += sizeof(unitSizeBytes); } MOZ_ASSERT(writtenLength + unitSize <= aDst->Size()); // Copy NAL unit data if (CMBlockBufferCopyDataBytes(block, readLength, unitSize, writer->Data() + writtenLength) != kCMBlockBufferNoErr) { LOGE("Cannot copy unit data"); return false; } readLength += unitSize; srcRemaining -= unitSize; writtenLength += unitSize; } MOZ_ASSERT(writtenLength == aDst->Size()); return true; } void AppleVTEncoder::OutputFrame(OSStatus aStatus, VTEncodeInfoFlags aFlags, CMSampleBufferRef aBuffer) { LOGV("status: %d, flags: %d, buffer %p", aStatus, aFlags, aBuffer); if (aStatus != noErr) { ProcessOutput(nullptr, EncodeResult::EncodeError); return; } if (aFlags & kVTEncodeInfo_FrameDropped) { ProcessOutput(nullptr, EncodeResult::FrameDropped); return; } if (!aBuffer) { ProcessOutput(nullptr, EncodeResult::EmptyBuffer); return; } RefPtr output(new MediaRawData()); if (__builtin_available(macos 11.3, *)) { if (mConfig.mScalabilityMode != ScalabilityMode::None) { CFDictionaryRef dict = (CFDictionaryRef)(CFArrayGetValueAtIndex( CMSampleBufferGetSampleAttachmentsArray(aBuffer, true), 0)); CFBooleanRef isBaseLayerRef = (CFBooleanRef)CFDictionaryGetValue( dict, (const void*)kCMSampleAttachmentKey_IsDependedOnByOthers); Boolean isBaseLayer = CFBooleanGetValue(isBaseLayerRef); output->mTemporalLayerId.emplace(isBaseLayer ? 0 : 1); } } bool forceAvcc = false; if (mConfig.mCodecSpecific->is()) { forceAvcc = mConfig.mCodecSpecific->as().mFormat == H264BitStreamFormat::AVC; } bool asAnnexB = !forceAvcc; bool succeeded = WriteExtraData(output, aBuffer, asAnnexB) && WriteNALUs(output, aBuffer, asAnnexB); output->mTime = media::TimeUnit::FromSeconds( CMTimeGetSeconds(CMSampleBufferGetPresentationTimeStamp(aBuffer))); output->mDuration = media::TimeUnit::FromSeconds( CMTimeGetSeconds(CMSampleBufferGetOutputDuration(aBuffer))); LOGV("Make a %s output[time: %s, duration: %s]: %s", asAnnexB ? "AnnexB" : "AVCC", output->mTime.ToString().get(), output->mDuration.ToString().get(), succeeded ? "succeed" : "failed"); ProcessOutput(succeeded ? std::move(output) : nullptr, EncodeResult::Success); } void AppleVTEncoder::ProcessOutput(RefPtr&& aOutput, EncodeResult aResult) { if (!mTaskQueue->IsCurrentThreadIn()) { LOGV("Dispatch ProcessOutput to task queue"); nsresult rv = mTaskQueue->Dispatch( NewRunnableMethod, EncodeResult>( "AppleVTEncoder::ProcessOutput", this, &AppleVTEncoder::ProcessOutput, std::move(aOutput), aResult)); MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); Unused << rv; return; } if (aResult != EncodeResult::Success) { switch (aResult) { case EncodeResult::EncodeError: mError = MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "Failed to encode"_ns); break; case EncodeResult::EmptyBuffer: mError = MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "Buffer is empty"_ns); break; case EncodeResult::FrameDropped: if (mConfig.mUsage == Usage::Realtime) { // Dropping a frame in real-time usage is okay. LOGW("Frame is dropped"); } else { // Some usages like transcoding should not drop a frame. LOGE("Frame is dropped"); mError = MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "Frame is dropped"_ns); } break; default: MOZ_ASSERT_UNREACHABLE("Unknown EncodeResult"); break; } MaybeResolveOrRejectEncodePromise(); return; } LOGV("Got %zu bytes of output", !aOutput.get() ? 0 : aOutput->Size()); if (!aOutput) { mError = MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "No converted output"_ns); MaybeResolveOrRejectEncodePromise(); return; } mEncodedData.AppendElement(std::move(aOutput)); MaybeResolveOrRejectEncodePromise(); } RefPtr AppleVTEncoder::Encode( const MediaData* aSample) { MOZ_ASSERT(aSample != nullptr); RefPtr sample(aSample->As()); RefPtr self = this; return InvokeAsync(mTaskQueue, __func__, [self, this, sample] { MOZ_ASSERT(mEncodePromise.IsEmpty(), "Encode should not be called again before getting results"); RefPtr p = mEncodePromise.Ensure(__func__); ProcessEncode(sample); return p; }); } RefPtr AppleVTEncoder::Reconfigure( const RefPtr& aConfigurationChanges) { return InvokeAsync(mTaskQueue, this, __func__, &AppleVTEncoder::ProcessReconfigure, aConfigurationChanges); } void AppleVTEncoder::ProcessEncode(const RefPtr& aSample) { LOGV("::ProcessEncode"); AssertOnTaskQueue(); MOZ_ASSERT(mSession); if (NS_FAILED(mError)) { LOGE("Pending error: %s", mError.Description().get()); MaybeResolveOrRejectEncodePromise(); } AutoCVBufferRef buffer( CreateCVPixelBuffer(aSample->mImage)); if (!buffer) { LOGE("Failed to allocate buffer"); mError = MediaResult(NS_ERROR_OUT_OF_MEMORY, "failed to allocate buffer"_ns); MaybeResolveOrRejectEncodePromise(); return; } CFDictionaryRef frameProps = nullptr; if (aSample->mKeyframe) { CFTypeRef keys[] = {kVTEncodeFrameOptionKey_ForceKeyFrame}; CFTypeRef values[] = {kCFBooleanTrue}; MOZ_ASSERT(std::size(keys) == std::size(values)); frameProps = CFDictionaryCreate( kCFAllocatorDefault, keys, values, std::size(keys), &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); }; VTEncodeInfoFlags info; OSStatus status = VTCompressionSessionEncodeFrame( mSession, buffer, CMTimeMake(aSample->mTime.ToMicroseconds(), USECS_PER_S), CMTimeMake(aSample->mDuration.ToMicroseconds(), USECS_PER_S), frameProps, nullptr /* sourceFrameRefcon */, &info); if (status != noErr) { LOGE("VTCompressionSessionEncodeFrame error: %d", status); mError = MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "VTCompressionSessionEncodeFrame error"_ns); MaybeResolveOrRejectEncodePromise(); return; } if (mConfig.mUsage != Usage::Realtime) { MaybeResolveOrRejectEncodePromise(); return; } // The latency between encoding a sample and receiving the encoded output is // critical in real-time usage. To minimize the latency, the output result // should be returned immediately once they are ready, instead of being // returned in the next or later Encode() iterations. LOGV("Encoding in progress"); // Workaround for real-time encoding in OS versions < 11. ForceOutputIfNeeded(); } RefPtr AppleVTEncoder::ProcessReconfigure( const RefPtr& aConfigurationChanges) { AssertOnTaskQueue(); MOZ_ASSERT(mSession); bool ok = false; for (const auto& confChange : aConfigurationChanges->mChanges) { // A reconfiguration on the fly succeeds if all changes can be applied // successfuly. In case of failure, the encoder will be drained and // recreated. ok &= confChange.match( // Not supported yet [&](const DimensionsChange& aChange) -> bool { return false; }, [&](const DisplayDimensionsChange& aChange) -> bool { return false; }, [&](const BitrateModeChange& aChange) -> bool { mConfig.mBitrateMode = aChange.get(); return SetBitrateAndMode(mConfig.mBitrateMode, mConfig.mBitrate); }, [&](const BitrateChange& aChange) -> bool { mConfig.mBitrate = aChange.get().refOr(0); // 0 is the default in AppleVTEncoder: the encoder chooses the bitrate // based on the content. return SetBitrateAndMode(mConfig.mBitrateMode, mConfig.mBitrate); }, [&](const FramerateChange& aChange) -> bool { // 0 means default, in VideoToolbox, and is valid, perform some light // sanitation on other values. double fps = aChange.get().refOr(0); if (std::isnan(fps) || fps < 0 || int64_t(fps) > std::numeric_limits::max()) { LOGE("Invalid fps of %lf", fps); return false; } return SetFrameRate(AssertedCast(fps)); }, [&](const UsageChange& aChange) -> bool { mConfig.mUsage = aChange.get(); return SetRealtime(aChange.get() == Usage::Realtime); }, [&](const ContentHintChange& aChange) -> bool { return false; }, [&](const SampleRateChange& aChange) -> bool { return false; }, [&](const NumberOfChannelsChange& aChange) -> bool { return false; }); }; using P = MediaDataEncoder::ReconfigurationPromise; if (ok) { return P::CreateAndResolve(true, __func__); } return P::CreateAndReject(NS_ERROR_DOM_MEDIA_FATAL_ERR, __func__); } static size_t NumberOfPlanes(OSType aPixelFormat) { switch (aPixelFormat) { case kCVPixelFormatType_32RGBA: case kCVPixelFormatType_32BGRA: case kCVPixelFormatType_24RGB: case kCVPixelFormatType_24BGR: case kCVPixelFormatType_OneComponent8: return 1; case kCVPixelFormatType_444YpCbCr8: case kCVPixelFormatType_420YpCbCr8PlanarFullRange: case kCVPixelFormatType_420YpCbCr8Planar: return 3; case kCVPixelFormatType_420YpCbCr8BiPlanarFullRange: case kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange: return 2; default: LOGE("Unsupported input pixel format"); return 0; } } using namespace layers; static void ReleaseSurface(void* aReleaseRef, const void* aBaseAddress) { RefPtr released = dont_AddRef(static_cast(aReleaseRef)); } static void ReleaseImage(void* aImageGrip, const void* aDataPtr, size_t aDataSize, size_t aNumOfPlanes, const void** aPlanes) { (static_cast(aImageGrip))->Release(); } CVPixelBufferRef AppleVTEncoder::CreateCVPixelBuffer(Image* aSource) { AssertOnTaskQueue(); auto sfr = EncoderConfig::SampleFormat::FromImage(aSource); if (sfr.isErr()) { MediaResult err = sfr.unwrapErr(); LOGE("%s", err.Description().get()); return nullptr; } const EncoderConfig::SampleFormat sf = sfr.unwrap(); gfx::ColorRange defaultColorRange = sf.IsYUV() ? gfx::ColorRange::LIMITED : gfx::ColorRange::FULL; auto pfr = MapPixelFormat(sf.mPixelFormat, sf.mColorSpace.mRange ? sf.mColorSpace.mRange.value() : defaultColorRange); if (pfr.isErr()) { MediaResult err = pfr.unwrapErr(); LOGE("%s", err.Description().get()); return nullptr; } OSType pixelFormat = pfr.unwrap(); if (sf != mConfig.mFormat) { LOGV( "Input image in format %s but encoder configured with format %s. " "Fingers crossed", sf.ToString().get(), mConfig.mFormat.ToString().get()); // Bug 1955153: If the encoder encounters a kVTPixelTransferNotSupportedErr // error due to an unsupported image format, it must be re-initialized. // Additionally, any changes to the color space also require re-initializing // the encoder. } if (aSource->GetFormat() == ImageFormat::PLANAR_YCBCR) { PlanarYCbCrImage* image = aSource->AsPlanarYCbCrImage(); if (!image || !image->GetData()) { LOGE("Failed to get PlanarYCbCrImage or its data"); return nullptr; } size_t numPlanes = NumberOfPlanes(pixelFormat); const PlanarYCbCrImage::Data* yuv = image->GetData(); auto ySize = yuv->YDataSize(); auto cbcrSize = yuv->CbCrDataSize(); void* addresses[3] = {}; size_t widths[3] = {}; size_t heights[3] = {}; size_t strides[3] = {}; switch (numPlanes) { case 3: addresses[2] = yuv->mCrChannel; widths[2] = cbcrSize.width; heights[2] = cbcrSize.height; strides[2] = yuv->mCbCrStride; [[fallthrough]]; case 2: addresses[1] = yuv->mCbChannel; widths[1] = cbcrSize.width; heights[1] = cbcrSize.height; strides[1] = yuv->mCbCrStride; [[fallthrough]]; case 1: addresses[0] = yuv->mYChannel; widths[0] = ySize.width; heights[0] = ySize.height; strides[0] = yuv->mYStride; break; default: LOGE("Unexpected number of planes: %zu", numPlanes); MOZ_ASSERT_UNREACHABLE("Unexpected number of planes"); return nullptr; } CVPixelBufferRef buffer = nullptr; image->AddRef(); // Grip input buffers. CVReturn rv = CVPixelBufferCreateWithPlanarBytes( kCFAllocatorDefault, yuv->mPictureRect.width, yuv->mPictureRect.height, pixelFormat, nullptr /* dataPtr */, 0 /* dataSize */, numPlanes, addresses, widths, heights, strides, ReleaseImage /* releaseCallback */, image /* releaseRefCon */, nullptr /* pixelBufferAttributes */, &buffer); if (rv == kCVReturnSuccess) { return buffer; // |image| will be released in |ReleaseImage()|. } LOGE("CVPIxelBufferCreateWithPlanarBytes error"); image->Release(); return nullptr; } RefPtr surface = aSource->GetAsSourceSurface(); if (!surface) { LOGE("Failed to get SourceSurface"); return nullptr; } RefPtr dataSurface = surface->GetDataSurface(); if (!dataSurface) { LOGE("Failed to get DataSurface"); return nullptr; } gfx::DataSourceSurface::ScopedMap map(dataSurface, gfx::DataSourceSurface::READ); if (NS_WARN_IF(!map.IsMapped())) { LOGE("Failed to map DataSurface"); return nullptr; } CVPixelBufferRef buffer = nullptr; gfx::DataSourceSurface* dss = dataSurface.forget().take(); CVReturn rv = CVPixelBufferCreateWithBytes( kCFAllocatorDefault, dss->GetSize().Width(), dss->GetSize().Height(), pixelFormat, map.GetData(), map.GetStride(), ReleaseSurface, dss, nullptr, &buffer); if (rv == kCVReturnSuccess) { return buffer; // |dss| will be released in |ReleaseSurface()|. } LOGE("CVPIxelBufferCreateWithBytes error: %d", rv); RefPtr released = dont_AddRef(dss); return nullptr; } RefPtr AppleVTEncoder::Drain() { return InvokeAsync(mTaskQueue, this, __func__, &AppleVTEncoder::ProcessDrain); } RefPtr AppleVTEncoder::ProcessDrain() { LOGV("::ProcessDrain"); AssertOnTaskQueue(); MOZ_ASSERT(mSession); OSStatus status = VTCompressionSessionCompleteFrames(mSession, kCMTimeIndefinite); if (status != noErr) { LOGE("VTCompressionSessionCompleteFrames error"); return EncodePromise::CreateAndReject(NS_ERROR_DOM_MEDIA_FATAL_ERR, __func__); } // Resolve the pending encode promise if any. MaybeResolveOrRejectEncodePromise(); // VTCompressionSessionCompleteFrames() could have queued multiple tasks with // the new drained frames. Dispatch a task after them to resolve the promise // with those frames. RefPtr self = this; return InvokeAsync(mTaskQueue, __func__, [self]() { EncodedData pendingFrames(std::move(self->mEncodedData)); LOGV("Resolve drain promise with %zu encoded outputs", pendingFrames.Length()); self->mEncodedData = EncodedData(); return EncodePromise::CreateAndResolve(std::move(pendingFrames), __func__); }); } RefPtr AppleVTEncoder::Shutdown() { return InvokeAsync(mTaskQueue, this, __func__, &AppleVTEncoder::ProcessShutdown); } RefPtr AppleVTEncoder::ProcessShutdown() { LOGD("::ProcessShutdown"); AssertOnTaskQueue(); InvalidateSessionIfNeeded(); mIsHardwareAccelerated = false; mError = MediaResult(NS_ERROR_DOM_MEDIA_CANCELED, "Canceled in shutdown"_ns); MaybeResolveOrRejectEncodePromise(); mError = NS_OK; return ShutdownPromise::CreateAndResolve(true, __func__); } RefPtr AppleVTEncoder::SetBitrate(uint32_t aBitsPerSec) { RefPtr self = this; return InvokeAsync(mTaskQueue, __func__, [self, aBitsPerSec]() { MOZ_ASSERT(self->mSession); bool rv = self->SetBitrateAndMode(self->mConfig.mBitrateMode, aBitsPerSec); return rv ? GenericPromise::CreateAndResolve(true, __func__) : GenericPromise::CreateAndReject( NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR, __func__); }); } void AppleVTEncoder::MaybeResolveOrRejectEncodePromise() { AssertOnTaskQueue(); if (mEncodePromise.IsEmpty()) { LOGV( "No pending promise to resolve(pending outputs: %zu) or reject(err: " "%s)", mEncodedData.Length(), mError.Description().get()); return; } if (mTimer) { mTimer->Cancel(); mTimer = nullptr; } if (NS_FAILED(mError.Code())) { LOGE("Rejecting encode promise with error: %s", mError.Description().get()); mEncodePromise.Reject(mError, __func__); return; } LOGV("Resolving with %zu encoded outputs", mEncodedData.Length()); mEncodePromise.Resolve(std::move(mEncodedData), __func__); } void AppleVTEncoder::ForceOutputIfNeeded() { if (__builtin_available(macos 11.0, *)) { return; } AssertOnTaskQueue(); // Ideally, OutputFrame (called via FrameCallback) should resolve the encode // promise. However, sometimes output is produced only after multiple // inputs. To ensure continuous encoding, we force the encoder to produce a // potentially empty output if no result is received in 50 ms. RefPtr self = this; auto r = NS_NewTimerWithCallback( [self](nsITimer* aTimer) { if (!self->mSession) { LOGV("Do nothing since the encoder has been shut down"); return; } LOGV("Resolving the pending promise"); self->MaybeResolveOrRejectEncodePromise(); }, TimeDuration::FromMilliseconds(50), nsITimer::TYPE_ONE_SHOT, "EncodingProgressChecker", mTaskQueue); if (r.isErr()) { LOGE( "Failed to set an encoding progress checker. Resolve the pending " "promise now"); MaybeResolveOrRejectEncodePromise(); return; } mTimer = r.unwrap(); } #undef LOGE #undef LOGW #undef LOGD #undef LOGV } // namespace mozilla