Adding upstream version 124.0.1.upstream/124.0.1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
commit: 26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree: f435a8308119effd964b339f76abb83a57c29483 /dom/media/platforms/wmf/WMFVideoMFTManager.cpp
parent: Initial commit. (diff)
download: firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
1 files changed, 1014 insertions, 0 deletions
diff --git a/dom/media/platforms/wmf/WMFVideoMFTManager.cpp b/dom/media/platforms/wmf/WMFVideoMFTManager.cpp
new file mode 100644
index 0000000000..65480c4a01
--- /dev/null
+++ b/dom/media/platforms/wmf/WMFVideoMFTManager.cpp
@@ -0,0 +1,1014 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "WMFVideoMFTManager.h"
+
+#include <psapi.h>
+#include <algorithm>
+#include "DXVA2Manager.h"
+#include "GMPUtils.h"  // For SplitAt. TODO: Move SplitAt to a central place.
+#include "IMFYCbCrImage.h"
+#include "ImageContainer.h"
+#include "MediaInfo.h"
+#include "MediaTelemetryConstants.h"
+#include "VideoUtils.h"
+#include "WMFDecoderModule.h"
+#include "WMFUtils.h"
+#include "gfx2DGlue.h"
+#include "gfxWindowsPlatform.h"
+#include "mozilla/AbstractThread.h"
+#include "mozilla/ClearOnShutdown.h"
+#include "mozilla/Logging.h"
+#include "mozilla/SchedulerGroup.h"
+#include "mozilla/StaticPrefs_gfx.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "mozilla/SyncRunnable.h"
+#include "mozilla/Telemetry.h"
+#include "mozilla/gfx/DeviceManagerDx.h"
+#include "mozilla/gfx/gfxVars.h"
+#include "mozilla/layers/LayersTypes.h"
+#include "nsPrintfCString.h"
+#include "nsThreadUtils.h"
+#include "nsWindowsHelpers.h"
+
+#define LOG(...) MOZ_LOG(sPDMLog, mozilla::LogLevel::Debug, (__VA_ARGS__))
+
+using mozilla::layers::Image;
+using mozilla::layers::IMFYCbCrImage;
+using mozilla::layers::LayerManager;
+using mozilla::layers::LayersBackend;
+using mozilla::media::TimeUnit;
+
+namespace mozilla {
+
+LayersBackend GetCompositorBackendType(
+    layers::KnowsCompositor* aKnowsCompositor) {
+  if (aKnowsCompositor) {
+    return aKnowsCompositor->GetCompositorBackendType();
+  }
+  return LayersBackend::LAYERS_NONE;
+}
+
+WMFVideoMFTManager::WMFVideoMFTManager(
+    const VideoInfo& aConfig, layers::KnowsCompositor* aKnowsCompositor,
+    layers::ImageContainer* aImageContainer, float aFramerate,
+    const CreateDecoderParams::OptionSet& aOptions, bool aDXVAEnabled,
+    Maybe<TrackingId> aTrackingId)
+    : mVideoInfo(aConfig),
+      mImageSize(aConfig.mImage),
+      mStreamType(GetStreamTypeFromMimeType(aConfig.mMimeType)),
+      mSoftwareImageSize(aConfig.mImage),
+      mSoftwarePictureSize(aConfig.mImage),
+      mVideoStride(0),
+      mColorSpace(aConfig.mColorSpace),
+      mColorRange(aConfig.mColorRange),
+      mImageContainer(aImageContainer),
+      mKnowsCompositor(aKnowsCompositor),
+      mDXVAEnabled(aDXVAEnabled &&
+                   !aOptions.contains(
+                       CreateDecoderParams::Option::HardwareDecoderNotAllowed)),
+      mZeroCopyNV12Texture(false),
+      mFramerate(aFramerate),
+      mLowLatency(aOptions.contains(CreateDecoderParams::Option::LowLatency)),
+      mTrackingId(std::move(aTrackingId))
+// mVideoStride, mVideoWidth, mVideoHeight, mUseHwAccel are initialized in
+// Init().
+{
+  MOZ_COUNT_CTOR(WMFVideoMFTManager);
+
+  // The V and U planes are stored 16-row-aligned, so we need to add padding
+  // to the row heights to ensure the Y'CbCr planes are referenced properly.
+  // This value is only used with software decoder.
+  if (mSoftwareImageSize.height % 16 != 0) {
+    mSoftwareImageSize.height += 16 - (mSoftwareImageSize.height % 16);
+  }
+}
+
+WMFVideoMFTManager::~WMFVideoMFTManager() {
+  MOZ_COUNT_DTOR(WMFVideoMFTManager);
+}
+
+/* static */
+const GUID& WMFVideoMFTManager::GetMediaSubtypeGUID() {
+  MOZ_ASSERT(StreamTypeIsVideo(mStreamType));
+  switch (mStreamType) {
+    case WMFStreamType::H264:
+      return MFVideoFormat_H264;
+    case WMFStreamType::VP8:
+      return MFVideoFormat_VP80;
+    case WMFStreamType::VP9:
+      return MFVideoFormat_VP90;
+    case WMFStreamType::AV1:
+      return MFVideoFormat_AV1;
+    case WMFStreamType::HEVC:
+      return MFVideoFormat_HEVC;
+    default:
+      return GUID_NULL;
+  };
+}
+
+bool WMFVideoMFTManager::InitializeDXVA() {
+  // If we use DXVA but aren't running with a D3D layer manager then the
+  // readback of decoded video frames from GPU to CPU memory grinds painting
+  // to a halt, and makes playback performance *worse*.
+  if (!mDXVAEnabled) {
+    mDXVAFailureReason.AssignLiteral(
+        "Hardware video decoding disabled or blacklisted");
+    return false;
+  }
+  MOZ_ASSERT(!mDXVA2Manager);
+  if (!mKnowsCompositor || !mKnowsCompositor->SupportsD3D11()) {
+    mDXVAFailureReason.AssignLiteral("Unsupported layers backend");
+    return false;
+  }
+
+  if (!XRE_IsRDDProcess() && !XRE_IsGPUProcess()) {
+    mDXVAFailureReason.AssignLiteral(
+        "DXVA only supported in RDD or GPU process");
+    return false;
+  }
+
+  bool d3d11 = true;
+  if (!StaticPrefs::media_wmf_dxva_d3d11_enabled()) {
+    mDXVAFailureReason = nsPrintfCString(
+        "D3D11: %s is false",
+        StaticPrefs::GetPrefName_media_wmf_dxva_d3d11_enabled());
+    d3d11 = false;
+  }
+
+  if (d3d11) {
+    mDXVAFailureReason.AppendLiteral("D3D11: ");
+    mDXVA2Manager.reset(
+        DXVA2Manager::CreateD3D11DXVA(mKnowsCompositor, mDXVAFailureReason));
+    if (mDXVA2Manager) {
+      return true;
+    }
+  }
+
+  return mDXVA2Manager != nullptr;
+}
+
+MediaResult WMFVideoMFTManager::ValidateVideoInfo() {
+  NS_ENSURE_TRUE(StreamTypeIsVideo(mStreamType),
+                 MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
+                             RESULT_DETAIL("Invalid stream type")));
+  switch (mStreamType) {
+    case WMFStreamType::H264:
+      if (!StaticPrefs::media_wmf_allow_unsupported_resolutions()) {
+        // The WMF H.264 decoder is documented to have a minimum resolution
+        // 48x48 pixels for resolution, but we won't enable hw decoding for the
+        // resolution < 132 pixels. It's assumed the software decoder doesn't
+        // have this limitation, but it still might have maximum resolution
+        // limitation.
+        // https://msdn.microsoft.com/en-us/library/windows/desktop/dd797815(v=vs.85).aspx
+        static const int32_t MAX_H264_PIXEL_COUNT = 4096 * 2304;
+        const CheckedInt32 pixelCount =
+            CheckedInt32(mVideoInfo.mImage.width) * mVideoInfo.mImage.height;
+
+        if (!pixelCount.isValid() ||
+            pixelCount.value() > MAX_H264_PIXEL_COUNT) {
+          mIsValid = false;
+          return MediaResult(
+              NS_ERROR_DOM_MEDIA_FATAL_ERR,
+              RESULT_DETAIL("Can't decode H.264 stream because its "
+                            "resolution is out of the maximum limitation"));
+        }
+      }
+      break;
+    default:
+      break;
+  }
+
+  return NS_OK;
+}
+
+MediaResult WMFVideoMFTManager::Init() {
+  MediaResult result = ValidateVideoInfo();
+  if (NS_FAILED(result)) {
+    return result;
+  }
+
+  result = InitInternal();
+  if (NS_SUCCEEDED(result) && mDXVA2Manager) {
+    // If we had some failures but eventually made it work,
+    // make sure we preserve the messages.
+    mDXVAFailureReason.AppendLiteral("Using D3D11 API");
+  }
+
+  return result;
+}
+
+MediaResult WMFVideoMFTManager::InitInternal() {
+  // The H264 SanityTest uses a 132x132 videos to determine if DXVA can be used.
+  // so we want to use the software decoder for videos with lower resolutions.
+  static const int MIN_H264_HW_WIDTH = 132;
+  static const int MIN_H264_HW_HEIGHT = 132;
+
+  mUseHwAccel = false;  // default value; changed if D3D setup succeeds.
+  bool useDxva = true;
+
+  if (mStreamType == WMFStreamType::H264 &&
+      (mVideoInfo.ImageRect().width < MIN_H264_HW_WIDTH ||
+       mVideoInfo.ImageRect().height < MIN_H264_HW_HEIGHT)) {
+    useDxva = false;
+    mDXVAFailureReason = nsPrintfCString(
+        "H264 video resolution too low: %" PRIu32 "x%" PRIu32,
+        mVideoInfo.ImageRect().width, mVideoInfo.ImageRect().height);
+  }
+
+  if (useDxva) {
+    useDxva = InitializeDXVA();
+  }
+
+  RefPtr<MFTDecoder> decoder = new MFTDecoder();
+  HRESULT hr = WMFDecoderModule::CreateMFTDecoder(mStreamType, decoder);
+  NS_ENSURE_TRUE(SUCCEEDED(hr),
+                 MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
+                             RESULT_DETAIL("Can't create the MFT decoder.")));
+
+  RefPtr<IMFAttributes> attr(decoder->GetAttributes());
+  UINT32 aware = 0;
+  if (attr) {
+    attr->GetUINT32(MF_SA_D3D_AWARE, &aware);
+    attr->SetUINT32(CODECAPI_AVDecNumWorkerThreads,
+                    WMFDecoderModule::GetNumDecoderThreads());
+    bool lowLatency = StaticPrefs::media_wmf_low_latency_enabled();
+    if (mLowLatency || lowLatency) {
+      hr = attr->SetUINT32(CODECAPI_AVLowLatencyMode, TRUE);
+      if (SUCCEEDED(hr)) {
+        LOG("Enabling Low Latency Mode");
+      } else {
+        LOG("Couldn't enable Low Latency Mode");
+      }
+    }
+
+    if (gfx::gfxVars::HwDecodedVideoZeroCopy() && mKnowsCompositor &&
+        mKnowsCompositor->UsingHardwareWebRender() && mDXVA2Manager &&
+        mDXVA2Manager->SupportsZeroCopyNV12Texture()) {
+      mZeroCopyNV12Texture = true;
+      const int kOutputBufferSize = 10;
+
+      // Each picture buffer can store a sample, plus one in
+      // pending_output_samples_. The decoder adds this number to the number of
+      // reference pictures it expects to need and uses that to determine the
+      // array size of the output texture.
+      const int kMaxOutputSamples = kOutputBufferSize + 1;
+      attr->SetUINT32(MF_SA_MINIMUM_OUTPUT_SAMPLE_COUNT_PROGRESSIVE,
+                      kMaxOutputSamples);
+      attr->SetUINT32(MF_SA_MINIMUM_OUTPUT_SAMPLE_COUNT, kMaxOutputSamples);
+    }
+  }
+
+  if (useDxva) {
+    if (aware) {
+      // TODO: Test if I need this anywhere... Maybe on Vista?
+      // hr = attr->SetUINT32(CODECAPI_AVDecVideoAcceleration_H264, TRUE);
+      // NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+      MOZ_ASSERT(mDXVA2Manager);
+      ULONG_PTR manager = ULONG_PTR(mDXVA2Manager->GetDXVADeviceManager());
+      hr = decoder->SendMFTMessage(MFT_MESSAGE_SET_D3D_MANAGER, manager);
+      if (SUCCEEDED(hr)) {
+        mUseHwAccel = true;
+      } else {
+        mDXVAFailureReason = nsPrintfCString(
+            "MFT_MESSAGE_SET_D3D_MANAGER failed with code %lX", hr);
+      }
+    } else {
+      mDXVAFailureReason.AssignLiteral(
+          "Decoder returned false for MF_SA_D3D_AWARE");
+    }
+  }
+
+  if (!mDXVAFailureReason.IsEmpty()) {
+    // DXVA failure reason being set can mean that D3D11 failed, or that DXVA is
+    // entirely disabled.
+    LOG("DXVA failure: %s", mDXVAFailureReason.get());
+  }
+
+  if (!mUseHwAccel) {
+    if (mDXVA2Manager) {
+      // Either mDXVAEnabled was set to false prior the second call to
+      // InitInternal() due to CanUseDXVA() returning false, or
+      // MFT_MESSAGE_SET_D3D_MANAGER failed
+      mDXVA2Manager.reset();
+    }
+    if (mStreamType == WMFStreamType::VP9 ||
+        mStreamType == WMFStreamType::VP8 ||
+        mStreamType == WMFStreamType::AV1 ||
+        mStreamType == WMFStreamType::HEVC) {
+      return MediaResult(
+          NS_ERROR_DOM_MEDIA_FATAL_ERR,
+          RESULT_DETAIL("Use VP8/VP9/AV1 MFT only if HW acceleration "
+                        "is available."));
+    }
+    Telemetry::Accumulate(Telemetry::MEDIA_DECODER_BACKEND_USED,
+                          uint32_t(media::MediaDecoderBackend::WMFSoftware));
+  }
+
+  mDecoder = decoder;
+  hr = SetDecoderMediaTypes();
+  NS_ENSURE_TRUE(
+      SUCCEEDED(hr),
+      MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
+                  RESULT_DETAIL("Fail to set the decoder media types.")));
+
+  RefPtr<IMFMediaType> inputType;
+  hr = mDecoder->GetInputMediaType(inputType);
+  NS_ENSURE_TRUE(
+      SUCCEEDED(hr),
+      MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
+                  RESULT_DETAIL("Fail to get the input media type.")));
+
+  RefPtr<IMFMediaType> outputType;
+  hr = mDecoder->GetOutputMediaType(outputType);
+  NS_ENSURE_TRUE(
+      SUCCEEDED(hr),
+      MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
+                  RESULT_DETAIL("Fail to get the output media type.")));
+
+  if (mUseHwAccel && !CanUseDXVA(inputType, outputType)) {
+    LOG("DXVA manager determined that the input type was unsupported in "
+        "hardware, retrying init without DXVA.");
+    mDXVAEnabled = false;
+    // DXVA initialization with current decoder actually failed,
+    // re-do initialization.
+    return InitInternal();
+  }
+
+  LOG("Video Decoder initialized, Using DXVA: %s",
+      (mUseHwAccel ? "Yes" : "No"));
+
+  if (mUseHwAccel) {
+    hr = mDXVA2Manager->ConfigureForSize(
+        outputType,
+        mColorSpace.refOr(
+            DefaultColorSpace({mImageSize.width, mImageSize.height})),
+        mColorRange, mVideoInfo.ImageRect().width,
+        mVideoInfo.ImageRect().height);
+    NS_ENSURE_TRUE(SUCCEEDED(hr),
+                   MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
+                               RESULT_DETAIL("Fail to configure image size for "
+                                             "DXVA2Manager.")));
+  } else {
+    GetDefaultStride(outputType, mVideoInfo.ImageRect().width, &mVideoStride);
+  }
+  LOG("WMFVideoMFTManager frame geometry stride=%u picture=(%d, %d, %d, %d) "
+      "display=(%d,%d)",
+      mVideoStride, mVideoInfo.ImageRect().x, mVideoInfo.ImageRect().y,
+      mVideoInfo.ImageRect().width, mVideoInfo.ImageRect().height,
+      mVideoInfo.mDisplay.width, mVideoInfo.mDisplay.height);
+
+  if (!mUseHwAccel) {
+    RefPtr<ID3D11Device> device = gfx::DeviceManagerDx::Get()->GetImageDevice();
+    if (device) {
+      mIMFUsable = true;
+    }
+  }
+  return MediaResult(NS_OK);
+}
+
+HRESULT
+WMFVideoMFTManager::SetDecoderMediaTypes() {
+  // Setup the input/output media types.
+  RefPtr<IMFMediaType> inputType;
+  HRESULT hr = wmf::MFCreateMediaType(getter_AddRefs(inputType));
+  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+
+  hr = inputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
+  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+
+  hr = inputType->SetGUID(MF_MT_SUBTYPE, GetMediaSubtypeGUID());
+  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+
+  hr = inputType->SetUINT32(MF_MT_INTERLACE_MODE,
+                            MFVideoInterlace_MixedInterlaceOrProgressive);
+  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+
+  hr = inputType->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
+  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+
+  hr = MFSetAttributeSize(inputType, MF_MT_FRAME_SIZE,
+                          mVideoInfo.ImageRect().width,
+                          mVideoInfo.ImageRect().height);
+  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+
+  UINT32 fpsDenominator = 1000;
+  UINT32 fpsNumerator = static_cast<uint32_t>(mFramerate * fpsDenominator);
+  if (fpsNumerator > 0) {
+    hr = MFSetAttributeRatio(inputType, MF_MT_FRAME_RATE, fpsNumerator,
+                             fpsDenominator);
+    NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+  }
+
+  RefPtr<IMFMediaType> outputType;
+  hr = wmf::MFCreateMediaType(getter_AddRefs(outputType));
+  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+
+  hr = outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
+  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+
+  hr = MFSetAttributeSize(outputType, MF_MT_FRAME_SIZE,
+                          mVideoInfo.ImageRect().width,
+                          mVideoInfo.ImageRect().height);
+  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+
+  if (fpsNumerator > 0) {
+    hr = MFSetAttributeRatio(outputType, MF_MT_FRAME_RATE, fpsNumerator,
+                             fpsDenominator);
+    NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+  }
+
+  GUID outputSubType = [&]() {
+    switch (mVideoInfo.mColorDepth) {
+      case gfx::ColorDepth::COLOR_8:
+        return mUseHwAccel ? MFVideoFormat_NV12 : MFVideoFormat_YV12;
+      case gfx::ColorDepth::COLOR_10:
+        return MFVideoFormat_P010;
+      case gfx::ColorDepth::COLOR_12:
+      case gfx::ColorDepth::COLOR_16:
+        return MFVideoFormat_P016;
+      default:
+        MOZ_ASSERT_UNREACHABLE("Unexpected color depth");
+    }
+  }();
+  hr = outputType->SetGUID(MF_MT_SUBTYPE, outputSubType);
+  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+
+  if (mZeroCopyNV12Texture) {
+    RefPtr<IMFAttributes> attr(mDecoder->GetOutputStreamAttributes());
+    if (attr) {
+      hr = attr->SetUINT32(MF_SA_D3D11_SHARED_WITHOUT_MUTEX, TRUE);
+      NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+
+      hr = attr->SetUINT32(MF_SA_D3D11_BINDFLAGS,
+                           D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_DECODER);
+      NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+    }
+  }
+
+  return mDecoder->SetMediaTypes(inputType, outputType);
+}
+
+HRESULT
+WMFVideoMFTManager::Input(MediaRawData* aSample) {
+  if (!mIsValid) {
+    return E_FAIL;
+  }
+
+  if (!mDecoder) {
+    // This can happen during shutdown.
+    return E_FAIL;
+  }
+
+  mTrackingId.apply([&](const auto& aId) {
+    MediaInfoFlag flag = MediaInfoFlag::None;
+    flag |= (aSample->mKeyframe ? MediaInfoFlag::KeyFrame
+                                : MediaInfoFlag::NonKeyFrame);
+    flag |= (mUseHwAccel ? MediaInfoFlag::HardwareDecoding
+                         : MediaInfoFlag::SoftwareDecoding);
+    switch (mStreamType) {
+      case WMFStreamType::H264:
+        flag |= MediaInfoFlag::VIDEO_H264;
+        break;
+      case WMFStreamType::VP8:
+        flag |= MediaInfoFlag::VIDEO_VP8;
+        break;
+      case WMFStreamType::VP9:
+        flag |= MediaInfoFlag::VIDEO_VP9;
+        break;
+      case WMFStreamType::AV1:
+        flag |= MediaInfoFlag::VIDEO_AV1;
+        break;
+      case WMFStreamType::HEVC:
+        flag |= MediaInfoFlag::VIDEO_HEVC;
+        break;
+      default:
+        break;
+    };
+    mPerformanceRecorder.Start(aSample->mTime.ToMicroseconds(),
+                               "WMFVideoDecoder"_ns, aId, flag);
+  });
+
+  RefPtr<IMFSample> inputSample;
+  HRESULT hr = mDecoder->CreateInputSample(
+      aSample->Data(), uint32_t(aSample->Size()),
+      aSample->mTime.ToMicroseconds(), aSample->mDuration.ToMicroseconds(),
+      &inputSample);
+  NS_ENSURE_TRUE(SUCCEEDED(hr) && inputSample != nullptr, hr);
+
+  if (!mColorSpace && aSample->mTrackInfo) {
+    // The colorspace definition is found in the H264 SPS NAL, available out of
+    // band, while for VP9 it's only available within the VP9 bytestream.
+    // The info would have been updated by the MediaChangeMonitor.
+    mColorSpace = aSample->mTrackInfo->GetAsVideoInfo()->mColorSpace;
+    mColorRange = aSample->mTrackInfo->GetAsVideoInfo()->mColorRange;
+  }
+  mLastDuration = aSample->mDuration;
+
+  // Forward sample data to the decoder.
+  return mDecoder->Input(inputSample);
+}
+
+// The MFTransforms we use for decoding H264 and AV1 video will silently fall
+// back to software decoding (even if we've negotiated DXVA) if the GPU
+// doesn't support decoding the given codec and resolution. It will then upload
+// the software decoded frames into d3d textures to preserve behaviour.
+//
+// Unfortunately this seems to cause corruption (see bug 1193547) and is
+// slow because the upload is done into a non-shareable texture and requires
+// us to copy it.
+//
+// This code tests if the given codec and resolution can be supported directly
+// on the GPU, and makes sure we only ask the MFT for DXVA if it can be
+// supported properly.
+//
+// Ideally we'd know the framerate during initialization and would also ensure
+// that new decoders are created if the resolution changes. Then we could move
+// this check into Init and consolidate the main thread blocking code.
+bool WMFVideoMFTManager::CanUseDXVA(IMFMediaType* aInputType,
+                                    IMFMediaType* aOutputType) {
+  MOZ_ASSERT(mDXVA2Manager);
+  // Check if we're able to use hardware decoding for the current codec config.
+  return mDXVA2Manager->SupportsConfig(mVideoInfo, aInputType, aOutputType);
+}
+
+TimeUnit WMFVideoMFTManager::GetSampleDurationOrLastKnownDuration(
+    IMFSample* aSample) const {
+  TimeUnit duration = GetSampleDuration(aSample);
+  if (!duration.IsValid()) {
+    // WMF returned a non-success code (likely duration unknown, but the API
+    // also allows for other, unspecified codes).
+    LOG("Got unknown sample duration -- bad return code. Using mLastDuration.");
+  } else if (duration == TimeUnit::Zero()) {
+    // Duration is zero. WMF uses this to indicate an unknown duration.
+    LOG("Got unknown sample duration -- zero duration returned. Using "
+        "mLastDuration.");
+  } else if (duration.IsNegative()) {
+    // A negative duration will cause issues up the stack. It's also unclear
+    // why this would happen, but the API allows for it by returning a signed
+    // int, so we handle it here.
+    LOG("Got negative sample duration: %f seconds. Using mLastDuration "
+        "instead.",
+        duration.ToSeconds());
+  } else {
+    // We got a duration without any problems.
+    return duration;
+  }
+
+  return mLastDuration;
+}
+
+HRESULT
+WMFVideoMFTManager::CreateBasicVideoFrame(IMFSample* aSample,
+                                          int64_t aStreamOffset,
+                                          VideoData** aOutVideoData) {
+  NS_ENSURE_TRUE(aSample, E_POINTER);
+  NS_ENSURE_TRUE(aOutVideoData, E_POINTER);
+
+  *aOutVideoData = nullptr;
+
+  HRESULT hr;
+  RefPtr<IMFMediaBuffer> buffer;
+
+  // Must convert to contiguous buffer to use IMD2DBuffer interface.
+  hr = aSample->ConvertToContiguousBuffer(getter_AddRefs(buffer));
+  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+
+  // Try and use the IMF2DBuffer interface if available, otherwise fallback
+  // to the IMFMediaBuffer interface. Apparently IMF2DBuffer is more efficient,
+  // but only some systems (Windows 8?) support it.
+  BYTE* data = nullptr;
+  LONG stride = 0;
+  RefPtr<IMF2DBuffer> twoDBuffer;
+  hr = buffer->QueryInterface(
+      static_cast<IMF2DBuffer**>(getter_AddRefs(twoDBuffer)));
+  if (SUCCEEDED(hr)) {
+    hr = twoDBuffer->Lock2D(&data, &stride);
+    NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+  } else {
+    hr = buffer->Lock(&data, nullptr, nullptr);
+    NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+    stride = mVideoStride;
+  }
+
+  const GUID& subType = mDecoder->GetOutputMediaSubType();
+  MOZ_DIAGNOSTIC_ASSERT(subType == MFVideoFormat_YV12 ||
+                        subType == MFVideoFormat_P010 ||
+                        subType == MFVideoFormat_P016);
+  const gfx::ColorDepth colorDepth = subType == MFVideoFormat_YV12
+                                         ? gfx::ColorDepth::COLOR_8
+                                         : gfx::ColorDepth::COLOR_16;
+
+  // YV12, planar format (3 planes): [YYYY....][VVVV....][UUUU....]
+  // i.e., Y, then V, then U.
+  // P010, P016 planar format (2 planes) [YYYY....][UVUV...]
+  // See
+  // https://docs.microsoft.com/en-us/windows/desktop/medfound/10-bit-and-16-bit-yuv-video-formats
+  VideoData::YCbCrBuffer b;
+
+  const uint32_t videoWidth = mSoftwareImageSize.width;
+  const uint32_t videoHeight = mSoftwareImageSize.height;
+
+  // Y (Y') plane
+  b.mPlanes[0].mData = data;
+  b.mPlanes[0].mStride = stride;
+  b.mPlanes[0].mHeight = videoHeight;
+  b.mPlanes[0].mWidth = videoWidth;
+  b.mPlanes[0].mSkip = 0;
+
+  MOZ_DIAGNOSTIC_ASSERT(mSoftwareImageSize.height % 16 == 0,
+                        "decoded height must be 16 bytes aligned");
+  const uint32_t y_size = stride * mSoftwareImageSize.height;
+  const uint32_t v_size = stride * mSoftwareImageSize.height / 4;
+  const uint32_t halfStride = (stride + 1) / 2;
+  const uint32_t halfHeight = (videoHeight + 1) / 2;
+  const uint32_t halfWidth = (videoWidth + 1) / 2;
+
+  if (subType == MFVideoFormat_YV12) {
+    // U plane (Cb)
+    b.mPlanes[1].mData = data + y_size + v_size;
+    b.mPlanes[1].mStride = halfStride;
+    b.mPlanes[1].mHeight = halfHeight;
+    b.mPlanes[1].mWidth = halfWidth;
+    b.mPlanes[1].mSkip = 0;
+
+    // V plane (Cr)
+    b.mPlanes[2].mData = data + y_size;
+    b.mPlanes[2].mStride = halfStride;
+    b.mPlanes[2].mHeight = halfHeight;
+    b.mPlanes[2].mWidth = halfWidth;
+    b.mPlanes[2].mSkip = 0;
+  } else {
+    // U plane (Cb)
+    b.mPlanes[1].mData = data + y_size;
+    b.mPlanes[1].mStride = stride;
+    b.mPlanes[1].mHeight = halfHeight;
+    b.mPlanes[1].mWidth = halfWidth;
+    b.mPlanes[1].mSkip = 1;
+
+    // V plane (Cr)
+    b.mPlanes[2].mData = data + y_size + sizeof(short);
+    b.mPlanes[2].mStride = stride;
+    b.mPlanes[2].mHeight = halfHeight;
+    b.mPlanes[2].mWidth = halfWidth;
+    b.mPlanes[2].mSkip = 1;
+  }
+
+  b.mChromaSubsampling = gfx::ChromaSubsampling::HALF_WIDTH_AND_HEIGHT;
+
+  // YuvColorSpace
+  b.mYUVColorSpace =
+      mColorSpace.refOr(DefaultColorSpace({videoWidth, videoHeight}));
+  b.mColorDepth = colorDepth;
+  b.mColorRange = mColorRange;
+
+  TimeUnit pts = GetSampleTime(aSample);
+  NS_ENSURE_TRUE(pts.IsValid(), E_FAIL);
+  TimeUnit duration = GetSampleDurationOrLastKnownDuration(aSample);
+  NS_ENSURE_TRUE(duration.IsValid(), E_FAIL);
+  gfx::IntRect pictureRegion = mVideoInfo.ScaledImageRect(
+      mSoftwarePictureSize.width, mSoftwarePictureSize.height);
+
+  if (colorDepth != gfx::ColorDepth::COLOR_8 || !mKnowsCompositor ||
+      !mKnowsCompositor->SupportsD3D11() || !mIMFUsable) {
+    Result<already_AddRefed<VideoData>, MediaResult> r =
+        VideoData::CreateAndCopyData(
+            mVideoInfo, mImageContainer, aStreamOffset, pts, duration, b, false,
+            TimeUnit::FromMicroseconds(-1), pictureRegion, mKnowsCompositor);
+    RefPtr<VideoData> v = r.unwrapOr(nullptr);
+    if (twoDBuffer) {
+      twoDBuffer->Unlock2D();
+    } else {
+      buffer->Unlock();
+    }
+    v.forget(aOutVideoData);
+    return S_OK;
+  }
+
+  RefPtr<layers::PlanarYCbCrImage> image =
+      new IMFYCbCrImage(buffer, twoDBuffer, mKnowsCompositor, mImageContainer);
+
+  VideoData::SetVideoDataToImage(image, mVideoInfo, b, pictureRegion, false);
+
+  RefPtr<VideoData> v = VideoData::CreateFromImage(
+      mVideoInfo.mDisplay, aStreamOffset, pts, duration, image.forget(), false,
+      TimeUnit::FromMicroseconds(-1));
+
+  mPerformanceRecorder.Record(pts.ToMicroseconds(), [&](DecodeStage& aStage) {
+    aStage.SetColorDepth(b.mColorDepth);
+    aStage.SetColorRange(b.mColorRange);
+    aStage.SetYUVColorSpace(b.mYUVColorSpace);
+    if (subType == MFVideoFormat_NV12) {
+      aStage.SetImageFormat(DecodeStage::NV12);
+    } else if (subType == MFVideoFormat_YV12) {
+      aStage.SetImageFormat(DecodeStage::YV12);
+    } else if (subType == MFVideoFormat_P010) {
+      aStage.SetImageFormat(DecodeStage::P010);
+    } else if (subType == MFVideoFormat_P016) {
+      aStage.SetImageFormat(DecodeStage::P016);
+    }
+    aStage.SetResolution(videoWidth, videoHeight);
+  });
+
+  v.forget(aOutVideoData);
+  return S_OK;
+}
+
+HRESULT
+WMFVideoMFTManager::CreateD3DVideoFrame(IMFSample* aSample,
+                                        int64_t aStreamOffset,
+                                        VideoData** aOutVideoData) {
+  NS_ENSURE_TRUE(aSample, E_POINTER);
+  NS_ENSURE_TRUE(aOutVideoData, E_POINTER);
+  NS_ENSURE_TRUE(mDXVA2Manager, E_ABORT);
+  NS_ENSURE_TRUE(mUseHwAccel, E_ABORT);
+
+  *aOutVideoData = nullptr;
+  HRESULT hr;
+
+  gfx::IntRect pictureRegion =
+      mVideoInfo.ScaledImageRect(mImageSize.width, mImageSize.height);
+  RefPtr<Image> image;
+  if (mZeroCopyNV12Texture && mDXVA2Manager->SupportsZeroCopyNV12Texture()) {
+    hr = mDXVA2Manager->WrapTextureWithImage(aSample, pictureRegion,
+                                             getter_AddRefs(image));
+  } else {
+    hr = mDXVA2Manager->CopyToImage(aSample, pictureRegion,
+                                    getter_AddRefs(image));
+    NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+  }
+  NS_ENSURE_TRUE(image, E_FAIL);
+
+  gfx::IntSize size = image->GetSize();
+
+  TimeUnit pts = GetSampleTime(aSample);
+  NS_ENSURE_TRUE(pts.IsValid(), E_FAIL);
+  TimeUnit duration = GetSampleDurationOrLastKnownDuration(aSample);
+  NS_ENSURE_TRUE(duration.IsValid(), E_FAIL);
+  RefPtr<VideoData> v = VideoData::CreateFromImage(
+      mVideoInfo.mDisplay, aStreamOffset, pts, duration, image.forget(), false,
+      TimeUnit::FromMicroseconds(-1));
+
+  NS_ENSURE_TRUE(v, E_FAIL);
+  v.forget(aOutVideoData);
+
+  mPerformanceRecorder.Record(pts.ToMicroseconds(), [&](DecodeStage& aStage) {
+    aStage.SetColorDepth(mVideoInfo.mColorDepth);
+    aStage.SetColorRange(mColorRange);
+    aStage.SetYUVColorSpace(mColorSpace.refOr(
+        DefaultColorSpace({mImageSize.width, mImageSize.height})));
+    const GUID& subType = mDecoder->GetOutputMediaSubType();
+    if (subType == MFVideoFormat_NV12) {
+      aStage.SetImageFormat(DecodeStage::NV12);
+    } else if (subType == MFVideoFormat_YV12) {
+      aStage.SetImageFormat(DecodeStage::YV12);
+    } else if (subType == MFVideoFormat_P010) {
+      aStage.SetImageFormat(DecodeStage::P010);
+    } else if (subType == MFVideoFormat_P016) {
+      aStage.SetImageFormat(DecodeStage::P016);
+    }
+    aStage.SetResolution(size.width, size.height);
+  });
+
+  return S_OK;
+}
+
+// Blocks until decoded sample is produced by the decoder.
+HRESULT
+WMFVideoMFTManager::Output(int64_t aStreamOffset, RefPtr<MediaData>& aOutData) {
+  RefPtr<IMFSample> sample;
+  HRESULT hr;
+  aOutData = nullptr;
+  int typeChangeCount = 0;
+
+  // Loop until we decode a sample, or an unexpected error that we can't
+  // handle occurs.
+  while (true) {
+    hr = mDecoder->Output(&sample);
+    if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) {
+      return MF_E_TRANSFORM_NEED_MORE_INPUT;
+    }
+
+    if (hr == MF_E_TRANSFORM_STREAM_CHANGE) {
+      MOZ_ASSERT(!sample);
+      // Video stream output type change, probably geometric aperture change or
+      // pixel type.
+      // We must reconfigure the decoder output type.
+
+      // Attempt to find an appropriate OutputType, trying in order:
+      // if HW accelerated: NV12, P010, P016
+      // if SW: YV12, P010, P016
+      if (FAILED(
+              (hr = (mDecoder->FindDecoderOutputTypeWithSubtype(
+                   mUseHwAccel ? MFVideoFormat_NV12 : MFVideoFormat_YV12)))) &&
+          FAILED((hr = mDecoder->FindDecoderOutputTypeWithSubtype(
+                      MFVideoFormat_P010))) &&
+          FAILED((hr = mDecoder->FindDecoderOutputTypeWithSubtype(
+                      MFVideoFormat_P016)))) {
+        LOG("No suitable output format found");
+        return hr;
+      }
+
+      RefPtr<IMFMediaType> outputType;
+      hr = mDecoder->GetOutputMediaType(outputType);
+      NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+
+      if (mUseHwAccel) {
+        hr = mDXVA2Manager->ConfigureForSize(
+            outputType,
+            mColorSpace.refOr(
+                DefaultColorSpace({mImageSize.width, mImageSize.height})),
+            mColorRange, mVideoInfo.ImageRect().width,
+            mVideoInfo.ImageRect().height);
+        NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+      } else {
+        // The stride may have changed, recheck for it.
+        hr = GetDefaultStride(outputType, mVideoInfo.ImageRect().width,
+                              &mVideoStride);
+        NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+
+        UINT32 width = 0, height = 0;
+        hr = MFGetAttributeSize(outputType, MF_MT_FRAME_SIZE, &width, &height);
+        NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+        NS_ENSURE_TRUE(width <= MAX_VIDEO_WIDTH, E_FAIL);
+        NS_ENSURE_TRUE(height <= MAX_VIDEO_HEIGHT, E_FAIL);
+        mSoftwareImageSize = gfx::IntSize(width, height);
+
+        gfx::IntRect picture;
+        hr = GetPictureRegion(outputType, picture);
+        NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+        MOZ_ASSERT(picture.width != 0 && picture.height != 0);
+        mSoftwarePictureSize = gfx::IntSize(picture.width, picture.height);
+        LOG("Output stream change, image size=[%ux%u], picture=[%u,%u]",
+            mSoftwareImageSize.width, mSoftwareImageSize.height,
+            mSoftwarePictureSize.width, mSoftwarePictureSize.height);
+      }
+      // Catch infinite loops, but some decoders perform at least 2 stream
+      // changes on consecutive calls, so be permissive.
+      // 100 is arbitrarily > 2.
+      NS_ENSURE_TRUE(typeChangeCount < 100, MF_E_TRANSFORM_STREAM_CHANGE);
+      // Loop back and try decoding again...
+      ++typeChangeCount;
+      continue;
+    }
+
+    if (SUCCEEDED(hr)) {
+      if (!sample) {
+        LOG("Video MFTDecoder returned success but no output!");
+        // On some machines/input the MFT returns success but doesn't output
+        // a video frame. If we detect this, try again, but only up to a
+        // point; after 250 failures, give up. Note we count all failures
+        // over the life of the decoder, as we may end up exiting with a
+        // NEED_MORE_INPUT and coming back to hit the same error. So just
+        // counting with a local variable (like typeChangeCount does) may
+        // not work in this situation.
+        ++mNullOutputCount;
+        if (mNullOutputCount > 250) {
+          LOG("Excessive Video MFTDecoder returning success but no output; "
+              "giving up");
+          mGotExcessiveNullOutput = true;
+          return E_FAIL;
+        }
+        continue;
+      }
+      TimeUnit pts = GetSampleTime(sample);
+      TimeUnit duration = GetSampleDurationOrLastKnownDuration(sample);
+
+      // AV1 MFT fix: Sample duration after seeking is always equal to the
+      // sample time, for some reason. Set it to last duration instead.
+      if (mStreamType == WMFStreamType::AV1 && duration == pts) {
+        LOG("Video sample duration (%" PRId64 ") matched timestamp (%" PRId64
+            "), setting to previous sample duration (%" PRId64 ") instead.",
+            pts.ToMicroseconds(), duration.ToMicroseconds(),
+            mLastDuration.ToMicroseconds());
+        duration = mLastDuration;
+        sample->SetSampleDuration(UsecsToHNs(duration.ToMicroseconds()));
+      }
+
+      if (!pts.IsValid() || !duration.IsValid()) {
+        return E_FAIL;
+      }
+      if (mSeekTargetThreshold.isSome()) {
+        if ((pts + duration) < mSeekTargetThreshold.ref()) {
+          LOG("Dropping video frame which pts (%" PRId64 " + %" PRId64
+              ") is smaller than seek target (%" PRId64 ").",
+              pts.ToMicroseconds(), duration.ToMicroseconds(),
+              mSeekTargetThreshold->ToMicroseconds());
+          // It is necessary to clear the pointer to release the previous output
+          // buffer.
+          sample = nullptr;
+          continue;
+        }
+        mSeekTargetThreshold.reset();
+      }
+      break;
+    }
+    // Else unexpected error so bail.
+    NS_WARNING("WMFVideoMFTManager::Output() unexpected error");
+    return hr;
+  }
+
+  RefPtr<VideoData> frame;
+  if (mUseHwAccel) {
+    hr = CreateD3DVideoFrame(sample, aStreamOffset, getter_AddRefs(frame));
+  } else {
+    hr = CreateBasicVideoFrame(sample, aStreamOffset, getter_AddRefs(frame));
+  }
+  // Frame should be non null only when we succeeded.
+  MOZ_ASSERT((frame != nullptr) == SUCCEEDED(hr));
+  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+  NS_ENSURE_TRUE(frame, E_FAIL);
+
+  aOutData = frame;
+
+  if (mNullOutputCount) {
+    mGotValidOutputAfterNullOutput = true;
+  }
+
+  return S_OK;
+}
+
+void WMFVideoMFTManager::Flush() {
+  MFTManager::Flush();
+  mPerformanceRecorder.Record(std::numeric_limits<int64_t>::max());
+}
+
+void WMFVideoMFTManager::Shutdown() {
+  if (mDXVA2Manager) {
+    mDXVA2Manager->BeforeShutdownVideoMFTDecoder();
+  }
+  mDecoder = nullptr;
+  mDXVA2Manager.reset();
+}
+
+bool WMFVideoMFTManager::IsHardwareAccelerated(
+    nsACString& aFailureReason) const {
+  aFailureReason = mDXVAFailureReason;
+  return mDecoder && mUseHwAccel;
+}
+
+nsCString WMFVideoMFTManager::GetDescriptionName() const {
+  nsCString failureReason;
+  bool hw = IsHardwareAccelerated(failureReason);
+
+  const char* formatName = [&]() {
+    if (!mDecoder) {
+      return "not initialized";
+    }
+    GUID format = mDecoder->GetOutputMediaSubType();
+    if (format == MFVideoFormat_NV12) {
+      if (!gfx::DeviceManagerDx::Get()->CanUseNV12()) {
+        return "nv12->argb32";
+      }
+      return "nv12";
+    }
+    if (format == MFVideoFormat_P010) {
+      if (!gfx::DeviceManagerDx::Get()->CanUseP010()) {
+        return "p010->argb32";
+      }
+      return "p010";
+    }
+    if (format == MFVideoFormat_P016) {
+      if (!gfx::DeviceManagerDx::Get()->CanUseP016()) {
+        return "p016->argb32";
+      }
+      return "p016";
+    }
+    if (format == MFVideoFormat_YV12) {
+      return "yv12";
+    }
+    return "unknown";
+  }();
+
+  const char* dxvaName = [&]() {
+    if (!mDXVA2Manager) {
+      return "no DXVA";
+    }
+    return "D3D11";
+  }();
+
+  return nsPrintfCString("wmf %s codec %s video decoder - %s, %s",
+                         StreamTypeToString(mStreamType),
+                         hw ? "hardware" : "software", dxvaName, formatName);
+}
+nsCString WMFVideoMFTManager::GetCodecName() const {
+  switch (mStreamType) {
+    case WMFStreamType::H264:
+      return "h264"_ns;
+    case WMFStreamType::VP8:
+      return "vp8"_ns;
+    case WMFStreamType::VP9:
+      return "vp9"_ns;
+    case WMFStreamType::AV1:
+      return "av1"_ns;
+    case WMFStreamType::HEVC:
+      return "hevc"_ns;
+    default:
+      return "unknown"_ns;
+  };
+}
+
+}  // namespace mozilla
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
commit	26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree	f435a8308119effd964b339f76abb83a57c29483 /dom/media/platforms/wmf/WMFVideoMFTManager.cpp
parent	Initial commit. (diff)
download	firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip