diff options
Diffstat (limited to 'gfx/layers/mlgpu/RenderViewMLGPU.cpp')
-rw-r--r-- | gfx/layers/mlgpu/RenderViewMLGPU.cpp | 549 |
1 files changed, 549 insertions, 0 deletions
diff --git a/gfx/layers/mlgpu/RenderViewMLGPU.cpp b/gfx/layers/mlgpu/RenderViewMLGPU.cpp new file mode 100644 index 0000000000..f6aceadeb5 --- /dev/null +++ b/gfx/layers/mlgpu/RenderViewMLGPU.cpp @@ -0,0 +1,549 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "RenderViewMLGPU.h" +#include "ContainerLayerMLGPU.h" +#include "FrameBuilder.h" +#include "mozilla/StaticPrefs_layers.h" +#include "LayersHelpers.h" +#include "MLGDevice.h" +#include "RenderPassMLGPU.h" +#include "ShaderDefinitionsMLGPU.h" +#include "Units.h" +#include "UnitTransforms.h" +#include "UtilityMLGPU.h" + +namespace mozilla { +namespace layers { + +using namespace gfx; + +RenderViewMLGPU::RenderViewMLGPU(FrameBuilder* aBuilder, + MLGRenderTarget* aTarget, + const nsIntRegion& aInvalidRegion) + : RenderViewMLGPU(aBuilder, nullptr) { + mTarget = aTarget; + mInvalidBounds = aInvalidRegion.GetBounds(); + + // The clear region on the layer manager is the area that must be clear after + // we finish drawing. + mPostClearRegion = aBuilder->GetManager()->GetRegionToClear(); + + // Clamp the post-clear region to the invalid bounds, since clears don't go + // through the scissor rect if using ClearView. + mPostClearRegion.AndWith(mInvalidBounds); + + // Since the post-clear will occlude everything, we include it in the final + // opaque area. + mOccludedRegion.OrWith(ViewAs<LayerPixel>( + mPostClearRegion, + PixelCastJustification::RenderTargetIsParentLayerForRoot)); + + AL_LOG("RenderView %p root with invalid area %s, clear area %s\n", this, + Stringify(mInvalidBounds).c_str(), + Stringify(mPostClearRegion).c_str()); +} + +RenderViewMLGPU::RenderViewMLGPU(FrameBuilder* aBuilder, + ContainerLayerMLGPU* aContainer, + RenderViewMLGPU* aParent) + : RenderViewMLGPU(aBuilder, aParent) { + mContainer = aContainer; + mTargetOffset = aContainer->GetTargetOffset(); + mInvalidBounds = aContainer->GetInvalidRect(); + MOZ_ASSERT(!mInvalidBounds.IsEmpty()); + + AL_LOG("RenderView %p starting with container %p and invalid area %s\n", this, + aContainer->GetLayer(), Stringify(mInvalidBounds).c_str()); + + mContainer->SetRenderView(this); +} + +RenderViewMLGPU::RenderViewMLGPU(FrameBuilder* aBuilder, + RenderViewMLGPU* aParent) + : mBuilder(aBuilder), + mDevice(aBuilder->GetDevice()), + mParent(aParent), + mContainer(nullptr), + mFinishedBuilding(false), + mCurrentLayerBufferIndex(kInvalidResourceIndex), + mCurrentMaskRectBufferIndex(kInvalidResourceIndex), + mCurrentDepthMode(MLGDepthTestMode::Disabled), + mNextSortIndex(1), + mUseDepthBuffer( + StaticPrefs::layers_mlgpu_enable_depth_buffer_AtStartup()), + mDepthBufferNeedsClear(false) { + if (aParent) { + aParent->AddChild(this); + } +} + +RenderViewMLGPU::~RenderViewMLGPU() { + for (const auto& child : mChildren) { + child->mParent = nullptr; + } +} + +IntSize RenderViewMLGPU::GetSize() const { + MOZ_ASSERT(mFinishedBuilding); + return mTarget->GetSize(); +} + +MLGRenderTarget* RenderViewMLGPU::GetRenderTarget() const { + MOZ_ASSERT(mFinishedBuilding); + return mTarget; +} + +void RenderViewMLGPU::AddChild(RenderViewMLGPU* aParent) { + mChildren.push_back(aParent); +} + +void RenderViewMLGPU::Render() { + // We render views depth-first to minimize render target switching. + for (const auto& child : mChildren) { + child->Render(); + } + + // If the view requires a surface copy (of its backdrop), then we delay + // rendering it until it is added to a batch. + if (mContainer && mContainer->NeedsSurfaceCopy()) { + return; + } + ExecuteRendering(); +} + +void RenderViewMLGPU::RenderAfterBackdropCopy() { + MOZ_ASSERT(mContainer && mContainer->NeedsSurfaceCopy()); + + // Update the invalid bounds based on the container's visible region. This + // of course won't affect the prepared pipeline, but it will change the + // scissor rect in SetDeviceState. + mInvalidBounds = mContainer->GetRenderRegion().GetBounds().ToUnknownRect() - + GetTargetOffset(); + + ExecuteRendering(); +} + +void RenderViewMLGPU::FinishBuilding() { + MOZ_ASSERT(!mFinishedBuilding); + mFinishedBuilding = true; + + if (mContainer) { + MOZ_ASSERT(!mTarget); + + MLGRenderTargetFlags flags = MLGRenderTargetFlags::Default; + if (mUseDepthBuffer) { + flags |= MLGRenderTargetFlags::ZBuffer; + } + mTarget = mContainer->UpdateRenderTarget(mDevice, flags); + } +} + +void RenderViewMLGPU::AddItem(LayerMLGPU* aItem, const IntRect& aRect, + Maybe<Polygon>&& aGeometry) { + AL_LOG("RenderView %p analyzing layer %p\n", this, aItem->GetLayer()); + + // If the item is not visible at all, skip it. + if (aItem->GetComputedOpacity() == 0.0f) { + AL_LOG("RenderView %p culling item %p with no opacity\n", this, + aItem->GetLayer()); + return; + } + + // When using the depth buffer, the z-index for items is important. + // + // Sort order starts at 1 and goes to positive infinity, with smaller values + // being closer to the screen. Our viewport is the same, with anything + // outside of [0.0, 1.0] being culled, and lower values occluding higher + // values. To make this work our projection transform scales the z-axis. + // Note that we do not use 0 as a sorting index (when depth-testing is + // enabled) because this would result in a z-value of 1.0, which would be + // culled. + ItemInfo info(mBuilder, this, aItem, mNextSortIndex++, aRect, + std::move(aGeometry)); + + // If the item is not visible, or we can't add it to the layer constant + // buffer for some reason, bail out. + if (!UpdateVisibleRegion(info) || !mBuilder->AddLayerToConstantBuffer(info)) { + AL_LOG("RenderView %p culled item %p!\n", this, aItem->GetLayer()); + return; + } + + // We support all layer types now. + MOZ_ASSERT(info.type != RenderPassType::Unknown); + + if (info.renderOrder == RenderOrder::FrontToBack) { + AddItemFrontToBack(aItem, info); + } else { + AddItemBackToFront(aItem, info); + } +} + +bool RenderViewMLGPU::UpdateVisibleRegion(ItemInfo& aItem) { + // If the item has some kind of complex transform, we perform a very + // simple occlusion test and move on. We using a depth buffer we skip + // CPU-based occlusion culling as well, since the GPU will do most of our + // culling work for us. + if (mUseDepthBuffer || !aItem.translation || + !StaticPrefs::layers_mlgpu_enable_cpu_occlusion_AtStartup()) { + // Update the render region even if we won't compute visibility, since some + // layer types (like Canvas and Image) need to have the visible region + // clamped. + LayerIntRegion region = aItem.layer->GetShadowVisibleRegion(); + aItem.layer->SetRenderRegion(std::move(region)); + + AL_LOG("RenderView %p simple occlusion test, bounds=%s, translation?=%d\n", + this, Stringify(aItem.bounds).c_str(), aItem.translation ? 1 : 0); + return mInvalidBounds.Intersects(aItem.bounds); + } + + MOZ_ASSERT(aItem.rectilinear); + + AL_LOG("RenderView %p starting visibility tests:\n", this); + AL_LOG(" occluded=%s\n", Stringify(mOccludedRegion).c_str()); + + // Compute the translation into render target space. + LayerIntPoint translation = LayerIntPoint::FromUnknownPoint( + aItem.translation.value() - mTargetOffset); + AL_LOG(" translation=%s\n", Stringify(translation).c_str()); + + IntRect clip = aItem.layer->GetComputedClipRect().ToUnknownRect(); + AL_LOG(" clip=%s\n", Stringify(translation).c_str()); + + LayerIntRegion region = aItem.layer->GetShadowVisibleRegion(); + region.MoveBy(translation); + AL_LOG(" effective-visible=%s\n", Stringify(region).c_str()); + + region.SubOut(mOccludedRegion); + region.AndWith(LayerIntRect::FromUnknownRect(mInvalidBounds)); + region.AndWith(LayerIntRect::FromUnknownRect(clip)); + if (region.IsEmpty()) { + return false; + } + + // Move the visible region back into layer space. + region.MoveBy(-translation); + AL_LOG(" new-local-visible=%s\n", Stringify(region).c_str()); + + aItem.layer->SetRenderRegion(std::move(region)); + + // Apply the new occluded area. We do another dance with the translation to + // avoid copying the region. We do this after the SetRegionToRender call to + // accomodate the possiblity of a layer changing its visible region. + if (aItem.opaque) { + mOccludedRegion.MoveBy(-translation); + mOccludedRegion.OrWith(aItem.layer->GetRenderRegion()); + mOccludedRegion.MoveBy(translation); + AL_LOG(" new-occluded=%s\n", Stringify(mOccludedRegion).c_str()); + + // If the occluded region gets too complicated, we reset it. + if (mOccludedRegion.GetNumRects() >= 32) { + mOccludedRegion.SetEmpty(); + AL_LOG(" clear-occluded, too many rects\n"); + } + } + return true; +} + +void RenderViewMLGPU::AddItemFrontToBack(LayerMLGPU* aLayer, ItemInfo& aItem) { + // We receive items in front-to-back order. Ideally we want to push items + // as far back into batches impossible, to ensure the GPU can do a good + // job at culling. However we also want to make sure we actually batch + // items versus drawing one primitive per pass. + // + // As a compromise we look at the most 3 recent batches and then give up. + // This can be tweaked in the future. + static const size_t kMaxSearch = 3; + size_t iterations = 0; + for (auto iter = mFrontToBack.rbegin(); iter != mFrontToBack.rend(); iter++) { + RenderPassMLGPU* pass = (*iter); + if (pass->IsCompatible(aItem) && pass->AcceptItem(aItem)) { + AL_LOG("RenderView %p added layer %p to pass %p (%d)\n", this, + aLayer->GetLayer(), pass, int(pass->GetType())); + return; + } + if (++iterations > kMaxSearch) { + break; + } + } + + RefPtr<RenderPassMLGPU> pass = RenderPassMLGPU::CreatePass(mBuilder, aItem); + if (!pass || !pass->AcceptItem(aItem)) { + MOZ_ASSERT_UNREACHABLE("Could not build a pass for item!"); + return; + } + AL_LOG("RenderView %p added layer %p to new pass %p (%d)\n", this, + aLayer->GetLayer(), pass.get(), int(pass->GetType())); + + mFrontToBack.push_back(pass); +} + +void RenderViewMLGPU::AddItemBackToFront(LayerMLGPU* aLayer, ItemInfo& aItem) { + // We receive layers in front-to-back order, but there are two cases when we + // actually draw back-to-front: when the depth buffer is disabled, or when + // using the depth buffer and the item has transparent pixels (and therefore + // requires blending). In these cases we will build vertex and constant + // buffers in reverse, as well as execute batches in reverse, to ensure the + // correct ordering. + // + // Note: We limit the number of batches we search through, since it's better + // to add new draw calls than spend too much time finding compatible + // batches further down. + static const size_t kMaxSearch = 10; + size_t iterations = 0; + for (auto iter = mBackToFront.begin(); iter != mBackToFront.end(); iter++) { + RenderPassMLGPU* pass = (*iter); + if (pass->IsCompatible(aItem) && pass->AcceptItem(aItem)) { + AL_LOG("RenderView %p added layer %p to pass %p (%d)\n", this, + aLayer->GetLayer(), pass, int(pass->GetType())); + return; + } + if (pass->Intersects(aItem)) { + break; + } + if (++iterations > kMaxSearch) { + break; + } + } + + RefPtr<RenderPassMLGPU> pass = RenderPassMLGPU::CreatePass(mBuilder, aItem); + if (!pass || !pass->AcceptItem(aItem)) { + MOZ_ASSERT_UNREACHABLE("Could not build a pass for item!"); + return; + } + AL_LOG("RenderView %p added layer %p to new pass %p (%d)\n", this, + aLayer->GetLayer(), pass.get(), int(pass->GetType())); + + mBackToFront.push_front(pass); +} + +void RenderViewMLGPU::Prepare() { + if (!mTarget) { + return; + } + + // Prepare front-to-back passes. These are only present when using the depth + // buffer, and they contain only opaque data. + for (RefPtr<RenderPassMLGPU>& pass : mFrontToBack) { + pass->PrepareForRendering(); + } + + // Prepare the Clear buffer, which will fill the render target with + // transparent pixels. This must happen before we set up world constants, + // since it can create new z-indices. + PrepareClears(); + + // Prepare the world constant buffer. This must be called after we've + // finished allocating all z-indices. + { + WorldConstants vsConstants; + Matrix4x4 projection = Matrix4x4::Translation(-1.0, 1.0, 0.0); + projection.PreScale(2.0 / float(mTarget->GetSize().width), + 2.0 / float(mTarget->GetSize().height), 1.0f); + projection.PreScale(1.0f, -1.0f, 1.0f); + + memcpy(vsConstants.projection, &projection._11, 64); + vsConstants.targetOffset = Point(mTargetOffset); + vsConstants.sortIndexOffset = PrepareDepthBuffer(); + vsConstants.debugFrameNumber = + mBuilder->GetManager()->GetDebugFrameNumber(); + + SharedConstantBuffer* shared = mDevice->GetSharedVSBuffer(); + if (!shared->Allocate(&mWorldConstants, vsConstants)) { + return; + } + } + + // Prepare back-to-front passes. In depth buffer mode, these contain draw + // calls that might produce transparent pixels. When using CPU-based occlusion + // culling, all draw calls are back-to-front. + for (RefPtr<RenderPassMLGPU>& pass : mBackToFront) { + pass->PrepareForRendering(); + } + + // Now, process children. + for (const auto& iter : mChildren) { + iter->Prepare(); + } +} + +void RenderViewMLGPU::ExecuteRendering() { + if (!mTarget) { + return; + } + if (!mWorldConstants.IsValid()) { + gfxWarning() << "Failed to allocate constant buffer for world transform"; + return; + } + + SetDeviceState(); + + // If using the depth buffer, clear it (if needed) and enable writes. + if (mUseDepthBuffer) { + if (mDepthBufferNeedsClear) { + mDevice->ClearDepthBuffer(mTarget); + } + SetDepthTestMode(MLGDepthTestMode::Write); + } + + // Opaque items, rendered front-to-back. + for (auto iter = mFrontToBack.begin(); iter != mFrontToBack.end(); iter++) { + ExecutePass(*iter); + } + + if (mUseDepthBuffer) { + // From now on we might be rendering transparent pixels, so we disable + // writing to the z-buffer. + SetDepthTestMode(MLGDepthTestMode::ReadOnly); + } + + // Clear any pixels that are not occluded, and therefore might require + // blending. + mDevice->DrawClearRegion(mPreClear); + + // Render back-to-front passes. + for (auto iter = mBackToFront.begin(); iter != mBackToFront.end(); iter++) { + ExecutePass(*iter); + } + + // Make sure the post-clear area has no pixels. + if (!mPostClearRegion.IsEmpty()) { + mDevice->DrawClearRegion(mPostClear); + } + + // We repaint the entire invalid region, even if it is partially occluded. + // Thus it's safe for us to clear the invalid area here. If we ever switch + // to nsIntRegions, we will have to take the difference between the paitned + // area and the invalid area. + if (mContainer) { + mContainer->ClearInvalidRect(); + } +} + +void RenderViewMLGPU::ExecutePass(RenderPassMLGPU* aPass) { + if (!aPass->IsPrepared()) { + return; + } + + // Change the layer buffer if needed. + if (aPass->GetLayerBufferIndex() != mCurrentLayerBufferIndex) { + mCurrentLayerBufferIndex = aPass->GetLayerBufferIndex(); + + ConstantBufferSection section = + mBuilder->GetLayerBufferByIndex(mCurrentLayerBufferIndex); + mDevice->SetVSConstantBuffer(kLayerBufferSlot, §ion); + } + + // Change the mask rect buffer if needed. + if (aPass->GetMaskRectBufferIndex() && + aPass->GetMaskRectBufferIndex().value() != mCurrentMaskRectBufferIndex) { + mCurrentMaskRectBufferIndex = aPass->GetMaskRectBufferIndex().value(); + + ConstantBufferSection section = + mBuilder->GetMaskRectBufferByIndex(mCurrentMaskRectBufferIndex); + mDevice->SetVSConstantBuffer(kMaskBufferSlot, §ion); + } + + aPass->ExecuteRendering(); +} + +void RenderViewMLGPU::SetDeviceState() { + // Note: we unbind slot 0 (which is where the render target could have been + // bound on a previous frame). Otherwise we trigger + // D3D11_DEVICE_PSSETSHADERRESOURCES_HAZARD. + mDevice->UnsetPSTexture(0); + mDevice->SetRenderTarget(mTarget); + mDevice->SetViewport(IntRect(IntPoint(0, 0), mTarget->GetSize())); + mDevice->SetScissorRect(Some(mInvalidBounds)); + mDevice->SetVSConstantBuffer(kWorldConstantBufferSlot, &mWorldConstants); +} + +void RenderViewMLGPU::SetDepthTestMode(MLGDepthTestMode aMode) { + mDevice->SetDepthTestMode(aMode); + mCurrentDepthMode = aMode; +} + +void RenderViewMLGPU::RestoreDeviceState() { + SetDeviceState(); + mDevice->SetDepthTestMode(mCurrentDepthMode); + mCurrentLayerBufferIndex = kInvalidResourceIndex; + mCurrentMaskRectBufferIndex = kInvalidResourceIndex; +} + +int32_t RenderViewMLGPU::PrepareDepthBuffer() { + if (!mUseDepthBuffer) { + return 0; + } + + // Rather than clear the depth buffer every frame, we offset z-indices each + // frame, starting with indices far away from the screen and moving toward + // the user each successive frame. This ensures that frames can re-use the + // depth buffer but never collide with previously written values. + // + // Once a frame runs out of sort indices, we finally clear the depth buffer + // and start over again. + + // Note: the lowest sort index (kDepthLimit) is always occluded since it will + // resolve to the clear value - kDepthLimit / kDepthLimit == 1.0. + // + // If we don't have any more indices to allocate, we need to clear the depth + // buffer and start fresh. + int32_t highestIndex = mTarget->GetLastDepthStart(); + if (highestIndex < mNextSortIndex) { + mDepthBufferNeedsClear = true; + highestIndex = kDepthLimit; + } + + // We should not have more than kDepthLimit layers to draw. The last known + // sort index might appear in the depth buffer and occlude something, so + // we subtract 1. This ensures all our indices will compare less than all + // old indices. + int32_t sortOffset = highestIndex - mNextSortIndex - 1; + MOZ_ASSERT(sortOffset >= 0); + + mTarget->SetLastDepthStart(sortOffset); + return sortOffset; +} + +void RenderViewMLGPU::PrepareClears() { + // We don't do any clearing if we're copying from a source backdrop. + if (mContainer && mContainer->NeedsSurfaceCopy()) { + return; + } + + // Get the list of rects to clear. If using the depth buffer, we don't + // care if it's accurate since the GPU will do occlusion testing for us. + // If not using the depth buffer, we subtract out the occluded region. + LayerIntRegion region = LayerIntRect::FromUnknownRect(mInvalidBounds); + if (!mUseDepthBuffer) { + // Don't let the clear region become too complicated. + region.SubOut(mOccludedRegion); + region.SimplifyOutward(kMaxClearViewRects); + } + + Maybe<int32_t> sortIndex; + if (mUseDepthBuffer) { + // Note that we use the lowest available sorting index, to ensure that when + // using the z-buffer, we don't draw over already-drawn content. + sortIndex = Some(mNextSortIndex++); + } + + nsTArray<IntRect> rects = ToRectArray(region); + mDevice->PrepareClearRegion(&mPreClear, std::move(rects), sortIndex); + + if (!mPostClearRegion.IsEmpty()) { + // Prepare the final clear as well. Note that we always do this clear at the + // very end, even when the depth buffer is enabled, so we don't bother + // setting a useful sorting index. If and when we try to ship the depth + // buffer, we would execute this clear earlier in the pipeline and give it + // the closest possible z-ordering to the screen. + nsTArray<IntRect> rects = ToRectArray(mPostClearRegion); + mDevice->PrepareClearRegion(&mPostClear, std::move(rects), Nothing()); + } +} + +} // namespace layers +} // namespace mozilla |