/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "RenderViewMLGPU.h" #include "ContainerLayerMLGPU.h" #include "FrameBuilder.h" #include "mozilla/StaticPrefs_layers.h" #include "LayersHelpers.h" #include "MLGDevice.h" #include "RenderPassMLGPU.h" #include "ShaderDefinitionsMLGPU.h" #include "Units.h" #include "UnitTransforms.h" #include "UtilityMLGPU.h" namespace mozilla { namespace layers { using namespace gfx; RenderViewMLGPU::RenderViewMLGPU(FrameBuilder* aBuilder, MLGRenderTarget* aTarget, const nsIntRegion& aInvalidRegion) : RenderViewMLGPU(aBuilder, nullptr) { mTarget = aTarget; mInvalidBounds = aInvalidRegion.GetBounds(); // The clear region on the layer manager is the area that must be clear after // we finish drawing. mPostClearRegion = aBuilder->GetManager()->GetRegionToClear(); // Clamp the post-clear region to the invalid bounds, since clears don't go // through the scissor rect if using ClearView. mPostClearRegion.AndWith(mInvalidBounds); // Since the post-clear will occlude everything, we include it in the final // opaque area. mOccludedRegion.OrWith(ViewAs( mPostClearRegion, PixelCastJustification::RenderTargetIsParentLayerForRoot)); AL_LOG("RenderView %p root with invalid area %s, clear area %s\n", this, Stringify(mInvalidBounds).c_str(), Stringify(mPostClearRegion).c_str()); } RenderViewMLGPU::RenderViewMLGPU(FrameBuilder* aBuilder, ContainerLayerMLGPU* aContainer, RenderViewMLGPU* aParent) : RenderViewMLGPU(aBuilder, aParent) { mContainer = aContainer; mTargetOffset = aContainer->GetTargetOffset(); mInvalidBounds = aContainer->GetInvalidRect(); MOZ_ASSERT(!mInvalidBounds.IsEmpty()); AL_LOG("RenderView %p starting with container %p and invalid area %s\n", this, aContainer->GetLayer(), Stringify(mInvalidBounds).c_str()); mContainer->SetRenderView(this); } RenderViewMLGPU::RenderViewMLGPU(FrameBuilder* aBuilder, RenderViewMLGPU* aParent) : mBuilder(aBuilder), mDevice(aBuilder->GetDevice()), mParent(aParent), mContainer(nullptr), mFinishedBuilding(false), mCurrentLayerBufferIndex(kInvalidResourceIndex), mCurrentMaskRectBufferIndex(kInvalidResourceIndex), mCurrentDepthMode(MLGDepthTestMode::Disabled), mNextSortIndex(1), mUseDepthBuffer( StaticPrefs::layers_mlgpu_enable_depth_buffer_AtStartup()), mDepthBufferNeedsClear(false) { if (aParent) { aParent->AddChild(this); } } RenderViewMLGPU::~RenderViewMLGPU() { for (const auto& child : mChildren) { child->mParent = nullptr; } } IntSize RenderViewMLGPU::GetSize() const { MOZ_ASSERT(mFinishedBuilding); return mTarget->GetSize(); } MLGRenderTarget* RenderViewMLGPU::GetRenderTarget() const { MOZ_ASSERT(mFinishedBuilding); return mTarget; } void RenderViewMLGPU::AddChild(RenderViewMLGPU* aParent) { mChildren.push_back(aParent); } void RenderViewMLGPU::Render() { // We render views depth-first to minimize render target switching. for (const auto& child : mChildren) { child->Render(); } // If the view requires a surface copy (of its backdrop), then we delay // rendering it until it is added to a batch. if (mContainer && mContainer->NeedsSurfaceCopy()) { return; } ExecuteRendering(); } void RenderViewMLGPU::RenderAfterBackdropCopy() { MOZ_ASSERT(mContainer && mContainer->NeedsSurfaceCopy()); // Update the invalid bounds based on the container's visible region. This // of course won't affect the prepared pipeline, but it will change the // scissor rect in SetDeviceState. mInvalidBounds = mContainer->GetRenderRegion().GetBounds().ToUnknownRect() - GetTargetOffset(); ExecuteRendering(); } void RenderViewMLGPU::FinishBuilding() { MOZ_ASSERT(!mFinishedBuilding); mFinishedBuilding = true; if (mContainer) { MOZ_ASSERT(!mTarget); MLGRenderTargetFlags flags = MLGRenderTargetFlags::Default; if (mUseDepthBuffer) { flags |= MLGRenderTargetFlags::ZBuffer; } mTarget = mContainer->UpdateRenderTarget(mDevice, flags); } } void RenderViewMLGPU::AddItem(LayerMLGPU* aItem, const IntRect& aRect, Maybe&& aGeometry) { AL_LOG("RenderView %p analyzing layer %p\n", this, aItem->GetLayer()); // If the item is not visible at all, skip it. if (aItem->GetComputedOpacity() == 0.0f) { AL_LOG("RenderView %p culling item %p with no opacity\n", this, aItem->GetLayer()); return; } // When using the depth buffer, the z-index for items is important. // // Sort order starts at 1 and goes to positive infinity, with smaller values // being closer to the screen. Our viewport is the same, with anything // outside of [0.0, 1.0] being culled, and lower values occluding higher // values. To make this work our projection transform scales the z-axis. // Note that we do not use 0 as a sorting index (when depth-testing is // enabled) because this would result in a z-value of 1.0, which would be // culled. ItemInfo info(mBuilder, this, aItem, mNextSortIndex++, aRect, std::move(aGeometry)); // If the item is not visible, or we can't add it to the layer constant // buffer for some reason, bail out. if (!UpdateVisibleRegion(info) || !mBuilder->AddLayerToConstantBuffer(info)) { AL_LOG("RenderView %p culled item %p!\n", this, aItem->GetLayer()); return; } // We support all layer types now. MOZ_ASSERT(info.type != RenderPassType::Unknown); if (info.renderOrder == RenderOrder::FrontToBack) { AddItemFrontToBack(aItem, info); } else { AddItemBackToFront(aItem, info); } } bool RenderViewMLGPU::UpdateVisibleRegion(ItemInfo& aItem) { // If the item has some kind of complex transform, we perform a very // simple occlusion test and move on. We using a depth buffer we skip // CPU-based occlusion culling as well, since the GPU will do most of our // culling work for us. if (mUseDepthBuffer || !aItem.translation || !StaticPrefs::layers_mlgpu_enable_cpu_occlusion_AtStartup()) { // Update the render region even if we won't compute visibility, since some // layer types (like Canvas and Image) need to have the visible region // clamped. LayerIntRegion region = aItem.layer->GetShadowVisibleRegion(); aItem.layer->SetRenderRegion(std::move(region)); AL_LOG("RenderView %p simple occlusion test, bounds=%s, translation?=%d\n", this, Stringify(aItem.bounds).c_str(), aItem.translation ? 1 : 0); return mInvalidBounds.Intersects(aItem.bounds); } MOZ_ASSERT(aItem.rectilinear); AL_LOG("RenderView %p starting visibility tests:\n", this); AL_LOG(" occluded=%s\n", Stringify(mOccludedRegion).c_str()); // Compute the translation into render target space. LayerIntPoint translation = LayerIntPoint::FromUnknownPoint( aItem.translation.value() - mTargetOffset); AL_LOG(" translation=%s\n", Stringify(translation).c_str()); IntRect clip = aItem.layer->GetComputedClipRect().ToUnknownRect(); AL_LOG(" clip=%s\n", Stringify(translation).c_str()); LayerIntRegion region = aItem.layer->GetShadowVisibleRegion(); region.MoveBy(translation); AL_LOG(" effective-visible=%s\n", Stringify(region).c_str()); region.SubOut(mOccludedRegion); region.AndWith(LayerIntRect::FromUnknownRect(mInvalidBounds)); region.AndWith(LayerIntRect::FromUnknownRect(clip)); if (region.IsEmpty()) { return false; } // Move the visible region back into layer space. region.MoveBy(-translation); AL_LOG(" new-local-visible=%s\n", Stringify(region).c_str()); aItem.layer->SetRenderRegion(std::move(region)); // Apply the new occluded area. We do another dance with the translation to // avoid copying the region. We do this after the SetRegionToRender call to // accomodate the possiblity of a layer changing its visible region. if (aItem.opaque) { mOccludedRegion.MoveBy(-translation); mOccludedRegion.OrWith(aItem.layer->GetRenderRegion()); mOccludedRegion.MoveBy(translation); AL_LOG(" new-occluded=%s\n", Stringify(mOccludedRegion).c_str()); // If the occluded region gets too complicated, we reset it. if (mOccludedRegion.GetNumRects() >= 32) { mOccludedRegion.SetEmpty(); AL_LOG(" clear-occluded, too many rects\n"); } } return true; } void RenderViewMLGPU::AddItemFrontToBack(LayerMLGPU* aLayer, ItemInfo& aItem) { // We receive items in front-to-back order. Ideally we want to push items // as far back into batches impossible, to ensure the GPU can do a good // job at culling. However we also want to make sure we actually batch // items versus drawing one primitive per pass. // // As a compromise we look at the most 3 recent batches and then give up. // This can be tweaked in the future. static const size_t kMaxSearch = 3; size_t iterations = 0; for (auto iter = mFrontToBack.rbegin(); iter != mFrontToBack.rend(); iter++) { RenderPassMLGPU* pass = (*iter); if (pass->IsCompatible(aItem) && pass->AcceptItem(aItem)) { AL_LOG("RenderView %p added layer %p to pass %p (%d)\n", this, aLayer->GetLayer(), pass, int(pass->GetType())); return; } if (++iterations > kMaxSearch) { break; } } RefPtr pass = RenderPassMLGPU::CreatePass(mBuilder, aItem); if (!pass || !pass->AcceptItem(aItem)) { MOZ_ASSERT_UNREACHABLE("Could not build a pass for item!"); return; } AL_LOG("RenderView %p added layer %p to new pass %p (%d)\n", this, aLayer->GetLayer(), pass.get(), int(pass->GetType())); mFrontToBack.push_back(pass); } void RenderViewMLGPU::AddItemBackToFront(LayerMLGPU* aLayer, ItemInfo& aItem) { // We receive layers in front-to-back order, but there are two cases when we // actually draw back-to-front: when the depth buffer is disabled, or when // using the depth buffer and the item has transparent pixels (and therefore // requires blending). In these cases we will build vertex and constant // buffers in reverse, as well as execute batches in reverse, to ensure the // correct ordering. // // Note: We limit the number of batches we search through, since it's better // to add new draw calls than spend too much time finding compatible // batches further down. static const size_t kMaxSearch = 10; size_t iterations = 0; for (auto iter = mBackToFront.begin(); iter != mBackToFront.end(); iter++) { RenderPassMLGPU* pass = (*iter); if (pass->IsCompatible(aItem) && pass->AcceptItem(aItem)) { AL_LOG("RenderView %p added layer %p to pass %p (%d)\n", this, aLayer->GetLayer(), pass, int(pass->GetType())); return; } if (pass->Intersects(aItem)) { break; } if (++iterations > kMaxSearch) { break; } } RefPtr pass = RenderPassMLGPU::CreatePass(mBuilder, aItem); if (!pass || !pass->AcceptItem(aItem)) { MOZ_ASSERT_UNREACHABLE("Could not build a pass for item!"); return; } AL_LOG("RenderView %p added layer %p to new pass %p (%d)\n", this, aLayer->GetLayer(), pass.get(), int(pass->GetType())); mBackToFront.push_front(pass); } void RenderViewMLGPU::Prepare() { if (!mTarget) { return; } // Prepare front-to-back passes. These are only present when using the depth // buffer, and they contain only opaque data. for (RefPtr& pass : mFrontToBack) { pass->PrepareForRendering(); } // Prepare the Clear buffer, which will fill the render target with // transparent pixels. This must happen before we set up world constants, // since it can create new z-indices. PrepareClears(); // Prepare the world constant buffer. This must be called after we've // finished allocating all z-indices. { WorldConstants vsConstants; Matrix4x4 projection = Matrix4x4::Translation(-1.0, 1.0, 0.0); projection.PreScale(2.0 / float(mTarget->GetSize().width), 2.0 / float(mTarget->GetSize().height), 1.0f); projection.PreScale(1.0f, -1.0f, 1.0f); memcpy(vsConstants.projection, &projection._11, 64); vsConstants.targetOffset = Point(mTargetOffset); vsConstants.sortIndexOffset = PrepareDepthBuffer(); vsConstants.debugFrameNumber = mBuilder->GetManager()->GetDebugFrameNumber(); SharedConstantBuffer* shared = mDevice->GetSharedVSBuffer(); if (!shared->Allocate(&mWorldConstants, vsConstants)) { return; } } // Prepare back-to-front passes. In depth buffer mode, these contain draw // calls that might produce transparent pixels. When using CPU-based occlusion // culling, all draw calls are back-to-front. for (RefPtr& pass : mBackToFront) { pass->PrepareForRendering(); } // Now, process children. for (const auto& iter : mChildren) { iter->Prepare(); } } void RenderViewMLGPU::ExecuteRendering() { if (!mTarget) { return; } if (!mWorldConstants.IsValid()) { gfxWarning() << "Failed to allocate constant buffer for world transform"; return; } SetDeviceState(); // If using the depth buffer, clear it (if needed) and enable writes. if (mUseDepthBuffer) { if (mDepthBufferNeedsClear) { mDevice->ClearDepthBuffer(mTarget); } SetDepthTestMode(MLGDepthTestMode::Write); } // Opaque items, rendered front-to-back. for (auto iter = mFrontToBack.begin(); iter != mFrontToBack.end(); iter++) { ExecutePass(*iter); } if (mUseDepthBuffer) { // From now on we might be rendering transparent pixels, so we disable // writing to the z-buffer. SetDepthTestMode(MLGDepthTestMode::ReadOnly); } // Clear any pixels that are not occluded, and therefore might require // blending. mDevice->DrawClearRegion(mPreClear); // Render back-to-front passes. for (auto iter = mBackToFront.begin(); iter != mBackToFront.end(); iter++) { ExecutePass(*iter); } // Make sure the post-clear area has no pixels. if (!mPostClearRegion.IsEmpty()) { mDevice->DrawClearRegion(mPostClear); } // We repaint the entire invalid region, even if it is partially occluded. // Thus it's safe for us to clear the invalid area here. If we ever switch // to nsIntRegions, we will have to take the difference between the paitned // area and the invalid area. if (mContainer) { mContainer->ClearInvalidRect(); } } void RenderViewMLGPU::ExecutePass(RenderPassMLGPU* aPass) { if (!aPass->IsPrepared()) { return; } // Change the layer buffer if needed. if (aPass->GetLayerBufferIndex() != mCurrentLayerBufferIndex) { mCurrentLayerBufferIndex = aPass->GetLayerBufferIndex(); ConstantBufferSection section = mBuilder->GetLayerBufferByIndex(mCurrentLayerBufferIndex); mDevice->SetVSConstantBuffer(kLayerBufferSlot, §ion); } // Change the mask rect buffer if needed. if (aPass->GetMaskRectBufferIndex() && aPass->GetMaskRectBufferIndex().value() != mCurrentMaskRectBufferIndex) { mCurrentMaskRectBufferIndex = aPass->GetMaskRectBufferIndex().value(); ConstantBufferSection section = mBuilder->GetMaskRectBufferByIndex(mCurrentMaskRectBufferIndex); mDevice->SetVSConstantBuffer(kMaskBufferSlot, §ion); } aPass->ExecuteRendering(); } void RenderViewMLGPU::SetDeviceState() { // Note: we unbind slot 0 (which is where the render target could have been // bound on a previous frame). Otherwise we trigger // D3D11_DEVICE_PSSETSHADERRESOURCES_HAZARD. mDevice->UnsetPSTexture(0); mDevice->SetRenderTarget(mTarget); mDevice->SetViewport(IntRect(IntPoint(0, 0), mTarget->GetSize())); mDevice->SetScissorRect(Some(mInvalidBounds)); mDevice->SetVSConstantBuffer(kWorldConstantBufferSlot, &mWorldConstants); } void RenderViewMLGPU::SetDepthTestMode(MLGDepthTestMode aMode) { mDevice->SetDepthTestMode(aMode); mCurrentDepthMode = aMode; } void RenderViewMLGPU::RestoreDeviceState() { SetDeviceState(); mDevice->SetDepthTestMode(mCurrentDepthMode); mCurrentLayerBufferIndex = kInvalidResourceIndex; mCurrentMaskRectBufferIndex = kInvalidResourceIndex; } int32_t RenderViewMLGPU::PrepareDepthBuffer() { if (!mUseDepthBuffer) { return 0; } // Rather than clear the depth buffer every frame, we offset z-indices each // frame, starting with indices far away from the screen and moving toward // the user each successive frame. This ensures that frames can re-use the // depth buffer but never collide with previously written values. // // Once a frame runs out of sort indices, we finally clear the depth buffer // and start over again. // Note: the lowest sort index (kDepthLimit) is always occluded since it will // resolve to the clear value - kDepthLimit / kDepthLimit == 1.0. // // If we don't have any more indices to allocate, we need to clear the depth // buffer and start fresh. int32_t highestIndex = mTarget->GetLastDepthStart(); if (highestIndex < mNextSortIndex) { mDepthBufferNeedsClear = true; highestIndex = kDepthLimit; } // We should not have more than kDepthLimit layers to draw. The last known // sort index might appear in the depth buffer and occlude something, so // we subtract 1. This ensures all our indices will compare less than all // old indices. int32_t sortOffset = highestIndex - mNextSortIndex - 1; MOZ_ASSERT(sortOffset >= 0); mTarget->SetLastDepthStart(sortOffset); return sortOffset; } void RenderViewMLGPU::PrepareClears() { // We don't do any clearing if we're copying from a source backdrop. if (mContainer && mContainer->NeedsSurfaceCopy()) { return; } // Get the list of rects to clear. If using the depth buffer, we don't // care if it's accurate since the GPU will do occlusion testing for us. // If not using the depth buffer, we subtract out the occluded region. LayerIntRegion region = LayerIntRect::FromUnknownRect(mInvalidBounds); if (!mUseDepthBuffer) { // Don't let the clear region become too complicated. region.SubOut(mOccludedRegion); region.SimplifyOutward(kMaxClearViewRects); } Maybe sortIndex; if (mUseDepthBuffer) { // Note that we use the lowest available sorting index, to ensure that when // using the z-buffer, we don't draw over already-drawn content. sortIndex = Some(mNextSortIndex++); } nsTArray rects = ToRectArray(region); mDevice->PrepareClearRegion(&mPreClear, std::move(rects), sortIndex); if (!mPostClearRegion.IsEmpty()) { // Prepare the final clear as well. Note that we always do this clear at the // very end, even when the depth buffer is enabled, so we don't bother // setting a useful sorting index. If and when we try to ship the depth // buffer, we would execute this clear earlier in the pipeline and give it // the closest possible z-ordering to the screen. nsTArray rects = ToRectArray(mPostClearRegion); mDevice->PrepareClearRegion(&mPostClear, std::move(rects), Nothing()); } } } // namespace layers } // namespace mozilla