diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /gfx/angle/checkout/src/compiler/translator/tree_ops/RewritePixelLocalStorage.cpp | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'gfx/angle/checkout/src/compiler/translator/tree_ops/RewritePixelLocalStorage.cpp')
-rw-r--r-- | gfx/angle/checkout/src/compiler/translator/tree_ops/RewritePixelLocalStorage.cpp | 861 |
1 files changed, 861 insertions, 0 deletions
diff --git a/gfx/angle/checkout/src/compiler/translator/tree_ops/RewritePixelLocalStorage.cpp b/gfx/angle/checkout/src/compiler/translator/tree_ops/RewritePixelLocalStorage.cpp new file mode 100644 index 0000000000..1c86cafe03 --- /dev/null +++ b/gfx/angle/checkout/src/compiler/translator/tree_ops/RewritePixelLocalStorage.cpp @@ -0,0 +1,861 @@ +// +// Copyright 2022 The ANGLE Project Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// + +#include "compiler/translator/tree_ops/RewritePixelLocalStorage.h" + +#include "common/angleutils.h" +#include "compiler/translator/StaticType.h" +#include "compiler/translator/SymbolTable.h" +#include "compiler/translator/tree_ops/MonomorphizeUnsupportedFunctions.h" +#include "compiler/translator/tree_util/BuiltIn.h" +#include "compiler/translator/tree_util/FindMain.h" +#include "compiler/translator/tree_util/IntermNode_util.h" +#include "compiler/translator/tree_util/IntermTraverse.h" + +namespace sh +{ +namespace +{ +constexpr static TBasicType DataTypeOfPLSType(TBasicType plsType) +{ + switch (plsType) + { + case EbtPixelLocalANGLE: + return EbtFloat; + case EbtIPixelLocalANGLE: + return EbtInt; + case EbtUPixelLocalANGLE: + return EbtUInt; + default: + UNREACHABLE(); + return EbtVoid; + } +} + +constexpr static TBasicType DataTypeOfImageType(TBasicType imageType) +{ + switch (imageType) + { + case EbtImage2D: + return EbtFloat; + case EbtIImage2D: + return EbtInt; + case EbtUImage2D: + return EbtUInt; + default: + UNREACHABLE(); + return EbtVoid; + } +} + +// Maps PLS symbols to a backing store. +template <typename T> +class PLSBackingStoreMap +{ + public: + // Sets the given variable as the backing storage for the plsSymbol's binding point. An entry + // must not already exist in the map for this binding point. + void insertNew(TIntermSymbol *plsSymbol, const T &backingStore) + { + ASSERT(plsSymbol); + ASSERT(IsPixelLocal(plsSymbol->getBasicType())); + int binding = plsSymbol->getType().getLayoutQualifier().binding; + ASSERT(binding >= 0); + auto result = mMap.insert({binding, backingStore}); + ASSERT(result.second); // Ensure an image didn't already exist for this symbol. + } + + // Looks up the backing store for the given plsSymbol's binding point. An entry must already + // exist in the map for this binding point. + const T &find(TIntermSymbol *plsSymbol) + { + ASSERT(plsSymbol); + ASSERT(IsPixelLocal(plsSymbol->getBasicType())); + int binding = plsSymbol->getType().getLayoutQualifier().binding; + ASSERT(binding >= 0); + auto iter = mMap.find(binding); + ASSERT(iter != mMap.end()); // Ensure PLSImages already exist for this symbol. + return iter->second; + } + + const std::map<int, T> &bindingOrderedMap() const { return mMap; } + + private: + // Use std::map so the backing stores are ordered by binding when we iterate. + std::map<int, T> mMap; +}; + +// Base class for rewriting high level PLS operations to AST operations specified by +// ShPixelLocalStorageType. +class RewritePLSTraverser : public TIntermTraverser +{ + public: + RewritePLSTraverser(TCompiler *compiler, + TSymbolTable &symbolTable, + const ShCompileOptions &compileOptions, + int shaderVersion) + : TIntermTraverser(true, false, false, &symbolTable), + mCompiler(compiler), + mCompileOptions(&compileOptions), + mShaderVersion(shaderVersion) + {} + + bool visitDeclaration(Visit, TIntermDeclaration *decl) override + { + TIntermTyped *declVariable = (decl->getSequence())->front()->getAsTyped(); + ASSERT(declVariable); + + if (!IsPixelLocal(declVariable->getBasicType())) + { + return true; + } + + // PLS is not allowed in arrays. + ASSERT(!declVariable->isArray()); + + // This visitDeclaration doesn't get called for function arguments, and opaque types can + // otherwise only be uniforms. + ASSERT(declVariable->getQualifier() == EvqUniform); + + TIntermSymbol *plsSymbol = declVariable->getAsSymbolNode(); + ASSERT(plsSymbol); + + visitPLSDeclaration(plsSymbol); + + return false; + } + + bool visitAggregate(Visit, TIntermAggregate *aggregate) override + { + if (!BuiltInGroup::IsPixelLocal(aggregate->getOp())) + { + return true; + } + + const TIntermSequence &args = *aggregate->getSequence(); + ASSERT(args.size() >= 1); + TIntermSymbol *plsSymbol = args[0]->getAsSymbolNode(); + + // Rewrite pixelLocalLoadANGLE -> imageLoad. + if (aggregate->getOp() == EOpPixelLocalLoadANGLE) + { + visitPLSLoad(plsSymbol); + return false; // No need to recurse since this node is being dropped. + } + + // Rewrite pixelLocalStoreANGLE -> imageStore. + if (aggregate->getOp() == EOpPixelLocalStoreANGLE) + { + // Also hoist the 'value' expression into a temp. In the event of + // "pixelLocalStoreANGLE(..., pixelLocalLoadANGLE(...))", this ensures the load occurs + // _before_ any potential barriers required by the subclass. + // + // NOTE: It is generally unsafe to hoist function arguments due to short circuiting, + // e.g., "if (false && function(...))", but pixelLocalStoreANGLE returns type void, so + // it is safe in this particular case. + TType *valueType = new TType(DataTypeOfPLSType(plsSymbol->getBasicType()), + plsSymbol->getPrecision(), EvqTemporary, 4); + TVariable *valueVar = CreateTempVariable(mSymbolTable, valueType); + TIntermDeclaration *valueDecl = + CreateTempInitDeclarationNode(valueVar, args[1]->getAsTyped()); + valueDecl->traverse(this); // Rewrite any potential pixelLocalLoadANGLEs in valueDecl. + insertStatementInParentBlock(valueDecl); + + visitPLSStore(plsSymbol, valueVar); + return false; // No need to recurse since this node is being dropped. + } + + return true; + } + + // Called after rewrite. Injects one-time setup code that needs to run before any PLS accesses. + virtual void injectSetupCode(TCompiler *, + TSymbolTable &, + const ShCompileOptions &, + TIntermBlock *mainBody, + size_t plsBeginPosition) + {} + + // Called after rewrite. Injects one-time finalization code that needs to run after all PLS. + virtual void injectFinalizeCode(TCompiler *, + TSymbolTable &, + const ShCompileOptions &, + TIntermBlock *mainBody, + size_t plsEndPosition) + {} + + TVariable *globalPixelCoord() const { return mGlobalPixelCoord; } + + protected: + virtual void visitPLSDeclaration(TIntermSymbol *plsSymbol) = 0; + virtual void visitPLSLoad(TIntermSymbol *plsSymbol) = 0; + virtual void visitPLSStore(TIntermSymbol *plsSymbol, TVariable *value) = 0; + + void ensureGlobalPixelCoordDeclared() + { + // Insert a global to hold the pixel coordinate as soon as we see PLS declared. This will be + // initialized at the beginning of main(). + if (!mGlobalPixelCoord) + { + TType *coordType = new TType(EbtInt, EbpHigh, EvqGlobal, 2); + mGlobalPixelCoord = CreateTempVariable(mSymbolTable, coordType); + insertStatementInParentBlock(CreateTempDeclarationNode(mGlobalPixelCoord)); + } + } + + const TCompiler *const mCompiler; + const ShCompileOptions *const mCompileOptions; + const int mShaderVersion; + + // Stores the shader invocation's pixel coordinate as "ivec2(floor(gl_FragCoord.xy))". + TVariable *mGlobalPixelCoord = nullptr; +}; + +// Rewrites high level PLS operations to shader image operations. +class RewritePLSToImagesTraverser : public RewritePLSTraverser +{ + public: + RewritePLSToImagesTraverser(TCompiler *compiler, + TSymbolTable &symbolTable, + const ShCompileOptions &compileOptions, + int shaderVersion) + : RewritePLSTraverser(compiler, symbolTable, compileOptions, shaderVersion) + {} + + private: + void visitPLSDeclaration(TIntermSymbol *plsSymbol) override + { + // Replace the PLS declaration with an image2D. + ensureGlobalPixelCoordDeclared(); + TVariable *image2D = createPLSImageReplacement(plsSymbol); + mImages.insertNew(plsSymbol, image2D); + queueReplacement(new TIntermDeclaration({new TIntermSymbol(image2D)}), + OriginalNode::IS_DROPPED); + } + + // Do all PLS formats need to be packed into r32f, r32i, or r32ui image2Ds? + bool needsR32Packing() const + { + return mCompileOptions->pls.type == ShPixelLocalStorageType::ImageStoreR32PackedFormats; + } + + // Creates an image2D that replaces a pixel local storage handle. + TVariable *createPLSImageReplacement(const TIntermSymbol *plsSymbol) + { + ASSERT(plsSymbol); + ASSERT(IsPixelLocal(plsSymbol->getBasicType())); + + TType *imageType = new TType(plsSymbol->getType()); + + TLayoutQualifier layoutQualifier = imageType->getLayoutQualifier(); + switch (layoutQualifier.imageInternalFormat) + { + case TLayoutImageInternalFormat::EiifRGBA8: + if (needsR32Packing()) + { + layoutQualifier.imageInternalFormat = EiifR32UI; + imageType->setPrecision(EbpHigh); + imageType->setBasicType(EbtUImage2D); + } + else + { + imageType->setBasicType(EbtImage2D); + } + break; + case TLayoutImageInternalFormat::EiifRGBA8I: + if (needsR32Packing()) + { + layoutQualifier.imageInternalFormat = EiifR32I; + imageType->setPrecision(EbpHigh); + } + imageType->setBasicType(EbtIImage2D); + break; + case TLayoutImageInternalFormat::EiifRGBA8UI: + if (needsR32Packing()) + { + layoutQualifier.imageInternalFormat = EiifR32UI; + imageType->setPrecision(EbpHigh); + } + imageType->setBasicType(EbtUImage2D); + break; + case TLayoutImageInternalFormat::EiifR32F: + imageType->setBasicType(EbtImage2D); + break; + case TLayoutImageInternalFormat::EiifR32UI: + imageType->setBasicType(EbtUImage2D); + break; + default: + UNREACHABLE(); + } + layoutQualifier.rasterOrdered = mCompileOptions->pls.fragmentSynchronizationType == + ShFragmentSynchronizationType::RasterizerOrderViews_D3D; + imageType->setLayoutQualifier(layoutQualifier); + + TMemoryQualifier memoryQualifier{}; + memoryQualifier.coherent = true; + memoryQualifier.restrictQualifier = true; + memoryQualifier.volatileQualifier = false; + // TODO(anglebug.com/7279): Maybe we could walk the tree first and see which PLS is used + // how. If the PLS is never loaded, we could add a writeonly qualifier, for example. + memoryQualifier.readonly = false; + memoryQualifier.writeonly = false; + imageType->setMemoryQualifier(memoryQualifier); + + const TVariable &plsVar = plsSymbol->variable(); + return new TVariable(plsVar.uniqueId(), plsVar.name(), plsVar.symbolType(), + plsVar.extensions(), imageType); + } + + void visitPLSLoad(TIntermSymbol *plsSymbol) override + { + // Replace the pixelLocalLoadANGLE with imageLoad. + TVariable *image2D = mImages.find(plsSymbol); + ASSERT(mGlobalPixelCoord); + TIntermTyped *pls = CreateBuiltInFunctionCallNode( + "imageLoad", {new TIntermSymbol(image2D), new TIntermSymbol(mGlobalPixelCoord)}, + *mSymbolTable, 310); + pls = unpackImageDataIfNecessary(pls, plsSymbol, image2D); + queueReplacement(pls, OriginalNode::IS_DROPPED); + } + + // Unpacks the raw PLS data if the output shader language needs r32* packing. + TIntermTyped *unpackImageDataIfNecessary(TIntermTyped *data, + TIntermSymbol *plsSymbol, + TVariable *image2D) + { + TLayoutImageInternalFormat plsFormat = + plsSymbol->getType().getLayoutQualifier().imageInternalFormat; + TLayoutImageInternalFormat imageFormat = + image2D->getType().getLayoutQualifier().imageInternalFormat; + if (plsFormat == imageFormat) + { + return data; // This PLS storage isn't packed. + } + ASSERT(needsR32Packing()); + switch (plsFormat) + { + case EiifRGBA8: + // Unpack and normalize r,g,b,a from a single 32-bit unsigned int: + // + // unpackUnorm4x8(data.r) + // + data = CreateBuiltInFunctionCallNode("unpackUnorm4x8", {CreateSwizzle(data, 0)}, + *mSymbolTable, 310); + break; + case EiifRGBA8I: + case EiifRGBA8UI: + { + constexpr unsigned shifts[] = {24, 16, 8, 0}; + // Unpack r,g,b,a form a single (signed or unsigned) 32-bit int. Shift left, + // then right, to preserve the sign for ints. (highp integers are exactly + // 32-bit, two's compliment.) + // + // data.rrrr << uvec4(24, 16, 8, 0) >> 24u + // + data = CreateSwizzle(data, 0, 0, 0, 0); + data = new TIntermBinary(EOpBitShiftLeft, data, CreateUVecNode(shifts, 4, EbpHigh)); + data = new TIntermBinary(EOpBitShiftRight, data, CreateUIntNode(24)); + break; + } + default: + UNREACHABLE(); + } + return data; + } + + void visitPLSStore(TIntermSymbol *plsSymbol, TVariable *value) override + { + TVariable *image2D = mImages.find(plsSymbol); + TIntermTyped *packedData = clampAndPackPLSDataIfNecessary(value, plsSymbol, image2D); + + // Surround the store with memoryBarrierImage calls in order to ensure dependent stores and + // loads in a single shader invocation are coherent. From the ES 3.1 spec: + // + // Using variables declared as "coherent" guarantees only that the results of stores will + // be immediately visible to shader invocations using similarly-declared variables; + // calling MemoryBarrier is required to ensure that the stores are visible to other + // operations. + // + insertStatementsInParentBlock( + {CreateBuiltInFunctionCallNode("memoryBarrierImage", {}, *mSymbolTable, + 310)}, // Before. + {CreateBuiltInFunctionCallNode("memoryBarrierImage", {}, *mSymbolTable, + 310)}); // After. + + // Rewrite the pixelLocalStoreANGLE with imageStore. + ASSERT(mGlobalPixelCoord); + queueReplacement( + CreateBuiltInFunctionCallNode( + "imageStore", + {new TIntermSymbol(image2D), new TIntermSymbol(mGlobalPixelCoord), packedData}, + *mSymbolTable, 310), + OriginalNode::IS_DROPPED); + } + + // Packs the PLS to raw data if the output shader language needs r32* packing. + TIntermTyped *clampAndPackPLSDataIfNecessary(TVariable *plsVar, + TIntermSymbol *plsSymbol, + TVariable *image2D) + { + TLayoutImageInternalFormat plsFormat = + plsSymbol->getType().getLayoutQualifier().imageInternalFormat; + // anglebug.com/7524: Storing to integer formats with values larger than can be represented + // is specified differently on different APIs. Clamp integer formats here to make it uniform + // and more GL-like. + switch (plsFormat) + { + case EiifRGBA8I: + { + // Clamp r,g,b,a to their min/max 8-bit values: + // + // plsVar = clamp(plsVar, -128, 127) & 0xff + // + TIntermTyped *newPLSValue = CreateBuiltInFunctionCallNode( + "clamp", + {new TIntermSymbol(plsVar), CreateIndexNode(-128), CreateIndexNode(127)}, + *mSymbolTable, mShaderVersion); + insertStatementInParentBlock(CreateTempAssignmentNode(plsVar, newPLSValue)); + break; + } + case EiifRGBA8UI: + { + // Clamp r,g,b,a to their max 8-bit values: + // + // plsVar = min(plsVar, 255) + // + TIntermTyped *newPLSValue = CreateBuiltInFunctionCallNode( + "min", {new TIntermSymbol(plsVar), CreateUIntNode(255)}, *mSymbolTable, + mShaderVersion); + insertStatementInParentBlock(CreateTempAssignmentNode(plsVar, newPLSValue)); + break; + } + default: + break; + } + TIntermTyped *result = new TIntermSymbol(plsVar); + TLayoutImageInternalFormat imageFormat = + image2D->getType().getLayoutQualifier().imageInternalFormat; + if (plsFormat == imageFormat) + { + return result; // This PLS storage isn't packed. + } + ASSERT(needsR32Packing()); + switch (plsFormat) + { + case EiifRGBA8: + { + if (mCompileOptions->passHighpToPackUnormSnormBuiltins) + { + // anglebug.com/7527: unpackUnorm4x8 doesn't work on Pixel 4 when passed + // a mediump vec4. Use an intermediate highp vec4. + // + // It's safe to inject a variable here because it happens right before + // pixelLocalStoreANGLE, which returns type void. (See visitAggregate.) + TType *highpType = new TType(EbtFloat, EbpHigh, EvqTemporary, 4); + TVariable *workaroundHighpVar = CreateTempVariable(mSymbolTable, highpType); + insertStatementInParentBlock( + CreateTempInitDeclarationNode(workaroundHighpVar, result)); + result = new TIntermSymbol(workaroundHighpVar); + } + + // Denormalize and pack r,g,b,a into a single 32-bit unsigned int: + // + // packUnorm4x8(workaroundHighpVar) + // + result = + CreateBuiltInFunctionCallNode("packUnorm4x8", {result}, *mSymbolTable, 310); + break; + } + case EiifRGBA8I: + case EiifRGBA8UI: + { + if (plsFormat == EiifRGBA8I) + { + // Mask off extra sign bits beyond 8. + // + // plsVar &= 0xff + // + insertStatementInParentBlock(new TIntermBinary( + EOpBitwiseAndAssign, new TIntermSymbol(plsVar), CreateIndexNode(0xff))); + } + // Pack r,g,b,a into a single 32-bit (signed or unsigned) int: + // + // r | (g << 8) | (b << 16) | (a << 24) + // + auto shiftComponent = [=](int componentIdx) { + return new TIntermBinary(EOpBitShiftLeft, + CreateSwizzle(new TIntermSymbol(plsVar), componentIdx), + CreateUIntNode(componentIdx * 8)); + }; + result = CreateSwizzle(result, 0); + result = new TIntermBinary(EOpBitwiseOr, result, shiftComponent(1)); + result = new TIntermBinary(EOpBitwiseOr, result, shiftComponent(2)); + result = new TIntermBinary(EOpBitwiseOr, result, shiftComponent(3)); + break; + } + default: + UNREACHABLE(); + } + // Convert the packed data to a {u,i}vec4 for imageStore. + TType imageStoreType(DataTypeOfImageType(image2D->getType().getBasicType()), 4); + return TIntermAggregate::CreateConstructor(imageStoreType, {result}); + } + + void injectSetupCode(TCompiler *compiler, + TSymbolTable &symbolTable, + const ShCompileOptions &compileOptions, + TIntermBlock *mainBody, + size_t plsBeginPosition) override + { + // When PLS is implemented with images, early_fragment_tests ensure that depth/stencil + // can also block stores to PLS. + compiler->specifyEarlyFragmentTests(); + + // Delimit the beginning of a per-pixel critical section, if supported. This makes pixel + // local storage coherent. + // + // Either: GL_NV_fragment_shader_interlock + // GL_INTEL_fragment_shader_ordering + // GL_ARB_fragment_shader_interlock (may compile to + // SPV_EXT_fragment_shader_interlock) + switch (compileOptions.pls.fragmentSynchronizationType) + { + // ROVs don't need explicit synchronization calls. + case ShFragmentSynchronizationType::RasterizerOrderViews_D3D: + case ShFragmentSynchronizationType::NotSupported: + break; + case ShFragmentSynchronizationType::FragmentShaderInterlock_NV_GL: + mainBody->insertStatement( + plsBeginPosition, + CreateBuiltInFunctionCallNode("beginInvocationInterlockNV", {}, symbolTable, + kESSLInternalBackendBuiltIns)); + break; + case ShFragmentSynchronizationType::FragmentShaderOrdering_INTEL_GL: + mainBody->insertStatement( + plsBeginPosition, + CreateBuiltInFunctionCallNode("beginFragmentShaderOrderingINTEL", {}, + symbolTable, kESSLInternalBackendBuiltIns)); + break; + case ShFragmentSynchronizationType::FragmentShaderInterlock_ARB_GL: + mainBody->insertStatement( + plsBeginPosition, + CreateBuiltInFunctionCallNode("beginInvocationInterlockARB", {}, symbolTable, + kESSLInternalBackendBuiltIns)); + break; + default: + UNREACHABLE(); + } + } + + void injectFinalizeCode(TCompiler *, + TSymbolTable &symbolTable, + const ShCompileOptions &compileOptions, + TIntermBlock *mainBody, + size_t plsEndPosition) override + { + // Delimit the end of the PLS critical section, if required. + // + // Either: GL_NV_fragment_shader_interlock + // GL_ARB_fragment_shader_interlock (may compile to + // SPV_EXT_fragment_shader_interlock) + switch (compileOptions.pls.fragmentSynchronizationType) + { + // ROVs don't need explicit synchronization calls. + case ShFragmentSynchronizationType::RasterizerOrderViews_D3D: + // GL_INTEL_fragment_shader_ordering doesn't have an "end()" call. + case ShFragmentSynchronizationType::FragmentShaderOrdering_INTEL_GL: + case ShFragmentSynchronizationType::NotSupported: + break; + case ShFragmentSynchronizationType::FragmentShaderInterlock_NV_GL: + + mainBody->insertStatement( + plsEndPosition, + CreateBuiltInFunctionCallNode("endInvocationInterlockNV", {}, symbolTable, + kESSLInternalBackendBuiltIns)); + break; + case ShFragmentSynchronizationType::FragmentShaderInterlock_ARB_GL: + mainBody->insertStatement( + plsEndPosition, + CreateBuiltInFunctionCallNode("endInvocationInterlockARB", {}, symbolTable, + kESSLInternalBackendBuiltIns)); + break; + default: + UNREACHABLE(); + } + } + + PLSBackingStoreMap<TVariable *> mImages; +}; + +// Rewrites high level PLS operations to framebuffer fetch operations. +class RewritePLSToFramebufferFetchTraverser : public RewritePLSTraverser +{ + public: + RewritePLSToFramebufferFetchTraverser(TCompiler *compiler, + TSymbolTable &symbolTable, + const ShCompileOptions &compileOptions, + int shaderVersion) + : RewritePLSTraverser(compiler, symbolTable, compileOptions, shaderVersion) + {} + + void visitPLSDeclaration(TIntermSymbol *plsSymbol) override + { + // Replace the PLS declaration with a framebuffer attachment. + PLSAttachment attachment(mCompiler, mSymbolTable, *mCompileOptions, plsSymbol->variable()); + mPLSAttachments.insertNew(plsSymbol, attachment); + insertStatementInParentBlock( + new TIntermDeclaration({new TIntermSymbol(attachment.fragmentVar)})); + queueReplacement(CreateTempDeclarationNode(attachment.accessVar), OriginalNode::IS_DROPPED); + } + + void visitPLSLoad(TIntermSymbol *plsSymbol) override + { + // Read our temporary accessVar. + const PLSAttachment &attachment = mPLSAttachments.find(plsSymbol); + queueReplacement(attachment.expandAccessVar(), OriginalNode::IS_DROPPED); + } + + void visitPLSStore(TIntermSymbol *plsSymbol, TVariable *value) override + { + // Set our temporary accessVar. + const PLSAttachment &attachment = mPLSAttachments.find(plsSymbol); + queueReplacement(CreateTempAssignmentNode(attachment.accessVar, attachment.swizzle(value)), + OriginalNode::IS_DROPPED); + } + + void injectSetupCode(TCompiler *compiler, + TSymbolTable &symbolTable, + const ShCompileOptions &compileOptions, + TIntermBlock *mainBody, + size_t plsBeginPosition) override + { + // [OpenGL ES Version 3.0.6, 3.9.2.3 "Shader Output"]: Any colors, or color components, + // associated with a fragment that are not written by the fragment shader are undefined. + // + // [EXT_shader_framebuffer_fetch]: Prior to fragment shading, fragment outputs declared + // inout are populated with the value last written to the framebuffer at the same(x, y, + // sample) position. + // + // It's unclear from the EXT_shader_framebuffer_fetch spec whether inout fragment variables + // become undefined if not explicitly written, but either way, when this compiles to subpass + // loads in Vulkan, we definitely get undefined behavior if PLS variables are not written. + // + // To make sure every PLS variable gets written, we read them all before PLS operations, + // then write them all back out after all PLS is complete. + std::vector<TIntermNode *> plsPreloads; + plsPreloads.reserve(mPLSAttachments.bindingOrderedMap().size()); + for (const auto &entry : mPLSAttachments.bindingOrderedMap()) + { + const PLSAttachment &attachment = entry.second; + plsPreloads.push_back( + CreateTempAssignmentNode(attachment.accessVar, attachment.swizzleFragmentVar())); + } + mainBody->getSequence()->insert(mainBody->getSequence()->begin() + plsBeginPosition, + plsPreloads.begin(), plsPreloads.end()); + } + + void injectFinalizeCode(TCompiler *, + TSymbolTable &symbolTable, + const ShCompileOptions &compileOptions, + TIntermBlock *mainBody, + size_t plsEndPosition) override + { + std::vector<TIntermNode *> plsWrites; + plsWrites.reserve(mPLSAttachments.bindingOrderedMap().size()); + for (const auto &entry : mPLSAttachments.bindingOrderedMap()) + { + const PLSAttachment &attachment = entry.second; + plsWrites.push_back(new TIntermBinary(EOpAssign, attachment.swizzleFragmentVar(), + new TIntermSymbol(attachment.accessVar))); + } + mainBody->getSequence()->insert(mainBody->getSequence()->begin() + plsEndPosition, + plsWrites.begin(), plsWrites.end()); + } + + private: + struct PLSAttachment + { + PLSAttachment(const TCompiler *compiler, + TSymbolTable *symbolTable, + const ShCompileOptions &compileOptions, + const TVariable &plsVar) + { + const TType &plsType = plsVar.getType(); + + TType *accessVarType; + switch (plsType.getLayoutQualifier().imageInternalFormat) + { + default: + UNREACHABLE(); + [[fallthrough]]; + case EiifRGBA8: + accessVarType = new TType(EbtFloat, 4); + break; + case EiifRGBA8I: + accessVarType = new TType(EbtInt, 4); + break; + case EiifRGBA8UI: + accessVarType = new TType(EbtUInt, 4); + break; + case EiifR32F: + accessVarType = new TType(EbtFloat, 1); + break; + case EiifR32UI: + accessVarType = new TType(EbtUInt, 1); + break; + } + accessVarType->setPrecision(plsType.getPrecision()); + accessVar = CreateTempVariable(symbolTable, accessVarType); + + // Qualcomm seems to want fragment outputs to be 4-component vectors, and produces a + // compile error from "inout uint". Our Metal translator also saturates color outputs to + // 4 components. And since the spec also seems silent on how many components an output + // must have, we always use 4. + TType *fragmentVarType = new TType(accessVarType->getBasicType(), 4); + fragmentVarType->setPrecision(plsType.getPrecision()); + fragmentVarType->setQualifier(EvqFragmentInOut); + + // PLS attachments are bound in reverse order from the rear. + TLayoutQualifier layoutQualifier = TLayoutQualifier::Create(); + layoutQualifier.location = + compiler->getResources().MaxCombinedDrawBuffersAndPixelLocalStoragePlanes - + plsType.getLayoutQualifier().binding - 1; + layoutQualifier.locationsSpecified = 1; + if (compileOptions.pls.fragmentSynchronizationType == + ShFragmentSynchronizationType::NotSupported) + { + // We're using EXT_shader_framebuffer_fetch_non_coherent, which requires the + // "noncoherent" qualifier. + layoutQualifier.noncoherent = true; + } + fragmentVarType->setLayoutQualifier(layoutQualifier); + + fragmentVar = new TVariable(plsVar.uniqueId(), plsVar.name(), plsVar.symbolType(), + plsVar.extensions(), fragmentVarType); + } + + // Expands our accessVar to 4 components, regardless of the size of the pixel local storage + // internalformat. + TIntermTyped *expandAccessVar() const + { + TIntermTyped *expanded = new TIntermSymbol(accessVar); + if (accessVar->getType().getNominalSize() == 1) + { + switch (accessVar->getType().getBasicType()) + { + case EbtFloat: + expanded = TIntermAggregate::CreateConstructor( // "vec4(r, 0, 0, 1)" + TType(EbtFloat, 4), + {expanded, CreateFloatNode(0, EbpHigh), CreateFloatNode(0, EbpHigh), + CreateFloatNode(1, EbpHigh)}); + break; + case EbtUInt: + expanded = TIntermAggregate::CreateConstructor( // "uvec4(r, 0, 0, 1)" + TType(EbtUInt, 4), + {expanded, CreateUIntNode(0), CreateUIntNode(0), CreateUIntNode(1)}); + break; + default: + UNREACHABLE(); + break; + } + } + return expanded; + } + + // Swizzles a variable down to the same number of components as the PLS internalformat. + TIntermTyped *swizzle(TVariable *var) const + { + TIntermTyped *swizzled = new TIntermSymbol(var); + if (var->getType().getNominalSize() != accessVar->getType().getNominalSize()) + { + ASSERT(var->getType().getNominalSize() > accessVar->getType().getNominalSize()); + TVector swizzleOffsets{0, 1, 2, 3}; + swizzleOffsets.resize(accessVar->getType().getNominalSize()); + swizzled = new TIntermSwizzle(swizzled, swizzleOffsets); + } + return swizzled; + } + + TIntermTyped *swizzleFragmentVar() const { return swizzle(fragmentVar); } + + TVariable *fragmentVar; + TVariable *accessVar; + }; + + PLSBackingStoreMap<PLSAttachment> mPLSAttachments; +}; +} // anonymous namespace + +bool RewritePixelLocalStorage(TCompiler *compiler, + TIntermBlock *root, + TSymbolTable &symbolTable, + const ShCompileOptions &compileOptions, + int shaderVersion) +{ + // If any functions take PLS arguments, monomorphize the functions by removing said parameters + // and making the PLS calls from main() instead, using the global uniform from the call site + // instead of the function argument. This is necessary because function arguments don't carry + // the necessary "binding" or "format" layout qualifiers. + if (!MonomorphizeUnsupportedFunctions( + compiler, root, &symbolTable, compileOptions, + UnsupportedFunctionArgsBitSet{UnsupportedFunctionArgs::PixelLocalStorage})) + { + return false; + } + + TIntermBlock *mainBody = FindMainBody(root); + + std::unique_ptr<RewritePLSTraverser> traverser; + switch (compileOptions.pls.type) + { + case ShPixelLocalStorageType::ImageStoreR32PackedFormats: + case ShPixelLocalStorageType::ImageStoreNativeFormats: + traverser = std::make_unique<RewritePLSToImagesTraverser>( + compiler, symbolTable, compileOptions, shaderVersion); + break; + case ShPixelLocalStorageType::FramebufferFetch: + traverser = std::make_unique<RewritePLSToFramebufferFetchTraverser>( + compiler, symbolTable, compileOptions, shaderVersion); + break; + default: + UNREACHABLE(); + return false; + } + + // Rewrite PLS operations to image operations. + root->traverse(traverser.get()); + if (!traverser->updateTree(compiler, root)) + { + return false; + } + + // Inject the code that needs to run before and after all PLS operations. + // TODO(anglebug.com/7279): Inject these functions in a tight critical section, instead of + // just locking the entire main() function: + // - Monomorphize all PLS calls into main(). + // - Insert begin/end calls around the first/last PLS calls (and outside of flow control). + traverser->injectSetupCode(compiler, symbolTable, compileOptions, mainBody, 0); + traverser->injectFinalizeCode(compiler, symbolTable, compileOptions, mainBody, + mainBody->getChildCount()); + + if (traverser->globalPixelCoord()) + { + // Initialize the global pixel coord at the beginning of main(): + // + // pixelCoord = ivec2(floor(gl_FragCoord.xy)); + // + TIntermTyped *exp; + exp = ReferenceBuiltInVariable(ImmutableString("gl_FragCoord"), symbolTable, shaderVersion); + exp = CreateSwizzle(exp, 0, 1); + exp = CreateBuiltInFunctionCallNode("floor", {exp}, symbolTable, shaderVersion); + exp = TIntermAggregate::CreateConstructor(TType(EbtInt, 2), {exp}); + exp = CreateTempAssignmentNode(traverser->globalPixelCoord(), exp); + mainBody->insertStatement(0, exp); + } + + return compiler->validateAST(root); +} +} // namespace sh |