summaryrefslogtreecommitdiffstats
path: root/gfx/angle/checkout/src/compiler/translator/tree_ops/RewritePixelLocalStorage.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'gfx/angle/checkout/src/compiler/translator/tree_ops/RewritePixelLocalStorage.cpp')
-rw-r--r--gfx/angle/checkout/src/compiler/translator/tree_ops/RewritePixelLocalStorage.cpp861
1 files changed, 861 insertions, 0 deletions
diff --git a/gfx/angle/checkout/src/compiler/translator/tree_ops/RewritePixelLocalStorage.cpp b/gfx/angle/checkout/src/compiler/translator/tree_ops/RewritePixelLocalStorage.cpp
new file mode 100644
index 0000000000..1c86cafe03
--- /dev/null
+++ b/gfx/angle/checkout/src/compiler/translator/tree_ops/RewritePixelLocalStorage.cpp
@@ -0,0 +1,861 @@
+//
+// Copyright 2022 The ANGLE Project Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+
+#include "compiler/translator/tree_ops/RewritePixelLocalStorage.h"
+
+#include "common/angleutils.h"
+#include "compiler/translator/StaticType.h"
+#include "compiler/translator/SymbolTable.h"
+#include "compiler/translator/tree_ops/MonomorphizeUnsupportedFunctions.h"
+#include "compiler/translator/tree_util/BuiltIn.h"
+#include "compiler/translator/tree_util/FindMain.h"
+#include "compiler/translator/tree_util/IntermNode_util.h"
+#include "compiler/translator/tree_util/IntermTraverse.h"
+
+namespace sh
+{
+namespace
+{
+constexpr static TBasicType DataTypeOfPLSType(TBasicType plsType)
+{
+ switch (plsType)
+ {
+ case EbtPixelLocalANGLE:
+ return EbtFloat;
+ case EbtIPixelLocalANGLE:
+ return EbtInt;
+ case EbtUPixelLocalANGLE:
+ return EbtUInt;
+ default:
+ UNREACHABLE();
+ return EbtVoid;
+ }
+}
+
+constexpr static TBasicType DataTypeOfImageType(TBasicType imageType)
+{
+ switch (imageType)
+ {
+ case EbtImage2D:
+ return EbtFloat;
+ case EbtIImage2D:
+ return EbtInt;
+ case EbtUImage2D:
+ return EbtUInt;
+ default:
+ UNREACHABLE();
+ return EbtVoid;
+ }
+}
+
+// Maps PLS symbols to a backing store.
+template <typename T>
+class PLSBackingStoreMap
+{
+ public:
+ // Sets the given variable as the backing storage for the plsSymbol's binding point. An entry
+ // must not already exist in the map for this binding point.
+ void insertNew(TIntermSymbol *plsSymbol, const T &backingStore)
+ {
+ ASSERT(plsSymbol);
+ ASSERT(IsPixelLocal(plsSymbol->getBasicType()));
+ int binding = plsSymbol->getType().getLayoutQualifier().binding;
+ ASSERT(binding >= 0);
+ auto result = mMap.insert({binding, backingStore});
+ ASSERT(result.second); // Ensure an image didn't already exist for this symbol.
+ }
+
+ // Looks up the backing store for the given plsSymbol's binding point. An entry must already
+ // exist in the map for this binding point.
+ const T &find(TIntermSymbol *plsSymbol)
+ {
+ ASSERT(plsSymbol);
+ ASSERT(IsPixelLocal(plsSymbol->getBasicType()));
+ int binding = plsSymbol->getType().getLayoutQualifier().binding;
+ ASSERT(binding >= 0);
+ auto iter = mMap.find(binding);
+ ASSERT(iter != mMap.end()); // Ensure PLSImages already exist for this symbol.
+ return iter->second;
+ }
+
+ const std::map<int, T> &bindingOrderedMap() const { return mMap; }
+
+ private:
+ // Use std::map so the backing stores are ordered by binding when we iterate.
+ std::map<int, T> mMap;
+};
+
+// Base class for rewriting high level PLS operations to AST operations specified by
+// ShPixelLocalStorageType.
+class RewritePLSTraverser : public TIntermTraverser
+{
+ public:
+ RewritePLSTraverser(TCompiler *compiler,
+ TSymbolTable &symbolTable,
+ const ShCompileOptions &compileOptions,
+ int shaderVersion)
+ : TIntermTraverser(true, false, false, &symbolTable),
+ mCompiler(compiler),
+ mCompileOptions(&compileOptions),
+ mShaderVersion(shaderVersion)
+ {}
+
+ bool visitDeclaration(Visit, TIntermDeclaration *decl) override
+ {
+ TIntermTyped *declVariable = (decl->getSequence())->front()->getAsTyped();
+ ASSERT(declVariable);
+
+ if (!IsPixelLocal(declVariable->getBasicType()))
+ {
+ return true;
+ }
+
+ // PLS is not allowed in arrays.
+ ASSERT(!declVariable->isArray());
+
+ // This visitDeclaration doesn't get called for function arguments, and opaque types can
+ // otherwise only be uniforms.
+ ASSERT(declVariable->getQualifier() == EvqUniform);
+
+ TIntermSymbol *plsSymbol = declVariable->getAsSymbolNode();
+ ASSERT(plsSymbol);
+
+ visitPLSDeclaration(plsSymbol);
+
+ return false;
+ }
+
+ bool visitAggregate(Visit, TIntermAggregate *aggregate) override
+ {
+ if (!BuiltInGroup::IsPixelLocal(aggregate->getOp()))
+ {
+ return true;
+ }
+
+ const TIntermSequence &args = *aggregate->getSequence();
+ ASSERT(args.size() >= 1);
+ TIntermSymbol *plsSymbol = args[0]->getAsSymbolNode();
+
+ // Rewrite pixelLocalLoadANGLE -> imageLoad.
+ if (aggregate->getOp() == EOpPixelLocalLoadANGLE)
+ {
+ visitPLSLoad(plsSymbol);
+ return false; // No need to recurse since this node is being dropped.
+ }
+
+ // Rewrite pixelLocalStoreANGLE -> imageStore.
+ if (aggregate->getOp() == EOpPixelLocalStoreANGLE)
+ {
+ // Also hoist the 'value' expression into a temp. In the event of
+ // "pixelLocalStoreANGLE(..., pixelLocalLoadANGLE(...))", this ensures the load occurs
+ // _before_ any potential barriers required by the subclass.
+ //
+ // NOTE: It is generally unsafe to hoist function arguments due to short circuiting,
+ // e.g., "if (false && function(...))", but pixelLocalStoreANGLE returns type void, so
+ // it is safe in this particular case.
+ TType *valueType = new TType(DataTypeOfPLSType(plsSymbol->getBasicType()),
+ plsSymbol->getPrecision(), EvqTemporary, 4);
+ TVariable *valueVar = CreateTempVariable(mSymbolTable, valueType);
+ TIntermDeclaration *valueDecl =
+ CreateTempInitDeclarationNode(valueVar, args[1]->getAsTyped());
+ valueDecl->traverse(this); // Rewrite any potential pixelLocalLoadANGLEs in valueDecl.
+ insertStatementInParentBlock(valueDecl);
+
+ visitPLSStore(plsSymbol, valueVar);
+ return false; // No need to recurse since this node is being dropped.
+ }
+
+ return true;
+ }
+
+ // Called after rewrite. Injects one-time setup code that needs to run before any PLS accesses.
+ virtual void injectSetupCode(TCompiler *,
+ TSymbolTable &,
+ const ShCompileOptions &,
+ TIntermBlock *mainBody,
+ size_t plsBeginPosition)
+ {}
+
+ // Called after rewrite. Injects one-time finalization code that needs to run after all PLS.
+ virtual void injectFinalizeCode(TCompiler *,
+ TSymbolTable &,
+ const ShCompileOptions &,
+ TIntermBlock *mainBody,
+ size_t plsEndPosition)
+ {}
+
+ TVariable *globalPixelCoord() const { return mGlobalPixelCoord; }
+
+ protected:
+ virtual void visitPLSDeclaration(TIntermSymbol *plsSymbol) = 0;
+ virtual void visitPLSLoad(TIntermSymbol *plsSymbol) = 0;
+ virtual void visitPLSStore(TIntermSymbol *plsSymbol, TVariable *value) = 0;
+
+ void ensureGlobalPixelCoordDeclared()
+ {
+ // Insert a global to hold the pixel coordinate as soon as we see PLS declared. This will be
+ // initialized at the beginning of main().
+ if (!mGlobalPixelCoord)
+ {
+ TType *coordType = new TType(EbtInt, EbpHigh, EvqGlobal, 2);
+ mGlobalPixelCoord = CreateTempVariable(mSymbolTable, coordType);
+ insertStatementInParentBlock(CreateTempDeclarationNode(mGlobalPixelCoord));
+ }
+ }
+
+ const TCompiler *const mCompiler;
+ const ShCompileOptions *const mCompileOptions;
+ const int mShaderVersion;
+
+ // Stores the shader invocation's pixel coordinate as "ivec2(floor(gl_FragCoord.xy))".
+ TVariable *mGlobalPixelCoord = nullptr;
+};
+
+// Rewrites high level PLS operations to shader image operations.
+class RewritePLSToImagesTraverser : public RewritePLSTraverser
+{
+ public:
+ RewritePLSToImagesTraverser(TCompiler *compiler,
+ TSymbolTable &symbolTable,
+ const ShCompileOptions &compileOptions,
+ int shaderVersion)
+ : RewritePLSTraverser(compiler, symbolTable, compileOptions, shaderVersion)
+ {}
+
+ private:
+ void visitPLSDeclaration(TIntermSymbol *plsSymbol) override
+ {
+ // Replace the PLS declaration with an image2D.
+ ensureGlobalPixelCoordDeclared();
+ TVariable *image2D = createPLSImageReplacement(plsSymbol);
+ mImages.insertNew(plsSymbol, image2D);
+ queueReplacement(new TIntermDeclaration({new TIntermSymbol(image2D)}),
+ OriginalNode::IS_DROPPED);
+ }
+
+ // Do all PLS formats need to be packed into r32f, r32i, or r32ui image2Ds?
+ bool needsR32Packing() const
+ {
+ return mCompileOptions->pls.type == ShPixelLocalStorageType::ImageStoreR32PackedFormats;
+ }
+
+ // Creates an image2D that replaces a pixel local storage handle.
+ TVariable *createPLSImageReplacement(const TIntermSymbol *plsSymbol)
+ {
+ ASSERT(plsSymbol);
+ ASSERT(IsPixelLocal(plsSymbol->getBasicType()));
+
+ TType *imageType = new TType(plsSymbol->getType());
+
+ TLayoutQualifier layoutQualifier = imageType->getLayoutQualifier();
+ switch (layoutQualifier.imageInternalFormat)
+ {
+ case TLayoutImageInternalFormat::EiifRGBA8:
+ if (needsR32Packing())
+ {
+ layoutQualifier.imageInternalFormat = EiifR32UI;
+ imageType->setPrecision(EbpHigh);
+ imageType->setBasicType(EbtUImage2D);
+ }
+ else
+ {
+ imageType->setBasicType(EbtImage2D);
+ }
+ break;
+ case TLayoutImageInternalFormat::EiifRGBA8I:
+ if (needsR32Packing())
+ {
+ layoutQualifier.imageInternalFormat = EiifR32I;
+ imageType->setPrecision(EbpHigh);
+ }
+ imageType->setBasicType(EbtIImage2D);
+ break;
+ case TLayoutImageInternalFormat::EiifRGBA8UI:
+ if (needsR32Packing())
+ {
+ layoutQualifier.imageInternalFormat = EiifR32UI;
+ imageType->setPrecision(EbpHigh);
+ }
+ imageType->setBasicType(EbtUImage2D);
+ break;
+ case TLayoutImageInternalFormat::EiifR32F:
+ imageType->setBasicType(EbtImage2D);
+ break;
+ case TLayoutImageInternalFormat::EiifR32UI:
+ imageType->setBasicType(EbtUImage2D);
+ break;
+ default:
+ UNREACHABLE();
+ }
+ layoutQualifier.rasterOrdered = mCompileOptions->pls.fragmentSynchronizationType ==
+ ShFragmentSynchronizationType::RasterizerOrderViews_D3D;
+ imageType->setLayoutQualifier(layoutQualifier);
+
+ TMemoryQualifier memoryQualifier{};
+ memoryQualifier.coherent = true;
+ memoryQualifier.restrictQualifier = true;
+ memoryQualifier.volatileQualifier = false;
+ // TODO(anglebug.com/7279): Maybe we could walk the tree first and see which PLS is used
+ // how. If the PLS is never loaded, we could add a writeonly qualifier, for example.
+ memoryQualifier.readonly = false;
+ memoryQualifier.writeonly = false;
+ imageType->setMemoryQualifier(memoryQualifier);
+
+ const TVariable &plsVar = plsSymbol->variable();
+ return new TVariable(plsVar.uniqueId(), plsVar.name(), plsVar.symbolType(),
+ plsVar.extensions(), imageType);
+ }
+
+ void visitPLSLoad(TIntermSymbol *plsSymbol) override
+ {
+ // Replace the pixelLocalLoadANGLE with imageLoad.
+ TVariable *image2D = mImages.find(plsSymbol);
+ ASSERT(mGlobalPixelCoord);
+ TIntermTyped *pls = CreateBuiltInFunctionCallNode(
+ "imageLoad", {new TIntermSymbol(image2D), new TIntermSymbol(mGlobalPixelCoord)},
+ *mSymbolTable, 310);
+ pls = unpackImageDataIfNecessary(pls, plsSymbol, image2D);
+ queueReplacement(pls, OriginalNode::IS_DROPPED);
+ }
+
+ // Unpacks the raw PLS data if the output shader language needs r32* packing.
+ TIntermTyped *unpackImageDataIfNecessary(TIntermTyped *data,
+ TIntermSymbol *plsSymbol,
+ TVariable *image2D)
+ {
+ TLayoutImageInternalFormat plsFormat =
+ plsSymbol->getType().getLayoutQualifier().imageInternalFormat;
+ TLayoutImageInternalFormat imageFormat =
+ image2D->getType().getLayoutQualifier().imageInternalFormat;
+ if (plsFormat == imageFormat)
+ {
+ return data; // This PLS storage isn't packed.
+ }
+ ASSERT(needsR32Packing());
+ switch (plsFormat)
+ {
+ case EiifRGBA8:
+ // Unpack and normalize r,g,b,a from a single 32-bit unsigned int:
+ //
+ // unpackUnorm4x8(data.r)
+ //
+ data = CreateBuiltInFunctionCallNode("unpackUnorm4x8", {CreateSwizzle(data, 0)},
+ *mSymbolTable, 310);
+ break;
+ case EiifRGBA8I:
+ case EiifRGBA8UI:
+ {
+ constexpr unsigned shifts[] = {24, 16, 8, 0};
+ // Unpack r,g,b,a form a single (signed or unsigned) 32-bit int. Shift left,
+ // then right, to preserve the sign for ints. (highp integers are exactly
+ // 32-bit, two's compliment.)
+ //
+ // data.rrrr << uvec4(24, 16, 8, 0) >> 24u
+ //
+ data = CreateSwizzle(data, 0, 0, 0, 0);
+ data = new TIntermBinary(EOpBitShiftLeft, data, CreateUVecNode(shifts, 4, EbpHigh));
+ data = new TIntermBinary(EOpBitShiftRight, data, CreateUIntNode(24));
+ break;
+ }
+ default:
+ UNREACHABLE();
+ }
+ return data;
+ }
+
+ void visitPLSStore(TIntermSymbol *plsSymbol, TVariable *value) override
+ {
+ TVariable *image2D = mImages.find(plsSymbol);
+ TIntermTyped *packedData = clampAndPackPLSDataIfNecessary(value, plsSymbol, image2D);
+
+ // Surround the store with memoryBarrierImage calls in order to ensure dependent stores and
+ // loads in a single shader invocation are coherent. From the ES 3.1 spec:
+ //
+ // Using variables declared as "coherent" guarantees only that the results of stores will
+ // be immediately visible to shader invocations using similarly-declared variables;
+ // calling MemoryBarrier is required to ensure that the stores are visible to other
+ // operations.
+ //
+ insertStatementsInParentBlock(
+ {CreateBuiltInFunctionCallNode("memoryBarrierImage", {}, *mSymbolTable,
+ 310)}, // Before.
+ {CreateBuiltInFunctionCallNode("memoryBarrierImage", {}, *mSymbolTable,
+ 310)}); // After.
+
+ // Rewrite the pixelLocalStoreANGLE with imageStore.
+ ASSERT(mGlobalPixelCoord);
+ queueReplacement(
+ CreateBuiltInFunctionCallNode(
+ "imageStore",
+ {new TIntermSymbol(image2D), new TIntermSymbol(mGlobalPixelCoord), packedData},
+ *mSymbolTable, 310),
+ OriginalNode::IS_DROPPED);
+ }
+
+ // Packs the PLS to raw data if the output shader language needs r32* packing.
+ TIntermTyped *clampAndPackPLSDataIfNecessary(TVariable *plsVar,
+ TIntermSymbol *plsSymbol,
+ TVariable *image2D)
+ {
+ TLayoutImageInternalFormat plsFormat =
+ plsSymbol->getType().getLayoutQualifier().imageInternalFormat;
+ // anglebug.com/7524: Storing to integer formats with values larger than can be represented
+ // is specified differently on different APIs. Clamp integer formats here to make it uniform
+ // and more GL-like.
+ switch (plsFormat)
+ {
+ case EiifRGBA8I:
+ {
+ // Clamp r,g,b,a to their min/max 8-bit values:
+ //
+ // plsVar = clamp(plsVar, -128, 127) & 0xff
+ //
+ TIntermTyped *newPLSValue = CreateBuiltInFunctionCallNode(
+ "clamp",
+ {new TIntermSymbol(plsVar), CreateIndexNode(-128), CreateIndexNode(127)},
+ *mSymbolTable, mShaderVersion);
+ insertStatementInParentBlock(CreateTempAssignmentNode(plsVar, newPLSValue));
+ break;
+ }
+ case EiifRGBA8UI:
+ {
+ // Clamp r,g,b,a to their max 8-bit values:
+ //
+ // plsVar = min(plsVar, 255)
+ //
+ TIntermTyped *newPLSValue = CreateBuiltInFunctionCallNode(
+ "min", {new TIntermSymbol(plsVar), CreateUIntNode(255)}, *mSymbolTable,
+ mShaderVersion);
+ insertStatementInParentBlock(CreateTempAssignmentNode(plsVar, newPLSValue));
+ break;
+ }
+ default:
+ break;
+ }
+ TIntermTyped *result = new TIntermSymbol(plsVar);
+ TLayoutImageInternalFormat imageFormat =
+ image2D->getType().getLayoutQualifier().imageInternalFormat;
+ if (plsFormat == imageFormat)
+ {
+ return result; // This PLS storage isn't packed.
+ }
+ ASSERT(needsR32Packing());
+ switch (plsFormat)
+ {
+ case EiifRGBA8:
+ {
+ if (mCompileOptions->passHighpToPackUnormSnormBuiltins)
+ {
+ // anglebug.com/7527: unpackUnorm4x8 doesn't work on Pixel 4 when passed
+ // a mediump vec4. Use an intermediate highp vec4.
+ //
+ // It's safe to inject a variable here because it happens right before
+ // pixelLocalStoreANGLE, which returns type void. (See visitAggregate.)
+ TType *highpType = new TType(EbtFloat, EbpHigh, EvqTemporary, 4);
+ TVariable *workaroundHighpVar = CreateTempVariable(mSymbolTable, highpType);
+ insertStatementInParentBlock(
+ CreateTempInitDeclarationNode(workaroundHighpVar, result));
+ result = new TIntermSymbol(workaroundHighpVar);
+ }
+
+ // Denormalize and pack r,g,b,a into a single 32-bit unsigned int:
+ //
+ // packUnorm4x8(workaroundHighpVar)
+ //
+ result =
+ CreateBuiltInFunctionCallNode("packUnorm4x8", {result}, *mSymbolTable, 310);
+ break;
+ }
+ case EiifRGBA8I:
+ case EiifRGBA8UI:
+ {
+ if (plsFormat == EiifRGBA8I)
+ {
+ // Mask off extra sign bits beyond 8.
+ //
+ // plsVar &= 0xff
+ //
+ insertStatementInParentBlock(new TIntermBinary(
+ EOpBitwiseAndAssign, new TIntermSymbol(plsVar), CreateIndexNode(0xff)));
+ }
+ // Pack r,g,b,a into a single 32-bit (signed or unsigned) int:
+ //
+ // r | (g << 8) | (b << 16) | (a << 24)
+ //
+ auto shiftComponent = [=](int componentIdx) {
+ return new TIntermBinary(EOpBitShiftLeft,
+ CreateSwizzle(new TIntermSymbol(plsVar), componentIdx),
+ CreateUIntNode(componentIdx * 8));
+ };
+ result = CreateSwizzle(result, 0);
+ result = new TIntermBinary(EOpBitwiseOr, result, shiftComponent(1));
+ result = new TIntermBinary(EOpBitwiseOr, result, shiftComponent(2));
+ result = new TIntermBinary(EOpBitwiseOr, result, shiftComponent(3));
+ break;
+ }
+ default:
+ UNREACHABLE();
+ }
+ // Convert the packed data to a {u,i}vec4 for imageStore.
+ TType imageStoreType(DataTypeOfImageType(image2D->getType().getBasicType()), 4);
+ return TIntermAggregate::CreateConstructor(imageStoreType, {result});
+ }
+
+ void injectSetupCode(TCompiler *compiler,
+ TSymbolTable &symbolTable,
+ const ShCompileOptions &compileOptions,
+ TIntermBlock *mainBody,
+ size_t plsBeginPosition) override
+ {
+ // When PLS is implemented with images, early_fragment_tests ensure that depth/stencil
+ // can also block stores to PLS.
+ compiler->specifyEarlyFragmentTests();
+
+ // Delimit the beginning of a per-pixel critical section, if supported. This makes pixel
+ // local storage coherent.
+ //
+ // Either: GL_NV_fragment_shader_interlock
+ // GL_INTEL_fragment_shader_ordering
+ // GL_ARB_fragment_shader_interlock (may compile to
+ // SPV_EXT_fragment_shader_interlock)
+ switch (compileOptions.pls.fragmentSynchronizationType)
+ {
+ // ROVs don't need explicit synchronization calls.
+ case ShFragmentSynchronizationType::RasterizerOrderViews_D3D:
+ case ShFragmentSynchronizationType::NotSupported:
+ break;
+ case ShFragmentSynchronizationType::FragmentShaderInterlock_NV_GL:
+ mainBody->insertStatement(
+ plsBeginPosition,
+ CreateBuiltInFunctionCallNode("beginInvocationInterlockNV", {}, symbolTable,
+ kESSLInternalBackendBuiltIns));
+ break;
+ case ShFragmentSynchronizationType::FragmentShaderOrdering_INTEL_GL:
+ mainBody->insertStatement(
+ plsBeginPosition,
+ CreateBuiltInFunctionCallNode("beginFragmentShaderOrderingINTEL", {},
+ symbolTable, kESSLInternalBackendBuiltIns));
+ break;
+ case ShFragmentSynchronizationType::FragmentShaderInterlock_ARB_GL:
+ mainBody->insertStatement(
+ plsBeginPosition,
+ CreateBuiltInFunctionCallNode("beginInvocationInterlockARB", {}, symbolTable,
+ kESSLInternalBackendBuiltIns));
+ break;
+ default:
+ UNREACHABLE();
+ }
+ }
+
+ void injectFinalizeCode(TCompiler *,
+ TSymbolTable &symbolTable,
+ const ShCompileOptions &compileOptions,
+ TIntermBlock *mainBody,
+ size_t plsEndPosition) override
+ {
+ // Delimit the end of the PLS critical section, if required.
+ //
+ // Either: GL_NV_fragment_shader_interlock
+ // GL_ARB_fragment_shader_interlock (may compile to
+ // SPV_EXT_fragment_shader_interlock)
+ switch (compileOptions.pls.fragmentSynchronizationType)
+ {
+ // ROVs don't need explicit synchronization calls.
+ case ShFragmentSynchronizationType::RasterizerOrderViews_D3D:
+ // GL_INTEL_fragment_shader_ordering doesn't have an "end()" call.
+ case ShFragmentSynchronizationType::FragmentShaderOrdering_INTEL_GL:
+ case ShFragmentSynchronizationType::NotSupported:
+ break;
+ case ShFragmentSynchronizationType::FragmentShaderInterlock_NV_GL:
+
+ mainBody->insertStatement(
+ plsEndPosition,
+ CreateBuiltInFunctionCallNode("endInvocationInterlockNV", {}, symbolTable,
+ kESSLInternalBackendBuiltIns));
+ break;
+ case ShFragmentSynchronizationType::FragmentShaderInterlock_ARB_GL:
+ mainBody->insertStatement(
+ plsEndPosition,
+ CreateBuiltInFunctionCallNode("endInvocationInterlockARB", {}, symbolTable,
+ kESSLInternalBackendBuiltIns));
+ break;
+ default:
+ UNREACHABLE();
+ }
+ }
+
+ PLSBackingStoreMap<TVariable *> mImages;
+};
+
+// Rewrites high level PLS operations to framebuffer fetch operations.
+class RewritePLSToFramebufferFetchTraverser : public RewritePLSTraverser
+{
+ public:
+ RewritePLSToFramebufferFetchTraverser(TCompiler *compiler,
+ TSymbolTable &symbolTable,
+ const ShCompileOptions &compileOptions,
+ int shaderVersion)
+ : RewritePLSTraverser(compiler, symbolTable, compileOptions, shaderVersion)
+ {}
+
+ void visitPLSDeclaration(TIntermSymbol *plsSymbol) override
+ {
+ // Replace the PLS declaration with a framebuffer attachment.
+ PLSAttachment attachment(mCompiler, mSymbolTable, *mCompileOptions, plsSymbol->variable());
+ mPLSAttachments.insertNew(plsSymbol, attachment);
+ insertStatementInParentBlock(
+ new TIntermDeclaration({new TIntermSymbol(attachment.fragmentVar)}));
+ queueReplacement(CreateTempDeclarationNode(attachment.accessVar), OriginalNode::IS_DROPPED);
+ }
+
+ void visitPLSLoad(TIntermSymbol *plsSymbol) override
+ {
+ // Read our temporary accessVar.
+ const PLSAttachment &attachment = mPLSAttachments.find(plsSymbol);
+ queueReplacement(attachment.expandAccessVar(), OriginalNode::IS_DROPPED);
+ }
+
+ void visitPLSStore(TIntermSymbol *plsSymbol, TVariable *value) override
+ {
+ // Set our temporary accessVar.
+ const PLSAttachment &attachment = mPLSAttachments.find(plsSymbol);
+ queueReplacement(CreateTempAssignmentNode(attachment.accessVar, attachment.swizzle(value)),
+ OriginalNode::IS_DROPPED);
+ }
+
+ void injectSetupCode(TCompiler *compiler,
+ TSymbolTable &symbolTable,
+ const ShCompileOptions &compileOptions,
+ TIntermBlock *mainBody,
+ size_t plsBeginPosition) override
+ {
+ // [OpenGL ES Version 3.0.6, 3.9.2.3 "Shader Output"]: Any colors, or color components,
+ // associated with a fragment that are not written by the fragment shader are undefined.
+ //
+ // [EXT_shader_framebuffer_fetch]: Prior to fragment shading, fragment outputs declared
+ // inout are populated with the value last written to the framebuffer at the same(x, y,
+ // sample) position.
+ //
+ // It's unclear from the EXT_shader_framebuffer_fetch spec whether inout fragment variables
+ // become undefined if not explicitly written, but either way, when this compiles to subpass
+ // loads in Vulkan, we definitely get undefined behavior if PLS variables are not written.
+ //
+ // To make sure every PLS variable gets written, we read them all before PLS operations,
+ // then write them all back out after all PLS is complete.
+ std::vector<TIntermNode *> plsPreloads;
+ plsPreloads.reserve(mPLSAttachments.bindingOrderedMap().size());
+ for (const auto &entry : mPLSAttachments.bindingOrderedMap())
+ {
+ const PLSAttachment &attachment = entry.second;
+ plsPreloads.push_back(
+ CreateTempAssignmentNode(attachment.accessVar, attachment.swizzleFragmentVar()));
+ }
+ mainBody->getSequence()->insert(mainBody->getSequence()->begin() + plsBeginPosition,
+ plsPreloads.begin(), plsPreloads.end());
+ }
+
+ void injectFinalizeCode(TCompiler *,
+ TSymbolTable &symbolTable,
+ const ShCompileOptions &compileOptions,
+ TIntermBlock *mainBody,
+ size_t plsEndPosition) override
+ {
+ std::vector<TIntermNode *> plsWrites;
+ plsWrites.reserve(mPLSAttachments.bindingOrderedMap().size());
+ for (const auto &entry : mPLSAttachments.bindingOrderedMap())
+ {
+ const PLSAttachment &attachment = entry.second;
+ plsWrites.push_back(new TIntermBinary(EOpAssign, attachment.swizzleFragmentVar(),
+ new TIntermSymbol(attachment.accessVar)));
+ }
+ mainBody->getSequence()->insert(mainBody->getSequence()->begin() + plsEndPosition,
+ plsWrites.begin(), plsWrites.end());
+ }
+
+ private:
+ struct PLSAttachment
+ {
+ PLSAttachment(const TCompiler *compiler,
+ TSymbolTable *symbolTable,
+ const ShCompileOptions &compileOptions,
+ const TVariable &plsVar)
+ {
+ const TType &plsType = plsVar.getType();
+
+ TType *accessVarType;
+ switch (plsType.getLayoutQualifier().imageInternalFormat)
+ {
+ default:
+ UNREACHABLE();
+ [[fallthrough]];
+ case EiifRGBA8:
+ accessVarType = new TType(EbtFloat, 4);
+ break;
+ case EiifRGBA8I:
+ accessVarType = new TType(EbtInt, 4);
+ break;
+ case EiifRGBA8UI:
+ accessVarType = new TType(EbtUInt, 4);
+ break;
+ case EiifR32F:
+ accessVarType = new TType(EbtFloat, 1);
+ break;
+ case EiifR32UI:
+ accessVarType = new TType(EbtUInt, 1);
+ break;
+ }
+ accessVarType->setPrecision(plsType.getPrecision());
+ accessVar = CreateTempVariable(symbolTable, accessVarType);
+
+ // Qualcomm seems to want fragment outputs to be 4-component vectors, and produces a
+ // compile error from "inout uint". Our Metal translator also saturates color outputs to
+ // 4 components. And since the spec also seems silent on how many components an output
+ // must have, we always use 4.
+ TType *fragmentVarType = new TType(accessVarType->getBasicType(), 4);
+ fragmentVarType->setPrecision(plsType.getPrecision());
+ fragmentVarType->setQualifier(EvqFragmentInOut);
+
+ // PLS attachments are bound in reverse order from the rear.
+ TLayoutQualifier layoutQualifier = TLayoutQualifier::Create();
+ layoutQualifier.location =
+ compiler->getResources().MaxCombinedDrawBuffersAndPixelLocalStoragePlanes -
+ plsType.getLayoutQualifier().binding - 1;
+ layoutQualifier.locationsSpecified = 1;
+ if (compileOptions.pls.fragmentSynchronizationType ==
+ ShFragmentSynchronizationType::NotSupported)
+ {
+ // We're using EXT_shader_framebuffer_fetch_non_coherent, which requires the
+ // "noncoherent" qualifier.
+ layoutQualifier.noncoherent = true;
+ }
+ fragmentVarType->setLayoutQualifier(layoutQualifier);
+
+ fragmentVar = new TVariable(plsVar.uniqueId(), plsVar.name(), plsVar.symbolType(),
+ plsVar.extensions(), fragmentVarType);
+ }
+
+ // Expands our accessVar to 4 components, regardless of the size of the pixel local storage
+ // internalformat.
+ TIntermTyped *expandAccessVar() const
+ {
+ TIntermTyped *expanded = new TIntermSymbol(accessVar);
+ if (accessVar->getType().getNominalSize() == 1)
+ {
+ switch (accessVar->getType().getBasicType())
+ {
+ case EbtFloat:
+ expanded = TIntermAggregate::CreateConstructor( // "vec4(r, 0, 0, 1)"
+ TType(EbtFloat, 4),
+ {expanded, CreateFloatNode(0, EbpHigh), CreateFloatNode(0, EbpHigh),
+ CreateFloatNode(1, EbpHigh)});
+ break;
+ case EbtUInt:
+ expanded = TIntermAggregate::CreateConstructor( // "uvec4(r, 0, 0, 1)"
+ TType(EbtUInt, 4),
+ {expanded, CreateUIntNode(0), CreateUIntNode(0), CreateUIntNode(1)});
+ break;
+ default:
+ UNREACHABLE();
+ break;
+ }
+ }
+ return expanded;
+ }
+
+ // Swizzles a variable down to the same number of components as the PLS internalformat.
+ TIntermTyped *swizzle(TVariable *var) const
+ {
+ TIntermTyped *swizzled = new TIntermSymbol(var);
+ if (var->getType().getNominalSize() != accessVar->getType().getNominalSize())
+ {
+ ASSERT(var->getType().getNominalSize() > accessVar->getType().getNominalSize());
+ TVector swizzleOffsets{0, 1, 2, 3};
+ swizzleOffsets.resize(accessVar->getType().getNominalSize());
+ swizzled = new TIntermSwizzle(swizzled, swizzleOffsets);
+ }
+ return swizzled;
+ }
+
+ TIntermTyped *swizzleFragmentVar() const { return swizzle(fragmentVar); }
+
+ TVariable *fragmentVar;
+ TVariable *accessVar;
+ };
+
+ PLSBackingStoreMap<PLSAttachment> mPLSAttachments;
+};
+} // anonymous namespace
+
+bool RewritePixelLocalStorage(TCompiler *compiler,
+ TIntermBlock *root,
+ TSymbolTable &symbolTable,
+ const ShCompileOptions &compileOptions,
+ int shaderVersion)
+{
+ // If any functions take PLS arguments, monomorphize the functions by removing said parameters
+ // and making the PLS calls from main() instead, using the global uniform from the call site
+ // instead of the function argument. This is necessary because function arguments don't carry
+ // the necessary "binding" or "format" layout qualifiers.
+ if (!MonomorphizeUnsupportedFunctions(
+ compiler, root, &symbolTable, compileOptions,
+ UnsupportedFunctionArgsBitSet{UnsupportedFunctionArgs::PixelLocalStorage}))
+ {
+ return false;
+ }
+
+ TIntermBlock *mainBody = FindMainBody(root);
+
+ std::unique_ptr<RewritePLSTraverser> traverser;
+ switch (compileOptions.pls.type)
+ {
+ case ShPixelLocalStorageType::ImageStoreR32PackedFormats:
+ case ShPixelLocalStorageType::ImageStoreNativeFormats:
+ traverser = std::make_unique<RewritePLSToImagesTraverser>(
+ compiler, symbolTable, compileOptions, shaderVersion);
+ break;
+ case ShPixelLocalStorageType::FramebufferFetch:
+ traverser = std::make_unique<RewritePLSToFramebufferFetchTraverser>(
+ compiler, symbolTable, compileOptions, shaderVersion);
+ break;
+ default:
+ UNREACHABLE();
+ return false;
+ }
+
+ // Rewrite PLS operations to image operations.
+ root->traverse(traverser.get());
+ if (!traverser->updateTree(compiler, root))
+ {
+ return false;
+ }
+
+ // Inject the code that needs to run before and after all PLS operations.
+ // TODO(anglebug.com/7279): Inject these functions in a tight critical section, instead of
+ // just locking the entire main() function:
+ // - Monomorphize all PLS calls into main().
+ // - Insert begin/end calls around the first/last PLS calls (and outside of flow control).
+ traverser->injectSetupCode(compiler, symbolTable, compileOptions, mainBody, 0);
+ traverser->injectFinalizeCode(compiler, symbolTable, compileOptions, mainBody,
+ mainBody->getChildCount());
+
+ if (traverser->globalPixelCoord())
+ {
+ // Initialize the global pixel coord at the beginning of main():
+ //
+ // pixelCoord = ivec2(floor(gl_FragCoord.xy));
+ //
+ TIntermTyped *exp;
+ exp = ReferenceBuiltInVariable(ImmutableString("gl_FragCoord"), symbolTable, shaderVersion);
+ exp = CreateSwizzle(exp, 0, 1);
+ exp = CreateBuiltInFunctionCallNode("floor", {exp}, symbolTable, shaderVersion);
+ exp = TIntermAggregate::CreateConstructor(TType(EbtInt, 2), {exp});
+ exp = CreateTempAssignmentNode(traverser->globalPixelCoord(), exp);
+ mainBody->insertStatement(0, exp);
+ }
+
+ return compiler->validateAST(root);
+}
+} // namespace sh