// // Copyright 2022 The ANGLE Project Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // #include "compiler/translator/tree_ops/RewritePixelLocalStorage.h" #include "common/angleutils.h" #include "compiler/translator/StaticType.h" #include "compiler/translator/SymbolTable.h" #include "compiler/translator/tree_ops/MonomorphizeUnsupportedFunctions.h" #include "compiler/translator/tree_util/BuiltIn.h" #include "compiler/translator/tree_util/FindMain.h" #include "compiler/translator/tree_util/IntermNode_util.h" #include "compiler/translator/tree_util/IntermTraverse.h" namespace sh { namespace { constexpr static TBasicType DataTypeOfPLSType(TBasicType plsType) { switch (plsType) { case EbtPixelLocalANGLE: return EbtFloat; case EbtIPixelLocalANGLE: return EbtInt; case EbtUPixelLocalANGLE: return EbtUInt; default: UNREACHABLE(); return EbtVoid; } } constexpr static TBasicType DataTypeOfImageType(TBasicType imageType) { switch (imageType) { case EbtImage2D: return EbtFloat; case EbtIImage2D: return EbtInt; case EbtUImage2D: return EbtUInt; default: UNREACHABLE(); return EbtVoid; } } // Maps PLS symbols to a backing store. template class PLSBackingStoreMap { public: // Sets the given variable as the backing storage for the plsSymbol's binding point. An entry // must not already exist in the map for this binding point. void insertNew(TIntermSymbol *plsSymbol, const T &backingStore) { ASSERT(plsSymbol); ASSERT(IsPixelLocal(plsSymbol->getBasicType())); int binding = plsSymbol->getType().getLayoutQualifier().binding; ASSERT(binding >= 0); auto result = mMap.insert({binding, backingStore}); ASSERT(result.second); // Ensure an image didn't already exist for this symbol. } // Looks up the backing store for the given plsSymbol's binding point. An entry must already // exist in the map for this binding point. const T &find(TIntermSymbol *plsSymbol) { ASSERT(plsSymbol); ASSERT(IsPixelLocal(plsSymbol->getBasicType())); int binding = plsSymbol->getType().getLayoutQualifier().binding; ASSERT(binding >= 0); auto iter = mMap.find(binding); ASSERT(iter != mMap.end()); // Ensure PLSImages already exist for this symbol. return iter->second; } const std::map &bindingOrderedMap() const { return mMap; } private: // Use std::map so the backing stores are ordered by binding when we iterate. std::map mMap; }; // Base class for rewriting high level PLS operations to AST operations specified by // ShPixelLocalStorageType. class RewritePLSTraverser : public TIntermTraverser { public: RewritePLSTraverser(TCompiler *compiler, TSymbolTable &symbolTable, const ShCompileOptions &compileOptions, int shaderVersion) : TIntermTraverser(true, false, false, &symbolTable), mCompiler(compiler), mCompileOptions(&compileOptions), mShaderVersion(shaderVersion) {} bool visitDeclaration(Visit, TIntermDeclaration *decl) override { TIntermTyped *declVariable = (decl->getSequence())->front()->getAsTyped(); ASSERT(declVariable); if (!IsPixelLocal(declVariable->getBasicType())) { return true; } // PLS is not allowed in arrays. ASSERT(!declVariable->isArray()); // This visitDeclaration doesn't get called for function arguments, and opaque types can // otherwise only be uniforms. ASSERT(declVariable->getQualifier() == EvqUniform); TIntermSymbol *plsSymbol = declVariable->getAsSymbolNode(); ASSERT(plsSymbol); visitPLSDeclaration(plsSymbol); return false; } bool visitAggregate(Visit, TIntermAggregate *aggregate) override { if (!BuiltInGroup::IsPixelLocal(aggregate->getOp())) { return true; } const TIntermSequence &args = *aggregate->getSequence(); ASSERT(args.size() >= 1); TIntermSymbol *plsSymbol = args[0]->getAsSymbolNode(); // Rewrite pixelLocalLoadANGLE -> imageLoad. if (aggregate->getOp() == EOpPixelLocalLoadANGLE) { visitPLSLoad(plsSymbol); return false; // No need to recurse since this node is being dropped. } // Rewrite pixelLocalStoreANGLE -> imageStore. if (aggregate->getOp() == EOpPixelLocalStoreANGLE) { // Also hoist the 'value' expression into a temp. In the event of // "pixelLocalStoreANGLE(..., pixelLocalLoadANGLE(...))", this ensures the load occurs // _before_ any potential barriers required by the subclass. // // NOTE: It is generally unsafe to hoist function arguments due to short circuiting, // e.g., "if (false && function(...))", but pixelLocalStoreANGLE returns type void, so // it is safe in this particular case. TType *valueType = new TType(DataTypeOfPLSType(plsSymbol->getBasicType()), plsSymbol->getPrecision(), EvqTemporary, 4); TVariable *valueVar = CreateTempVariable(mSymbolTable, valueType); TIntermDeclaration *valueDecl = CreateTempInitDeclarationNode(valueVar, args[1]->getAsTyped()); valueDecl->traverse(this); // Rewrite any potential pixelLocalLoadANGLEs in valueDecl. insertStatementInParentBlock(valueDecl); visitPLSStore(plsSymbol, valueVar); return false; // No need to recurse since this node is being dropped. } return true; } // Called after rewrite. Injects one-time setup code that needs to run before any PLS accesses. virtual void injectSetupCode(TCompiler *, TSymbolTable &, const ShCompileOptions &, TIntermBlock *mainBody, size_t plsBeginPosition) {} // Called after rewrite. Injects one-time finalization code that needs to run after all PLS. virtual void injectFinalizeCode(TCompiler *, TSymbolTable &, const ShCompileOptions &, TIntermBlock *mainBody, size_t plsEndPosition) {} TVariable *globalPixelCoord() const { return mGlobalPixelCoord; } protected: virtual void visitPLSDeclaration(TIntermSymbol *plsSymbol) = 0; virtual void visitPLSLoad(TIntermSymbol *plsSymbol) = 0; virtual void visitPLSStore(TIntermSymbol *plsSymbol, TVariable *value) = 0; void ensureGlobalPixelCoordDeclared() { // Insert a global to hold the pixel coordinate as soon as we see PLS declared. This will be // initialized at the beginning of main(). if (!mGlobalPixelCoord) { TType *coordType = new TType(EbtInt, EbpHigh, EvqGlobal, 2); mGlobalPixelCoord = CreateTempVariable(mSymbolTable, coordType); insertStatementInParentBlock(CreateTempDeclarationNode(mGlobalPixelCoord)); } } const TCompiler *const mCompiler; const ShCompileOptions *const mCompileOptions; const int mShaderVersion; // Stores the shader invocation's pixel coordinate as "ivec2(floor(gl_FragCoord.xy))". TVariable *mGlobalPixelCoord = nullptr; }; // Rewrites high level PLS operations to shader image operations. class RewritePLSToImagesTraverser : public RewritePLSTraverser { public: RewritePLSToImagesTraverser(TCompiler *compiler, TSymbolTable &symbolTable, const ShCompileOptions &compileOptions, int shaderVersion) : RewritePLSTraverser(compiler, symbolTable, compileOptions, shaderVersion) {} private: void visitPLSDeclaration(TIntermSymbol *plsSymbol) override { // Replace the PLS declaration with an image2D. ensureGlobalPixelCoordDeclared(); TVariable *image2D = createPLSImageReplacement(plsSymbol); mImages.insertNew(plsSymbol, image2D); queueReplacement(new TIntermDeclaration({new TIntermSymbol(image2D)}), OriginalNode::IS_DROPPED); } // Do all PLS formats need to be packed into r32f, r32i, or r32ui image2Ds? bool needsR32Packing() const { return mCompileOptions->pls.type == ShPixelLocalStorageType::ImageStoreR32PackedFormats; } // Creates an image2D that replaces a pixel local storage handle. TVariable *createPLSImageReplacement(const TIntermSymbol *plsSymbol) { ASSERT(plsSymbol); ASSERT(IsPixelLocal(plsSymbol->getBasicType())); TType *imageType = new TType(plsSymbol->getType()); TLayoutQualifier layoutQualifier = imageType->getLayoutQualifier(); switch (layoutQualifier.imageInternalFormat) { case TLayoutImageInternalFormat::EiifRGBA8: if (needsR32Packing()) { layoutQualifier.imageInternalFormat = EiifR32UI; imageType->setPrecision(EbpHigh); imageType->setBasicType(EbtUImage2D); } else { imageType->setBasicType(EbtImage2D); } break; case TLayoutImageInternalFormat::EiifRGBA8I: if (needsR32Packing()) { layoutQualifier.imageInternalFormat = EiifR32I; imageType->setPrecision(EbpHigh); } imageType->setBasicType(EbtIImage2D); break; case TLayoutImageInternalFormat::EiifRGBA8UI: if (needsR32Packing()) { layoutQualifier.imageInternalFormat = EiifR32UI; imageType->setPrecision(EbpHigh); } imageType->setBasicType(EbtUImage2D); break; case TLayoutImageInternalFormat::EiifR32F: imageType->setBasicType(EbtImage2D); break; case TLayoutImageInternalFormat::EiifR32UI: imageType->setBasicType(EbtUImage2D); break; default: UNREACHABLE(); } layoutQualifier.rasterOrdered = mCompileOptions->pls.fragmentSynchronizationType == ShFragmentSynchronizationType::RasterizerOrderViews_D3D; imageType->setLayoutQualifier(layoutQualifier); TMemoryQualifier memoryQualifier{}; memoryQualifier.coherent = true; memoryQualifier.restrictQualifier = true; memoryQualifier.volatileQualifier = false; // TODO(anglebug.com/7279): Maybe we could walk the tree first and see which PLS is used // how. If the PLS is never loaded, we could add a writeonly qualifier, for example. memoryQualifier.readonly = false; memoryQualifier.writeonly = false; imageType->setMemoryQualifier(memoryQualifier); const TVariable &plsVar = plsSymbol->variable(); return new TVariable(plsVar.uniqueId(), plsVar.name(), plsVar.symbolType(), plsVar.extensions(), imageType); } void visitPLSLoad(TIntermSymbol *plsSymbol) override { // Replace the pixelLocalLoadANGLE with imageLoad. TVariable *image2D = mImages.find(plsSymbol); ASSERT(mGlobalPixelCoord); TIntermTyped *pls = CreateBuiltInFunctionCallNode( "imageLoad", {new TIntermSymbol(image2D), new TIntermSymbol(mGlobalPixelCoord)}, *mSymbolTable, 310); pls = unpackImageDataIfNecessary(pls, plsSymbol, image2D); queueReplacement(pls, OriginalNode::IS_DROPPED); } // Unpacks the raw PLS data if the output shader language needs r32* packing. TIntermTyped *unpackImageDataIfNecessary(TIntermTyped *data, TIntermSymbol *plsSymbol, TVariable *image2D) { TLayoutImageInternalFormat plsFormat = plsSymbol->getType().getLayoutQualifier().imageInternalFormat; TLayoutImageInternalFormat imageFormat = image2D->getType().getLayoutQualifier().imageInternalFormat; if (plsFormat == imageFormat) { return data; // This PLS storage isn't packed. } ASSERT(needsR32Packing()); switch (plsFormat) { case EiifRGBA8: // Unpack and normalize r,g,b,a from a single 32-bit unsigned int: // // unpackUnorm4x8(data.r) // data = CreateBuiltInFunctionCallNode("unpackUnorm4x8", {CreateSwizzle(data, 0)}, *mSymbolTable, 310); break; case EiifRGBA8I: case EiifRGBA8UI: { constexpr unsigned shifts[] = {24, 16, 8, 0}; // Unpack r,g,b,a form a single (signed or unsigned) 32-bit int. Shift left, // then right, to preserve the sign for ints. (highp integers are exactly // 32-bit, two's compliment.) // // data.rrrr << uvec4(24, 16, 8, 0) >> 24u // data = CreateSwizzle(data, 0, 0, 0, 0); data = new TIntermBinary(EOpBitShiftLeft, data, CreateUVecNode(shifts, 4, EbpHigh)); data = new TIntermBinary(EOpBitShiftRight, data, CreateUIntNode(24)); break; } default: UNREACHABLE(); } return data; } void visitPLSStore(TIntermSymbol *plsSymbol, TVariable *value) override { TVariable *image2D = mImages.find(plsSymbol); TIntermTyped *packedData = clampAndPackPLSDataIfNecessary(value, plsSymbol, image2D); // Surround the store with memoryBarrierImage calls in order to ensure dependent stores and // loads in a single shader invocation are coherent. From the ES 3.1 spec: // // Using variables declared as "coherent" guarantees only that the results of stores will // be immediately visible to shader invocations using similarly-declared variables; // calling MemoryBarrier is required to ensure that the stores are visible to other // operations. // insertStatementsInParentBlock( {CreateBuiltInFunctionCallNode("memoryBarrierImage", {}, *mSymbolTable, 310)}, // Before. {CreateBuiltInFunctionCallNode("memoryBarrierImage", {}, *mSymbolTable, 310)}); // After. // Rewrite the pixelLocalStoreANGLE with imageStore. ASSERT(mGlobalPixelCoord); queueReplacement( CreateBuiltInFunctionCallNode( "imageStore", {new TIntermSymbol(image2D), new TIntermSymbol(mGlobalPixelCoord), packedData}, *mSymbolTable, 310), OriginalNode::IS_DROPPED); } // Packs the PLS to raw data if the output shader language needs r32* packing. TIntermTyped *clampAndPackPLSDataIfNecessary(TVariable *plsVar, TIntermSymbol *plsSymbol, TVariable *image2D) { TLayoutImageInternalFormat plsFormat = plsSymbol->getType().getLayoutQualifier().imageInternalFormat; // anglebug.com/7524: Storing to integer formats with values larger than can be represented // is specified differently on different APIs. Clamp integer formats here to make it uniform // and more GL-like. switch (plsFormat) { case EiifRGBA8I: { // Clamp r,g,b,a to their min/max 8-bit values: // // plsVar = clamp(plsVar, -128, 127) & 0xff // TIntermTyped *newPLSValue = CreateBuiltInFunctionCallNode( "clamp", {new TIntermSymbol(plsVar), CreateIndexNode(-128), CreateIndexNode(127)}, *mSymbolTable, mShaderVersion); insertStatementInParentBlock(CreateTempAssignmentNode(plsVar, newPLSValue)); break; } case EiifRGBA8UI: { // Clamp r,g,b,a to their max 8-bit values: // // plsVar = min(plsVar, 255) // TIntermTyped *newPLSValue = CreateBuiltInFunctionCallNode( "min", {new TIntermSymbol(plsVar), CreateUIntNode(255)}, *mSymbolTable, mShaderVersion); insertStatementInParentBlock(CreateTempAssignmentNode(plsVar, newPLSValue)); break; } default: break; } TIntermTyped *result = new TIntermSymbol(plsVar); TLayoutImageInternalFormat imageFormat = image2D->getType().getLayoutQualifier().imageInternalFormat; if (plsFormat == imageFormat) { return result; // This PLS storage isn't packed. } ASSERT(needsR32Packing()); switch (plsFormat) { case EiifRGBA8: { if (mCompileOptions->passHighpToPackUnormSnormBuiltins) { // anglebug.com/7527: unpackUnorm4x8 doesn't work on Pixel 4 when passed // a mediump vec4. Use an intermediate highp vec4. // // It's safe to inject a variable here because it happens right before // pixelLocalStoreANGLE, which returns type void. (See visitAggregate.) TType *highpType = new TType(EbtFloat, EbpHigh, EvqTemporary, 4); TVariable *workaroundHighpVar = CreateTempVariable(mSymbolTable, highpType); insertStatementInParentBlock( CreateTempInitDeclarationNode(workaroundHighpVar, result)); result = new TIntermSymbol(workaroundHighpVar); } // Denormalize and pack r,g,b,a into a single 32-bit unsigned int: // // packUnorm4x8(workaroundHighpVar) // result = CreateBuiltInFunctionCallNode("packUnorm4x8", {result}, *mSymbolTable, 310); break; } case EiifRGBA8I: case EiifRGBA8UI: { if (plsFormat == EiifRGBA8I) { // Mask off extra sign bits beyond 8. // // plsVar &= 0xff // insertStatementInParentBlock(new TIntermBinary( EOpBitwiseAndAssign, new TIntermSymbol(plsVar), CreateIndexNode(0xff))); } // Pack r,g,b,a into a single 32-bit (signed or unsigned) int: // // r | (g << 8) | (b << 16) | (a << 24) // auto shiftComponent = [=](int componentIdx) { return new TIntermBinary(EOpBitShiftLeft, CreateSwizzle(new TIntermSymbol(plsVar), componentIdx), CreateUIntNode(componentIdx * 8)); }; result = CreateSwizzle(result, 0); result = new TIntermBinary(EOpBitwiseOr, result, shiftComponent(1)); result = new TIntermBinary(EOpBitwiseOr, result, shiftComponent(2)); result = new TIntermBinary(EOpBitwiseOr, result, shiftComponent(3)); break; } default: UNREACHABLE(); } // Convert the packed data to a {u,i}vec4 for imageStore. TType imageStoreType(DataTypeOfImageType(image2D->getType().getBasicType()), 4); return TIntermAggregate::CreateConstructor(imageStoreType, {result}); } void injectSetupCode(TCompiler *compiler, TSymbolTable &symbolTable, const ShCompileOptions &compileOptions, TIntermBlock *mainBody, size_t plsBeginPosition) override { // When PLS is implemented with images, early_fragment_tests ensure that depth/stencil // can also block stores to PLS. compiler->specifyEarlyFragmentTests(); // Delimit the beginning of a per-pixel critical section, if supported. This makes pixel // local storage coherent. // // Either: GL_NV_fragment_shader_interlock // GL_INTEL_fragment_shader_ordering // GL_ARB_fragment_shader_interlock (may compile to // SPV_EXT_fragment_shader_interlock) switch (compileOptions.pls.fragmentSynchronizationType) { // ROVs don't need explicit synchronization calls. case ShFragmentSynchronizationType::RasterizerOrderViews_D3D: case ShFragmentSynchronizationType::NotSupported: break; case ShFragmentSynchronizationType::FragmentShaderInterlock_NV_GL: mainBody->insertStatement( plsBeginPosition, CreateBuiltInFunctionCallNode("beginInvocationInterlockNV", {}, symbolTable, kESSLInternalBackendBuiltIns)); break; case ShFragmentSynchronizationType::FragmentShaderOrdering_INTEL_GL: mainBody->insertStatement( plsBeginPosition, CreateBuiltInFunctionCallNode("beginFragmentShaderOrderingINTEL", {}, symbolTable, kESSLInternalBackendBuiltIns)); break; case ShFragmentSynchronizationType::FragmentShaderInterlock_ARB_GL: mainBody->insertStatement( plsBeginPosition, CreateBuiltInFunctionCallNode("beginInvocationInterlockARB", {}, symbolTable, kESSLInternalBackendBuiltIns)); break; default: UNREACHABLE(); } } void injectFinalizeCode(TCompiler *, TSymbolTable &symbolTable, const ShCompileOptions &compileOptions, TIntermBlock *mainBody, size_t plsEndPosition) override { // Delimit the end of the PLS critical section, if required. // // Either: GL_NV_fragment_shader_interlock // GL_ARB_fragment_shader_interlock (may compile to // SPV_EXT_fragment_shader_interlock) switch (compileOptions.pls.fragmentSynchronizationType) { // ROVs don't need explicit synchronization calls. case ShFragmentSynchronizationType::RasterizerOrderViews_D3D: // GL_INTEL_fragment_shader_ordering doesn't have an "end()" call. case ShFragmentSynchronizationType::FragmentShaderOrdering_INTEL_GL: case ShFragmentSynchronizationType::NotSupported: break; case ShFragmentSynchronizationType::FragmentShaderInterlock_NV_GL: mainBody->insertStatement( plsEndPosition, CreateBuiltInFunctionCallNode("endInvocationInterlockNV", {}, symbolTable, kESSLInternalBackendBuiltIns)); break; case ShFragmentSynchronizationType::FragmentShaderInterlock_ARB_GL: mainBody->insertStatement( plsEndPosition, CreateBuiltInFunctionCallNode("endInvocationInterlockARB", {}, symbolTable, kESSLInternalBackendBuiltIns)); break; default: UNREACHABLE(); } } PLSBackingStoreMap mImages; }; // Rewrites high level PLS operations to framebuffer fetch operations. class RewritePLSToFramebufferFetchTraverser : public RewritePLSTraverser { public: RewritePLSToFramebufferFetchTraverser(TCompiler *compiler, TSymbolTable &symbolTable, const ShCompileOptions &compileOptions, int shaderVersion) : RewritePLSTraverser(compiler, symbolTable, compileOptions, shaderVersion) {} void visitPLSDeclaration(TIntermSymbol *plsSymbol) override { // Replace the PLS declaration with a framebuffer attachment. PLSAttachment attachment(mCompiler, mSymbolTable, *mCompileOptions, plsSymbol->variable()); mPLSAttachments.insertNew(plsSymbol, attachment); insertStatementInParentBlock( new TIntermDeclaration({new TIntermSymbol(attachment.fragmentVar)})); queueReplacement(CreateTempDeclarationNode(attachment.accessVar), OriginalNode::IS_DROPPED); } void visitPLSLoad(TIntermSymbol *plsSymbol) override { // Read our temporary accessVar. const PLSAttachment &attachment = mPLSAttachments.find(plsSymbol); queueReplacement(attachment.expandAccessVar(), OriginalNode::IS_DROPPED); } void visitPLSStore(TIntermSymbol *plsSymbol, TVariable *value) override { // Set our temporary accessVar. const PLSAttachment &attachment = mPLSAttachments.find(plsSymbol); queueReplacement(CreateTempAssignmentNode(attachment.accessVar, attachment.swizzle(value)), OriginalNode::IS_DROPPED); } void injectSetupCode(TCompiler *compiler, TSymbolTable &symbolTable, const ShCompileOptions &compileOptions, TIntermBlock *mainBody, size_t plsBeginPosition) override { // [OpenGL ES Version 3.0.6, 3.9.2.3 "Shader Output"]: Any colors, or color components, // associated with a fragment that are not written by the fragment shader are undefined. // // [EXT_shader_framebuffer_fetch]: Prior to fragment shading, fragment outputs declared // inout are populated with the value last written to the framebuffer at the same(x, y, // sample) position. // // It's unclear from the EXT_shader_framebuffer_fetch spec whether inout fragment variables // become undefined if not explicitly written, but either way, when this compiles to subpass // loads in Vulkan, we definitely get undefined behavior if PLS variables are not written. // // To make sure every PLS variable gets written, we read them all before PLS operations, // then write them all back out after all PLS is complete. std::vector plsPreloads; plsPreloads.reserve(mPLSAttachments.bindingOrderedMap().size()); for (const auto &entry : mPLSAttachments.bindingOrderedMap()) { const PLSAttachment &attachment = entry.second; plsPreloads.push_back( CreateTempAssignmentNode(attachment.accessVar, attachment.swizzleFragmentVar())); } mainBody->getSequence()->insert(mainBody->getSequence()->begin() + plsBeginPosition, plsPreloads.begin(), plsPreloads.end()); } void injectFinalizeCode(TCompiler *, TSymbolTable &symbolTable, const ShCompileOptions &compileOptions, TIntermBlock *mainBody, size_t plsEndPosition) override { std::vector plsWrites; plsWrites.reserve(mPLSAttachments.bindingOrderedMap().size()); for (const auto &entry : mPLSAttachments.bindingOrderedMap()) { const PLSAttachment &attachment = entry.second; plsWrites.push_back(new TIntermBinary(EOpAssign, attachment.swizzleFragmentVar(), new TIntermSymbol(attachment.accessVar))); } mainBody->getSequence()->insert(mainBody->getSequence()->begin() + plsEndPosition, plsWrites.begin(), plsWrites.end()); } private: struct PLSAttachment { PLSAttachment(const TCompiler *compiler, TSymbolTable *symbolTable, const ShCompileOptions &compileOptions, const TVariable &plsVar) { const TType &plsType = plsVar.getType(); TType *accessVarType; switch (plsType.getLayoutQualifier().imageInternalFormat) { default: UNREACHABLE(); [[fallthrough]]; case EiifRGBA8: accessVarType = new TType(EbtFloat, 4); break; case EiifRGBA8I: accessVarType = new TType(EbtInt, 4); break; case EiifRGBA8UI: accessVarType = new TType(EbtUInt, 4); break; case EiifR32F: accessVarType = new TType(EbtFloat, 1); break; case EiifR32UI: accessVarType = new TType(EbtUInt, 1); break; } accessVarType->setPrecision(plsType.getPrecision()); accessVar = CreateTempVariable(symbolTable, accessVarType); // Qualcomm seems to want fragment outputs to be 4-component vectors, and produces a // compile error from "inout uint". Our Metal translator also saturates color outputs to // 4 components. And since the spec also seems silent on how many components an output // must have, we always use 4. TType *fragmentVarType = new TType(accessVarType->getBasicType(), 4); fragmentVarType->setPrecision(plsType.getPrecision()); fragmentVarType->setQualifier(EvqFragmentInOut); // PLS attachments are bound in reverse order from the rear. TLayoutQualifier layoutQualifier = TLayoutQualifier::Create(); layoutQualifier.location = compiler->getResources().MaxCombinedDrawBuffersAndPixelLocalStoragePlanes - plsType.getLayoutQualifier().binding - 1; layoutQualifier.locationsSpecified = 1; if (compileOptions.pls.fragmentSynchronizationType == ShFragmentSynchronizationType::NotSupported) { // We're using EXT_shader_framebuffer_fetch_non_coherent, which requires the // "noncoherent" qualifier. layoutQualifier.noncoherent = true; } fragmentVarType->setLayoutQualifier(layoutQualifier); fragmentVar = new TVariable(plsVar.uniqueId(), plsVar.name(), plsVar.symbolType(), plsVar.extensions(), fragmentVarType); } // Expands our accessVar to 4 components, regardless of the size of the pixel local storage // internalformat. TIntermTyped *expandAccessVar() const { TIntermTyped *expanded = new TIntermSymbol(accessVar); if (accessVar->getType().getNominalSize() == 1) { switch (accessVar->getType().getBasicType()) { case EbtFloat: expanded = TIntermAggregate::CreateConstructor( // "vec4(r, 0, 0, 1)" TType(EbtFloat, 4), {expanded, CreateFloatNode(0, EbpHigh), CreateFloatNode(0, EbpHigh), CreateFloatNode(1, EbpHigh)}); break; case EbtUInt: expanded = TIntermAggregate::CreateConstructor( // "uvec4(r, 0, 0, 1)" TType(EbtUInt, 4), {expanded, CreateUIntNode(0), CreateUIntNode(0), CreateUIntNode(1)}); break; default: UNREACHABLE(); break; } } return expanded; } // Swizzles a variable down to the same number of components as the PLS internalformat. TIntermTyped *swizzle(TVariable *var) const { TIntermTyped *swizzled = new TIntermSymbol(var); if (var->getType().getNominalSize() != accessVar->getType().getNominalSize()) { ASSERT(var->getType().getNominalSize() > accessVar->getType().getNominalSize()); TVector swizzleOffsets{0, 1, 2, 3}; swizzleOffsets.resize(accessVar->getType().getNominalSize()); swizzled = new TIntermSwizzle(swizzled, swizzleOffsets); } return swizzled; } TIntermTyped *swizzleFragmentVar() const { return swizzle(fragmentVar); } TVariable *fragmentVar; TVariable *accessVar; }; PLSBackingStoreMap mPLSAttachments; }; } // anonymous namespace bool RewritePixelLocalStorage(TCompiler *compiler, TIntermBlock *root, TSymbolTable &symbolTable, const ShCompileOptions &compileOptions, int shaderVersion) { // If any functions take PLS arguments, monomorphize the functions by removing said parameters // and making the PLS calls from main() instead, using the global uniform from the call site // instead of the function argument. This is necessary because function arguments don't carry // the necessary "binding" or "format" layout qualifiers. if (!MonomorphizeUnsupportedFunctions( compiler, root, &symbolTable, compileOptions, UnsupportedFunctionArgsBitSet{UnsupportedFunctionArgs::PixelLocalStorage})) { return false; } TIntermBlock *mainBody = FindMainBody(root); std::unique_ptr traverser; switch (compileOptions.pls.type) { case ShPixelLocalStorageType::ImageStoreR32PackedFormats: case ShPixelLocalStorageType::ImageStoreNativeFormats: traverser = std::make_unique( compiler, symbolTable, compileOptions, shaderVersion); break; case ShPixelLocalStorageType::FramebufferFetch: traverser = std::make_unique( compiler, symbolTable, compileOptions, shaderVersion); break; default: UNREACHABLE(); return false; } // Rewrite PLS operations to image operations. root->traverse(traverser.get()); if (!traverser->updateTree(compiler, root)) { return false; } // Inject the code that needs to run before and after all PLS operations. // TODO(anglebug.com/7279): Inject these functions in a tight critical section, instead of // just locking the entire main() function: // - Monomorphize all PLS calls into main(). // - Insert begin/end calls around the first/last PLS calls (and outside of flow control). traverser->injectSetupCode(compiler, symbolTable, compileOptions, mainBody, 0); traverser->injectFinalizeCode(compiler, symbolTable, compileOptions, mainBody, mainBody->getChildCount()); if (traverser->globalPixelCoord()) { // Initialize the global pixel coord at the beginning of main(): // // pixelCoord = ivec2(floor(gl_FragCoord.xy)); // TIntermTyped *exp; exp = ReferenceBuiltInVariable(ImmutableString("gl_FragCoord"), symbolTable, shaderVersion); exp = CreateSwizzle(exp, 0, 1); exp = CreateBuiltInFunctionCallNode("floor", {exp}, symbolTable, shaderVersion); exp = TIntermAggregate::CreateConstructor(TType(EbtInt, 2), {exp}); exp = CreateTempAssignmentNode(traverser->globalPixelCoord(), exp); mainBody->insertStatement(0, exp); } return compiler->validateAST(root); } } // namespace sh