summaryrefslogtreecommitdiffstats
path: root/src/libs/dxvk-native-1.9.2a/src/dxbc/dxbc_compiler.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/libs/dxvk-native-1.9.2a/src/dxbc/dxbc_compiler.cpp')
-rw-r--r--src/libs/dxvk-native-1.9.2a/src/dxbc/dxbc_compiler.cpp7905
1 files changed, 7905 insertions, 0 deletions
diff --git a/src/libs/dxvk-native-1.9.2a/src/dxbc/dxbc_compiler.cpp b/src/libs/dxvk-native-1.9.2a/src/dxbc/dxbc_compiler.cpp
new file mode 100644
index 00000000..f4f3951e
--- /dev/null
+++ b/src/libs/dxvk-native-1.9.2a/src/dxbc/dxbc_compiler.cpp
@@ -0,0 +1,7905 @@
+#include "dxbc_compiler.h"
+
+namespace dxvk {
+
+ constexpr uint32_t Icb_BindingSlotId = 14;
+ constexpr uint32_t Icb_MaxBakedDwords = 16;
+
+ constexpr uint32_t PerVertex_Position = 0;
+ constexpr uint32_t PerVertex_CullDist = 1;
+ constexpr uint32_t PerVertex_ClipDist = 2;
+
+ DxbcCompiler::DxbcCompiler(
+ const std::string& fileName,
+ const DxbcModuleInfo& moduleInfo,
+ const DxbcProgramInfo& programInfo,
+ const Rc<DxbcIsgn>& isgn,
+ const Rc<DxbcIsgn>& osgn,
+ const Rc<DxbcIsgn>& psgn,
+ const DxbcAnalysisInfo& analysis)
+ : m_moduleInfo (moduleInfo),
+ m_programInfo(programInfo),
+ m_module (spvVersion(1, 3)),
+ m_isgn (isgn),
+ m_osgn (osgn),
+ m_psgn (psgn),
+ m_analysis (&analysis) {
+ // Declare an entry point ID. We'll need it during the
+ // initialization phase where the execution mode is set.
+ m_entryPointId = m_module.allocateId();
+
+ // Set the shader name so that we recognize it in renderdoc
+ m_module.setDebugSource(
+ spv::SourceLanguageUnknown, 0,
+ m_module.addDebugString(fileName.c_str()),
+ nullptr);
+
+ if (Logger::logLevel() <= LogLevel::Debug) {
+ if (m_isgn != nullptr) {
+ Logger::debug(str::format("Input Signature for - ", fileName.c_str(), "\n"));
+ m_isgn->printEntries();
+ }
+ if (m_osgn != nullptr) {
+ Logger::debug(str::format("Output Signature for - ", fileName.c_str(), "\n"));
+ m_osgn->printEntries();
+ }
+ if (m_psgn != nullptr) {
+ Logger::debug(str::format("Patch Constant Signature for - ", fileName.c_str(), "\n"));
+ m_psgn->printEntries();
+ }
+ }
+
+ // Set the memory model. This is the same for all shaders.
+ m_module.setMemoryModel(
+ spv::AddressingModelLogical,
+ spv::MemoryModelGLSL450);
+
+ // Make sure our interface registers are clear
+ for (uint32_t i = 0; i < DxbcMaxInterfaceRegs; i++) {
+ m_vRegs.at(i) = DxbcRegisterPointer { };
+ m_oRegs.at(i) = DxbcRegisterPointer { };
+ }
+
+ this->emitInit();
+ }
+
+
+ DxbcCompiler::~DxbcCompiler() {
+
+ }
+
+
+ void DxbcCompiler::processInstruction(const DxbcShaderInstruction& ins) {
+ switch (ins.opClass) {
+ case DxbcInstClass::Declaration:
+ return this->emitDcl(ins);
+
+ case DxbcInstClass::CustomData:
+ return this->emitCustomData(ins);
+
+ case DxbcInstClass::Atomic:
+ return this->emitAtomic(ins);
+
+ case DxbcInstClass::AtomicCounter:
+ return this->emitAtomicCounter(ins);
+
+ case DxbcInstClass::Barrier:
+ return this->emitBarrier(ins);
+
+ case DxbcInstClass::BitExtract:
+ return this->emitBitExtract(ins);
+
+ case DxbcInstClass::BitInsert:
+ return this->emitBitInsert(ins);
+
+ case DxbcInstClass::BitScan:
+ return this->emitBitScan(ins);
+
+ case DxbcInstClass::BufferQuery:
+ return this->emitBufferQuery(ins);
+
+ case DxbcInstClass::BufferLoad:
+ return this->emitBufferLoad(ins);
+
+ case DxbcInstClass::BufferStore:
+ return this->emitBufferStore(ins);
+
+ case DxbcInstClass::ConvertFloat16:
+ return this->emitConvertFloat16(ins);
+
+ case DxbcInstClass::ConvertFloat64:
+ return this->emitConvertFloat64(ins);
+
+ case DxbcInstClass::ControlFlow:
+ return this->emitControlFlow(ins);
+
+ case DxbcInstClass::GeometryEmit:
+ return this->emitGeometryEmit(ins);
+
+ case DxbcInstClass::HullShaderPhase:
+ return this->emitHullShaderPhase(ins);
+
+ case DxbcInstClass::HullShaderInstCnt:
+ return this->emitHullShaderInstCnt(ins);
+
+ case DxbcInstClass::Interpolate:
+ return this->emitInterpolate(ins);
+
+ case DxbcInstClass::NoOperation:
+ return;
+
+ case DxbcInstClass::TextureQuery:
+ return this->emitTextureQuery(ins);
+
+ case DxbcInstClass::TextureQueryLod:
+ return this->emitTextureQueryLod(ins);
+
+ case DxbcInstClass::TextureQueryMs:
+ return this->emitTextureQueryMs(ins);
+
+ case DxbcInstClass::TextureQueryMsPos:
+ return this->emitTextureQueryMsPos(ins);
+
+ case DxbcInstClass::TextureFetch:
+ return this->emitTextureFetch(ins);
+
+ case DxbcInstClass::TextureGather:
+ return this->emitTextureGather(ins);
+
+ case DxbcInstClass::TextureSample:
+ return this->emitTextureSample(ins);
+
+ case DxbcInstClass::TypedUavLoad:
+ return this->emitTypedUavLoad(ins);
+
+ case DxbcInstClass::TypedUavStore:
+ return this->emitTypedUavStore(ins);
+
+ case DxbcInstClass::VectorAlu:
+ return this->emitVectorAlu(ins);
+
+ case DxbcInstClass::VectorCmov:
+ return this->emitVectorCmov(ins);
+
+ case DxbcInstClass::VectorCmp:
+ return this->emitVectorCmp(ins);
+
+ case DxbcInstClass::VectorDeriv:
+ return this->emitVectorDeriv(ins);
+
+ case DxbcInstClass::VectorDot:
+ return this->emitVectorDot(ins);
+
+ case DxbcInstClass::VectorIdiv:
+ return this->emitVectorIdiv(ins);
+
+ case DxbcInstClass::VectorImul:
+ return this->emitVectorImul(ins);
+
+ case DxbcInstClass::VectorMsad:
+ return this->emitVectorMsad(ins);
+
+ case DxbcInstClass::VectorShift:
+ return this->emitVectorShift(ins);
+
+ case DxbcInstClass::VectorSinCos:
+ return this->emitVectorSinCos(ins);
+
+ default:
+ Logger::warn(
+ str::format("DxbcCompiler: Unhandled opcode class: ",
+ ins.op));
+ }
+ }
+
+
+ void DxbcCompiler::processXfbPassthrough() {
+ m_module.setExecutionMode (m_entryPointId, spv::ExecutionModeInputPoints);
+ m_module.setExecutionMode (m_entryPointId, spv::ExecutionModeOutputPoints);
+ m_module.setOutputVertices(m_entryPointId, 1);
+ m_module.setInvocations (m_entryPointId, 1);
+
+ for (auto e = m_isgn->begin(); e != m_isgn->end(); e++) {
+ emitDclInput(e->registerId, 1,
+ e->componentMask, DxbcSystemValue::None,
+ DxbcInterpolationMode::Undefined);
+ }
+
+ // Figure out which streams to enable
+ uint32_t streamMask = 0;
+
+ for (size_t i = 0; i < m_xfbVars.size(); i++)
+ streamMask |= 1u << m_xfbVars[i].streamId;
+
+ for (uint32_t streamId : bit::BitMask(streamMask)) {
+ emitXfbOutputSetup(streamId, true);
+ m_module.opEmitVertex(m_module.constu32(streamId));
+ }
+
+ // End the main function
+ emitFunctionEnd();
+ }
+
+
+ Rc<DxvkShader> DxbcCompiler::finalize() {
+ // Depending on the shader type, this will prepare
+ // input registers, call various shader functions
+ // and write back the output registers.
+ switch (m_programInfo.type()) {
+ case DxbcProgramType::VertexShader: this->emitVsFinalize(); break;
+ case DxbcProgramType::HullShader: this->emitHsFinalize(); break;
+ case DxbcProgramType::DomainShader: this->emitDsFinalize(); break;
+ case DxbcProgramType::GeometryShader: this->emitGsFinalize(); break;
+ case DxbcProgramType::PixelShader: this->emitPsFinalize(); break;
+ case DxbcProgramType::ComputeShader: this->emitCsFinalize(); break;
+ }
+
+ // Emit float control mode if the extension is supported
+ this->emitFloatControl();
+
+ // Declare the entry point, we now have all the
+ // information we need, including the interfaces
+ m_module.addEntryPoint(m_entryPointId,
+ m_programInfo.executionModel(), "main",
+ m_entryPointInterfaces.size(),
+ m_entryPointInterfaces.data());
+ m_module.setDebugName(m_entryPointId, "main");
+
+ DxvkShaderOptions shaderOptions = { };
+
+ if (m_moduleInfo.xfb != nullptr) {
+ shaderOptions.rasterizedStream = m_moduleInfo.xfb->rasterizedStream;
+
+ for (uint32_t i = 0; i < 4; i++)
+ shaderOptions.xfbStrides[i] = m_moduleInfo.xfb->strides[i];
+ }
+
+ // Create the shader module object
+ return new DxvkShader(
+ m_programInfo.shaderStage(),
+ m_resourceSlots.size(),
+ m_resourceSlots.data(),
+ m_interfaceSlots,
+ m_module.compile(),
+ shaderOptions,
+ std::move(m_immConstData));
+ }
+
+
+ void DxbcCompiler::emitDcl(const DxbcShaderInstruction& ins) {
+ switch (ins.op) {
+ case DxbcOpcode::DclGlobalFlags:
+ return this->emitDclGlobalFlags(ins);
+
+ case DxbcOpcode::DclIndexRange:
+ return; // not needed for anything
+
+ case DxbcOpcode::DclTemps:
+ return this->emitDclTemps(ins);
+
+ case DxbcOpcode::DclIndexableTemp:
+ return this->emitDclIndexableTemp(ins);
+
+ case DxbcOpcode::DclInput:
+ case DxbcOpcode::DclInputSgv:
+ case DxbcOpcode::DclInputSiv:
+ case DxbcOpcode::DclInputPs:
+ case DxbcOpcode::DclInputPsSgv:
+ case DxbcOpcode::DclInputPsSiv:
+ case DxbcOpcode::DclOutput:
+ case DxbcOpcode::DclOutputSgv:
+ case DxbcOpcode::DclOutputSiv:
+ return this->emitDclInterfaceReg(ins);
+
+ case DxbcOpcode::DclConstantBuffer:
+ return this->emitDclConstantBuffer(ins);
+
+ case DxbcOpcode::DclSampler:
+ return this->emitDclSampler(ins);
+
+ case DxbcOpcode::DclStream:
+ return this->emitDclStream(ins);
+
+ case DxbcOpcode::DclUavTyped:
+ case DxbcOpcode::DclResource:
+ return this->emitDclResourceTyped(ins);
+
+ case DxbcOpcode::DclUavRaw:
+ case DxbcOpcode::DclResourceRaw:
+ case DxbcOpcode::DclUavStructured:
+ case DxbcOpcode::DclResourceStructured:
+ return this->emitDclResourceRawStructured(ins);
+
+ case DxbcOpcode::DclThreadGroupSharedMemoryRaw:
+ case DxbcOpcode::DclThreadGroupSharedMemoryStructured:
+ return this->emitDclThreadGroupSharedMemory(ins);
+
+ case DxbcOpcode::DclGsInputPrimitive:
+ return this->emitDclGsInputPrimitive(ins);
+
+ case DxbcOpcode::DclGsOutputPrimitiveTopology:
+ return this->emitDclGsOutputTopology(ins);
+
+ case DxbcOpcode::DclMaxOutputVertexCount:
+ return this->emitDclMaxOutputVertexCount(ins);
+
+ case DxbcOpcode::DclInputControlPointCount:
+ return this->emitDclInputControlPointCount(ins);
+
+ case DxbcOpcode::DclOutputControlPointCount:
+ return this->emitDclOutputControlPointCount(ins);
+
+ case DxbcOpcode::DclHsMaxTessFactor:
+ return this->emitDclHsMaxTessFactor(ins);
+
+ case DxbcOpcode::DclTessDomain:
+ return this->emitDclTessDomain(ins);
+
+ case DxbcOpcode::DclTessPartitioning:
+ return this->emitDclTessPartitioning(ins);
+
+ case DxbcOpcode::DclTessOutputPrimitive:
+ return this->emitDclTessOutputPrimitive(ins);
+
+ case DxbcOpcode::DclThreadGroup:
+ return this->emitDclThreadGroup(ins);
+
+ case DxbcOpcode::DclGsInstanceCount:
+ return this->emitDclGsInstanceCount(ins);
+
+ default:
+ Logger::warn(
+ str::format("DxbcCompiler: Unhandled opcode: ",
+ ins.op));
+ }
+ }
+
+
+ void DxbcCompiler::emitDclGlobalFlags(const DxbcShaderInstruction& ins) {
+ const DxbcGlobalFlags flags = ins.controls.globalFlags();
+
+ if (flags.test(DxbcGlobalFlag::RefactoringAllowed))
+ m_precise = false;
+
+ if (flags.test(DxbcGlobalFlag::EarlyFragmentTests))
+ m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeEarlyFragmentTests);
+ }
+
+
+ void DxbcCompiler::emitDclTemps(const DxbcShaderInstruction& ins) {
+ // dcl_temps has one operand:
+ // (imm0) Number of temp registers
+
+ // Ignore this and declare temps on demand.
+ }
+
+
+ void DxbcCompiler::emitDclIndexableTemp(const DxbcShaderInstruction& ins) {
+ // dcl_indexable_temps has three operands:
+ // (imm0) Array register index (x#)
+ // (imm1) Number of vectors stored in the array
+ // (imm2) Component count of each individual vector
+ DxbcRegisterInfo info;
+ info.type.ctype = DxbcScalarType::Float32;
+ info.type.ccount = ins.imm[2].u32;
+ info.type.alength = ins.imm[1].u32;
+ info.sclass = spv::StorageClassPrivate;
+
+ const uint32_t regId = ins.imm[0].u32;
+
+ if (regId >= m_xRegs.size())
+ m_xRegs.resize(regId + 1);
+
+ m_xRegs.at(regId).ccount = info.type.ccount;
+ m_xRegs.at(regId).alength = info.type.alength;
+ m_xRegs.at(regId).varId = emitNewVariable(info);
+
+ m_module.setDebugName(m_xRegs.at(regId).varId,
+ str::format("x", regId).c_str());
+ }
+
+
+ void DxbcCompiler::emitDclInterfaceReg(const DxbcShaderInstruction& ins) {
+ switch (ins.dst[0].type) {
+ case DxbcOperandType::InputControlPoint:
+ if (m_programInfo.type() != DxbcProgramType::HullShader)
+ break;
+ /* fall through */
+
+ case DxbcOperandType::Input:
+ case DxbcOperandType::Output: {
+ // dcl_input and dcl_output instructions
+ // have the following operands:
+ // (dst0) The register to declare
+ // (imm0) The system value (optional)
+ uint32_t regDim = 0;
+ uint32_t regIdx = 0;
+
+ // In the vertex and fragment shader stage, the
+ // operand indices will have the following format:
+ // (0) Register index
+ //
+ // In other stages, the input and output registers
+ // may be declared as arrays of a fixed size:
+ // (0) Array length
+ // (1) Register index
+ if (ins.dst[0].idxDim == 2) {
+ regDim = ins.dst[0].idx[0].offset;
+ regIdx = ins.dst[0].idx[1].offset;
+ } else if (ins.dst[0].idxDim == 1) {
+ regIdx = ins.dst[0].idx[0].offset;
+ } else {
+ Logger::err(str::format(
+ "DxbcCompiler: ", ins.op,
+ ": Invalid index dimension"));
+ return;
+ }
+
+ // This declaration may map an output register to a system
+ // value. If that is the case, the system value type will
+ // be stored in the second operand.
+ const bool hasSv =
+ ins.op == DxbcOpcode::DclInputSgv
+ || ins.op == DxbcOpcode::DclInputSiv
+ || ins.op == DxbcOpcode::DclInputPsSgv
+ || ins.op == DxbcOpcode::DclInputPsSiv
+ || ins.op == DxbcOpcode::DclOutputSgv
+ || ins.op == DxbcOpcode::DclOutputSiv;
+
+ DxbcSystemValue sv = DxbcSystemValue::None;
+
+ if (hasSv)
+ sv = static_cast<DxbcSystemValue>(ins.imm[0].u32);
+
+ // In the pixel shader, inputs are declared with an
+ // interpolation mode that is part of the op token.
+ const bool hasInterpolationMode =
+ ins.op == DxbcOpcode::DclInputPs
+ || ins.op == DxbcOpcode::DclInputPsSiv;
+
+ DxbcInterpolationMode im = DxbcInterpolationMode::Undefined;
+
+ if (hasInterpolationMode)
+ im = ins.controls.interpolation();
+
+ // Declare the actual input/output variable
+ switch (ins.op) {
+ case DxbcOpcode::DclInput:
+ case DxbcOpcode::DclInputSgv:
+ case DxbcOpcode::DclInputSiv:
+ case DxbcOpcode::DclInputPs:
+ case DxbcOpcode::DclInputPsSgv:
+ case DxbcOpcode::DclInputPsSiv:
+ this->emitDclInput(regIdx, regDim, ins.dst[0].mask, sv, im);
+ break;
+
+ case DxbcOpcode::DclOutput:
+ case DxbcOpcode::DclOutputSgv:
+ case DxbcOpcode::DclOutputSiv:
+ this->emitDclOutput(regIdx, regDim, ins.dst[0].mask, sv, im);
+ break;
+
+ default:
+ Logger::err(str::format(
+ "DxbcCompiler: Unexpected opcode: ",
+ ins.op));
+ }
+ } break;
+
+ case DxbcOperandType::InputThreadId: {
+ m_cs.builtinGlobalInvocationId = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 3, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInGlobalInvocationId,
+ "vThreadId");
+ } break;
+
+ case DxbcOperandType::InputThreadGroupId: {
+ m_cs.builtinWorkgroupId = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 3, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInWorkgroupId,
+ "vThreadGroupId");
+ } break;
+
+ case DxbcOperandType::InputThreadIdInGroup: {
+ m_cs.builtinLocalInvocationId = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 3, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInLocalInvocationId,
+ "vThreadIdInGroup");
+ } break;
+
+ case DxbcOperandType::InputThreadIndexInGroup: {
+ m_cs.builtinLocalInvocationIndex = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 1, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInLocalInvocationIndex,
+ "vThreadIndexInGroup");
+ } break;
+
+ case DxbcOperandType::InputCoverageMask: {
+ m_ps.builtinSampleMaskIn = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 1, 1 },
+ spv::StorageClassInput },
+ spv::BuiltInSampleMask,
+ "vCoverage");
+ } break;
+
+ case DxbcOperandType::OutputCoverageMask: {
+ m_ps.builtinSampleMaskOut = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 1, 1 },
+ spv::StorageClassOutput },
+ spv::BuiltInSampleMask,
+ "oMask");
+ } break;
+
+ case DxbcOperandType::OutputDepth: {
+ m_module.setExecutionMode(m_entryPointId,
+ spv::ExecutionModeDepthReplacing);
+ m_ps.builtinDepth = emitNewBuiltinVariable({
+ { DxbcScalarType::Float32, 1, 0 },
+ spv::StorageClassOutput },
+ spv::BuiltInFragDepth,
+ "oDepth");
+ } break;
+
+ case DxbcOperandType::OutputStencilRef: {
+ m_module.enableExtension("SPV_EXT_shader_stencil_export");
+ m_module.enableCapability(spv::CapabilityStencilExportEXT);
+ m_module.setExecutionMode(m_entryPointId,
+ spv::ExecutionModeStencilRefReplacingEXT);
+ m_ps.builtinStencilRef = emitNewBuiltinVariable({
+ { DxbcScalarType::Sint32, 1, 0 },
+ spv::StorageClassOutput },
+ spv::BuiltInFragStencilRefEXT,
+ "oStencilRef");
+ } break;
+
+ case DxbcOperandType::OutputDepthGe: {
+ m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeDepthReplacing);
+ m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeDepthGreater);
+ m_ps.builtinDepth = emitNewBuiltinVariable({
+ { DxbcScalarType::Float32, 1, 0 },
+ spv::StorageClassOutput },
+ spv::BuiltInFragDepth,
+ "oDepthGe");
+ } break;
+
+ case DxbcOperandType::OutputDepthLe: {
+ m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeDepthReplacing);
+ m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeDepthLess);
+ m_ps.builtinDepth = emitNewBuiltinVariable({
+ { DxbcScalarType::Float32, 1, 0 },
+ spv::StorageClassOutput },
+ spv::BuiltInFragDepth,
+ "oDepthLe");
+ } break;
+
+ case DxbcOperandType::InputPrimitiveId: {
+ m_primitiveIdIn = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 1, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInPrimitiveId,
+ "vPrim");
+ } break;
+
+ case DxbcOperandType::InputDomainPoint: {
+ m_ds.builtinTessCoord = emitNewBuiltinVariable({
+ { DxbcScalarType::Float32, 3, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInTessCoord,
+ "vDomain");
+ } break;
+
+ case DxbcOperandType::InputForkInstanceId:
+ case DxbcOperandType::InputJoinInstanceId: {
+ auto phase = this->getCurrentHsForkJoinPhase();
+
+ phase->instanceIdPtr = m_module.newVar(
+ m_module.defPointerType(
+ m_module.defIntType(32, 0),
+ spv::StorageClassFunction),
+ spv::StorageClassFunction);
+
+ m_module.opStore(phase->instanceIdPtr, phase->instanceId);
+ m_module.setDebugName(phase->instanceIdPtr,
+ ins.dst[0].type == DxbcOperandType::InputForkInstanceId
+ ? "vForkInstanceId" : "vJoinInstanceId");
+ } break;
+
+ case DxbcOperandType::OutputControlPointId: {
+ // This system value map to the invocation
+ // ID, which has been declared already.
+ } break;
+
+ case DxbcOperandType::InputPatchConstant:
+ case DxbcOperandType::OutputControlPoint: {
+ // These have been declared as global input and
+ // output arrays, so there's nothing left to do.
+ } break;
+
+ case DxbcOperandType::InputGsInstanceId: {
+ m_gs.builtinInvocationId = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 1, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInInvocationId,
+ "vInstanceID");
+ } break;
+
+ default:
+ Logger::err(str::format(
+ "DxbcCompiler: Unsupported operand type declaration: ",
+ ins.dst[0].type));
+
+ }
+ }
+
+
+ void DxbcCompiler::emitDclInput(
+ uint32_t regIdx,
+ uint32_t regDim,
+ DxbcRegMask regMask,
+ DxbcSystemValue sv,
+ DxbcInterpolationMode im) {
+ // Avoid declaring the same variable multiple times.
+ // This may happen when multiple system values are
+ // mapped to different parts of the same register.
+ if (m_vRegs.at(regIdx).id == 0 && sv == DxbcSystemValue::None) {
+ const DxbcVectorType regType = getInputRegType(regIdx);
+
+ DxbcRegisterInfo info;
+ info.type.ctype = regType.ctype;
+ info.type.ccount = regType.ccount;
+ info.type.alength = regDim;
+ info.sclass = spv::StorageClassInput;
+
+ const uint32_t varId = emitNewVariable(info);
+
+ m_module.decorateLocation(varId, regIdx);
+ m_module.setDebugName(varId, str::format("v", regIdx).c_str());
+ m_entryPointInterfaces.push_back(varId);
+
+ m_vRegs.at(regIdx) = { regType, varId };
+
+ // Interpolation mode, used in pixel shaders
+ if (im == DxbcInterpolationMode::Constant)
+ m_module.decorate(varId, spv::DecorationFlat);
+
+ if (im == DxbcInterpolationMode::LinearCentroid
+ || im == DxbcInterpolationMode::LinearNoPerspectiveCentroid)
+ m_module.decorate(varId, spv::DecorationCentroid);
+
+ if (im == DxbcInterpolationMode::LinearNoPerspective
+ || im == DxbcInterpolationMode::LinearNoPerspectiveCentroid
+ || im == DxbcInterpolationMode::LinearNoPerspectiveSample)
+ m_module.decorate(varId, spv::DecorationNoPerspective);
+
+ if (im == DxbcInterpolationMode::LinearSample
+ || im == DxbcInterpolationMode::LinearNoPerspectiveSample) {
+ m_module.enableCapability(spv::CapabilitySampleRateShading);
+ m_module.decorate(varId, spv::DecorationSample);
+ }
+
+ // Declare the input slot as defined
+ m_interfaceSlots.inputSlots |= 1u << regIdx;
+ m_vArrayLength = std::max(m_vArrayLength, regIdx + 1);
+ } else if (sv != DxbcSystemValue::None) {
+ // Add a new system value mapping if needed
+ bool skipSv = sv == DxbcSystemValue::ClipDistance
+ || sv == DxbcSystemValue::CullDistance;
+
+ if (!skipSv)
+ m_vMappings.push_back({ regIdx, regMask, sv });
+ }
+ }
+
+
+ void DxbcCompiler::emitDclOutput(
+ uint32_t regIdx,
+ uint32_t regDim,
+ DxbcRegMask regMask,
+ DxbcSystemValue sv,
+ DxbcInterpolationMode im) {
+ // Add a new system value mapping if needed. Clip
+ // and cull distances are handled separately.
+ if (sv != DxbcSystemValue::None
+ && sv != DxbcSystemValue::ClipDistance
+ && sv != DxbcSystemValue::CullDistance)
+ m_oMappings.push_back({ regIdx, regMask, sv });
+
+ if (m_programInfo.type() == DxbcProgramType::HullShader) {
+ // Hull shaders don't use standard outputs
+ if (getCurrentHsForkJoinPhase() != nullptr)
+ m_hs.outputPerPatchMask |= 1 << regIdx;
+ } else if (m_oRegs.at(regIdx).id == 0) {
+ // Avoid declaring the same variable multiple times.
+ // This may happen when multiple system values are
+ // mapped to different parts of the same register.
+ const DxbcVectorType regType = getOutputRegType(regIdx);
+
+ DxbcRegisterInfo info;
+ info.type.ctype = regType.ctype;
+ info.type.ccount = regType.ccount;
+ info.type.alength = regDim;
+ info.sclass = spv::StorageClassOutput;
+
+ // In xfb mode, we set up the actual
+ // output vars when emitting a vertex
+ if (m_moduleInfo.xfb != nullptr)
+ info.sclass = spv::StorageClassPrivate;
+
+ // In geometry shaders, don't duplicate system value outputs
+ // to stay within device limits. The pixel shader will read
+ // all GS system value outputs as system value inputs.
+ if (m_programInfo.type() == DxbcProgramType::GeometryShader && sv != DxbcSystemValue::None)
+ info.sclass = spv::StorageClassPrivate;
+
+ const uint32_t varId = this->emitNewVariable(info);
+ m_module.setDebugName(varId, str::format("o", regIdx).c_str());
+
+ if (info.sclass == spv::StorageClassOutput) {
+ m_module.decorateLocation(varId, regIdx);
+ m_entryPointInterfaces.push_back(varId);
+
+ // Add index decoration for potential dual-source blending
+ if (m_programInfo.type() == DxbcProgramType::PixelShader)
+ m_module.decorateIndex(varId, 0);
+
+ // Declare vertex positions in all stages as invariant, even if
+ // this is not the last stage, to help with potential Z fighting.
+ if (sv == DxbcSystemValue::Position && m_moduleInfo.options.invariantPosition)
+ m_module.decorate(varId, spv::DecorationInvariant);
+ }
+
+ m_oRegs.at(regIdx) = { regType, varId };
+
+ // Declare the output slot as defined
+ m_interfaceSlots.outputSlots |= 1u << regIdx;
+ }
+ }
+
+
+ void DxbcCompiler::emitDclConstantBuffer(const DxbcShaderInstruction& ins) {
+ // dcl_constant_buffer has one operand with two indices:
+ // (0) Constant buffer register ID (cb#)
+ // (1) Number of constants in the buffer
+ const uint32_t bufferId = ins.dst[0].idx[0].offset;
+ const uint32_t elementCount = ins.dst[0].idx[1].offset;
+
+ bool asSsbo = m_moduleInfo.options.dynamicIndexedConstantBufferAsSsbo
+ && ins.controls.accessType() == DxbcConstantBufferAccessType::DynamicallyIndexed;
+
+ this->emitDclConstantBufferVar(bufferId, elementCount,
+ str::format("cb", bufferId).c_str(), asSsbo);
+ }
+
+
+ void DxbcCompiler::emitDclConstantBufferVar(
+ uint32_t regIdx,
+ uint32_t numConstants,
+ const char* name,
+ bool asSsbo) {
+ // Uniform buffer data is stored as a fixed-size array
+ // of 4x32-bit vectors. SPIR-V requires explicit strides.
+ const uint32_t arrayType = m_module.defArrayTypeUnique(
+ getVectorTypeId({ DxbcScalarType::Float32, 4 }),
+ m_module.constu32(numConstants));
+ m_module.decorateArrayStride(arrayType, 16);
+
+ // SPIR-V requires us to put that array into a
+ // struct and decorate that struct as a block.
+ const uint32_t structType = m_module.defStructTypeUnique(1, &arrayType);
+
+ m_module.decorate(structType, asSsbo
+ ? spv::DecorationBufferBlock
+ : spv::DecorationBlock);
+ m_module.memberDecorateOffset(structType, 0, 0);
+
+ m_module.setDebugName (structType, str::format(name, "_t").c_str());
+ m_module.setDebugMemberName (structType, 0, "m");
+
+ // Variable that we'll use to access the buffer
+ const uint32_t varId = m_module.newVar(
+ m_module.defPointerType(structType, spv::StorageClassUniform),
+ spv::StorageClassUniform);
+
+ m_module.setDebugName(varId, name);
+
+ // Compute the DXVK binding slot index for the buffer.
+ // D3D11 needs to bind the actual buffers to this slot.
+ uint32_t bindingId = computeConstantBufferBinding(
+ m_programInfo.type(), regIdx);
+
+ m_module.decorateDescriptorSet(varId, 0);
+ m_module.decorateBinding(varId, bindingId);
+
+ if (asSsbo)
+ m_module.decorate(varId, spv::DecorationNonWritable);
+
+ // Declare a specialization constant which will
+ // store whether or not the resource is bound.
+ const uint32_t specConstId = m_module.specConstBool(true);
+ m_module.decorateSpecId(specConstId, bindingId);
+ m_module.setDebugName(specConstId,
+ str::format(name, "_bound").c_str());
+
+ DxbcConstantBuffer buf;
+ buf.varId = varId;
+ buf.size = numConstants;
+ m_constantBuffers.at(regIdx) = buf;
+
+ // Store descriptor info for the shader interface
+ DxvkResourceSlot resource;
+ resource.slot = bindingId;
+ resource.type = asSsbo
+ ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
+ : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+ resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM;
+ resource.access = VK_ACCESS_UNIFORM_READ_BIT;
+ m_resourceSlots.push_back(resource);
+ }
+
+
+ void DxbcCompiler::emitDclSampler(const DxbcShaderInstruction& ins) {
+ // dclSampler takes one operand:
+ // (dst0) The sampler register to declare
+ const uint32_t samplerId = ins.dst[0].idx[0].offset;
+
+ // The sampler type is opaque, but we still have to
+ // define a pointer and a variable in oder to use it
+ const uint32_t samplerType = m_module.defSamplerType();
+ const uint32_t samplerPtrType = m_module.defPointerType(
+ samplerType, spv::StorageClassUniformConstant);
+
+ // Define the sampler variable
+ const uint32_t varId = m_module.newVar(samplerPtrType,
+ spv::StorageClassUniformConstant);
+ m_module.setDebugName(varId,
+ str::format("s", samplerId).c_str());
+
+ m_samplers.at(samplerId).varId = varId;
+ m_samplers.at(samplerId).typeId = samplerType;
+
+ // Compute binding slot index for the sampler
+ uint32_t bindingId = computeSamplerBinding(
+ m_programInfo.type(), samplerId);
+
+ m_module.decorateDescriptorSet(varId, 0);
+ m_module.decorateBinding(varId, bindingId);
+
+ // Store descriptor info for the shader interface
+ DxvkResourceSlot resource;
+ resource.slot = bindingId;
+ resource.type = VK_DESCRIPTOR_TYPE_SAMPLER;
+ resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM;
+ resource.access = 0;
+ m_resourceSlots.push_back(resource);
+ }
+
+
+ void DxbcCompiler::emitDclStream(const DxbcShaderInstruction& ins) {
+ if (ins.dst[0].idx[0].offset != 0 && m_moduleInfo.xfb == nullptr)
+ Logger::err("Dxbc: Multiple streams not supported");
+ }
+
+
+ void DxbcCompiler::emitDclResourceTyped(const DxbcShaderInstruction& ins) {
+ // dclResource takes two operands:
+ // (dst0) The resource register ID
+ // (imm0) The resource return type
+ const uint32_t registerId = ins.dst[0].idx[0].offset;
+
+ // We also handle unordered access views here
+ const bool isUav = ins.op == DxbcOpcode::DclUavTyped;
+
+ if (isUav) {
+ if (m_moduleInfo.options.useStorageImageReadWithoutFormat)
+ m_module.enableCapability(spv::CapabilityStorageImageReadWithoutFormat);
+ m_module.enableCapability(spv::CapabilityStorageImageWriteWithoutFormat);
+ }
+
+ // Defines the type of the resource (texture2D, ...)
+ const DxbcResourceDim resourceType = ins.controls.resourceDim();
+
+ // Defines the type of a read operation. DXBC has the ability
+ // to define four different types whereas SPIR-V only allows
+ // one, but in practice this should not be much of a problem.
+ auto xType = static_cast<DxbcResourceReturnType>(
+ bit::extract(ins.imm[0].u32, 0, 3));
+ auto yType = static_cast<DxbcResourceReturnType>(
+ bit::extract(ins.imm[0].u32, 4, 7));
+ auto zType = static_cast<DxbcResourceReturnType>(
+ bit::extract(ins.imm[0].u32, 8, 11));
+ auto wType = static_cast<DxbcResourceReturnType>(
+ bit::extract(ins.imm[0].u32, 12, 15));
+
+ if ((xType != yType) || (xType != zType) || (xType != wType))
+ Logger::warn("DxbcCompiler: dcl_resource: Ignoring resource return types");
+
+ // Declare the actual sampled type
+ const DxbcScalarType sampledType = [xType] {
+ switch (xType) {
+ // FIXME is this correct? There's no documentation about it
+ case DxbcResourceReturnType::Mixed: return DxbcScalarType::Uint32;
+ // FIXME do we have to manually clamp writes to SNORM/UNORM resources?
+ case DxbcResourceReturnType::Snorm: return DxbcScalarType::Float32;
+ case DxbcResourceReturnType::Unorm: return DxbcScalarType::Float32;
+ case DxbcResourceReturnType::Float: return DxbcScalarType::Float32;
+ case DxbcResourceReturnType::Sint: return DxbcScalarType::Sint32;
+ case DxbcResourceReturnType::Uint: return DxbcScalarType::Uint32;
+ default: throw DxvkError(str::format("DxbcCompiler: Invalid sampled type: ", xType));
+ }
+ }();
+
+ // Declare the resource type
+ const uint32_t sampledTypeId = getScalarTypeId(sampledType);
+ const DxbcImageInfo typeInfo = getResourceType(resourceType, isUav);
+
+ // Declare additional capabilities if necessary
+ switch (resourceType) {
+ case DxbcResourceDim::Buffer:
+ m_module.enableCapability(isUav
+ ? spv::CapabilityImageBuffer
+ : spv::CapabilitySampledBuffer);
+ break;
+
+ case DxbcResourceDim::Texture1D:
+ case DxbcResourceDim::Texture1DArr:
+ m_module.enableCapability(isUav
+ ? spv::CapabilityImage1D
+ : spv::CapabilitySampled1D);
+ break;
+
+ case DxbcResourceDim::TextureCubeArr:
+ m_module.enableCapability(
+ spv::CapabilitySampledCubeArray);
+ break;
+
+ default:
+ // No additional capabilities required
+ break;
+ }
+
+ // If the read-without-format capability is not set and this
+ // image is access via a typed load, or if atomic operations
+ // are used,, we must define the image format explicitly.
+ spv::ImageFormat imageFormat = spv::ImageFormatUnknown;
+
+ if (isUav) {
+ if ((m_analysis->uavInfos[registerId].accessAtomicOp)
+ || (m_analysis->uavInfos[registerId].accessTypedLoad
+ && !m_moduleInfo.options.useStorageImageReadWithoutFormat))
+ imageFormat = getScalarImageFormat(sampledType);
+ }
+
+ // We do not know whether the image is going to be used as
+ // a color image or a depth image yet, but we can pick the
+ // correct type when creating a sampled image object.
+ const uint32_t imageTypeId = m_module.defImageType(sampledTypeId,
+ typeInfo.dim, 0, typeInfo.array, typeInfo.ms, typeInfo.sampled,
+ imageFormat);
+
+ // We'll declare the texture variable with the color type
+ // and decide which one to use when the texture is sampled.
+ const uint32_t resourcePtrType = m_module.defPointerType(
+ imageTypeId, spv::StorageClassUniformConstant);
+
+ const uint32_t varId = m_module.newVar(resourcePtrType,
+ spv::StorageClassUniformConstant);
+
+ m_module.setDebugName(varId,
+ str::format(isUav ? "u" : "t", registerId).c_str());
+
+ // Compute the DXVK binding slot index for the resource.
+ // D3D11 needs to bind the actual resource to this slot.
+ uint32_t bindingId = isUav
+ ? computeUavBinding(m_programInfo.type(), registerId)
+ : computeSrvBinding(m_programInfo.type(), registerId);
+
+ m_module.decorateDescriptorSet(varId, 0);
+ m_module.decorateBinding(varId, bindingId);
+
+ if (ins.controls.uavFlags().test(DxbcUavFlag::GloballyCoherent))
+ m_module.decorate(varId, spv::DecorationCoherent);
+
+ // Declare a specialization constant which will
+ // store whether or not the resource is bound.
+ const uint32_t specConstId = m_module.specConstBool(true);
+ m_module.decorateSpecId(specConstId, bindingId);
+ m_module.setDebugName(specConstId,
+ str::format(isUav ? "u" : "t", registerId, "_bound").c_str());
+
+ if (isUav) {
+ DxbcUav uav;
+ uav.type = DxbcResourceType::Typed;
+ uav.imageInfo = typeInfo;
+ uav.varId = varId;
+ uav.ctrId = 0;
+ uav.specId = specConstId;
+ uav.sampledType = sampledType;
+ uav.sampledTypeId = sampledTypeId;
+ uav.imageTypeId = imageTypeId;
+ uav.structStride = 0;
+ uav.structAlign = 0;
+ m_uavs.at(registerId) = uav;
+ } else {
+ DxbcShaderResource res;
+ res.type = DxbcResourceType::Typed;
+ res.imageInfo = typeInfo;
+ res.varId = varId;
+ res.specId = specConstId;
+ res.sampledType = sampledType;
+ res.sampledTypeId = sampledTypeId;
+ res.imageTypeId = imageTypeId;
+ res.colorTypeId = imageTypeId;
+ res.depthTypeId = 0;
+ res.structStride = 0;
+ res.structAlign = 0;
+
+ if ((sampledType == DxbcScalarType::Float32)
+ && (resourceType == DxbcResourceDim::Texture2D
+ || resourceType == DxbcResourceDim::Texture2DArr
+ || resourceType == DxbcResourceDim::TextureCube
+ || resourceType == DxbcResourceDim::TextureCubeArr)) {
+ res.depthTypeId = m_module.defImageType(sampledTypeId,
+ typeInfo.dim, 1, typeInfo.array, typeInfo.ms, typeInfo.sampled,
+ spv::ImageFormatUnknown);
+ }
+
+ m_textures.at(registerId) = res;
+ }
+
+ // Store descriptor info for the shader interface
+ DxvkResourceSlot resource;
+ resource.slot = bindingId;
+ resource.view = typeInfo.vtype;
+
+ if (isUav) {
+ resource.type = resourceType == DxbcResourceDim::Buffer
+ ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER
+ : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
+ resource.access = m_analysis->uavInfos[registerId].accessFlags;
+
+ if (!(resource.access & VK_ACCESS_SHADER_WRITE_BIT))
+ m_module.decorate(varId, spv::DecorationNonWritable);
+ if (!(resource.access & VK_ACCESS_SHADER_READ_BIT))
+ m_module.decorate(varId, spv::DecorationNonReadable);
+ } else {
+ resource.type = resourceType == DxbcResourceDim::Buffer
+ ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER
+ : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
+ resource.access = VK_ACCESS_SHADER_READ_BIT;
+ }
+
+ m_resourceSlots.push_back(resource);
+ }
+
+
+ void DxbcCompiler::emitDclResourceRawStructured(const DxbcShaderInstruction& ins) {
+ // dcl_resource_raw and dcl_uav_raw take one argument:
+ // (dst0) The resource register ID
+ // dcl_resource_structured and dcl_uav_structured take two arguments:
+ // (dst0) The resource register ID
+ // (imm0) Structure stride, in bytes
+ const uint32_t registerId = ins.dst[0].idx[0].offset;
+
+ const bool isUav = ins.op == DxbcOpcode::DclUavRaw
+ || ins.op == DxbcOpcode::DclUavStructured;
+
+ const bool isStructured = ins.op == DxbcOpcode::DclUavStructured
+ || ins.op == DxbcOpcode::DclResourceStructured;
+
+ const DxbcScalarType sampledType = DxbcScalarType::Uint32;
+ const uint32_t sampledTypeId = getScalarTypeId(sampledType);
+
+ const DxbcImageInfo typeInfo = { spv::DimBuffer, 0, 0, isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_MAX_ENUM };
+
+ // Declare the resource type
+ uint32_t resTypeId = 0;
+ uint32_t varId = 0;
+
+ // Write back resource info
+ DxbcResourceType resType = isStructured
+ ? DxbcResourceType::Structured
+ : DxbcResourceType::Raw;
+
+ uint32_t resStride = isStructured
+ ? ins.imm[0].u32
+ : 0;
+
+ uint32_t resAlign = isStructured
+ ? (resStride & -resStride)
+ : 16;
+
+ // Compute the DXVK binding slot index for the resource.
+ uint32_t bindingId = isUav
+ ? computeUavBinding(m_programInfo.type(), registerId)
+ : computeSrvBinding(m_programInfo.type(), registerId);
+
+ // Test whether we should use a raw SSBO for this resource
+ bool useRawSsbo = m_moduleInfo.options.minSsboAlignment <= resAlign;
+
+ if (useRawSsbo) {
+ uint32_t elemType = getScalarTypeId(DxbcScalarType::Uint32);
+ uint32_t arrayType = m_module.defRuntimeArrayTypeUnique(elemType);
+ uint32_t structType = m_module.defStructTypeUnique(1, &arrayType);
+ uint32_t ptrType = m_module.defPointerType(structType, spv::StorageClassUniform);
+
+ resTypeId = m_module.defPointerType(elemType, spv::StorageClassUniform);
+ varId = m_module.newVar(ptrType, spv::StorageClassUniform);
+
+ m_module.decorateArrayStride(arrayType, sizeof(uint32_t));
+ m_module.decorate(structType, spv::DecorationBufferBlock);
+ m_module.memberDecorateOffset(structType, 0, 0);
+
+ m_module.setDebugName(structType,
+ str::format(isUav ? "u" : "t", registerId, "_t").c_str());
+ m_module.setDebugMemberName(structType, 0, "m");
+ } else {
+ // Structured and raw buffers are represented as
+ // texel buffers consisting of 32-bit integers.
+ m_module.enableCapability(isUav
+ ? spv::CapabilityImageBuffer
+ : spv::CapabilitySampledBuffer);
+
+ resTypeId = m_module.defImageType(sampledTypeId,
+ typeInfo.dim, 0, typeInfo.array, typeInfo.ms, typeInfo.sampled,
+ spv::ImageFormatR32ui);
+
+ varId = m_module.newVar(
+ m_module.defPointerType(resTypeId, spv::StorageClassUniformConstant),
+ spv::StorageClassUniformConstant);
+ }
+
+ m_module.setDebugName(varId,
+ str::format(isUav ? "u" : "t", registerId).c_str());
+
+ m_module.decorateDescriptorSet(varId, 0);
+ m_module.decorateBinding(varId, bindingId);
+
+ if (ins.controls.uavFlags().test(DxbcUavFlag::GloballyCoherent))
+ m_module.decorate(varId, spv::DecorationCoherent);
+
+ // Declare a specialization constant which will
+ // store whether or not the resource is bound.
+ const uint32_t specConstId = m_module.specConstBool(true);
+ m_module.decorateSpecId(specConstId, bindingId);
+ m_module.setDebugName(specConstId,
+ str::format(isUav ? "u" : "t", registerId, "_bound").c_str());
+
+ if (isUav) {
+ DxbcUav uav;
+ uav.type = resType;
+ uav.imageInfo = typeInfo;
+ uav.varId = varId;
+ uav.ctrId = 0;
+ uav.specId = specConstId;
+ uav.sampledType = sampledType;
+ uav.sampledTypeId = sampledTypeId;
+ uav.imageTypeId = resTypeId;
+ uav.structStride = resStride;
+ uav.structAlign = resAlign;
+ m_uavs.at(registerId) = uav;
+ } else {
+ DxbcShaderResource res;
+ res.type = resType;
+ res.imageInfo = typeInfo;
+ res.varId = varId;
+ res.specId = specConstId;
+ res.sampledType = sampledType;
+ res.sampledTypeId = sampledTypeId;
+ res.imageTypeId = resTypeId;
+ res.colorTypeId = resTypeId;
+ res.depthTypeId = 0;
+ res.structStride = resStride;
+ res.structAlign = resAlign;
+ m_textures.at(registerId) = res;
+ }
+
+ // Store descriptor info for the shader interface
+ DxvkResourceSlot resource;
+ resource.slot = bindingId;
+ resource.type = useRawSsbo
+ ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
+ : (isUav
+ ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER
+ : VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
+ resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM;
+ resource.access = isUav
+ ? m_analysis->uavInfos[registerId].accessFlags
+ : VK_ACCESS_SHADER_READ_BIT;
+
+ if (useRawSsbo || isUav) {
+ if (!(resource.access & VK_ACCESS_SHADER_WRITE_BIT))
+ m_module.decorate(varId, spv::DecorationNonWritable);
+ if (!(resource.access & VK_ACCESS_SHADER_READ_BIT))
+ m_module.decorate(varId, spv::DecorationNonReadable);
+ }
+
+ m_resourceSlots.push_back(resource);
+ }
+
+
+ void DxbcCompiler::emitDclThreadGroupSharedMemory(const DxbcShaderInstruction& ins) {
+ // dcl_tgsm_raw takes two arguments:
+ // (dst0) The resource register ID
+ // (imm0) Block size, in bytes
+ // dcl_tgsm_structured takes three arguments:
+ // (dst0) The resource register ID
+ // (imm0) Structure stride, in bytes
+ // (imm1) Structure count
+ const bool isStructured = ins.op == DxbcOpcode::DclThreadGroupSharedMemoryStructured;
+
+ const uint32_t regId = ins.dst[0].idx[0].offset;
+
+ if (regId >= m_gRegs.size())
+ m_gRegs.resize(regId + 1);
+
+ const uint32_t elementStride = isStructured ? ins.imm[0].u32 : 0;
+ const uint32_t elementCount = isStructured ? ins.imm[1].u32 : ins.imm[0].u32;
+
+ DxbcRegisterInfo varInfo;
+ varInfo.type.ctype = DxbcScalarType::Uint32;
+ varInfo.type.ccount = 1;
+ varInfo.type.alength = isStructured
+ ? elementCount * elementStride / 4
+ : elementCount / 4;
+ varInfo.sclass = spv::StorageClassWorkgroup;
+
+ m_gRegs[regId].type = isStructured
+ ? DxbcResourceType::Structured
+ : DxbcResourceType::Raw;
+ m_gRegs[regId].elementStride = elementStride;
+ m_gRegs[regId].elementCount = elementCount;
+ m_gRegs[regId].varId = emitNewVariable(varInfo);
+
+ m_module.setDebugName(m_gRegs[regId].varId,
+ str::format("g", regId).c_str());
+ }
+
+
+ void DxbcCompiler::emitDclGsInputPrimitive(const DxbcShaderInstruction& ins) {
+ // The input primitive type is stored within in the
+ // control bits of the opcode token. In SPIR-V, we
+ // have to define an execution mode.
+ const spv::ExecutionMode mode = [&] {
+ switch (ins.controls.primitive()) {
+ case DxbcPrimitive::Point: return spv::ExecutionModeInputPoints;
+ case DxbcPrimitive::Line: return spv::ExecutionModeInputLines;
+ case DxbcPrimitive::Triangle: return spv::ExecutionModeTriangles;
+ case DxbcPrimitive::LineAdj: return spv::ExecutionModeInputLinesAdjacency;
+ case DxbcPrimitive::TriangleAdj: return spv::ExecutionModeInputTrianglesAdjacency;
+ default: throw DxvkError("DxbcCompiler: Unsupported primitive type");
+ }
+ }();
+
+ m_gs.inputPrimitive = ins.controls.primitive();
+ m_module.setExecutionMode(m_entryPointId, mode);
+
+ const uint32_t vertexCount
+ = primitiveVertexCount(m_gs.inputPrimitive);
+
+ emitDclInputArray(vertexCount);
+ emitDclInputPerVertex(vertexCount, "gs_vertex_in");
+ }
+
+
+ void DxbcCompiler::emitDclGsOutputTopology(const DxbcShaderInstruction& ins) {
+ // The input primitive topology is stored within in the
+ // control bits of the opcode token. In SPIR-V, we have
+ // to define an execution mode.
+ const spv::ExecutionMode mode = [&] {
+ switch (ins.controls.primitiveTopology()) {
+ case DxbcPrimitiveTopology::PointList: return spv::ExecutionModeOutputPoints;
+ case DxbcPrimitiveTopology::LineStrip: return spv::ExecutionModeOutputLineStrip;
+ case DxbcPrimitiveTopology::TriangleStrip: return spv::ExecutionModeOutputTriangleStrip;
+ default: throw DxvkError("DxbcCompiler: Unsupported primitive topology");
+ }
+ }();
+
+ m_module.setExecutionMode(m_entryPointId, mode);
+ }
+
+
+ void DxbcCompiler::emitDclMaxOutputVertexCount(const DxbcShaderInstruction& ins) {
+ // dcl_max_output_vertex_count has one operand:
+ // (imm0) The maximum number of vertices
+ m_gs.outputVertexCount = ins.imm[0].u32;
+
+ m_module.setOutputVertices(m_entryPointId, m_gs.outputVertexCount);
+ }
+
+
+ void DxbcCompiler::emitDclInputControlPointCount(const DxbcShaderInstruction& ins) {
+ // dcl_input_control_points has the control point
+ // count embedded within the opcode token.
+ if (m_programInfo.type() == DxbcProgramType::HullShader) {
+ m_hs.vertexCountIn = ins.controls.controlPointCount();
+
+ emitDclInputArray(m_hs.vertexCountIn);
+ } else {
+ m_ds.vertexCountIn = ins.controls.controlPointCount();
+
+ m_ds.inputPerPatch = emitTessInterfacePerPatch (spv::StorageClassInput);
+ m_ds.inputPerVertex = emitTessInterfacePerVertex(spv::StorageClassInput, m_ds.vertexCountIn);
+ }
+ }
+
+
+ void DxbcCompiler::emitDclOutputControlPointCount(const DxbcShaderInstruction& ins) {
+ // dcl_output_control_points has the control point
+ // count embedded within the opcode token.
+ m_hs.vertexCountOut = ins.controls.controlPointCount();
+
+ m_hs.outputPerPatch = emitTessInterfacePerPatch(spv::StorageClassPrivate);
+ m_hs.outputPerVertex = emitTessInterfacePerVertex(spv::StorageClassOutput, m_hs.vertexCountOut);
+
+ m_module.setOutputVertices(m_entryPointId, m_hs.vertexCountOut);
+ }
+
+
+ void DxbcCompiler::emitDclHsMaxTessFactor(const DxbcShaderInstruction& ins) {
+ m_hs.maxTessFactor = ins.imm[0].f32;
+ }
+
+
+ void DxbcCompiler::emitDclTessDomain(const DxbcShaderInstruction& ins) {
+ const spv::ExecutionMode executionMode = [&] {
+ switch (ins.controls.tessDomain()) {
+ case DxbcTessDomain::Isolines: return spv::ExecutionModeIsolines;
+ case DxbcTessDomain::Triangles: return spv::ExecutionModeTriangles;
+ case DxbcTessDomain::Quads: return spv::ExecutionModeQuads;
+ default: throw DxvkError("Dxbc: Invalid tess domain");
+ }
+ }();
+
+ m_module.setExecutionMode(m_entryPointId, executionMode);
+ }
+
+
+ void DxbcCompiler::emitDclTessPartitioning(const DxbcShaderInstruction& ins) {
+ const spv::ExecutionMode executionMode = [&] {
+ switch (ins.controls.tessPartitioning()) {
+ case DxbcTessPartitioning::Pow2:
+ case DxbcTessPartitioning::Integer: return spv::ExecutionModeSpacingEqual;
+ case DxbcTessPartitioning::FractOdd: return spv::ExecutionModeSpacingFractionalOdd;
+ case DxbcTessPartitioning::FractEven: return spv::ExecutionModeSpacingFractionalEven;
+ default: throw DxvkError("Dxbc: Invalid tess partitioning");
+ }
+ }();
+
+ m_module.setExecutionMode(m_entryPointId, executionMode);
+ }
+
+
+ void DxbcCompiler::emitDclTessOutputPrimitive(const DxbcShaderInstruction& ins) {
+ switch (ins.controls.tessOutputPrimitive()) {
+ case DxbcTessOutputPrimitive::Point:
+ m_module.setExecutionMode(m_entryPointId, spv::ExecutionModePointMode);
+ break;
+
+ case DxbcTessOutputPrimitive::Line:
+ break;
+
+ case DxbcTessOutputPrimitive::TriangleCw:
+ m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeVertexOrderCw);
+ break;
+
+ case DxbcTessOutputPrimitive::TriangleCcw:
+ m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeVertexOrderCcw);
+ break;
+
+ default:
+ throw DxvkError("Dxbc: Invalid tess output primitive");
+ }
+ }
+
+
+ void DxbcCompiler::emitDclThreadGroup(const DxbcShaderInstruction& ins) {
+ // dcl_thread_group has three operands:
+ // (imm0) Number of threads in X dimension
+ // (imm1) Number of threads in Y dimension
+ // (imm2) Number of threads in Z dimension
+ m_cs.workgroupSizeX = ins.imm[0].u32;
+ m_cs.workgroupSizeY = ins.imm[1].u32;
+ m_cs.workgroupSizeZ = ins.imm[2].u32;
+
+ m_module.setLocalSize(m_entryPointId,
+ ins.imm[0].u32, ins.imm[1].u32, ins.imm[2].u32);
+ }
+
+
+ void DxbcCompiler::emitDclGsInstanceCount(const DxbcShaderInstruction& ins) {
+ // dcl_gs_instance_count has one operand:
+ // (imm0) Number of geometry shader invocations
+ m_module.setInvocations(m_entryPointId, ins.imm[0].u32);
+ m_gs.invocationCount = ins.imm[0].u32;
+ }
+
+
+ uint32_t DxbcCompiler::emitDclUavCounter(uint32_t regId) {
+ // Declare a structure type which holds the UAV counter
+ if (m_uavCtrStructType == 0) {
+ const uint32_t t_u32 = m_module.defIntType(32, 0);
+ const uint32_t t_struct = m_module.defStructTypeUnique(1, &t_u32);
+
+ m_module.decorate(t_struct, spv::DecorationBufferBlock);
+ m_module.memberDecorateOffset(t_struct, 0, 0);
+
+ m_module.setDebugName (t_struct, "uav_meta");
+ m_module.setDebugMemberName(t_struct, 0, "ctr");
+
+ m_uavCtrStructType = t_struct;
+ m_uavCtrPointerType = m_module.defPointerType(
+ t_struct, spv::StorageClassUniform);
+ }
+
+ // Declare the buffer variable
+ const uint32_t varId = m_module.newVar(
+ m_uavCtrPointerType, spv::StorageClassUniform);
+
+ m_module.setDebugName(varId,
+ str::format("u", regId, "_meta").c_str());
+
+ uint32_t bindingId = computeUavCounterBinding(
+ m_programInfo.type(), regId);
+
+ m_module.decorateDescriptorSet(varId, 0);
+ m_module.decorateBinding(varId, bindingId);
+
+ // Declare the storage buffer binding
+ DxvkResourceSlot resource;
+ resource.slot = bindingId;
+ resource.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+ resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM;
+ resource.access = VK_ACCESS_SHADER_READ_BIT
+ | VK_ACCESS_SHADER_WRITE_BIT;
+ m_resourceSlots.push_back(resource);
+
+ return varId;
+ }
+
+
+ void DxbcCompiler::emitDclImmediateConstantBuffer(const DxbcShaderInstruction& ins) {
+ if (m_immConstBuf != 0)
+ throw DxvkError("DxbcCompiler: Immediate constant buffer already declared");
+
+ if ((ins.customDataSize & 0x3) != 0)
+ throw DxvkError("DxbcCompiler: Immediate constant buffer size not a multiple of four DWORDs");
+
+ if (ins.customDataSize <= Icb_MaxBakedDwords) {
+ this->emitDclImmediateConstantBufferBaked(
+ ins.customDataSize, ins.customData);
+ } else {
+ this->emitDclImmediateConstantBufferUbo(
+ ins.customDataSize, ins.customData);
+ }
+ }
+
+
+ void DxbcCompiler::emitDclImmediateConstantBufferBaked(
+ uint32_t dwordCount,
+ const uint32_t* dwordArray) {
+ // Declare individual vector constants as 4x32-bit vectors
+ std::array<uint32_t, 4096> vectorIds;
+
+ DxbcVectorType vecType;
+ vecType.ctype = DxbcScalarType::Uint32;
+ vecType.ccount = 4;
+
+ const uint32_t vectorTypeId = getVectorTypeId(vecType);
+ const uint32_t vectorCount = dwordCount / 4;
+
+ for (uint32_t i = 0; i < vectorCount; i++) {
+ std::array<uint32_t, 4> scalarIds = {
+ m_module.constu32(dwordArray[4 * i + 0]),
+ m_module.constu32(dwordArray[4 * i + 1]),
+ m_module.constu32(dwordArray[4 * i + 2]),
+ m_module.constu32(dwordArray[4 * i + 3]),
+ };
+
+ vectorIds.at(i) = m_module.constComposite(
+ vectorTypeId, scalarIds.size(), scalarIds.data());
+ }
+
+ // Declare the array that contains all the vectors
+ DxbcArrayType arrInfo;
+ arrInfo.ctype = DxbcScalarType::Uint32;
+ arrInfo.ccount = 4;
+ arrInfo.alength = vectorCount;
+
+ const uint32_t arrayTypeId = getArrayTypeId(arrInfo);
+ const uint32_t arrayId = m_module.constComposite(
+ arrayTypeId, vectorCount, vectorIds.data());
+
+ // Declare the variable that will hold the constant
+ // data and initialize it with the constant array.
+ const uint32_t pointerTypeId = m_module.defPointerType(
+ arrayTypeId, spv::StorageClassPrivate);
+
+ m_immConstBuf = m_module.newVarInit(
+ pointerTypeId, spv::StorageClassPrivate,
+ arrayId);
+ m_module.setDebugName(m_immConstBuf, "icb");
+ }
+
+
+ void DxbcCompiler::emitDclImmediateConstantBufferUbo(
+ uint32_t dwordCount,
+ const uint32_t* dwordArray) {
+ this->emitDclConstantBufferVar(Icb_BindingSlotId, dwordCount / 4, "icb",
+ m_moduleInfo.options.dynamicIndexedConstantBufferAsSsbo);
+ m_immConstData = DxvkShaderConstData(dwordCount, dwordArray);
+ }
+
+
+ void DxbcCompiler::emitCustomData(const DxbcShaderInstruction& ins) {
+ switch (ins.customDataType) {
+ case DxbcCustomDataClass::ImmConstBuf:
+ return emitDclImmediateConstantBuffer(ins);
+
+ default:
+ Logger::warn(str::format(
+ "DxbcCompiler: Unsupported custom data block: ",
+ ins.customDataType));
+ }
+ }
+
+
+ void DxbcCompiler::emitVectorAlu(const DxbcShaderInstruction& ins) {
+ std::array<DxbcRegisterValue, DxbcMaxOperandCount> src;
+
+ for (uint32_t i = 0; i < ins.srcCount; i++)
+ src.at(i) = emitRegisterLoad(ins.src[i], ins.dst[0].mask);
+
+ DxbcRegisterValue dst;
+ dst.type.ctype = ins.dst[0].dataType;
+ dst.type.ccount = ins.dst[0].mask.popCount();
+
+ if (isDoubleType(ins.dst[0].dataType))
+ dst.type.ccount /= 2;
+
+ const uint32_t typeId = getVectorTypeId(dst.type);
+
+ switch (ins.op) {
+ /////////////////////
+ // Move instructions
+ case DxbcOpcode::Mov:
+ case DxbcOpcode::DMov:
+ dst.id = src.at(0).id;
+ break;
+
+ /////////////////////////////////////
+ // ALU operations on float32 numbers
+ case DxbcOpcode::Add:
+ case DxbcOpcode::DAdd:
+ dst.id = m_module.opFAdd(typeId,
+ src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::Div:
+ case DxbcOpcode::DDiv:
+ dst.id = m_module.opFDiv(typeId,
+ src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::Exp:
+ dst.id = m_module.opExp2(
+ typeId, src.at(0).id);
+ break;
+
+ case DxbcOpcode::Frc:
+ dst.id = m_module.opFract(
+ typeId, src.at(0).id);
+ break;
+
+ case DxbcOpcode::Log:
+ dst.id = m_module.opLog2(
+ typeId, src.at(0).id);
+ break;
+
+ case DxbcOpcode::Mad:
+ case DxbcOpcode::DFma:
+ dst.id = m_module.opFFma(typeId,
+ src.at(0).id, src.at(1).id, src.at(2).id);
+ break;
+
+ case DxbcOpcode::Max:
+ case DxbcOpcode::DMax:
+ dst.id = m_module.opNMax(typeId,
+ src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::Min:
+ case DxbcOpcode::DMin:
+ dst.id = m_module.opNMin(typeId,
+ src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::Mul:
+ case DxbcOpcode::DMul:
+ dst.id = m_module.opFMul(typeId,
+ src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::Rcp:
+ dst.id = m_module.opFDiv(typeId,
+ emitBuildConstVecf32(
+ 1.0f, 1.0f, 1.0f, 1.0f,
+ ins.dst[0].mask).id,
+ src.at(0).id);
+ break;
+
+ case DxbcOpcode::DRcp:
+ dst.id = m_module.opFDiv(typeId,
+ emitBuildConstVecf64(1.0, 1.0,
+ ins.dst[0].mask).id,
+ src.at(0).id);
+ break;
+
+ case DxbcOpcode::RoundNe:
+ dst.id = m_module.opRoundEven(
+ typeId, src.at(0).id);
+ break;
+
+ case DxbcOpcode::RoundNi:
+ dst.id = m_module.opFloor(
+ typeId, src.at(0).id);
+ break;
+
+ case DxbcOpcode::RoundPi:
+ dst.id = m_module.opCeil(
+ typeId, src.at(0).id);
+ break;
+
+ case DxbcOpcode::RoundZ:
+ dst.id = m_module.opTrunc(
+ typeId, src.at(0).id);
+ break;
+
+ case DxbcOpcode::Rsq:
+ dst.id = m_module.opInverseSqrt(
+ typeId, src.at(0).id);
+ break;
+
+ case DxbcOpcode::Sqrt:
+ dst.id = m_module.opSqrt(
+ typeId, src.at(0).id);
+ break;
+
+ /////////////////////////////////////
+ // ALU operations on signed integers
+ case DxbcOpcode::IAdd:
+ dst.id = m_module.opIAdd(typeId,
+ src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::IMad:
+ case DxbcOpcode::UMad:
+ dst.id = m_module.opIAdd(typeId,
+ m_module.opIMul(typeId,
+ src.at(0).id, src.at(1).id),
+ src.at(2).id);
+ break;
+
+ case DxbcOpcode::IMax:
+ dst.id = m_module.opSMax(typeId,
+ src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::IMin:
+ dst.id = m_module.opSMin(typeId,
+ src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::INeg:
+ dst.id = m_module.opSNegate(
+ typeId, src.at(0).id);
+ break;
+
+ ///////////////////////////////////////
+ // ALU operations on unsigned integers
+ case DxbcOpcode::UMax:
+ dst.id = m_module.opUMax(typeId,
+ src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::UMin:
+ dst.id = m_module.opUMin(typeId,
+ src.at(0).id, src.at(1).id);
+ break;
+
+ ///////////////////////////////////////
+ // Bit operations on unsigned integers
+ case DxbcOpcode::And:
+ dst.id = m_module.opBitwiseAnd(typeId,
+ src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::Not:
+ dst.id = m_module.opNot(
+ typeId, src.at(0).id);
+ break;
+
+ case DxbcOpcode::Or:
+ dst.id = m_module.opBitwiseOr(typeId,
+ src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::Xor:
+ dst.id = m_module.opBitwiseXor(typeId,
+ src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::CountBits:
+ dst.id = m_module.opBitCount(
+ typeId, src.at(0).id);
+ break;
+
+ case DxbcOpcode::BfRev:
+ dst.id = m_module.opBitReverse(
+ typeId, src.at(0).id);
+ break;
+
+ ///////////////////////////
+ // Conversion instructions
+ case DxbcOpcode::ItoF:
+ dst.id = m_module.opConvertStoF(
+ typeId, src.at(0).id);
+ break;
+
+ case DxbcOpcode::UtoF:
+ dst.id = m_module.opConvertUtoF(
+ typeId, src.at(0).id);
+ break;
+
+ case DxbcOpcode::FtoI:
+ dst.id = m_module.opConvertFtoS(
+ typeId, src.at(0).id);
+ break;
+
+ case DxbcOpcode::FtoU:
+ dst.id = m_module.opConvertFtoU(
+ typeId, src.at(0).id);
+ break;
+
+ default:
+ Logger::warn(str::format(
+ "DxbcCompiler: Unhandled instruction: ",
+ ins.op));
+ return;
+ }
+
+ if (ins.controls.precise() || m_precise)
+ m_module.decorate(dst.id, spv::DecorationNoContraction);
+
+ // Store computed value
+ dst = emitDstOperandModifiers(dst, ins.modifiers);
+ emitRegisterStore(ins.dst[0], dst);
+ }
+
+
+ void DxbcCompiler::emitVectorCmov(const DxbcShaderInstruction& ins) {
+ // movc and swapc have the following operands:
+ // (dst0) The first destination register
+ // (dst1) The second destination register (swapc only)
+ // (src0) The condition vector
+ // (src1) Vector to select from if the condition is not 0
+ // (src2) Vector to select from if the condition is 0
+ DxbcRegMask condMask = ins.dst[0].mask;
+
+ if (ins.dst[0].dataType == DxbcScalarType::Float64) {
+ condMask = DxbcRegMask(
+ condMask[0] && condMask[1],
+ condMask[2] && condMask[3],
+ false, false);
+ }
+
+ const DxbcRegisterValue condition = emitRegisterLoad(ins.src[0], condMask);
+ const DxbcRegisterValue selectTrue = emitRegisterLoad(ins.src[1], ins.dst[0].mask);
+ const DxbcRegisterValue selectFalse = emitRegisterLoad(ins.src[2], ins.dst[0].mask);
+
+ uint32_t componentCount = condMask.popCount();
+
+ // We'll compare against a vector of zeroes to generate a
+ // boolean vector, which in turn will be used by OpSelect
+ uint32_t zeroType = m_module.defIntType(32, 0);
+ uint32_t boolType = m_module.defBoolType();
+
+ uint32_t zero = m_module.constu32(0);
+
+ if (componentCount > 1) {
+ zeroType = m_module.defVectorType(zeroType, componentCount);
+ boolType = m_module.defVectorType(boolType, componentCount);
+
+ const std::array<uint32_t, 4> zeroVec = { zero, zero, zero, zero };
+ zero = m_module.constComposite(zeroType, componentCount, zeroVec.data());
+ }
+
+ // In case of swapc, the second destination operand receives
+ // the output that a cmov instruction would normally get
+ const uint32_t trueIndex = ins.op == DxbcOpcode::Swapc ? 1 : 0;
+
+ for (uint32_t i = 0; i < ins.dstCount; i++) {
+ DxbcRegisterValue result;
+ result.type.ctype = ins.dst[i].dataType;
+ result.type.ccount = componentCount;
+ result.id = m_module.opSelect(
+ getVectorTypeId(result.type),
+ m_module.opINotEqual(boolType, condition.id, zero),
+ i == trueIndex ? selectTrue.id : selectFalse.id,
+ i != trueIndex ? selectTrue.id : selectFalse.id);
+
+ result = emitDstOperandModifiers(result, ins.modifiers);
+ emitRegisterStore(ins.dst[i], result);
+ }
+ }
+
+ void DxbcCompiler::emitVectorCmp(const DxbcShaderInstruction& ins) {
+ // Compare instructions have three operands:
+ // (dst0) The destination register
+ // (src0) The first vector to compare
+ // (src1) The second vector to compare
+ uint32_t componentCount = ins.dst[0].mask.popCount();
+
+ // For 64-bit operations, we'll return a 32-bit
+ // vector, so we have to adjust the read mask
+ DxbcRegMask srcMask = ins.dst[0].mask;
+
+ if (isDoubleType(ins.src[0].dataType)) {
+ srcMask = DxbcRegMask(
+ componentCount > 0, componentCount > 0,
+ componentCount > 1, componentCount > 1);
+ }
+
+ const std::array<DxbcRegisterValue, 2> src = {
+ emitRegisterLoad(ins.src[0], srcMask),
+ emitRegisterLoad(ins.src[1], srcMask),
+ };
+
+ // Condition, which is a boolean vector used
+ // to select between the ~0u and 0u vectors.
+ uint32_t condition = 0;
+ uint32_t conditionType = m_module.defBoolType();
+
+ if (componentCount > 1)
+ conditionType = m_module.defVectorType(conditionType, componentCount);
+
+ bool invert = false;
+
+ switch (ins.op) {
+ case DxbcOpcode::Ne:
+ case DxbcOpcode::DNe:
+ invert = true;
+ /* fall through */
+
+ case DxbcOpcode::Eq:
+ case DxbcOpcode::DEq:
+ condition = m_module.opFOrdEqual(
+ conditionType, src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::Ge:
+ case DxbcOpcode::DGe:
+ condition = m_module.opFOrdGreaterThanEqual(
+ conditionType, src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::Lt:
+ case DxbcOpcode::DLt:
+ condition = m_module.opFOrdLessThan(
+ conditionType, src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::IEq:
+ condition = m_module.opIEqual(
+ conditionType, src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::IGe:
+ condition = m_module.opSGreaterThanEqual(
+ conditionType, src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::ILt:
+ condition = m_module.opSLessThan(
+ conditionType, src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::INe:
+ condition = m_module.opINotEqual(
+ conditionType, src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::UGe:
+ condition = m_module.opUGreaterThanEqual(
+ conditionType, src.at(0).id, src.at(1).id);
+ break;
+
+ case DxbcOpcode::ULt:
+ condition = m_module.opULessThan(
+ conditionType, src.at(0).id, src.at(1).id);
+ break;
+
+ default:
+ Logger::warn(str::format(
+ "DxbcCompiler: Unhandled instruction: ",
+ ins.op));
+ return;
+ }
+
+ // Generate constant vectors for selection
+ uint32_t sFalse = m_module.constu32( 0u);
+ uint32_t sTrue = m_module.constu32(~0u);
+
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Uint32;
+ result.type.ccount = componentCount;
+
+ const uint32_t typeId = getVectorTypeId(result.type);
+
+ if (componentCount > 1) {
+ const std::array<uint32_t, 4> vFalse = { sFalse, sFalse, sFalse, sFalse };
+ const std::array<uint32_t, 4> vTrue = { sTrue, sTrue, sTrue, sTrue };
+
+ sFalse = m_module.constComposite(typeId, componentCount, vFalse.data());
+ sTrue = m_module.constComposite(typeId, componentCount, vTrue .data());
+ }
+
+ if (invert)
+ std::swap(sFalse, sTrue);
+
+ // Perform component-wise mask selection
+ // based on the condition evaluated above.
+ result.id = m_module.opSelect(
+ typeId, condition, sTrue, sFalse);
+
+ emitRegisterStore(ins.dst[0], result);
+ }
+
+
+ void DxbcCompiler::emitVectorDeriv(const DxbcShaderInstruction& ins) {
+ // Derivative instructions have two operands:
+ // (dst0) Destination register for the derivative
+ // (src0) The operand to compute the derivative of
+ DxbcRegisterValue value = emitRegisterLoad(ins.src[0], ins.dst[0].mask);
+ const uint32_t typeId = getVectorTypeId(value.type);
+
+ switch (ins.op) {
+ case DxbcOpcode::DerivRtx:
+ value.id = m_module.opDpdx(typeId, value.id);
+ break;
+
+ case DxbcOpcode::DerivRty:
+ value.id = m_module.opDpdy(typeId, value.id);
+ break;
+
+ case DxbcOpcode::DerivRtxCoarse:
+ value.id = m_module.opDpdxCoarse(typeId, value.id);
+ break;
+
+ case DxbcOpcode::DerivRtyCoarse:
+ value.id = m_module.opDpdyCoarse(typeId, value.id);
+ break;
+
+ case DxbcOpcode::DerivRtxFine:
+ value.id = m_module.opDpdxFine(typeId, value.id);
+ break;
+
+ case DxbcOpcode::DerivRtyFine:
+ value.id = m_module.opDpdyFine(typeId, value.id);
+ break;
+
+ default:
+ Logger::warn(str::format(
+ "DxbcCompiler: Unhandled instruction: ",
+ ins.op));
+ return;
+ }
+
+ value = emitDstOperandModifiers(value, ins.modifiers);
+ emitRegisterStore(ins.dst[0], value);
+ }
+
+
+ void DxbcCompiler::emitVectorDot(const DxbcShaderInstruction& ins) {
+ const DxbcRegMask srcMask(true,
+ ins.op >= DxbcOpcode::Dp2,
+ ins.op >= DxbcOpcode::Dp3,
+ ins.op >= DxbcOpcode::Dp4);
+
+ const std::array<DxbcRegisterValue, 2> src = {
+ emitRegisterLoad(ins.src[0], srcMask),
+ emitRegisterLoad(ins.src[1], srcMask),
+ };
+
+ DxbcRegisterValue dst;
+ dst.type.ctype = ins.dst[0].dataType;
+ dst.type.ccount = 1;
+
+ dst.id = m_module.opDot(
+ getVectorTypeId(dst.type),
+ src.at(0).id,
+ src.at(1).id);
+
+ if (ins.controls.precise() || m_precise)
+ m_module.decorate(dst.id, spv::DecorationNoContraction);
+
+ dst = emitDstOperandModifiers(dst, ins.modifiers);
+ emitRegisterStore(ins.dst[0], dst);
+ }
+
+
+ void DxbcCompiler::emitVectorIdiv(const DxbcShaderInstruction& ins) {
+ // udiv has four operands:
+ // (dst0) Quotient destination register
+ // (dst1) Remainder destination register
+ // (src0) The first vector to compare
+ // (src1) The second vector to compare
+ if (ins.dst[0].type == DxbcOperandType::Null
+ && ins.dst[1].type == DxbcOperandType::Null)
+ return;
+
+ // FIXME support this if applications require it
+ if (ins.dst[0].type != DxbcOperandType::Null
+ && ins.dst[1].type != DxbcOperandType::Null
+ && ins.dst[0].mask != ins.dst[1].mask) {
+ Logger::warn("DxbcCompiler: Idiv with different destination masks not supported");
+ return;
+ }
+
+ // Load source operands as integers with the
+ // mask of one non-NULL destination operand
+ const DxbcRegMask srcMask =
+ ins.dst[0].type != DxbcOperandType::Null
+ ? ins.dst[0].mask
+ : ins.dst[1].mask;
+
+ const std::array<DxbcRegisterValue, 2> src = {
+ emitRegisterLoad(ins.src[0], srcMask),
+ emitRegisterLoad(ins.src[1], srcMask),
+ };
+
+ // Division by zero will return 0xffffffff for both results
+ auto bvecId = getVectorTypeId({ DxbcScalarType::Bool, srcMask.popCount() });
+
+ DxbcRegisterValue const0 = emitBuildConstVecu32( 0u, 0u, 0u, 0u, srcMask);
+ DxbcRegisterValue constff = emitBuildConstVecu32(~0u, ~0u, ~0u, ~0u, srcMask);
+
+ uint32_t cmpValue = m_module.opINotEqual(bvecId, src.at(1).id, const0.id);
+
+ // Compute results only if the destination
+ // operands are not NULL.
+ if (ins.dst[0].type != DxbcOperandType::Null) {
+ DxbcRegisterValue quotient;
+ quotient.type.ctype = ins.dst[0].dataType;
+ quotient.type.ccount = ins.dst[0].mask.popCount();
+
+ quotient.id = m_module.opUDiv(
+ getVectorTypeId(quotient.type),
+ src.at(0).id, src.at(1).id);
+
+ quotient.id = m_module.opSelect(
+ getVectorTypeId(quotient.type),
+ cmpValue, quotient.id, constff.id);
+
+ quotient = emitDstOperandModifiers(quotient, ins.modifiers);
+ emitRegisterStore(ins.dst[0], quotient);
+ }
+
+ if (ins.dst[1].type != DxbcOperandType::Null) {
+ DxbcRegisterValue remainder;
+ remainder.type.ctype = ins.dst[1].dataType;
+ remainder.type.ccount = ins.dst[1].mask.popCount();
+
+ remainder.id = m_module.opUMod(
+ getVectorTypeId(remainder.type),
+ src.at(0).id, src.at(1).id);
+
+ remainder.id = m_module.opSelect(
+ getVectorTypeId(remainder.type),
+ cmpValue, remainder.id, constff.id);
+
+ remainder = emitDstOperandModifiers(remainder, ins.modifiers);
+ emitRegisterStore(ins.dst[1], remainder);
+ }
+ }
+
+
+ void DxbcCompiler::emitVectorImul(const DxbcShaderInstruction& ins) {
+ // imul and umul have four operands:
+ // (dst0) High destination register
+ // (dst1) Low destination register
+ // (src0) The first vector to compare
+ // (src1) The second vector to compare
+ if (ins.dst[0].type == DxbcOperandType::Null) {
+ if (ins.dst[1].type == DxbcOperandType::Null)
+ return;
+
+ // If dst0 is NULL, this instruction behaves just
+ // like any other three-operand ALU instruction
+ const std::array<DxbcRegisterValue, 2> src = {
+ emitRegisterLoad(ins.src[0], ins.dst[1].mask),
+ emitRegisterLoad(ins.src[1], ins.dst[1].mask),
+ };
+
+ DxbcRegisterValue result;
+ result.type.ctype = ins.dst[1].dataType;
+ result.type.ccount = ins.dst[1].mask.popCount();
+ result.id = m_module.opIMul(
+ getVectorTypeId(result.type),
+ src.at(0).id, src.at(1).id);
+
+ result = emitDstOperandModifiers(result, ins.modifiers);
+ emitRegisterStore(ins.dst[1], result);
+ } else {
+ // TODO implement this
+ Logger::warn("DxbcCompiler: Extended Imul not yet supported");
+ }
+ }
+
+
+ void DxbcCompiler::emitVectorMsad(const DxbcShaderInstruction& ins) {
+ // msad has four operands:
+ // (dst0) Destination
+ // (src0) Reference (packed uint8)
+ // (src1) Source (packed uint8)
+ // (src2) Accumulator
+ DxbcRegisterValue refReg = emitRegisterLoad(ins.src[0], ins.dst[0].mask);
+ DxbcRegisterValue srcReg = emitRegisterLoad(ins.src[1], ins.dst[0].mask);
+ DxbcRegisterValue result = emitRegisterLoad(ins.src[2], ins.dst[0].mask);
+
+ auto typeId = getVectorTypeId(result.type);
+ auto bvecId = getVectorTypeId({ DxbcScalarType::Bool, result.type.ccount });
+
+ for (uint32_t i = 0; i < 4; i++) {
+ auto shift = m_module.constu32(8 * i);
+ auto count = m_module.constu32(8);
+
+ auto ref = m_module.opBitFieldUExtract(typeId, refReg.id, shift, count);
+ auto src = m_module.opBitFieldUExtract(typeId, srcReg.id, shift, count);
+
+ auto zero = emitBuildConstVecu32(0, 0, 0, 0, ins.dst[0].mask);
+ auto mask = m_module.opINotEqual(bvecId, ref, zero.id);
+
+ auto diff = m_module.opSAbs(typeId, m_module.opISub(typeId, ref, src));
+ result.id = m_module.opSelect(typeId, mask, m_module.opIAdd(typeId, result.id, diff), result.id);
+ }
+
+ result = emitDstOperandModifiers(result, ins.modifiers);
+ emitRegisterStore(ins.dst[0], result);
+ }
+
+
+ void DxbcCompiler::emitVectorShift(const DxbcShaderInstruction& ins) {
+ // Shift operations have three operands:
+ // (dst0) The destination register
+ // (src0) The register to shift
+ // (src1) The shift amount (scalar)
+ DxbcRegisterValue shiftReg = emitRegisterLoad(ins.src[0], ins.dst[0].mask);
+ DxbcRegisterValue countReg = emitRegisterLoad(ins.src[1], ins.dst[0].mask);
+
+ if (ins.src[1].type != DxbcOperandType::Imm32)
+ countReg = emitRegisterMaskBits(countReg, 0x1F);
+
+ if (countReg.type.ccount == 1)
+ countReg = emitRegisterExtend(countReg, shiftReg.type.ccount);
+
+ DxbcRegisterValue result;
+ result.type.ctype = ins.dst[0].dataType;
+ result.type.ccount = ins.dst[0].mask.popCount();
+
+ switch (ins.op) {
+ case DxbcOpcode::IShl:
+ result.id = m_module.opShiftLeftLogical(
+ getVectorTypeId(result.type),
+ shiftReg.id, countReg.id);
+ break;
+
+ case DxbcOpcode::IShr:
+ result.id = m_module.opShiftRightArithmetic(
+ getVectorTypeId(result.type),
+ shiftReg.id, countReg.id);
+ break;
+
+ case DxbcOpcode::UShr:
+ result.id = m_module.opShiftRightLogical(
+ getVectorTypeId(result.type),
+ shiftReg.id, countReg.id);
+ break;
+
+ default:
+ Logger::warn(str::format(
+ "DxbcCompiler: Unhandled instruction: ",
+ ins.op));
+ return;
+ }
+
+ result = emitDstOperandModifiers(result, ins.modifiers);
+ emitRegisterStore(ins.dst[0], result);
+ }
+
+
+ void DxbcCompiler::emitVectorSinCos(const DxbcShaderInstruction& ins) {
+ // sincos has three operands:
+ // (dst0) Destination register for sin(x)
+ // (dst1) Destination register for cos(x)
+ // (src0) Source operand x
+
+ // Load source operand as 32-bit float vector.
+ const DxbcRegisterValue srcValue = emitRegisterLoad(
+ ins.src[0], DxbcRegMask(true, true, true, true));
+
+ // Either output may be DxbcOperandType::Null, in
+ // which case we don't have to generate any code.
+ if (ins.dst[0].type != DxbcOperandType::Null) {
+ const DxbcRegisterValue sinInput =
+ emitRegisterExtract(srcValue, ins.dst[0].mask);
+
+ DxbcRegisterValue sin;
+ sin.type = sinInput.type;
+ sin.id = m_module.opSin(
+ getVectorTypeId(sin.type),
+ sinInput.id);
+
+ emitRegisterStore(ins.dst[0], sin);
+ }
+
+ if (ins.dst[1].type != DxbcOperandType::Null) {
+ const DxbcRegisterValue cosInput =
+ emitRegisterExtract(srcValue, ins.dst[1].mask);
+
+ DxbcRegisterValue cos;
+ cos.type = cosInput.type;
+ cos.id = m_module.opCos(
+ getVectorTypeId(cos.type),
+ cosInput.id);
+
+ emitRegisterStore(ins.dst[1], cos);
+ }
+ }
+
+
+ void DxbcCompiler::emitGeometryEmit(const DxbcShaderInstruction& ins) {
+ // In xfb mode we might have multiple streams, so
+ // we have to figure out which stream to write to
+ uint32_t streamId = 0;
+ uint32_t streamVar = 0;
+
+ if (m_moduleInfo.xfb != nullptr) {
+ streamId = ins.dstCount > 0 ? ins.dst[0].idx[0].offset : 0;
+ streamVar = m_module.constu32(streamId);
+ }
+
+ // Checking the negation is easier for EmitThenCut/EmitThenCutStream
+ bool doEmit = ins.op != DxbcOpcode::Cut && ins.op != DxbcOpcode::CutStream;
+ bool doCut = ins.op != DxbcOpcode::Emit && ins.op != DxbcOpcode::EmitStream;
+
+ if (doEmit) {
+ if (m_perVertexOut)
+ emitOutputSetup();
+ emitClipCullStore(DxbcSystemValue::ClipDistance, m_clipDistances);
+ emitClipCullStore(DxbcSystemValue::CullDistance, m_cullDistances);
+ emitXfbOutputSetup(streamId, false);
+ m_module.opEmitVertex(streamVar);
+ }
+
+ if (doCut)
+ m_module.opEndPrimitive(streamVar);
+ }
+
+
+ void DxbcCompiler::emitAtomic(const DxbcShaderInstruction& ins) {
+ // atomic_* operations have the following operands:
+ // (dst0) Destination u# or g# register
+ // (src0) Index into the texture or buffer
+ // (src1) The source value for the operation
+ // (src2) Second source operand (optional)
+ // imm_atomic_* operations have the following operands:
+ // (dst0) Register that receives the result
+ // (dst1) Destination u# or g# register
+ // (srcX) As above
+ const DxbcBufferInfo bufferInfo = getBufferInfo(ins.dst[ins.dstCount - 1]);
+
+ bool isImm = ins.dstCount == 2;
+ bool isUav = ins.dst[ins.dstCount - 1].type == DxbcOperandType::UnorderedAccessView;
+
+ bool isSsbo = m_moduleInfo.options.minSsboAlignment <= bufferInfo.align
+ && bufferInfo.type != DxbcResourceType::Typed
+ && isUav;
+
+ // Perform atomic operations on UAVs only if the UAV
+ // is bound and if there is nothing else stopping us.
+ DxbcConditional cond;
+
+ if (isUav) {
+ uint32_t writeTest = emitUavWriteTest(bufferInfo);
+
+ cond.labelIf = m_module.allocateId();
+ cond.labelEnd = m_module.allocateId();
+
+ m_module.opSelectionMerge(cond.labelEnd, spv::SelectionControlMaskNone);
+ m_module.opBranchConditional(writeTest, cond.labelIf, cond.labelEnd);
+
+ m_module.opLabel(cond.labelIf);
+ }
+
+ // Retrieve destination pointer for the atomic operation>
+ const DxbcRegisterPointer pointer = emitGetAtomicPointer(
+ ins.dst[ins.dstCount - 1], ins.src[0]);
+
+ // Load source values
+ std::array<DxbcRegisterValue, 2> src;
+
+ for (uint32_t i = 1; i < ins.srcCount; i++) {
+ src[i - 1] = emitRegisterBitcast(
+ emitRegisterLoad(ins.src[i], DxbcRegMask(true, false, false, false)),
+ pointer.type.ctype);
+ }
+
+ // Define memory scope and semantics based on the operands
+ uint32_t scope = 0;
+ uint32_t semantics = 0;
+
+ if (isUav) {
+ scope = spv::ScopeDevice;
+ semantics = spv::MemorySemanticsAcquireReleaseMask;
+
+ semantics |= isSsbo
+ ? spv::MemorySemanticsUniformMemoryMask
+ : spv::MemorySemanticsImageMemoryMask;
+ } else {
+ scope = spv::ScopeWorkgroup;
+ semantics = spv::MemorySemanticsWorkgroupMemoryMask
+ | spv::MemorySemanticsAcquireReleaseMask;
+ }
+
+ const uint32_t scopeId = m_module.constu32(scope);
+ const uint32_t semanticsId = m_module.constu32(semantics);
+
+ // Perform the atomic operation on the given pointer
+ DxbcRegisterValue value;
+ value.type = pointer.type;
+ value.id = 0;
+
+ // The result type, which is a scalar integer
+ const uint32_t typeId = getVectorTypeId(value.type);
+
+ switch (ins.op) {
+ case DxbcOpcode::AtomicCmpStore:
+ case DxbcOpcode::ImmAtomicCmpExch:
+ value.id = m_module.opAtomicCompareExchange(
+ typeId, pointer.id, scopeId, semanticsId,
+ m_module.constu32(spv::MemorySemanticsMaskNone),
+ src[1].id, src[0].id);
+ break;
+
+ case DxbcOpcode::ImmAtomicExch:
+ value.id = m_module.opAtomicExchange(typeId,
+ pointer.id, scopeId, semanticsId,
+ src[0].id);
+ break;
+
+ case DxbcOpcode::AtomicIAdd:
+ case DxbcOpcode::ImmAtomicIAdd:
+ value.id = m_module.opAtomicIAdd(typeId,
+ pointer.id, scopeId, semanticsId,
+ src[0].id);
+ break;
+
+ case DxbcOpcode::AtomicAnd:
+ case DxbcOpcode::ImmAtomicAnd:
+ value.id = m_module.opAtomicAnd(typeId,
+ pointer.id, scopeId, semanticsId,
+ src[0].id);
+ break;
+
+ case DxbcOpcode::AtomicOr:
+ case DxbcOpcode::ImmAtomicOr:
+ value.id = m_module.opAtomicOr(typeId,
+ pointer.id, scopeId, semanticsId,
+ src[0].id);
+ break;
+
+ case DxbcOpcode::AtomicXor:
+ case DxbcOpcode::ImmAtomicXor:
+ value.id = m_module.opAtomicXor(typeId,
+ pointer.id, scopeId, semanticsId,
+ src[0].id);
+ break;
+
+ case DxbcOpcode::AtomicIMin:
+ case DxbcOpcode::ImmAtomicIMin:
+ value.id = m_module.opAtomicSMin(typeId,
+ pointer.id, scopeId, semanticsId,
+ src[0].id);
+ break;
+
+ case DxbcOpcode::AtomicIMax:
+ case DxbcOpcode::ImmAtomicIMax:
+ value.id = m_module.opAtomicSMax(typeId,
+ pointer.id, scopeId, semanticsId,
+ src[0].id);
+ break;
+
+ case DxbcOpcode::AtomicUMin:
+ case DxbcOpcode::ImmAtomicUMin:
+ value.id = m_module.opAtomicUMin(typeId,
+ pointer.id, scopeId, semanticsId,
+ src[0].id);
+ break;
+
+ case DxbcOpcode::AtomicUMax:
+ case DxbcOpcode::ImmAtomicUMax:
+ value.id = m_module.opAtomicUMax(typeId,
+ pointer.id, scopeId, semanticsId,
+ src[0].id);
+ break;
+
+ default:
+ Logger::warn(str::format(
+ "DxbcCompiler: Unhandled instruction: ",
+ ins.op));
+ return;
+ }
+
+ // Write back the result to the destination
+ // register if this is an imm_atomic_* opcode.
+ if (isImm)
+ emitRegisterStore(ins.dst[0], value);
+
+ // End conditional block
+ if (isUav) {
+ m_module.opBranch(cond.labelEnd);
+ m_module.opLabel (cond.labelEnd);
+ }
+ }
+
+
+ void DxbcCompiler::emitAtomicCounter(const DxbcShaderInstruction& ins) {
+ // imm_atomic_alloc and imm_atomic_consume have the following operands:
+ // (dst0) The register that will hold the old counter value
+ // (dst1) The UAV whose counter is going to be modified
+ const DxbcBufferInfo bufferInfo = getBufferInfo(ins.dst[1]);
+
+ const uint32_t registerId = ins.dst[1].idx[0].offset;
+
+ if (m_uavs.at(registerId).ctrId == 0)
+ m_uavs.at(registerId).ctrId = emitDclUavCounter(registerId);
+
+ // Only perform the operation if the UAV is bound
+ uint32_t writeTest = emitUavWriteTest(bufferInfo);
+
+ DxbcConditional cond;
+ cond.labelIf = m_module.allocateId();
+ cond.labelEnd = m_module.allocateId();
+
+ m_module.opSelectionMerge(cond.labelEnd, spv::SelectionControlMaskNone);
+ m_module.opBranchConditional(writeTest, cond.labelIf, cond.labelEnd);
+
+ m_module.opLabel(cond.labelIf);
+
+ // Only use subgroup ops on compute to avoid having to
+ // deal with helper invocations or hardware limitations
+ bool useSubgroupOps = m_moduleInfo.options.useSubgroupOpsForAtomicCounters
+ && m_programInfo.type() == DxbcProgramType::ComputeShader;
+
+ // In case we have subgroup ops enabled, we need to
+ // count the number of active lanes, the lane index,
+ // and we need to perform the atomic op conditionally
+ uint32_t laneCount = 0;
+ uint32_t laneIndex = 0;
+
+ DxbcConditional elect;
+
+ if (useSubgroupOps) {
+ m_module.enableCapability(spv::CapabilityGroupNonUniform);
+ m_module.enableCapability(spv::CapabilityGroupNonUniformBallot);
+
+ uint32_t ballot = m_module.opGroupNonUniformBallot(
+ getVectorTypeId({ DxbcScalarType::Uint32, 4 }),
+ m_module.constu32(spv::ScopeSubgroup),
+ m_module.constBool(true));
+
+ laneCount = m_module.opGroupNonUniformBallotBitCount(
+ getScalarTypeId(DxbcScalarType::Uint32),
+ m_module.constu32(spv::ScopeSubgroup),
+ spv::GroupOperationReduce, ballot);
+
+ laneIndex = m_module.opGroupNonUniformBallotBitCount(
+ getScalarTypeId(DxbcScalarType::Uint32),
+ m_module.constu32(spv::ScopeSubgroup),
+ spv::GroupOperationExclusiveScan, ballot);
+
+ // Elect one lane to perform the atomic op
+ uint32_t election = m_module.opGroupNonUniformElect(
+ m_module.defBoolType(),
+ m_module.constu32(spv::ScopeSubgroup));
+
+ elect.labelIf = m_module.allocateId();
+ elect.labelEnd = m_module.allocateId();
+
+ m_module.opSelectionMerge(elect.labelEnd, spv::SelectionControlMaskNone);
+ m_module.opBranchConditional(election, elect.labelIf, elect.labelEnd);
+
+ m_module.opLabel(elect.labelIf);
+ } else {
+ // We're going to use this for the increment
+ laneCount = m_module.constu32(1);
+ }
+
+ // Get a pointer to the atomic counter in question
+ DxbcRegisterInfo ptrType;
+ ptrType.type.ctype = DxbcScalarType::Uint32;
+ ptrType.type.ccount = 1;
+ ptrType.type.alength = 0;
+ ptrType.sclass = spv::StorageClassUniform;
+
+ uint32_t zeroId = m_module.consti32(0);
+ uint32_t ptrId = m_module.opAccessChain(
+ getPointerTypeId(ptrType),
+ m_uavs.at(registerId).ctrId,
+ 1, &zeroId);
+
+ // Define memory scope and semantics based on the operands
+ uint32_t scope = spv::ScopeDevice;
+ uint32_t semantics = spv::MemorySemanticsUniformMemoryMask
+ | spv::MemorySemanticsAcquireReleaseMask;
+
+ uint32_t scopeId = m_module.constu32(scope);
+ uint32_t semanticsId = m_module.constu32(semantics);
+
+ // Compute the result value
+ DxbcRegisterValue value;
+ value.type.ctype = DxbcScalarType::Uint32;
+ value.type.ccount = 1;
+
+ uint32_t typeId = getVectorTypeId(value.type);
+
+ switch (ins.op) {
+ case DxbcOpcode::ImmAtomicAlloc:
+ value.id = m_module.opAtomicIAdd(typeId, ptrId,
+ scopeId, semanticsId, laneCount);
+ break;
+
+ case DxbcOpcode::ImmAtomicConsume:
+ value.id = m_module.opAtomicISub(typeId, ptrId,
+ scopeId, semanticsId, laneCount);
+ value.id = m_module.opISub(typeId, value.id, laneCount);
+ break;
+
+ default:
+ Logger::warn(str::format(
+ "DxbcCompiler: Unhandled instruction: ",
+ ins.op));
+ return;
+ }
+
+ // If we're using subgroup ops, we have to broadcast
+ // the result of the atomic op and compute the index
+ if (useSubgroupOps) {
+ m_module.opBranch(elect.labelEnd);
+ m_module.opLabel (elect.labelEnd);
+
+ uint32_t undef = m_module.constUndef(typeId);
+
+ std::array<SpirvPhiLabel, 2> phiLabels = {{
+ { value.id, elect.labelIf },
+ { undef, cond.labelIf },
+ }};
+
+ value.id = m_module.opPhi(typeId,
+ phiLabels.size(), phiLabels.data());
+ value.id = m_module.opGroupNonUniformBroadcastFirst(typeId,
+ m_module.constu32(spv::ScopeSubgroup), value.id);
+ value.id = m_module.opIAdd(typeId, value.id, laneIndex);
+ }
+
+ // Store the result
+ emitRegisterStore(ins.dst[0], value);
+
+ // End conditional block
+ m_module.opBranch(cond.labelEnd);
+ m_module.opLabel (cond.labelEnd);
+ }
+
+
+ void DxbcCompiler::emitBarrier(const DxbcShaderInstruction& ins) {
+ // sync takes no operands. Instead, the synchronization
+ // scope is defined by the operand control bits.
+ const DxbcSyncFlags flags = ins.controls.syncFlags();
+
+ uint32_t executionScope = spv::ScopeInvocation;
+ uint32_t memoryScope = spv::ScopeInvocation;
+ uint32_t memorySemantics = 0;
+
+ if (flags.test(DxbcSyncFlag::ThreadsInGroup))
+ executionScope = spv::ScopeWorkgroup;
+
+ if (flags.test(DxbcSyncFlag::ThreadGroupSharedMemory)) {
+ memoryScope = spv::ScopeWorkgroup;
+ memorySemantics |= spv::MemorySemanticsWorkgroupMemoryMask
+ | spv::MemorySemanticsAcquireReleaseMask;
+ }
+
+ if (flags.test(DxbcSyncFlag::UavMemoryGroup)) {
+ memoryScope = spv::ScopeWorkgroup;
+ memorySemantics |= spv::MemorySemanticsImageMemoryMask
+ | spv::MemorySemanticsUniformMemoryMask
+ | spv::MemorySemanticsAcquireReleaseMask;
+ }
+
+ if (flags.test(DxbcSyncFlag::UavMemoryGlobal)) {
+ memoryScope = spv::ScopeDevice;
+ memorySemantics |= spv::MemorySemanticsImageMemoryMask
+ | spv::MemorySemanticsUniformMemoryMask
+ | spv::MemorySemanticsAcquireReleaseMask;
+ }
+
+ if (executionScope != spv::ScopeInvocation) {
+ m_module.opControlBarrier(
+ m_module.constu32(executionScope),
+ m_module.constu32(memoryScope),
+ m_module.constu32(memorySemantics));
+ } else if (memoryScope != spv::ScopeInvocation) {
+ m_module.opMemoryBarrier(
+ m_module.constu32(memoryScope),
+ m_module.constu32(memorySemantics));
+ } else {
+ Logger::warn("DxbcCompiler: sync instruction has no effect");
+ }
+ }
+
+
+ void DxbcCompiler::emitBitExtract(const DxbcShaderInstruction& ins) {
+ // ibfe and ubfe take the following arguments:
+ // (dst0) The destination register
+ // (src0) Number of bits to extact
+ // (src1) Offset of the bits to extract
+ // (src2) Register to extract bits from
+ const bool isSigned = ins.op == DxbcOpcode::IBfe;
+
+ DxbcRegisterValue bitCnt = emitRegisterLoad(ins.src[0], ins.dst[0].mask);
+ DxbcRegisterValue bitOfs = emitRegisterLoad(ins.src[1], ins.dst[0].mask);
+
+ if (ins.src[0].type != DxbcOperandType::Imm32)
+ bitCnt = emitRegisterMaskBits(bitCnt, 0x1F);
+
+ if (ins.src[1].type != DxbcOperandType::Imm32)
+ bitOfs = emitRegisterMaskBits(bitOfs, 0x1F);
+
+ const DxbcRegisterValue src = emitRegisterLoad(ins.src[2], ins.dst[0].mask);
+
+ const uint32_t componentCount = src.type.ccount;
+ std::array<uint32_t, 4> componentIds = {{ 0, 0, 0, 0 }};
+
+ for (uint32_t i = 0; i < componentCount; i++) {
+ const DxbcRegisterValue currBitCnt = emitRegisterExtract(bitCnt, DxbcRegMask::select(i));
+ const DxbcRegisterValue currBitOfs = emitRegisterExtract(bitOfs, DxbcRegMask::select(i));
+ const DxbcRegisterValue currSrc = emitRegisterExtract(src, DxbcRegMask::select(i));
+
+ const uint32_t typeId = getVectorTypeId(currSrc.type);
+
+ componentIds[i] = isSigned
+ ? m_module.opBitFieldSExtract(typeId, currSrc.id, currBitOfs.id, currBitCnt.id)
+ : m_module.opBitFieldUExtract(typeId, currSrc.id, currBitOfs.id, currBitCnt.id);
+ }
+
+ DxbcRegisterValue result;
+ result.type = src.type;
+ result.id = componentCount > 1
+ ? m_module.opCompositeConstruct(
+ getVectorTypeId(result.type),
+ componentCount, componentIds.data())
+ : componentIds[0];
+ emitRegisterStore(ins.dst[0], result);
+ }
+
+
+ void DxbcCompiler::emitBitInsert(const DxbcShaderInstruction& ins) {
+ // ibfe and ubfe take the following arguments:
+ // (dst0) The destination register
+ // (src0) Number of bits to extact
+ // (src1) Offset of the bits to extract
+ // (src2) Register to take bits from
+ // (src3) Register to replace bits in
+ DxbcRegisterValue bitCnt = emitRegisterLoad(ins.src[0], ins.dst[0].mask);
+ DxbcRegisterValue bitOfs = emitRegisterLoad(ins.src[1], ins.dst[0].mask);
+
+ if (ins.src[0].type != DxbcOperandType::Imm32)
+ bitCnt = emitRegisterMaskBits(bitCnt, 0x1F);
+
+ if (ins.src[1].type != DxbcOperandType::Imm32)
+ bitOfs = emitRegisterMaskBits(bitOfs, 0x1F);
+
+ const DxbcRegisterValue insert = emitRegisterLoad(ins.src[2], ins.dst[0].mask);
+ const DxbcRegisterValue base = emitRegisterLoad(ins.src[3], ins.dst[0].mask);
+
+ const uint32_t componentCount = base.type.ccount;
+ std::array<uint32_t, 4> componentIds = {{ 0, 0, 0, 0 }};
+
+ for (uint32_t i = 0; i < componentCount; i++) {
+ const DxbcRegisterValue currBitCnt = emitRegisterExtract(bitCnt, DxbcRegMask::select(i));
+ const DxbcRegisterValue currBitOfs = emitRegisterExtract(bitOfs, DxbcRegMask::select(i));
+ const DxbcRegisterValue currInsert = emitRegisterExtract(insert, DxbcRegMask::select(i));
+ const DxbcRegisterValue currBase = emitRegisterExtract(base, DxbcRegMask::select(i));
+
+ componentIds[i] = m_module.opBitFieldInsert(
+ getVectorTypeId(currBase.type),
+ currBase.id, currInsert.id,
+ currBitOfs.id, currBitCnt.id);
+ }
+
+ DxbcRegisterValue result;
+ result.type = base.type;
+ result.id = componentCount > 1
+ ? m_module.opCompositeConstruct(
+ getVectorTypeId(result.type),
+ componentCount, componentIds.data())
+ : componentIds[0];
+ emitRegisterStore(ins.dst[0], result);
+ }
+
+
+ void DxbcCompiler::emitBitScan(const DxbcShaderInstruction& ins) {
+ // firstbit(lo|hi|shi) have two operands:
+ // (dst0) The destination operant
+ // (src0) Source operand to scan
+ DxbcRegisterValue src = emitRegisterLoad(ins.src[0], ins.dst[0].mask);
+
+ DxbcRegisterValue dst;
+ dst.type.ctype = ins.dst[0].dataType;
+ dst.type.ccount = ins.dst[0].mask.popCount();
+
+ // Result type, should be an unsigned integer
+ const uint32_t typeId = getVectorTypeId(dst.type);
+
+ switch (ins.op) {
+ case DxbcOpcode::FirstBitLo: dst.id = m_module.opFindILsb(typeId, src.id); break;
+ case DxbcOpcode::FirstBitHi: dst.id = m_module.opFindUMsb(typeId, src.id); break;
+ case DxbcOpcode::FirstBitShi: dst.id = m_module.opFindSMsb(typeId, src.id); break;
+ default: Logger::warn(str::format("DxbcCompiler: Unhandled instruction: ", ins.op)); return;
+ }
+
+ // The 'Hi' variants are counted from the MSB in DXBC
+ // rather than the LSB, so we have to invert the number
+ if (ins.op == DxbcOpcode::FirstBitHi || ins.op == DxbcOpcode::FirstBitShi) {
+ uint32_t boolTypeId = m_module.defBoolType();
+
+ if (dst.type.ccount > 1)
+ boolTypeId = m_module.defVectorType(boolTypeId, dst.type.ccount);
+
+ DxbcRegisterValue const31 = emitBuildConstVecu32(31u, 31u, 31u, 31u, ins.dst[0].mask);
+ DxbcRegisterValue constff = emitBuildConstVecu32(~0u, ~0u, ~0u, ~0u, ins.dst[0].mask);
+
+ dst.id = m_module.opSelect(typeId,
+ m_module.opINotEqual(boolTypeId, dst.id, constff.id),
+ m_module.opISub(typeId, const31.id, dst.id),
+ constff.id);
+ }
+
+ // No modifiers are supported
+ emitRegisterStore(ins.dst[0], dst);
+ }
+
+
+ void DxbcCompiler::emitBufferQuery(const DxbcShaderInstruction& ins) {
+ // bufinfo takes two arguments
+ // (dst0) The destination register
+ // (src0) The buffer register to query
+ const DxbcBufferInfo bufferInfo = getBufferInfo(ins.src[0]);
+
+ bool isSsbo = m_moduleInfo.options.minSsboAlignment <= bufferInfo.align
+ && bufferInfo.type != DxbcResourceType::Typed;
+
+ // We'll store this as a scalar unsigned integer
+ DxbcRegisterValue result = isSsbo
+ ? emitQueryBufferSize(ins.src[0])
+ : emitQueryTexelBufferSize(ins.src[0]);
+
+ uint32_t typeId = getVectorTypeId(result.type);
+
+ // Adjust returned size if this is a raw or structured
+ // buffer, as emitQueryTexelBufferSize only returns the
+ // number of typed elements in the buffer.
+ if (bufferInfo.type == DxbcResourceType::Raw) {
+ result.id = m_module.opIMul(typeId,
+ result.id, m_module.constu32(4));
+ } else if (bufferInfo.type == DxbcResourceType::Structured) {
+ result.id = m_module.opUDiv(typeId, result.id,
+ m_module.constu32(bufferInfo.stride / 4));
+ }
+
+ // Store the result. The scalar will be extended to a
+ // vector if the write mask consists of more than one
+ // component, which is the desired behaviour.
+ emitRegisterStore(ins.dst[0], result);
+ }
+
+
+ void DxbcCompiler::emitBufferLoad(const DxbcShaderInstruction& ins) {
+ // ld_raw takes three arguments:
+ // (dst0) Destination register
+ // (src0) Byte offset
+ // (src1) Source register
+ // ld_structured takes four arguments:
+ // (dst0) Destination register
+ // (src0) Structure index
+ // (src1) Byte offset
+ // (src2) Source register
+ const bool isStructured = ins.op == DxbcOpcode::LdStructured;
+
+ // Source register. The exact way we access
+ // the data depends on the register type.
+ const DxbcRegister& dstReg = ins.dst[0];
+ const DxbcRegister& srcReg = isStructured ? ins.src[2] : ins.src[1];
+
+ // Retrieve common info about the buffer
+ const DxbcBufferInfo bufferInfo = getBufferInfo(srcReg);
+
+ // Compute element index
+ const DxbcRegisterValue elementIndex = isStructured
+ ? emitCalcBufferIndexStructured(
+ emitRegisterLoad(ins.src[0], DxbcRegMask(true, false, false, false)),
+ emitRegisterLoad(ins.src[1], DxbcRegMask(true, false, false, false)),
+ bufferInfo.stride)
+ : emitCalcBufferIndexRaw(
+ emitRegisterLoad(ins.src[0], DxbcRegMask(true, false, false, false)));
+
+ emitRegisterStore(dstReg,
+ emitRawBufferLoad(srcReg, elementIndex, dstReg.mask));
+ }
+
+
+ void DxbcCompiler::emitBufferStore(const DxbcShaderInstruction& ins) {
+ // store_raw takes three arguments:
+ // (dst0) Destination register
+ // (src0) Byte offset
+ // (src1) Source register
+ // store_structured takes four arguments:
+ // (dst0) Destination register
+ // (src0) Structure index
+ // (src1) Byte offset
+ // (src2) Source register
+ const bool isStructured = ins.op == DxbcOpcode::StoreStructured;
+
+ // Source register. The exact way we access
+ // the data depends on the register type.
+ const DxbcRegister& dstReg = ins.dst[0];
+ const DxbcRegister& srcReg = isStructured ? ins.src[2] : ins.src[1];
+
+ // Retrieve common info about the buffer
+ const DxbcBufferInfo bufferInfo = getBufferInfo(dstReg);
+
+ // Compute element index
+ const DxbcRegisterValue elementIndex = isStructured
+ ? emitCalcBufferIndexStructured(
+ emitRegisterLoad(ins.src[0], DxbcRegMask(true, false, false, false)),
+ emitRegisterLoad(ins.src[1], DxbcRegMask(true, false, false, false)),
+ bufferInfo.stride)
+ : emitCalcBufferIndexRaw(
+ emitRegisterLoad(ins.src[0], DxbcRegMask(true, false, false, false)));
+
+ emitRawBufferStore(dstReg, elementIndex,
+ emitRegisterLoad(srcReg, dstReg.mask));
+ }
+
+
+ void DxbcCompiler::emitConvertFloat16(const DxbcShaderInstruction& ins) {
+ // f32tof16 takes two operands:
+ // (dst0) Destination register as a uint32 vector
+ // (src0) Source register as a float32 vector
+ // f16tof32 takes two operands:
+ // (dst0) Destination register as a float32 vector
+ // (src0) Source register as a uint32 vector
+ const DxbcRegisterValue src = emitRegisterLoad(ins.src[0], ins.dst[0].mask);
+
+ // We handle both packing and unpacking here
+ const bool isPack = ins.op == DxbcOpcode::F32toF16;
+
+ // The conversion instructions do not map very well to the
+ // SPIR-V pack instructions, which operate on 2D vectors.
+ std::array<uint32_t, 4> scalarIds = {{ 0, 0, 0, 0 }};
+
+ const uint32_t componentCount = src.type.ccount;
+
+ // These types are used in both pack and unpack operations
+ const uint32_t t_u32 = getVectorTypeId({ DxbcScalarType::Uint32, 1 });
+ const uint32_t t_f32 = getVectorTypeId({ DxbcScalarType::Float32, 1 });
+ const uint32_t t_f32v2 = getVectorTypeId({ DxbcScalarType::Float32, 2 });
+
+ // Constant zero-bit pattern, used for packing
+ const uint32_t zerof32 = isPack ? m_module.constf32(0.0f) : 0;
+
+ for (uint32_t i = 0; i < componentCount; i++) {
+ const DxbcRegisterValue componentValue
+ = emitRegisterExtract(src, DxbcRegMask::select(i));
+
+ if (isPack) { // f32tof16
+ const std::array<uint32_t, 2> packIds =
+ {{ componentValue.id, zerof32 }};
+
+ scalarIds[i] = m_module.opPackHalf2x16(t_u32,
+ m_module.opCompositeConstruct(t_f32v2, packIds.size(), packIds.data()));
+ } else { // f16tof32
+ const uint32_t zeroIndex = 0;
+
+ scalarIds[i] = m_module.opCompositeExtract(t_f32,
+ m_module.opUnpackHalf2x16(t_f32v2, componentValue.id),
+ 1, &zeroIndex);
+ }
+ }
+
+ DxbcRegisterValue result;
+ result.type.ctype = ins.dst[0].dataType;
+ result.type.ccount = componentCount;
+
+ uint32_t typeId = getVectorTypeId(result.type);
+ result.id = componentCount > 1
+ ? m_module.opCompositeConstruct(typeId,
+ componentCount, scalarIds.data())
+ : scalarIds[0];
+
+ if (isPack) {
+ // Some drivers return infinity if the input value is above a certain
+ // threshold, but D3D wants us to return infinity only if the input is
+ // actually infinite. Fix this up to return the maximum representable
+ // 16-bit floating point number instead, but preserve input infinity.
+ uint32_t t_bvec = getVectorTypeId({ DxbcScalarType::Bool, componentCount });
+ uint32_t f16Infinity = m_module.constuReplicant(0x7C00, componentCount);
+ uint32_t f16Unsigned = m_module.constuReplicant(0x7FFF, componentCount);
+
+ uint32_t isInputInf = m_module.opIsInf(t_bvec, src.id);
+ uint32_t isValueInf = m_module.opIEqual(t_bvec, f16Infinity,
+ m_module.opBitwiseAnd(typeId, result.id, f16Unsigned));
+
+ result.id = m_module.opSelect(getVectorTypeId(result.type),
+ m_module.opLogicalAnd(t_bvec, isValueInf, m_module.opLogicalNot(t_bvec, isInputInf)),
+ m_module.opISub(typeId, result.id, m_module.constuReplicant(1, componentCount)),
+ result.id);
+ }
+
+ // Store result in the destination register
+ emitRegisterStore(ins.dst[0], result);
+ }
+
+
+ void DxbcCompiler::emitConvertFloat64(const DxbcShaderInstruction& ins) {
+ // ftod and dtof take the following operands:
+ // (dst0) Destination operand
+ // (src0) Number to convert
+ uint32_t dstBits = ins.dst[0].mask.popCount();
+
+ DxbcRegMask srcMask = isDoubleType(ins.dst[0].dataType)
+ ? DxbcRegMask(dstBits >= 2, dstBits >= 4, false, false)
+ : DxbcRegMask(dstBits >= 1, dstBits >= 1, dstBits >= 2, dstBits >= 2);
+
+ // Perform actual conversion, destination modifiers are not applied
+ DxbcRegisterValue val = emitRegisterLoad(ins.src[0], srcMask);
+
+ DxbcRegisterValue result;
+ result.type.ctype = ins.dst[0].dataType;
+ result.type.ccount = val.type.ccount;
+
+ switch (ins.op) {
+ case DxbcOpcode::DtoF:
+ case DxbcOpcode::FtoD:
+ result.id = m_module.opFConvert(
+ getVectorTypeId(result.type), val.id);
+ break;
+
+ case DxbcOpcode::DtoI:
+ result.id = m_module.opConvertFtoS(
+ getVectorTypeId(result.type), val.id);
+ break;
+
+ case DxbcOpcode::DtoU:
+ result.id = m_module.opConvertFtoU(
+ getVectorTypeId(result.type), val.id);
+ break;
+
+ case DxbcOpcode::ItoD:
+ result.id = m_module.opConvertStoF(
+ getVectorTypeId(result.type), val.id);
+ break;
+
+ case DxbcOpcode::UtoD:
+ result.id = m_module.opConvertUtoF(
+ getVectorTypeId(result.type), val.id);
+ break;
+
+ default:
+ Logger::warn(str::format("DxbcCompiler: Unhandled instruction: ", ins.op));
+ return;
+ }
+
+ emitRegisterStore(ins.dst[0], result);
+ }
+
+
+ void DxbcCompiler::emitHullShaderInstCnt(const DxbcShaderInstruction& ins) {
+ this->getCurrentHsForkJoinPhase()->instanceCount = ins.imm[0].u32;
+ }
+
+
+ void DxbcCompiler::emitHullShaderPhase(const DxbcShaderInstruction& ins) {
+ switch (ins.op) {
+ case DxbcOpcode::HsDecls: {
+ if (m_hs.currPhaseType != DxbcCompilerHsPhase::None)
+ Logger::err("DXBC: HsDecls not the first phase in hull shader");
+
+ m_hs.currPhaseType = DxbcCompilerHsPhase::Decl;
+ } break;
+
+ case DxbcOpcode::HsControlPointPhase: {
+ m_hs.cpPhase = this->emitNewHullShaderControlPointPhase();
+
+ m_hs.currPhaseType = DxbcCompilerHsPhase::ControlPoint;
+ m_hs.currPhaseId = 0;
+
+ m_module.setDebugName(m_hs.cpPhase.functionId, "hs_control_point");
+ } break;
+
+ case DxbcOpcode::HsForkPhase: {
+ auto phase = this->emitNewHullShaderForkJoinPhase();
+ m_hs.forkPhases.push_back(phase);
+
+ m_hs.currPhaseType = DxbcCompilerHsPhase::Fork;
+ m_hs.currPhaseId = m_hs.forkPhases.size() - 1;
+
+ m_module.setDebugName(phase.functionId,
+ str::format("hs_fork_", m_hs.currPhaseId).c_str());
+ } break;
+
+ case DxbcOpcode::HsJoinPhase: {
+ auto phase = this->emitNewHullShaderForkJoinPhase();
+ m_hs.joinPhases.push_back(phase);
+
+ m_hs.currPhaseType = DxbcCompilerHsPhase::Join;
+ m_hs.currPhaseId = m_hs.joinPhases.size() - 1;
+
+ m_module.setDebugName(phase.functionId,
+ str::format("hs_join_", m_hs.currPhaseId).c_str());
+ } break;
+
+ default:
+ Logger::warn(str::format(
+ "DxbcCompiler: Unhandled instruction: ",
+ ins.op));
+ }
+ }
+
+
+ void DxbcCompiler::emitInterpolate(const DxbcShaderInstruction& ins) {
+ m_module.enableCapability(spv::CapabilityInterpolationFunction);
+
+ // The SPIR-V instructions operate on input variable pointers,
+ // which are all declared as four-component float vectors.
+ uint32_t registerId = ins.src[0].idx[0].offset;
+
+ DxbcRegisterValue result;
+ result.type = getInputRegType(registerId);
+
+ switch (ins.op) {
+ case DxbcOpcode::EvalCentroid: {
+ result.id = m_module.opInterpolateAtCentroid(
+ getVectorTypeId(result.type),
+ m_vRegs.at(registerId).id);
+ } break;
+
+ case DxbcOpcode::EvalSampleIndex: {
+ const DxbcRegisterValue sampleIndex = emitRegisterLoad(
+ ins.src[1], DxbcRegMask(true, false, false, false));
+
+ result.id = m_module.opInterpolateAtSample(
+ getVectorTypeId(result.type),
+ m_vRegs.at(registerId).id,
+ sampleIndex.id);
+ } break;
+
+ case DxbcOpcode::EvalSnapped: {
+ const DxbcRegisterValue offset = emitRegisterLoad(
+ ins.src[1], DxbcRegMask(true, true, false, false));
+
+ result.id = m_module.opInterpolateAtOffset(
+ getVectorTypeId(result.type),
+ m_vRegs.at(registerId).id,
+ offset.id);
+ } break;
+
+ default:
+ Logger::warn(str::format(
+ "DxbcCompiler: Unhandled instruction: ",
+ ins.op));
+ return;
+ }
+
+ result = emitRegisterSwizzle(result,
+ ins.src[0].swizzle, ins.dst[0].mask);
+ emitRegisterStore(ins.dst[0], result);
+ }
+
+
+ void DxbcCompiler::emitTextureQuery(const DxbcShaderInstruction& ins) {
+ // resinfo has three operands:
+ // (dst0) The destination register
+ // (src0) Resource LOD to query
+ // (src1) Resource to query
+ const DxbcBufferInfo resourceInfo = getBufferInfo(ins.src[1]);
+ const DxbcResinfoType resinfoType = ins.controls.resinfoType();
+
+ // Read the exact LOD for the image query
+ const DxbcRegisterValue mipLod = emitRegisterLoad(
+ ins.src[0], DxbcRegMask(true, false, false, false));
+
+ const DxbcScalarType returnType = resinfoType == DxbcResinfoType::Uint
+ ? DxbcScalarType::Uint32 : DxbcScalarType::Float32;
+
+ // Query the size of the selected mip level, as well as the
+ // total number of mip levels. We will have to combine the
+ // result into a four-component vector later.
+ DxbcRegisterValue imageSize = emitQueryTextureSize(ins.src[1], mipLod);
+ DxbcRegisterValue imageLevels = emitQueryTextureLods(ins.src[1]);
+
+ // Convert intermediates to the requested type
+ if (returnType == DxbcScalarType::Float32) {
+ imageSize.type.ctype = DxbcScalarType::Float32;
+ imageSize.id = m_module.opConvertUtoF(
+ getVectorTypeId(imageSize.type),
+ imageSize.id);
+
+ imageLevels.type.ctype = DxbcScalarType::Float32;
+ imageLevels.id = m_module.opConvertUtoF(
+ getVectorTypeId(imageLevels.type),
+ imageLevels.id);
+ }
+
+ // If the selected return type is rcpFloat, we need
+ // to compute the reciprocal of the image dimensions,
+ // but not the array size, so we need to separate it.
+ const uint32_t imageCoordDim = imageSize.type.ccount;
+
+ DxbcRegisterValue imageLayers;
+ imageLayers.type = imageSize.type;
+ imageLayers.id = 0;
+
+ if (resinfoType == DxbcResinfoType::RcpFloat && resourceInfo.image.array) {
+ imageLayers = emitRegisterExtract(imageSize, DxbcRegMask::select(imageCoordDim - 1));
+ imageSize = emitRegisterExtract(imageSize, DxbcRegMask::firstN(imageCoordDim - 1));
+ }
+
+ if (resinfoType == DxbcResinfoType::RcpFloat) {
+ imageSize.id = m_module.opFDiv(
+ getVectorTypeId(imageSize.type),
+ emitBuildConstVecf32(1.0f, 1.0f, 1.0f, 1.0f,
+ DxbcRegMask::firstN(imageSize.type.ccount)).id,
+ imageSize.id);
+ }
+
+ // Concatenate result vectors and scalars to form a
+ // 4D vector. Unused components will be set to zero.
+ std::array<uint32_t, 4> vectorIds = { imageSize.id, 0, 0, 0 };
+ uint32_t numVectorIds = 1;
+
+ if (imageLayers.id != 0)
+ vectorIds[numVectorIds++] = imageLayers.id;
+
+ if (imageCoordDim < 3) {
+ const uint32_t zero = returnType == DxbcScalarType::Uint32
+ ? m_module.constu32(0)
+ : m_module.constf32(0.0f);
+
+ for (uint32_t i = imageCoordDim; i < 3; i++)
+ vectorIds[numVectorIds++] = zero;
+ }
+
+ vectorIds[numVectorIds++] = imageLevels.id;
+
+ // Create the actual result vector
+ DxbcRegisterValue result;
+ result.type.ctype = returnType;
+ result.type.ccount = 4;
+ result.id = m_module.opCompositeConstruct(
+ getVectorTypeId(result.type),
+ numVectorIds, vectorIds.data());
+
+ // Swizzle components using the resource swizzle
+ // and the destination operand's write mask
+ result = emitRegisterSwizzle(result,
+ ins.src[1].swizzle, ins.dst[0].mask);
+ emitRegisterStore(ins.dst[0], result);
+ }
+
+
+ void DxbcCompiler::emitTextureQueryLod(const DxbcShaderInstruction& ins) {
+ // All sample instructions have at least these operands:
+ // (dst0) The destination register
+ // (src0) Texture coordinates
+ // (src1) The texture itself
+ // (src2) The sampler object
+ const DxbcRegister& texCoordReg = ins.src[0];
+ const DxbcRegister& textureReg = ins.src[1];
+ const DxbcRegister& samplerReg = ins.src[2];
+
+ // Texture and sampler register IDs
+ const auto& texture = m_textures.at(textureReg.idx[0].offset);
+ const auto& sampler = m_samplers.at(samplerReg.idx[0].offset);
+
+ // Load texture coordinates
+ const DxbcRegisterValue coord = emitRegisterLoad(texCoordReg,
+ DxbcRegMask::firstN(getTexLayerDim(texture.imageInfo)));
+
+ // Query the LOD. The result is a two-dimensional float32
+ // vector containing the mip level and virtual LOD numbers.
+ const uint32_t sampledImageId = emitLoadSampledImage(texture, sampler, false);
+ const uint32_t queriedLodId = m_module.opImageQueryLod(
+ getVectorTypeId({ DxbcScalarType::Float32, 2 }),
+ sampledImageId, coord.id);
+
+ // Build the result array vector by filling up
+ // the remaining two components with zeroes.
+ const uint32_t zero = m_module.constf32(0.0f);
+ const std::array<uint32_t, 3> resultIds
+ = {{ queriedLodId, zero, zero }};
+
+ DxbcRegisterValue result;
+ result.type = DxbcVectorType { DxbcScalarType::Float32, 4 };
+ result.id = m_module.opCompositeConstruct(
+ getVectorTypeId(result.type),
+ resultIds.size(), resultIds.data());
+
+ result = emitRegisterSwizzle(result, ins.src[1].swizzle, ins.dst[0].mask);
+ emitRegisterStore(ins.dst[0], result);
+ }
+
+
+ void DxbcCompiler::emitTextureQueryMs(const DxbcShaderInstruction& ins) {
+ // sampleinfo has two operands:
+ // (dst0) The destination register
+ // (src0) Resource to query
+ DxbcRegisterValue sampleCount = emitQueryTextureSamples(ins.src[0]);
+
+ if (ins.controls.returnType() != DxbcInstructionReturnType::Uint) {
+ sampleCount.type = { DxbcScalarType::Float32, 1 };
+ sampleCount.id = m_module.opConvertUtoF(
+ getVectorTypeId(sampleCount.type),
+ sampleCount.id);
+ }
+
+ emitRegisterStore(ins.dst[0], sampleCount);
+ }
+
+
+ void DxbcCompiler::emitTextureQueryMsPos(const DxbcShaderInstruction& ins) {
+ // samplepos has three operands:
+ // (dst0) The destination register
+ // (src0) Resource to query
+ // (src1) Sample index
+ if (m_samplePositions == 0)
+ m_samplePositions = emitSamplePosArray();
+
+ // The lookup index is qual to the sample count plus the
+ // sample index, or 0 if the resource cannot be queried.
+ DxbcRegisterValue sampleCount = emitQueryTextureSamples(ins.src[0]);
+ DxbcRegisterValue sampleIndex = emitRegisterLoad(
+ ins.src[1], DxbcRegMask(true, false, false, false));
+
+ uint32_t lookupIndex = m_module.opIAdd(
+ getVectorTypeId(sampleCount.type),
+ sampleCount.id, sampleIndex.id);
+
+ // Validate the parameters
+ uint32_t sampleCountValid = m_module.opULessThanEqual(
+ m_module.defBoolType(),
+ sampleCount.id,
+ m_module.constu32(16));
+
+ uint32_t sampleIndexValid = m_module.opULessThan(
+ m_module.defBoolType(),
+ sampleIndex.id,
+ sampleCount.id);
+
+ // If the lookup cannot be performed, set the lookup
+ // index to zero, which will return a zero vector.
+ lookupIndex = m_module.opSelect(
+ getVectorTypeId(sampleCount.type),
+ m_module.opLogicalAnd(
+ m_module.defBoolType(),
+ sampleCountValid,
+ sampleIndexValid),
+ lookupIndex,
+ m_module.constu32(0));
+
+ // Load sample pos vector and write the masked
+ // components to the destination register.
+ DxbcRegisterPointer samplePos;
+ samplePos.type.ctype = DxbcScalarType::Float32;
+ samplePos.type.ccount = 2;
+ samplePos.id = m_module.opAccessChain(
+ m_module.defPointerType(
+ getVectorTypeId(samplePos.type),
+ spv::StorageClassPrivate),
+ m_samplePositions, 1, &lookupIndex);
+
+ // Expand to vec4 by appending zeroes
+ DxbcRegisterValue result = emitValueLoad(samplePos);
+
+ DxbcRegisterValue zero;
+ zero.type.ctype = DxbcScalarType::Float32;
+ zero.type.ccount = 2;
+ zero.id = m_module.constvec2f32(0.0f, 0.0f);
+
+ result = emitRegisterConcat(result, zero);
+
+ emitRegisterStore(ins.dst[0],
+ emitRegisterSwizzle(result,
+ ins.src[0].swizzle,
+ ins.dst[0].mask));
+ }
+
+
+ void DxbcCompiler::emitTextureFetch(const DxbcShaderInstruction& ins) {
+ // ld has three operands:
+ // (dst0) The destination register
+ // (src0) Source address
+ // (src1) Source texture
+ // ld2dms has four operands:
+ // (dst0) The destination register
+ // (src0) Source address
+ // (src1) Source texture
+ // (src2) Sample number
+ const auto& texture = m_textures.at(ins.src[1].idx[0].offset);
+ const uint32_t imageLayerDim = getTexLayerDim(texture.imageInfo);
+
+ // Load the texture coordinates. The last component
+ // contains the LOD if the resource is an image.
+ const DxbcRegisterValue address = emitRegisterLoad(
+ ins.src[0], DxbcRegMask(true, true, true, true));
+
+ // Additional image operands. This will store
+ // the LOD and the address offset if present.
+ SpirvImageOperands imageOperands;
+
+ if (ins.sampleControls.u != 0 || ins.sampleControls.v != 0 || ins.sampleControls.w != 0) {
+ const std::array<uint32_t, 3> offsetIds = {
+ imageLayerDim >= 1 ? m_module.consti32(ins.sampleControls.u) : 0,
+ imageLayerDim >= 2 ? m_module.consti32(ins.sampleControls.v) : 0,
+ imageLayerDim >= 3 ? m_module.consti32(ins.sampleControls.w) : 0,
+ };
+
+ imageOperands.flags |= spv::ImageOperandsConstOffsetMask;
+ imageOperands.sConstOffset = m_module.constComposite(
+ getVectorTypeId({ DxbcScalarType::Sint32, imageLayerDim }),
+ imageLayerDim, offsetIds.data());
+ }
+
+ // The LOD is not present when reading from
+ // a buffer or from a multisample texture.
+ if (texture.imageInfo.dim != spv::DimBuffer && texture.imageInfo.ms == 0) {
+ DxbcRegisterValue imageLod;
+
+ if (ins.op != DxbcOpcode::LdMs) {
+ imageLod = emitRegisterExtract(
+ address, DxbcRegMask(false, false, false, true));
+ } else {
+ // If we force-disabled MSAA, fetch from LOD 0
+ imageLod.type = { DxbcScalarType::Uint32, 1 };
+ imageLod.id = m_module.constu32(0);
+ }
+
+ imageOperands.flags |= spv::ImageOperandsLodMask;
+ imageOperands.sLod = imageLod.id;
+ }
+
+ // The ld2ms instruction has a sample index, but we
+ // are only allowed to set it for multisample views
+ if (ins.op == DxbcOpcode::LdMs && texture.imageInfo.ms == 1) {
+ DxbcRegisterValue sampleId = emitRegisterLoad(
+ ins.src[2], DxbcRegMask(true, false, false, false));
+
+ imageOperands.flags |= spv::ImageOperandsSampleMask;
+ imageOperands.sSampleId = sampleId.id;
+ }
+
+ // Extract coordinates from address
+ const DxbcRegisterValue coord = emitCalcTexCoord(address, texture.imageInfo);
+
+ // Reading a typed image or buffer view
+ // always returns a four-component vector.
+ const uint32_t imageId = m_module.opLoad(texture.imageTypeId, texture.varId);
+
+ DxbcRegisterValue result;
+ result.type.ctype = texture.sampledType;
+ result.type.ccount = 4;
+ result.id = m_module.opImageFetch(
+ getVectorTypeId(result.type), imageId,
+ coord.id, imageOperands);
+
+ // Swizzle components using the texture swizzle
+ // and the destination operand's write mask
+ result = emitRegisterSwizzle(result,
+ ins.src[1].swizzle, ins.dst[0].mask);
+
+ // If the texture is not bound, return zeroes
+ DxbcRegisterValue bound;
+ bound.type = { DxbcScalarType::Bool, 1 };
+ bound.id = texture.specId;
+
+ DxbcRegisterValue mergedResult;
+ mergedResult.type = result.type;
+ mergedResult.id = m_module.opSelect(getVectorTypeId(mergedResult.type),
+ emitBuildVector(bound, result.type.ccount).id, result.id,
+ emitBuildZeroVector(result.type).id);
+
+ emitRegisterStore(ins.dst[0], mergedResult);
+ }
+
+
+ void DxbcCompiler::emitTextureGather(const DxbcShaderInstruction& ins) {
+ // Gather4 takes the following operands:
+ // (dst0) The destination register
+ // (src0) Texture coordinates
+ // (src1) The texture itself
+ // (src2) The sampler, with a component selector
+ // Gather4C takes the following additional operand:
+ // (src3) The depth reference value
+ // The Gather4Po variants take an additional operand
+ // which defines an extended constant offset.
+ // TODO reduce code duplication by moving some common code
+ // in both sample() and gather() into separate methods
+ const bool isExtendedGather = ins.op == DxbcOpcode::Gather4Po
+ || ins.op == DxbcOpcode::Gather4PoC;
+
+ const DxbcRegister& texCoordReg = ins.src[0];
+ const DxbcRegister& textureReg = ins.src[1 + isExtendedGather];
+ const DxbcRegister& samplerReg = ins.src[2 + isExtendedGather];
+
+ // Texture and sampler register IDs
+ const auto& texture = m_textures.at(textureReg.idx[0].offset);
+ const auto& sampler = m_samplers.at(samplerReg.idx[0].offset);
+
+ // Image type, which stores the image dimensions etc.
+ const uint32_t imageLayerDim = getTexLayerDim(texture.imageInfo);
+
+ // Load the texture coordinates. SPIR-V allows these
+ // to be float4 even if not all components are used.
+ DxbcRegisterValue coord = emitLoadTexCoord(texCoordReg, texture.imageInfo);
+
+ // Load reference value for depth-compare operations
+ const bool isDepthCompare = ins.op == DxbcOpcode::Gather4C
+ || ins.op == DxbcOpcode::Gather4PoC;
+
+ const DxbcRegisterValue referenceValue = isDepthCompare
+ ? emitRegisterLoad(ins.src[3 + isExtendedGather],
+ DxbcRegMask(true, false, false, false))
+ : DxbcRegisterValue();
+
+ // Accumulate additional image operands.
+ SpirvImageOperands imageOperands;
+
+ if (isExtendedGather) {
+ m_module.enableCapability(spv::CapabilityImageGatherExtended);
+
+ DxbcRegisterValue gatherOffset = emitRegisterLoad(
+ ins.src[1], DxbcRegMask::firstN(imageLayerDim));
+
+ imageOperands.flags |= spv::ImageOperandsOffsetMask;
+ imageOperands.gOffset = gatherOffset.id;
+ } else if (ins.sampleControls.u != 0 || ins.sampleControls.v != 0 || ins.sampleControls.w != 0) {
+ const std::array<uint32_t, 3> offsetIds = {
+ imageLayerDim >= 1 ? m_module.consti32(ins.sampleControls.u) : 0,
+ imageLayerDim >= 2 ? m_module.consti32(ins.sampleControls.v) : 0,
+ imageLayerDim >= 3 ? m_module.consti32(ins.sampleControls.w) : 0,
+ };
+
+ imageOperands.flags |= spv::ImageOperandsConstOffsetMask;
+ imageOperands.sConstOffset = m_module.constComposite(
+ getVectorTypeId({ DxbcScalarType::Sint32, imageLayerDim }),
+ imageLayerDim, offsetIds.data());
+ }
+
+ // Gathering texels always returns a four-component
+ // vector, even for the depth-compare variants.
+ uint32_t sampledImageId = emitLoadSampledImage(texture, sampler, isDepthCompare);
+
+ DxbcRegisterValue result;
+ result.type.ctype = texture.sampledType;
+ result.type.ccount = 4;
+
+ switch (ins.op) {
+ // Simple image gather operation
+ case DxbcOpcode::Gather4:
+ case DxbcOpcode::Gather4Po: {
+ result.id = m_module.opImageGather(
+ getVectorTypeId(result.type), sampledImageId, coord.id,
+ m_module.consti32(samplerReg.swizzle[0]),
+ imageOperands);
+ } break;
+
+ // Depth-compare operation
+ case DxbcOpcode::Gather4C:
+ case DxbcOpcode::Gather4PoC: {
+ result.id = m_module.opImageDrefGather(
+ getVectorTypeId(result.type), sampledImageId, coord.id,
+ referenceValue.id, imageOperands);
+ } break;
+
+ default:
+ Logger::warn(str::format(
+ "DxbcCompiler: Unhandled instruction: ",
+ ins.op));
+ return;
+ }
+
+ // Swizzle components using the texture swizzle
+ // and the destination operand's write mask
+ result = emitRegisterSwizzle(result,
+ textureReg.swizzle, ins.dst[0].mask);
+
+ DxbcRegisterValue bound;
+ bound.type = { DxbcScalarType::Bool, 1 };
+ bound.id = texture.specId;
+
+ result.id = m_module.opSelect(getVectorTypeId(result.type),
+ emitBuildVector(bound, result.type.ccount).id, result.id,
+ emitBuildZeroVector(result.type).id);
+
+ emitRegisterStore(ins.dst[0], result);
+ }
+
+
+ void DxbcCompiler::emitTextureSample(const DxbcShaderInstruction& ins) {
+ // All sample instructions have at least these operands:
+ // (dst0) The destination register
+ // (src0) Texture coordinates
+ // (src1) The texture itself
+ // (src2) The sampler object
+ const DxbcRegister& texCoordReg = ins.src[0];
+ const DxbcRegister& textureReg = ins.src[1];
+ const DxbcRegister& samplerReg = ins.src[2];
+
+ // Texture and sampler register IDs
+ const auto& texture = m_textures.at(textureReg.idx[0].offset);
+ const auto& sampler = m_samplers.at(samplerReg.idx[0].offset);
+ const uint32_t imageLayerDim = getTexLayerDim(texture.imageInfo);
+
+ // Load the texture coordinates. SPIR-V allows these
+ // to be float4 even if not all components are used.
+ DxbcRegisterValue coord = emitLoadTexCoord(texCoordReg, texture.imageInfo);
+
+ // Load reference value for depth-compare operations
+ const bool isDepthCompare = ins.op == DxbcOpcode::SampleC
+ || ins.op == DxbcOpcode::SampleClz;
+
+ const DxbcRegisterValue referenceValue = isDepthCompare
+ ? emitRegisterLoad(ins.src[3], DxbcRegMask(true, false, false, false))
+ : DxbcRegisterValue();
+
+ // Load explicit gradients for sample operations that require them
+ const bool hasExplicitGradients = ins.op == DxbcOpcode::SampleD;
+
+ const DxbcRegisterValue explicitGradientX = hasExplicitGradients
+ ? emitRegisterLoad(ins.src[3], DxbcRegMask::firstN(imageLayerDim))
+ : DxbcRegisterValue();
+
+ const DxbcRegisterValue explicitGradientY = hasExplicitGradients
+ ? emitRegisterLoad(ins.src[4], DxbcRegMask::firstN(imageLayerDim))
+ : DxbcRegisterValue();
+
+ // LOD for certain sample operations
+ const bool hasLod = ins.op == DxbcOpcode::SampleL
+ || ins.op == DxbcOpcode::SampleB;
+
+ const DxbcRegisterValue lod = hasLod
+ ? emitRegisterLoad(ins.src[3], DxbcRegMask(true, false, false, false))
+ : DxbcRegisterValue();
+
+ // Accumulate additional image operands. These are
+ // not part of the actual operand token in SPIR-V.
+ SpirvImageOperands imageOperands;
+
+ if (ins.sampleControls.u != 0 || ins.sampleControls.v != 0 || ins.sampleControls.w != 0) {
+ const std::array<uint32_t, 3> offsetIds = {
+ imageLayerDim >= 1 ? m_module.consti32(ins.sampleControls.u) : 0,
+ imageLayerDim >= 2 ? m_module.consti32(ins.sampleControls.v) : 0,
+ imageLayerDim >= 3 ? m_module.consti32(ins.sampleControls.w) : 0,
+ };
+
+ imageOperands.flags |= spv::ImageOperandsConstOffsetMask;
+ imageOperands.sConstOffset = m_module.constComposite(
+ getVectorTypeId({ DxbcScalarType::Sint32, imageLayerDim }),
+ imageLayerDim, offsetIds.data());
+ }
+
+ // Combine the texture and the sampler into a sampled image
+ uint32_t sampledImageId = emitLoadSampledImage(texture, sampler, isDepthCompare);
+
+ // Sampling an image always returns a four-component
+ // vector, whereas depth-compare ops return a scalar.
+ DxbcRegisterValue result;
+ result.type.ctype = texture.sampledType;
+ result.type.ccount = isDepthCompare ? 1 : 4;
+
+ switch (ins.op) {
+ // Simple image sample operation
+ case DxbcOpcode::Sample: {
+ result.id = m_module.opImageSampleImplicitLod(
+ getVectorTypeId(result.type),
+ sampledImageId, coord.id,
+ imageOperands);
+ } break;
+
+ // Depth-compare operation
+ case DxbcOpcode::SampleC: {
+ result.id = m_module.opImageSampleDrefImplicitLod(
+ getVectorTypeId(result.type), sampledImageId, coord.id,
+ referenceValue.id, imageOperands);
+ } break;
+
+ // Depth-compare operation on mip level zero
+ case DxbcOpcode::SampleClz: {
+ imageOperands.flags |= spv::ImageOperandsLodMask;
+ imageOperands.sLod = m_module.constf32(0.0f);
+
+ result.id = m_module.opImageSampleDrefExplicitLod(
+ getVectorTypeId(result.type), sampledImageId, coord.id,
+ referenceValue.id, imageOperands);
+ } break;
+
+ // Sample operation with explicit gradients
+ case DxbcOpcode::SampleD: {
+ imageOperands.flags |= spv::ImageOperandsGradMask;
+ imageOperands.sGradX = explicitGradientX.id;
+ imageOperands.sGradY = explicitGradientY.id;
+
+ result.id = m_module.opImageSampleExplicitLod(
+ getVectorTypeId(result.type), sampledImageId, coord.id,
+ imageOperands);
+ } break;
+
+ // Sample operation with explicit LOD
+ case DxbcOpcode::SampleL: {
+ imageOperands.flags |= spv::ImageOperandsLodMask;
+ imageOperands.sLod = lod.id;
+
+ result.id = m_module.opImageSampleExplicitLod(
+ getVectorTypeId(result.type), sampledImageId, coord.id,
+ imageOperands);
+ } break;
+
+ // Sample operation with LOD bias
+ case DxbcOpcode::SampleB: {
+ imageOperands.flags |= spv::ImageOperandsBiasMask;
+ imageOperands.sLodBias = lod.id;
+
+ result.id = m_module.opImageSampleImplicitLod(
+ getVectorTypeId(result.type), sampledImageId, coord.id,
+ imageOperands);
+ } break;
+
+ default:
+ Logger::warn(str::format(
+ "DxbcCompiler: Unhandled instruction: ",
+ ins.op));
+ return;
+ }
+
+ // Swizzle components using the texture swizzle
+ // and the destination operand's write mask
+ if (result.type.ccount != 1) {
+ result = emitRegisterSwizzle(result,
+ textureReg.swizzle, ins.dst[0].mask);
+ }
+
+ DxbcRegisterValue bound;
+ bound.type = { DxbcScalarType::Bool, 1 };
+ bound.id = texture.specId;
+
+ result.id = m_module.opSelect(getVectorTypeId(result.type),
+ emitBuildVector(bound, result.type.ccount).id, result.id,
+ emitBuildZeroVector(result.type).id);
+
+ emitRegisterStore(ins.dst[0], result);
+ }
+
+
+ void DxbcCompiler::emitTypedUavLoad(const DxbcShaderInstruction& ins) {
+ // load_uav_typed has three operands:
+ // (dst0) The destination register
+ // (src0) The texture or buffer coordinates
+ // (src1) The UAV to load from
+ const uint32_t registerId = ins.src[1].idx[0].offset;
+ const DxbcUav uavInfo = m_uavs.at(registerId);
+
+ // Load texture coordinates
+ DxbcRegisterValue texCoord = emitLoadTexCoord(
+ ins.src[0], uavInfo.imageInfo);
+
+ // Load source value from the UAV
+ DxbcRegisterValue uavValue;
+ uavValue.type.ctype = uavInfo.sampledType;
+ uavValue.type.ccount = 4;
+ uavValue.id = m_module.opImageRead(
+ getVectorTypeId(uavValue.type),
+ m_module.opLoad(uavInfo.imageTypeId, uavInfo.varId),
+ texCoord.id, SpirvImageOperands());
+
+ // Apply component swizzle and mask
+ uavValue = emitRegisterSwizzle(uavValue,
+ ins.src[1].swizzle, ins.dst[0].mask);
+
+ emitRegisterStore(ins.dst[0], uavValue);
+ }
+
+
+ void DxbcCompiler::emitTypedUavStore(const DxbcShaderInstruction& ins) {
+ // store_uav_typed has three operands:
+ // (dst0) The destination UAV
+ // (src0) The texture or buffer coordinates
+ // (src1) The value to store
+ const DxbcBufferInfo uavInfo = getBufferInfo(ins.dst[0]);
+
+ // Execute write op only if the UAV is bound
+ uint32_t writeTest = emitUavWriteTest(uavInfo);
+
+ DxbcConditional cond;
+ cond.labelIf = m_module.allocateId();
+ cond.labelEnd = m_module.allocateId();
+
+ m_module.opSelectionMerge (cond.labelEnd, spv::SelectionControlMaskNone);
+ m_module.opBranchConditional(writeTest, cond.labelIf, cond.labelEnd);
+
+ m_module.opLabel(cond.labelIf);
+
+ // Load texture coordinates
+ DxbcRegisterValue texCoord = emitLoadTexCoord(ins.src[0], uavInfo.image);
+
+ // Load the value that will be written to the image. We'll
+ // have to cast it to the component type of the image.
+ const DxbcRegisterValue texValue = emitRegisterBitcast(
+ emitRegisterLoad(ins.src[1], DxbcRegMask(true, true, true, true)),
+ uavInfo.stype);
+
+ // Write the given value to the image
+ m_module.opImageWrite(
+ m_module.opLoad(uavInfo.typeId, uavInfo.varId),
+ texCoord.id, texValue.id, SpirvImageOperands());
+
+ // End conditional block
+ m_module.opBranch(cond.labelEnd);
+ m_module.opLabel (cond.labelEnd);
+ }
+
+
+ void DxbcCompiler::emitControlFlowIf(const DxbcShaderInstruction& ins) {
+ // Load the first component of the condition
+ // operand and perform a zero test on it.
+ const DxbcRegisterValue condition = emitRegisterLoad(
+ ins.src[0], DxbcRegMask(true, false, false, false));
+
+ // Declare the 'if' block. We do not know if there
+ // will be an 'else' block or not, so we'll assume
+ // that there is one and leave it empty otherwise.
+ DxbcCfgBlock block;
+ block.type = DxbcCfgBlockType::If;
+ block.b_if.ztestId = emitRegisterZeroTest(condition, ins.controls.zeroTest()).id;
+ block.b_if.labelIf = m_module.allocateId();
+ block.b_if.labelElse = 0;
+ block.b_if.labelEnd = m_module.allocateId();
+ block.b_if.headerPtr = m_module.getInsertionPtr();
+ m_controlFlowBlocks.push_back(block);
+
+ // We'll insert the branch instruction when closing
+ // the block, since we don't know whether or not an
+ // else block is needed right now.
+ m_module.opLabel(block.b_if.labelIf);
+ }
+
+
+ void DxbcCompiler::emitControlFlowElse(const DxbcShaderInstruction& ins) {
+ if (m_controlFlowBlocks.size() == 0
+ || m_controlFlowBlocks.back().type != DxbcCfgBlockType::If
+ || m_controlFlowBlocks.back().b_if.labelElse != 0)
+ throw DxvkError("DxbcCompiler: 'Else' without 'If' found");
+
+ // Set the 'Else' flag so that we do
+ // not insert a dummy block on 'EndIf'
+ DxbcCfgBlock& block = m_controlFlowBlocks.back();
+ block.b_if.labelElse = m_module.allocateId();
+
+ // Close the 'If' block by branching to
+ // the merge block we declared earlier
+ m_module.opBranch(block.b_if.labelEnd);
+ m_module.opLabel (block.b_if.labelElse);
+ }
+
+
+ void DxbcCompiler::emitControlFlowEndIf(const DxbcShaderInstruction& ins) {
+ if (m_controlFlowBlocks.size() == 0
+ || m_controlFlowBlocks.back().type != DxbcCfgBlockType::If)
+ throw DxvkError("DxbcCompiler: 'EndIf' without 'If' found");
+
+ // Remove the block from the stack, it's closed
+ DxbcCfgBlock block = m_controlFlowBlocks.back();
+ m_controlFlowBlocks.pop_back();
+
+ // Write out the 'if' header
+ m_module.beginInsertion(block.b_if.headerPtr);
+
+ m_module.opSelectionMerge(
+ block.b_if.labelEnd,
+ spv::SelectionControlMaskNone);
+
+ m_module.opBranchConditional(
+ block.b_if.ztestId,
+ block.b_if.labelIf,
+ block.b_if.labelElse != 0
+ ? block.b_if.labelElse
+ : block.b_if.labelEnd);
+
+ m_module.endInsertion();
+
+ // End the active 'if' or 'else' block
+ m_module.opBranch(block.b_if.labelEnd);
+ m_module.opLabel (block.b_if.labelEnd);
+ }
+
+
+ void DxbcCompiler::emitControlFlowSwitch(const DxbcShaderInstruction& ins) {
+ // Load the selector as a scalar unsigned integer
+ const DxbcRegisterValue selector = emitRegisterLoad(
+ ins.src[0], DxbcRegMask(true, false, false, false));
+
+ // Declare switch block. We cannot insert the switch
+ // instruction itself yet because the number of case
+ // statements and blocks is unknown at this point.
+ DxbcCfgBlock block;
+ block.type = DxbcCfgBlockType::Switch;
+ block.b_switch.insertPtr = m_module.getInsertionPtr();
+ block.b_switch.selectorId = selector.id;
+ block.b_switch.labelBreak = m_module.allocateId();
+ block.b_switch.labelCase = m_module.allocateId();
+ block.b_switch.labelDefault = 0;
+ block.b_switch.labelCases = nullptr;
+ m_controlFlowBlocks.push_back(block);
+
+ // Define the first 'case' label
+ m_module.opLabel(block.b_switch.labelCase);
+ }
+
+
+ void DxbcCompiler::emitControlFlowCase(const DxbcShaderInstruction& ins) {
+ if (m_controlFlowBlocks.size() == 0
+ || m_controlFlowBlocks.back().type != DxbcCfgBlockType::Switch)
+ throw DxvkError("DxbcCompiler: 'Case' without 'Switch' found");
+
+ // The source operand must be a 32-bit immediate.
+ if (ins.src[0].type != DxbcOperandType::Imm32)
+ throw DxvkError("DxbcCompiler: Invalid operand type for 'Case'");
+
+ // Use the last label allocated for 'case'. The block starting
+ // with that label is guaranteed to be empty unless a previous
+ // 'case' block was not properly closed in the DXBC shader.
+ DxbcCfgBlockSwitch* block = &m_controlFlowBlocks.back().b_switch;
+
+ DxbcSwitchLabel label;
+ label.desc.literal = ins.src[0].imm.u32_1;
+ label.desc.labelId = block->labelCase;
+ label.next = block->labelCases;
+ block->labelCases = new DxbcSwitchLabel(label);
+ }
+
+
+ void DxbcCompiler::emitControlFlowDefault(const DxbcShaderInstruction& ins) {
+ if (m_controlFlowBlocks.size() == 0
+ || m_controlFlowBlocks.back().type != DxbcCfgBlockType::Switch)
+ throw DxvkError("DxbcCompiler: 'Default' without 'Switch' found");
+
+ // Set the last label allocated for 'case' as the default label.
+ m_controlFlowBlocks.back().b_switch.labelDefault
+ = m_controlFlowBlocks.back().b_switch.labelCase;
+ }
+
+
+ void DxbcCompiler::emitControlFlowEndSwitch(const DxbcShaderInstruction& ins) {
+ if (m_controlFlowBlocks.size() == 0
+ || m_controlFlowBlocks.back().type != DxbcCfgBlockType::Switch)
+ throw DxvkError("DxbcCompiler: 'EndSwitch' without 'Switch' found");
+
+ // Remove the block from the stack, it's closed
+ DxbcCfgBlock block = m_controlFlowBlocks.back();
+ m_controlFlowBlocks.pop_back();
+
+ // If no 'default' label was specified, use the last allocated
+ // 'case' label. This is guaranteed to be an empty block unless
+ // a previous 'case' block was not closed properly.
+ if (block.b_switch.labelDefault == 0)
+ block.b_switch.labelDefault = block.b_switch.labelCase;
+
+ // Close the current 'case' block
+ m_module.opBranch(block.b_switch.labelBreak);
+ m_module.opLabel (block.b_switch.labelBreak);
+
+ // Insert the 'switch' statement. For that, we need to
+ // gather all the literal-label pairs for the construct.
+ m_module.beginInsertion(block.b_switch.insertPtr);
+ m_module.opSelectionMerge(
+ block.b_switch.labelBreak,
+ spv::SelectionControlMaskNone);
+
+ // We'll restore the original order of the case labels here
+ std::vector<SpirvSwitchCaseLabel> jumpTargets;
+ for (auto i = block.b_switch.labelCases; i != nullptr; i = i->next)
+ jumpTargets.insert(jumpTargets.begin(), i->desc);
+
+ m_module.opSwitch(
+ block.b_switch.selectorId,
+ block.b_switch.labelDefault,
+ jumpTargets.size(),
+ jumpTargets.data());
+ m_module.endInsertion();
+
+ // Destroy the list of case labels
+ // FIXME we're leaking memory if compilation fails.
+ DxbcSwitchLabel* caseLabel = block.b_switch.labelCases;
+
+ while (caseLabel != nullptr)
+ delete std::exchange(caseLabel, caseLabel->next);
+ }
+
+
+ void DxbcCompiler::emitControlFlowLoop(const DxbcShaderInstruction& ins) {
+ // Declare the 'loop' block
+ DxbcCfgBlock block;
+ block.type = DxbcCfgBlockType::Loop;
+ block.b_loop.labelHeader = m_module.allocateId();
+ block.b_loop.labelBegin = m_module.allocateId();
+ block.b_loop.labelContinue = m_module.allocateId();
+ block.b_loop.labelBreak = m_module.allocateId();
+ m_controlFlowBlocks.push_back(block);
+
+ m_module.opBranch(block.b_loop.labelHeader);
+ m_module.opLabel (block.b_loop.labelHeader);
+
+ m_module.opLoopMerge(
+ block.b_loop.labelBreak,
+ block.b_loop.labelContinue,
+ spv::LoopControlMaskNone);
+
+ m_module.opBranch(block.b_loop.labelBegin);
+ m_module.opLabel (block.b_loop.labelBegin);
+ }
+
+
+ void DxbcCompiler::emitControlFlowEndLoop(const DxbcShaderInstruction& ins) {
+ if (m_controlFlowBlocks.size() == 0
+ || m_controlFlowBlocks.back().type != DxbcCfgBlockType::Loop)
+ throw DxvkError("DxbcCompiler: 'EndLoop' without 'Loop' found");
+
+ // Remove the block from the stack, it's closed
+ const DxbcCfgBlock block = m_controlFlowBlocks.back();
+ m_controlFlowBlocks.pop_back();
+
+ // Declare the continue block
+ m_module.opBranch(block.b_loop.labelContinue);
+ m_module.opLabel (block.b_loop.labelContinue);
+
+ // Declare the merge block
+ m_module.opBranch(block.b_loop.labelHeader);
+ m_module.opLabel (block.b_loop.labelBreak);
+ }
+
+
+ void DxbcCompiler::emitControlFlowBreak(const DxbcShaderInstruction& ins) {
+ const bool isBreak = ins.op == DxbcOpcode::Break;
+
+ DxbcCfgBlock* cfgBlock = isBreak
+ ? cfgFindBlock({ DxbcCfgBlockType::Loop, DxbcCfgBlockType::Switch })
+ : cfgFindBlock({ DxbcCfgBlockType::Loop });
+
+ if (cfgBlock == nullptr)
+ throw DxvkError("DxbcCompiler: 'Break' or 'Continue' outside 'Loop' or 'Switch' found");
+
+ if (cfgBlock->type == DxbcCfgBlockType::Loop) {
+ m_module.opBranch(isBreak
+ ? cfgBlock->b_loop.labelBreak
+ : cfgBlock->b_loop.labelContinue);
+ } else /* if (cfgBlock->type == DxbcCfgBlockType::Switch) */ {
+ m_module.opBranch(cfgBlock->b_switch.labelBreak);
+ }
+
+ // Subsequent instructions assume that there is an open block
+ const uint32_t labelId = m_module.allocateId();
+ m_module.opLabel(labelId);
+
+ // If this is on the same level as a switch-case construct,
+ // rather than being nested inside an 'if' statement, close
+ // the current 'case' block.
+ if (m_controlFlowBlocks.back().type == DxbcCfgBlockType::Switch)
+ cfgBlock->b_switch.labelCase = labelId;
+ }
+
+
+ void DxbcCompiler::emitControlFlowBreakc(const DxbcShaderInstruction& ins) {
+ const bool isBreak = ins.op == DxbcOpcode::Breakc;
+
+ DxbcCfgBlock* cfgBlock = isBreak
+ ? cfgFindBlock({ DxbcCfgBlockType::Loop, DxbcCfgBlockType::Switch })
+ : cfgFindBlock({ DxbcCfgBlockType::Loop });
+
+ if (cfgBlock == nullptr)
+ throw DxvkError("DxbcCompiler: 'Breakc' or 'Continuec' outside 'Loop' or 'Switch' found");
+
+ // Perform zero test on the first component of the condition
+ const DxbcRegisterValue condition = emitRegisterLoad(
+ ins.src[0], DxbcRegMask(true, false, false, false));
+
+ const DxbcRegisterValue zeroTest = emitRegisterZeroTest(
+ condition, ins.controls.zeroTest());
+
+ // We basically have to wrap this into an 'if' block
+ const uint32_t breakBlock = m_module.allocateId();
+ const uint32_t mergeBlock = m_module.allocateId();
+
+ m_module.opSelectionMerge(mergeBlock,
+ spv::SelectionControlMaskNone);
+
+ m_module.opBranchConditional(
+ zeroTest.id, breakBlock, mergeBlock);
+
+ m_module.opLabel(breakBlock);
+
+ if (cfgBlock->type == DxbcCfgBlockType::Loop) {
+ m_module.opBranch(isBreak
+ ? cfgBlock->b_loop.labelBreak
+ : cfgBlock->b_loop.labelContinue);
+ } else /* if (cfgBlock->type == DxbcCfgBlockType::Switch) */ {
+ m_module.opBranch(cfgBlock->b_switch.labelBreak);
+ }
+
+ m_module.opLabel(mergeBlock);
+ }
+
+
+ void DxbcCompiler::emitControlFlowRet(const DxbcShaderInstruction& ins) {
+ if (m_controlFlowBlocks.size() != 0) {
+ uint32_t labelId = m_module.allocateId();
+
+ m_module.opReturn();
+ m_module.opLabel(labelId);
+
+ // return can be used in place of break to terminate a case block
+ if (m_controlFlowBlocks.back().type == DxbcCfgBlockType::Switch)
+ m_controlFlowBlocks.back().b_switch.labelCase = labelId;
+ } else {
+ // Last instruction in the current function
+ this->emitFunctionEnd();
+ }
+ }
+
+
+ void DxbcCompiler::emitControlFlowRetc(const DxbcShaderInstruction& ins) {
+ // Perform zero test on the first component of the condition
+ const DxbcRegisterValue condition = emitRegisterLoad(
+ ins.src[0], DxbcRegMask(true, false, false, false));
+
+ const DxbcRegisterValue zeroTest = emitRegisterZeroTest(
+ condition, ins.controls.zeroTest());
+
+ // We basically have to wrap this into an 'if' block
+ const uint32_t returnLabel = m_module.allocateId();
+ const uint32_t continueLabel = m_module.allocateId();
+
+ m_module.opSelectionMerge(continueLabel,
+ spv::SelectionControlMaskNone);
+
+ m_module.opBranchConditional(
+ zeroTest.id, returnLabel, continueLabel);
+
+ m_module.opLabel(returnLabel);
+ m_module.opReturn();
+
+ m_module.opLabel(continueLabel);
+ }
+
+
+ void DxbcCompiler::emitControlFlowDiscard(const DxbcShaderInstruction& ins) {
+ // Discard actually has an operand that determines
+ // whether or not the fragment should be discarded
+ const DxbcRegisterValue condition = emitRegisterLoad(
+ ins.src[0], DxbcRegMask(true, false, false, false));
+
+ const DxbcRegisterValue zeroTest = emitRegisterZeroTest(
+ condition, ins.controls.zeroTest());
+
+ if (m_ps.killState == 0) {
+ DxbcConditional cond;
+ cond.labelIf = m_module.allocateId();
+ cond.labelEnd = m_module.allocateId();
+
+ m_module.opSelectionMerge(cond.labelEnd, spv::SelectionControlMaskNone);
+ m_module.opBranchConditional(zeroTest.id, cond.labelIf, cond.labelEnd);
+
+ m_module.opLabel(cond.labelIf);
+
+ if (m_moduleInfo.options.useDemoteToHelperInvocation) {
+ m_module.opDemoteToHelperInvocation();
+ m_module.opBranch(cond.labelEnd);
+ } else {
+ // OpKill terminates the block
+ m_module.opKill();
+ }
+
+ m_module.opLabel(cond.labelEnd);
+ } else {
+ uint32_t typeId = m_module.defBoolType();
+
+ uint32_t killState = m_module.opLoad (typeId, m_ps.killState);
+ killState = m_module.opLogicalOr(typeId, killState, zeroTest.id);
+ m_module.opStore(m_ps.killState, killState);
+
+ if (m_moduleInfo.options.useSubgroupOpsForEarlyDiscard) {
+ uint32_t ballot = m_module.opGroupNonUniformBallot(
+ getVectorTypeId({ DxbcScalarType::Uint32, 4 }),
+ m_module.constu32(spv::ScopeSubgroup),
+ killState);
+
+ uint32_t laneId = m_module.opLoad(
+ getScalarTypeId(DxbcScalarType::Uint32),
+ m_ps.builtinLaneId);
+
+ uint32_t laneIdPart = m_module.opShiftRightLogical(
+ getScalarTypeId(DxbcScalarType::Uint32),
+ laneId, m_module.constu32(5));
+
+ uint32_t laneMask = m_module.opVectorExtractDynamic(
+ getScalarTypeId(DxbcScalarType::Uint32),
+ ballot, laneIdPart);
+
+ uint32_t laneIdQuad = m_module.opBitwiseAnd(
+ getScalarTypeId(DxbcScalarType::Uint32),
+ laneId, m_module.constu32(0x1c));
+
+ laneMask = m_module.opShiftRightLogical(
+ getScalarTypeId(DxbcScalarType::Uint32),
+ laneMask, laneIdQuad);
+
+ laneMask = m_module.opBitwiseAnd(
+ getScalarTypeId(DxbcScalarType::Uint32),
+ laneMask, m_module.constu32(0xf));
+
+ uint32_t killSubgroup = m_module.opIEqual(
+ m_module.defBoolType(),
+ laneMask, m_module.constu32(0xf));
+
+ DxbcConditional cond;
+ cond.labelIf = m_module.allocateId();
+ cond.labelEnd = m_module.allocateId();
+
+ m_module.opSelectionMerge(cond.labelEnd, spv::SelectionControlMaskNone);
+ m_module.opBranchConditional(killSubgroup, cond.labelIf, cond.labelEnd);
+
+ // OpKill terminates the block
+ m_module.opLabel(cond.labelIf);
+ m_module.opKill();
+
+ m_module.opLabel(cond.labelEnd);
+ }
+ }
+ }
+
+
+ void DxbcCompiler::emitControlFlowLabel(const DxbcShaderInstruction& ins) {
+ uint32_t functionNr = ins.dst[0].idx[0].offset;
+ uint32_t functionId = getFunctionId(functionNr);
+
+ this->emitFunctionBegin(
+ functionId,
+ m_module.defVoidType(),
+ m_module.defFunctionType(
+ m_module.defVoidType(), 0, nullptr));
+
+ m_module.opLabel(m_module.allocateId());
+ m_module.setDebugName(functionId, str::format("label", functionNr).c_str());
+
+ m_insideFunction = true;
+ }
+
+
+ void DxbcCompiler::emitControlFlowCall(const DxbcShaderInstruction& ins) {
+ uint32_t functionNr = ins.src[0].idx[0].offset;
+ uint32_t functionId = getFunctionId(functionNr);
+
+ m_module.opFunctionCall(
+ m_module.defVoidType(),
+ functionId, 0, nullptr);
+ }
+
+
+ void DxbcCompiler::emitControlFlowCallc(const DxbcShaderInstruction& ins) {
+ uint32_t functionNr = ins.src[1].idx[0].offset;
+ uint32_t functionId = getFunctionId(functionNr);
+
+ // Perform zero test on the first component of the condition
+ const DxbcRegisterValue condition = emitRegisterLoad(
+ ins.src[0], DxbcRegMask(true, false, false, false));
+
+ const DxbcRegisterValue zeroTest = emitRegisterZeroTest(
+ condition, ins.controls.zeroTest());
+
+ // We basically have to wrap this into an 'if' block
+ const uint32_t callLabel = m_module.allocateId();
+ const uint32_t skipLabel = m_module.allocateId();
+
+ m_module.opSelectionMerge(skipLabel,
+ spv::SelectionControlMaskNone);
+
+ m_module.opBranchConditional(
+ zeroTest.id, callLabel, skipLabel);
+
+ m_module.opLabel(callLabel);
+ m_module.opFunctionCall(
+ m_module.defVoidType(),
+ functionId, 0, nullptr);
+
+ m_module.opBranch(skipLabel);
+ m_module.opLabel(skipLabel);
+ }
+
+
+ void DxbcCompiler::emitControlFlow(const DxbcShaderInstruction& ins) {
+ switch (ins.op) {
+ case DxbcOpcode::If:
+ return this->emitControlFlowIf(ins);
+
+ case DxbcOpcode::Else:
+ return this->emitControlFlowElse(ins);
+
+ case DxbcOpcode::EndIf:
+ return this->emitControlFlowEndIf(ins);
+
+ case DxbcOpcode::Switch:
+ return this->emitControlFlowSwitch(ins);
+
+ case DxbcOpcode::Case:
+ return this->emitControlFlowCase(ins);
+
+ case DxbcOpcode::Default:
+ return this->emitControlFlowDefault(ins);
+
+ case DxbcOpcode::EndSwitch:
+ return this->emitControlFlowEndSwitch(ins);
+
+ case DxbcOpcode::Loop:
+ return this->emitControlFlowLoop(ins);
+
+ case DxbcOpcode::EndLoop:
+ return this->emitControlFlowEndLoop(ins);
+
+ case DxbcOpcode::Break:
+ case DxbcOpcode::Continue:
+ return this->emitControlFlowBreak(ins);
+
+ case DxbcOpcode::Breakc:
+ case DxbcOpcode::Continuec:
+ return this->emitControlFlowBreakc(ins);
+
+ case DxbcOpcode::Ret:
+ return this->emitControlFlowRet(ins);
+
+ case DxbcOpcode::Retc:
+ return this->emitControlFlowRetc(ins);
+
+ case DxbcOpcode::Discard:
+ return this->emitControlFlowDiscard(ins);
+
+ case DxbcOpcode::Label:
+ return this->emitControlFlowLabel(ins);
+
+ case DxbcOpcode::Call:
+ return this->emitControlFlowCall(ins);
+
+ case DxbcOpcode::Callc:
+ return this->emitControlFlowCallc(ins);
+
+ default:
+ Logger::warn(str::format(
+ "DxbcCompiler: Unhandled instruction: ",
+ ins.op));
+ }
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitBuildConstVecf32(
+ float x,
+ float y,
+ float z,
+ float w,
+ const DxbcRegMask& writeMask) {
+ // TODO refactor these functions into one single template
+ std::array<uint32_t, 4> ids = { 0, 0, 0, 0 };
+ uint32_t componentIndex = 0;
+
+ if (writeMask[0]) ids[componentIndex++] = m_module.constf32(x);
+ if (writeMask[1]) ids[componentIndex++] = m_module.constf32(y);
+ if (writeMask[2]) ids[componentIndex++] = m_module.constf32(z);
+ if (writeMask[3]) ids[componentIndex++] = m_module.constf32(w);
+
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Float32;
+ result.type.ccount = componentIndex;
+ result.id = componentIndex > 1
+ ? m_module.constComposite(
+ getVectorTypeId(result.type),
+ componentIndex, ids.data())
+ : ids[0];
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitBuildConstVecu32(
+ uint32_t x,
+ uint32_t y,
+ uint32_t z,
+ uint32_t w,
+ const DxbcRegMask& writeMask) {
+ std::array<uint32_t, 4> ids = { 0, 0, 0, 0 };
+ uint32_t componentIndex = 0;
+
+ if (writeMask[0]) ids[componentIndex++] = m_module.constu32(x);
+ if (writeMask[1]) ids[componentIndex++] = m_module.constu32(y);
+ if (writeMask[2]) ids[componentIndex++] = m_module.constu32(z);
+ if (writeMask[3]) ids[componentIndex++] = m_module.constu32(w);
+
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Uint32;
+ result.type.ccount = componentIndex;
+ result.id = componentIndex > 1
+ ? m_module.constComposite(
+ getVectorTypeId(result.type),
+ componentIndex, ids.data())
+ : ids[0];
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitBuildConstVeci32(
+ int32_t x,
+ int32_t y,
+ int32_t z,
+ int32_t w,
+ const DxbcRegMask& writeMask) {
+ std::array<uint32_t, 4> ids = { 0, 0, 0, 0 };
+ uint32_t componentIndex = 0;
+
+ if (writeMask[0]) ids[componentIndex++] = m_module.consti32(x);
+ if (writeMask[1]) ids[componentIndex++] = m_module.consti32(y);
+ if (writeMask[2]) ids[componentIndex++] = m_module.consti32(z);
+ if (writeMask[3]) ids[componentIndex++] = m_module.consti32(w);
+
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Sint32;
+ result.type.ccount = componentIndex;
+ result.id = componentIndex > 1
+ ? m_module.constComposite(
+ getVectorTypeId(result.type),
+ componentIndex, ids.data())
+ : ids[0];
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitBuildConstVecf64(
+ double xy,
+ double zw,
+ const DxbcRegMask& writeMask) {
+ std::array<uint32_t, 2> ids = { 0, 0 };
+ uint32_t componentIndex = 0;
+
+ if (writeMask[0] && writeMask[1]) ids[componentIndex++] = m_module.constf64(xy);
+ if (writeMask[2] && writeMask[3]) ids[componentIndex++] = m_module.constf64(zw);
+
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Float64;
+ result.type.ccount = componentIndex;
+ result.id = componentIndex > 1
+ ? m_module.constComposite(
+ getVectorTypeId(result.type),
+ componentIndex, ids.data())
+ : ids[0];
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitBuildVector(
+ DxbcRegisterValue scalar,
+ uint32_t count) {
+ if (count == 1)
+ return scalar;
+
+ std::array<uint32_t, 4> scalarIds =
+ { scalar.id, scalar.id, scalar.id, scalar.id };
+
+ DxbcRegisterValue result;
+ result.type.ctype = scalar.type.ctype;
+ result.type.ccount = count;
+ result.id = m_module.constComposite(
+ getVectorTypeId(result.type),
+ count, scalarIds.data());
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitBuildZeroVector(
+ DxbcVectorType type) {
+ DxbcRegisterValue result;
+ result.type.ctype = type.ctype;
+ result.type.ccount = 1;
+
+ switch (type.ctype) {
+ case DxbcScalarType::Float32: result.id = m_module.constf32(0.0f); break;
+ case DxbcScalarType::Uint32: result.id = m_module.constu32(0u); break;
+ case DxbcScalarType::Sint32: result.id = m_module.consti32(0); break;
+ default: throw DxvkError("DxbcCompiler: Invalid scalar type");
+ }
+
+ return emitBuildVector(result, type.ccount);
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitRegisterBitcast(
+ DxbcRegisterValue srcValue,
+ DxbcScalarType dstType) {
+ DxbcScalarType srcType = srcValue.type.ctype;
+
+ if (srcType == dstType)
+ return srcValue;
+
+ DxbcRegisterValue result;
+ result.type.ctype = dstType;
+ result.type.ccount = srcValue.type.ccount;
+
+ if (isDoubleType(srcType)) result.type.ccount *= 2;
+ if (isDoubleType(dstType)) result.type.ccount /= 2;
+
+ result.id = m_module.opBitcast(
+ getVectorTypeId(result.type),
+ srcValue.id);
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitRegisterSwizzle(
+ DxbcRegisterValue value,
+ DxbcRegSwizzle swizzle,
+ DxbcRegMask writeMask) {
+ if (value.type.ccount == 1)
+ return emitRegisterExtend(value, writeMask.popCount());
+
+ std::array<uint32_t, 4> indices;
+
+ uint32_t dstIndex = 0;
+
+ for (uint32_t i = 0; i < 4; i++) {
+ if (writeMask[i])
+ indices[dstIndex++] = swizzle[i];
+ }
+
+ // If the swizzle combined with the mask can be reduced
+ // to a no-op, we don't need to insert any instructions.
+ bool isIdentitySwizzle = dstIndex == value.type.ccount;
+
+ for (uint32_t i = 0; i < dstIndex && isIdentitySwizzle; i++)
+ isIdentitySwizzle &= indices[i] == i;
+
+ if (isIdentitySwizzle)
+ return value;
+
+ // Use OpCompositeExtract if the resulting vector contains
+ // only one component, and OpVectorShuffle if it is a vector.
+ DxbcRegisterValue result;
+ result.type.ctype = value.type.ctype;
+ result.type.ccount = dstIndex;
+
+ const uint32_t typeId = getVectorTypeId(result.type);
+
+ if (dstIndex == 1) {
+ result.id = m_module.opCompositeExtract(
+ typeId, value.id, 1, indices.data());
+ } else {
+ result.id = m_module.opVectorShuffle(
+ typeId, value.id, value.id,
+ dstIndex, indices.data());
+ }
+
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitRegisterExtract(
+ DxbcRegisterValue value,
+ DxbcRegMask mask) {
+ return emitRegisterSwizzle(value,
+ DxbcRegSwizzle(0, 1, 2, 3), mask);
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitRegisterInsert(
+ DxbcRegisterValue dstValue,
+ DxbcRegisterValue srcValue,
+ DxbcRegMask srcMask) {
+ DxbcRegisterValue result;
+ result.type = dstValue.type;
+
+ const uint32_t typeId = getVectorTypeId(result.type);
+
+ if (srcMask.popCount() == 0) {
+ // Nothing to do if the insertion mask is empty
+ result.id = dstValue.id;
+ } else if (dstValue.type.ccount == 1) {
+ // Both values are scalar, so the first component
+ // of the write mask decides which one to take.
+ result.id = srcMask[0] ? srcValue.id : dstValue.id;
+ } else if (srcValue.type.ccount == 1) {
+ // The source value is scalar. Since OpVectorShuffle
+ // requires both arguments to be vectors, we have to
+ // use OpCompositeInsert to modify the vector instead.
+ const uint32_t componentId = srcMask.firstSet();
+
+ result.id = m_module.opCompositeInsert(typeId,
+ srcValue.id, dstValue.id, 1, &componentId);
+ } else {
+ // Both arguments are vectors. We can determine which
+ // components to take from which vector and use the
+ // OpVectorShuffle instruction.
+ std::array<uint32_t, 4> components;
+ uint32_t srcComponentId = dstValue.type.ccount;
+
+ for (uint32_t i = 0; i < dstValue.type.ccount; i++)
+ components.at(i) = srcMask[i] ? srcComponentId++ : i;
+
+ result.id = m_module.opVectorShuffle(
+ typeId, dstValue.id, srcValue.id,
+ dstValue.type.ccount, components.data());
+ }
+
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitRegisterConcat(
+ DxbcRegisterValue value1,
+ DxbcRegisterValue value2) {
+ std::array<uint32_t, 2> ids =
+ {{ value1.id, value2.id }};
+
+ DxbcRegisterValue result;
+ result.type.ctype = value1.type.ctype;
+ result.type.ccount = value1.type.ccount + value2.type.ccount;
+ result.id = m_module.opCompositeConstruct(
+ getVectorTypeId(result.type),
+ ids.size(), ids.data());
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitRegisterExtend(
+ DxbcRegisterValue value,
+ uint32_t size) {
+ if (size == 1)
+ return value;
+
+ std::array<uint32_t, 4> ids = {{
+ value.id, value.id,
+ value.id, value.id,
+ }};
+
+ DxbcRegisterValue result;
+ result.type.ctype = value.type.ctype;
+ result.type.ccount = size;
+ result.id = m_module.opCompositeConstruct(
+ getVectorTypeId(result.type),
+ size, ids.data());
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitRegisterAbsolute(
+ DxbcRegisterValue value) {
+ const uint32_t typeId = getVectorTypeId(value.type);
+
+ switch (value.type.ctype) {
+ case DxbcScalarType::Float32: value.id = m_module.opFAbs(typeId, value.id); break;
+ case DxbcScalarType::Float64: value.id = m_module.opFAbs(typeId, value.id); break;
+ case DxbcScalarType::Sint32: value.id = m_module.opSAbs(typeId, value.id); break;
+ case DxbcScalarType::Sint64: value.id = m_module.opSAbs(typeId, value.id); break;
+ default: Logger::warn("DxbcCompiler: Cannot get absolute value for given type");
+ }
+
+ return value;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitRegisterNegate(
+ DxbcRegisterValue value) {
+ const uint32_t typeId = getVectorTypeId(value.type);
+
+ switch (value.type.ctype) {
+ case DxbcScalarType::Float32: value.id = m_module.opFNegate(typeId, value.id); break;
+ case DxbcScalarType::Float64: value.id = m_module.opFNegate(typeId, value.id); break;
+ case DxbcScalarType::Sint32: value.id = m_module.opSNegate(typeId, value.id); break;
+ case DxbcScalarType::Sint64: value.id = m_module.opSNegate(typeId, value.id); break;
+ default: Logger::warn("DxbcCompiler: Cannot negate given type");
+ }
+
+ return value;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitRegisterZeroTest(
+ DxbcRegisterValue value,
+ DxbcZeroTest test) {
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Bool;
+ result.type.ccount = 1;
+
+ const uint32_t zeroId = m_module.constu32(0u);
+ const uint32_t typeId = getVectorTypeId(result.type);
+
+ result.id = test == DxbcZeroTest::TestZ
+ ? m_module.opIEqual (typeId, value.id, zeroId)
+ : m_module.opINotEqual(typeId, value.id, zeroId);
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitRegisterMaskBits(
+ DxbcRegisterValue value,
+ uint32_t mask) {
+ DxbcRegisterValue maskVector = emitBuildConstVecu32(
+ mask, mask, mask, mask, DxbcRegMask::firstN(value.type.ccount));
+
+ DxbcRegisterValue result;
+ result.type = value.type;
+ result.id = m_module.opBitwiseAnd(
+ getVectorTypeId(result.type),
+ value.id, maskVector.id);
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitSrcOperandModifiers(
+ DxbcRegisterValue value,
+ DxbcRegModifiers modifiers) {
+ if (modifiers.test(DxbcRegModifier::Abs))
+ value = emitRegisterAbsolute(value);
+
+ if (modifiers.test(DxbcRegModifier::Neg))
+ value = emitRegisterNegate(value);
+ return value;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitDstOperandModifiers(
+ DxbcRegisterValue value,
+ DxbcOpModifiers modifiers) {
+ const uint32_t typeId = getVectorTypeId(value.type);
+
+ if (modifiers.saturate) {
+ DxbcRegMask mask;
+ DxbcRegisterValue vec0, vec1;
+
+ if (value.type.ctype == DxbcScalarType::Float32) {
+ mask = DxbcRegMask::firstN(value.type.ccount);
+ vec0 = emitBuildConstVecf32(0.0f, 0.0f, 0.0f, 0.0f, mask);
+ vec1 = emitBuildConstVecf32(1.0f, 1.0f, 1.0f, 1.0f, mask);
+ } else if (value.type.ctype == DxbcScalarType::Float64) {
+ mask = DxbcRegMask::firstN(value.type.ccount * 2);
+ vec0 = emitBuildConstVecf64(0.0, 0.0, mask);
+ vec1 = emitBuildConstVecf64(1.0, 1.0, mask);
+ }
+
+ if (mask)
+ value.id = m_module.opNClamp(typeId, value.id, vec0.id, vec1.id);
+ }
+
+ return value;
+ }
+
+
+ DxbcRegisterPointer DxbcCompiler::emitArrayAccess(
+ DxbcRegisterPointer pointer,
+ spv::StorageClass sclass,
+ uint32_t index) {
+ uint32_t ptrTypeId = m_module.defPointerType(
+ getVectorTypeId(pointer.type), sclass);
+
+ DxbcRegisterPointer result;
+ result.type = pointer.type;
+ result.id = m_module.opAccessChain(
+ ptrTypeId, pointer.id, 1, &index);
+ return result;
+ }
+
+
+ uint32_t DxbcCompiler::emitLoadSampledImage(
+ const DxbcShaderResource& textureResource,
+ const DxbcSampler& samplerResource,
+ bool isDepthCompare) {
+ const uint32_t sampledImageType = isDepthCompare
+ ? m_module.defSampledImageType(textureResource.depthTypeId)
+ : m_module.defSampledImageType(textureResource.colorTypeId);
+
+ return m_module.opSampledImage(sampledImageType,
+ m_module.opLoad(textureResource.imageTypeId, textureResource.varId),
+ m_module.opLoad(samplerResource.typeId, samplerResource.varId));
+ }
+
+
+ DxbcRegisterPointer DxbcCompiler::emitGetTempPtr(
+ const DxbcRegister& operand) {
+ // r# regs are indexed as follows:
+ // (0) register index (immediate)
+ uint32_t regIdx = operand.idx[0].offset;
+
+ if (regIdx >= m_rRegs.size())
+ m_rRegs.resize(regIdx + 1, 0u);
+
+ if (!m_rRegs.at(regIdx)) {
+ DxbcRegisterInfo info;
+ info.type.ctype = DxbcScalarType::Float32;
+ info.type.ccount = 4;
+ info.type.alength = 0;
+ info.sclass = spv::StorageClassPrivate;
+
+ uint32_t varId = emitNewVariable(info);
+ m_rRegs.at(regIdx) = varId;
+
+ m_module.setDebugName(varId,
+ str::format("r", regIdx).c_str());
+ }
+
+ DxbcRegisterPointer result;
+ result.type.ctype = DxbcScalarType::Float32;
+ result.type.ccount = 4;
+ result.id = m_rRegs.at(regIdx);
+ return result;
+ }
+
+
+ DxbcRegisterPointer DxbcCompiler::emitGetIndexableTempPtr(
+ const DxbcRegister& operand) {
+ return getIndexableTempPtr(operand, emitIndexLoad(operand.idx[1]));
+ }
+
+
+ DxbcRegisterPointer DxbcCompiler::emitGetInputPtr(
+ const DxbcRegister& operand) {
+ // In the vertex and pixel stages,
+ // v# regs are indexed as follows:
+ // (0) register index (relative)
+ //
+ // In the tessellation and geometry
+ // stages, the index has two dimensions:
+ // (0) vertex index (relative)
+ // (1) register index (relative)
+ DxbcRegisterPointer result;
+ result.type.ctype = DxbcScalarType::Float32;
+ result.type.ccount = 4;
+
+ std::array<uint32_t, 2> indices = {{ 0, 0 }};
+
+ for (uint32_t i = 0; i < operand.idxDim; i++)
+ indices.at(i) = emitIndexLoad(operand.idx[i]).id;
+
+ // Pick the input array depending on
+ // the program type and operand type
+ struct InputArray {
+ uint32_t id;
+ spv::StorageClass sclass;
+ };
+
+ const InputArray array = [&] () -> InputArray {
+ switch (operand.type) {
+ case DxbcOperandType::InputControlPoint:
+ return m_programInfo.type() == DxbcProgramType::HullShader
+ ? InputArray { m_vArray, spv::StorageClassPrivate }
+ : InputArray { m_ds.inputPerVertex, spv::StorageClassInput };
+ case DxbcOperandType::InputPatchConstant:
+ return m_programInfo.type() == DxbcProgramType::HullShader
+ ? InputArray { m_hs.outputPerPatch, spv::StorageClassPrivate }
+ : InputArray { m_ds.inputPerPatch, spv::StorageClassInput };
+ case DxbcOperandType::OutputControlPoint:
+ return InputArray { m_hs.outputPerVertex, spv::StorageClassOutput };
+ default:
+ return { m_vArray, spv::StorageClassPrivate };
+ }
+ }();
+
+ DxbcRegisterInfo info;
+ info.type.ctype = result.type.ctype;
+ info.type.ccount = result.type.ccount;
+ info.type.alength = 0;
+ info.sclass = array.sclass;
+
+ result.id = m_module.opAccessChain(
+ getPointerTypeId(info), array.id,
+ operand.idxDim, indices.data());
+
+ return result;
+ }
+
+
+ DxbcRegisterPointer DxbcCompiler::emitGetOutputPtr(
+ const DxbcRegister& operand) {
+ if (m_programInfo.type() == DxbcProgramType::HullShader) {
+ // Hull shaders are special in that they have two sets of
+ // output registers, one for per-patch values and one for
+ // per-vertex values.
+ DxbcRegisterPointer result;
+ result.type.ctype = DxbcScalarType::Float32;
+ result.type.ccount = 4;
+
+ uint32_t registerId = emitIndexLoad(operand.idx[0]).id;
+
+ if (m_hs.currPhaseType == DxbcCompilerHsPhase::ControlPoint) {
+ std::array<uint32_t, 2> indices = {{
+ m_module.opLoad(m_module.defIntType(32, 0), m_hs.builtinInvocationId),
+ registerId,
+ }};
+
+ uint32_t ptrTypeId = m_module.defPointerType(
+ getVectorTypeId(result.type),
+ spv::StorageClassOutput);
+
+ result.id = m_module.opAccessChain(
+ ptrTypeId, m_hs.outputPerVertex,
+ indices.size(), indices.data());
+ } else {
+ uint32_t ptrTypeId = m_module.defPointerType(
+ getVectorTypeId(result.type),
+ spv::StorageClassPrivate);
+
+ result.id = m_module.opAccessChain(
+ ptrTypeId, m_hs.outputPerPatch,
+ 1, &registerId);
+ }
+
+ return result;
+ } else {
+ // Regular shaders have their output
+ // registers set up at declaration time
+ return m_oRegs.at(operand.idx[0].offset);
+ }
+ }
+
+
+ DxbcRegisterPointer DxbcCompiler::emitGetImmConstBufPtr(
+ const DxbcRegister& operand) {
+ const DxbcRegisterValue constId
+ = emitIndexLoad(operand.idx[0]);
+
+ if (m_immConstBuf != 0) {
+ DxbcRegisterInfo ptrInfo;
+ ptrInfo.type.ctype = DxbcScalarType::Uint32;
+ ptrInfo.type.ccount = 4;
+ ptrInfo.type.alength = 0;
+ ptrInfo.sclass = spv::StorageClassPrivate;
+
+ DxbcRegisterPointer result;
+ result.type.ctype = ptrInfo.type.ctype;
+ result.type.ccount = ptrInfo.type.ccount;
+ result.id = m_module.opAccessChain(
+ getPointerTypeId(ptrInfo),
+ m_immConstBuf, 1, &constId.id);
+ return result;
+ } else if (m_constantBuffers.at(Icb_BindingSlotId).varId != 0) {
+ const std::array<uint32_t, 2> indices =
+ {{ m_module.consti32(0), constId.id }};
+
+ DxbcRegisterInfo ptrInfo;
+ ptrInfo.type.ctype = DxbcScalarType::Float32;
+ ptrInfo.type.ccount = 4;
+ ptrInfo.type.alength = 0;
+ ptrInfo.sclass = spv::StorageClassUniform;
+
+ DxbcRegisterPointer result;
+ result.type.ctype = ptrInfo.type.ctype;
+ result.type.ccount = ptrInfo.type.ccount;
+ result.id = m_module.opAccessChain(
+ getPointerTypeId(ptrInfo),
+ m_constantBuffers.at(Icb_BindingSlotId).varId,
+ indices.size(), indices.data());
+ return result;
+ } else {
+ throw DxvkError("DxbcCompiler: Immediate constant buffer not defined");
+ }
+ }
+
+
+ DxbcRegisterPointer DxbcCompiler::emitGetOperandPtr(
+ const DxbcRegister& operand) {
+ switch (operand.type) {
+ case DxbcOperandType::Temp:
+ return emitGetTempPtr(operand);
+
+ case DxbcOperandType::IndexableTemp:
+ return emitGetIndexableTempPtr(operand);
+
+ case DxbcOperandType::Input:
+ case DxbcOperandType::InputControlPoint:
+ case DxbcOperandType::InputPatchConstant:
+ case DxbcOperandType::OutputControlPoint:
+ return emitGetInputPtr(operand);
+
+ case DxbcOperandType::Output:
+ return emitGetOutputPtr(operand);
+
+ case DxbcOperandType::ImmediateConstantBuffer:
+ return emitGetImmConstBufPtr(operand);
+
+ case DxbcOperandType::InputThreadId:
+ return DxbcRegisterPointer {
+ { DxbcScalarType::Uint32, 3 },
+ m_cs.builtinGlobalInvocationId };
+
+ case DxbcOperandType::InputThreadGroupId:
+ return DxbcRegisterPointer {
+ { DxbcScalarType::Uint32, 3 },
+ m_cs.builtinWorkgroupId };
+
+ case DxbcOperandType::InputThreadIdInGroup:
+ return DxbcRegisterPointer {
+ { DxbcScalarType::Uint32, 3 },
+ m_cs.builtinLocalInvocationId };
+
+ case DxbcOperandType::InputThreadIndexInGroup:
+ return DxbcRegisterPointer {
+ { DxbcScalarType::Uint32, 1 },
+ m_cs.builtinLocalInvocationIndex };
+
+ case DxbcOperandType::InputCoverageMask: {
+ const std::array<uint32_t, 1> indices
+ = {{ m_module.constu32(0) }};
+
+ DxbcRegisterPointer result;
+ result.type.ctype = DxbcScalarType::Uint32;
+ result.type.ccount = 1;
+ result.id = m_module.opAccessChain(
+ m_module.defPointerType(
+ getVectorTypeId(result.type),
+ spv::StorageClassInput),
+ m_ps.builtinSampleMaskIn,
+ indices.size(), indices.data());
+ return result;
+ }
+
+ case DxbcOperandType::OutputCoverageMask: {
+ const std::array<uint32_t, 1> indices
+ = {{ m_module.constu32(0) }};
+
+ DxbcRegisterPointer result;
+ result.type.ctype = DxbcScalarType::Uint32;
+ result.type.ccount = 1;
+ result.id = m_module.opAccessChain(
+ m_module.defPointerType(
+ getVectorTypeId(result.type),
+ spv::StorageClassOutput),
+ m_ps.builtinSampleMaskOut,
+ indices.size(), indices.data());
+ return result;
+ }
+
+ case DxbcOperandType::OutputDepth:
+ case DxbcOperandType::OutputDepthGe:
+ case DxbcOperandType::OutputDepthLe:
+ return DxbcRegisterPointer {
+ { DxbcScalarType::Float32, 1 },
+ m_ps.builtinDepth };
+
+ case DxbcOperandType::OutputStencilRef:
+ return DxbcRegisterPointer {
+ { DxbcScalarType::Sint32, 1 },
+ m_ps.builtinStencilRef };
+
+ case DxbcOperandType::InputPrimitiveId:
+ return DxbcRegisterPointer {
+ { DxbcScalarType::Uint32, 1 },
+ m_primitiveIdIn };
+
+ case DxbcOperandType::InputDomainPoint:
+ return DxbcRegisterPointer {
+ { DxbcScalarType::Float32, 3 },
+ m_ds.builtinTessCoord };
+
+ case DxbcOperandType::OutputControlPointId:
+ return DxbcRegisterPointer {
+ { DxbcScalarType::Uint32, 1 },
+ m_hs.builtinInvocationId };
+
+ case DxbcOperandType::InputForkInstanceId:
+ case DxbcOperandType::InputJoinInstanceId:
+ return DxbcRegisterPointer {
+ { DxbcScalarType::Uint32, 1 },
+ getCurrentHsForkJoinPhase()->instanceIdPtr };
+
+ case DxbcOperandType::InputGsInstanceId:
+ return DxbcRegisterPointer {
+ { DxbcScalarType::Uint32, 1 },
+ m_gs.builtinInvocationId };
+
+ default:
+ throw DxvkError(str::format(
+ "DxbcCompiler: Unhandled operand type: ",
+ operand.type));
+ }
+ }
+
+
+ DxbcRegisterPointer DxbcCompiler::emitGetAtomicPointer(
+ const DxbcRegister& operand,
+ const DxbcRegister& address) {
+ // Query information about the resource itself
+ const uint32_t registerId = operand.idx[0].offset;
+ const DxbcBufferInfo resourceInfo = getBufferInfo(operand);
+
+ // For UAVs and shared memory, different methods
+ // of obtaining the final pointer are used.
+ bool isTgsm = operand.type == DxbcOperandType::ThreadGroupSharedMemory;
+ bool isSsbo = m_moduleInfo.options.minSsboAlignment <= resourceInfo.align
+ && resourceInfo.type != DxbcResourceType::Typed
+ && !isTgsm;
+
+ // Compute the actual address into the resource
+ const DxbcRegisterValue addressValue = [&] {
+ switch (resourceInfo.type) {
+ case DxbcResourceType::Raw:
+ return emitCalcBufferIndexRaw(emitRegisterLoad(
+ address, DxbcRegMask(true, false, false, false)));
+
+ case DxbcResourceType::Structured: {
+ const DxbcRegisterValue addressComponents = emitRegisterLoad(
+ address, DxbcRegMask(true, true, false, false));
+
+ return emitCalcBufferIndexStructured(
+ emitRegisterExtract(addressComponents, DxbcRegMask(true, false, false, false)),
+ emitRegisterExtract(addressComponents, DxbcRegMask(false, true, false, false)),
+ resourceInfo.stride);
+ };
+
+ case DxbcResourceType::Typed: {
+ if (isTgsm)
+ throw DxvkError("DxbcCompiler: TGSM cannot be typed");
+
+ return emitLoadTexCoord(address,
+ m_uavs.at(registerId).imageInfo);
+ }
+
+ default:
+ throw DxvkError("DxbcCompiler: Unhandled resource type");
+ }
+ }();
+
+ // Compute the actual pointer
+ DxbcRegisterPointer result;
+ result.type.ctype = resourceInfo.stype;
+ result.type.ccount = 1;
+
+ if (isTgsm) {
+ result.id = m_module.opAccessChain(resourceInfo.typeId,
+ resourceInfo.varId, 1, &addressValue.id);
+ } else if (isSsbo) {
+ uint32_t indices[2] = { m_module.constu32(0), addressValue.id };
+ result.id = m_module.opAccessChain(resourceInfo.typeId,
+ resourceInfo.varId, 2, indices);
+ } else {
+ result.id = m_module.opImageTexelPointer(
+ m_module.defPointerType(getVectorTypeId(result.type), spv::StorageClassImage),
+ resourceInfo.varId, addressValue.id, m_module.constu32(0));
+ }
+
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitRawBufferLoad(
+ const DxbcRegister& operand,
+ DxbcRegisterValue elementIndex,
+ DxbcRegMask writeMask) {
+ const DxbcBufferInfo bufferInfo = getBufferInfo(operand);
+
+ // Shared memory is the only type of buffer that
+ // is not accessed through a texel buffer view
+ bool isTgsm = operand.type == DxbcOperandType::ThreadGroupSharedMemory;
+ bool isSsbo = m_moduleInfo.options.minSsboAlignment <= bufferInfo.align
+ && !isTgsm;
+
+ // Common types and IDs used while loading the data
+ uint32_t bufferId = isTgsm || isSsbo ? 0 : m_module.opLoad(bufferInfo.typeId, bufferInfo.varId);
+
+ uint32_t vectorTypeId = getVectorTypeId({ DxbcScalarType::Uint32, 4 });
+ uint32_t scalarTypeId = getVectorTypeId({ DxbcScalarType::Uint32, 1 });
+
+ // Since all data is represented as a sequence of 32-bit
+ // integers, we have to load each component individually.
+ std::array<uint32_t, 4> ccomps = { 0, 0, 0, 0 };
+ std::array<uint32_t, 4> scomps = { 0, 0, 0, 0 };
+ uint32_t scount = 0;
+
+ for (uint32_t i = 0; i < 4; i++) {
+ uint32_t sindex = operand.swizzle[i];
+
+ if (!writeMask[i])
+ continue;
+
+ if (ccomps[sindex] == 0) {
+ uint32_t elementIndexAdjusted = m_module.opIAdd(
+ getVectorTypeId(elementIndex.type), elementIndex.id,
+ m_module.consti32(sindex));
+
+ // Load requested component from the buffer
+ uint32_t zero = 0;
+
+ if (isTgsm) {
+ ccomps[sindex] = m_module.opLoad(scalarTypeId,
+ m_module.opAccessChain(bufferInfo.typeId,
+ bufferInfo.varId, 1, &elementIndexAdjusted));
+ } else if (isSsbo) {
+ uint32_t indices[2] = { m_module.constu32(0), elementIndexAdjusted };
+ ccomps[sindex] = m_module.opLoad(scalarTypeId,
+ m_module.opAccessChain(bufferInfo.typeId,
+ bufferInfo.varId, 2, indices));
+ } else if (operand.type == DxbcOperandType::Resource) {
+ ccomps[sindex] = m_module.opCompositeExtract(scalarTypeId,
+ m_module.opImageFetch(vectorTypeId,
+ bufferId, elementIndexAdjusted,
+ SpirvImageOperands()), 1, &zero);
+ } else if (operand.type == DxbcOperandType::UnorderedAccessView) {
+ ccomps[sindex] = m_module.opCompositeExtract(scalarTypeId,
+ m_module.opImageRead(vectorTypeId,
+ bufferId, elementIndexAdjusted,
+ SpirvImageOperands()), 1, &zero);
+ } else {
+ throw DxvkError("DxbcCompiler: Invalid operand type for strucured/raw load");
+ }
+ }
+ }
+
+ for (uint32_t i = 0; i < 4; i++) {
+ uint32_t sindex = operand.swizzle[i];
+
+ if (writeMask[i])
+ scomps[scount++] = ccomps[sindex];
+ }
+
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Uint32;
+ result.type.ccount = scount;
+ result.id = scomps[0];
+
+ if (scount > 1) {
+ result.id = m_module.opCompositeConstruct(
+ getVectorTypeId(result.type),
+ scount, scomps.data());
+ }
+
+ return result;
+ }
+
+
+ void DxbcCompiler::emitRawBufferStore(
+ const DxbcRegister& operand,
+ DxbcRegisterValue elementIndex,
+ DxbcRegisterValue value) {
+ const DxbcBufferInfo bufferInfo = getBufferInfo(operand);
+
+ // Cast source value to the expected data type
+ value = emitRegisterBitcast(value, DxbcScalarType::Uint32);
+
+ // Thread Group Shared Memory is not accessed through a texel buffer view
+ bool isTgsm = operand.type == DxbcOperandType::ThreadGroupSharedMemory;
+ bool isSsbo = m_moduleInfo.options.minSsboAlignment <= bufferInfo.align
+ && !isTgsm;
+
+ // Perform UAV writes only if the UAV is bound and if there
+ // is nothing else preventing us from writing to it.
+ DxbcConditional cond;
+
+ if (!isTgsm) {
+ uint32_t writeTest = emitUavWriteTest(bufferInfo);
+
+ cond.labelIf = m_module.allocateId();
+ cond.labelEnd = m_module.allocateId();
+
+ m_module.opSelectionMerge(cond.labelEnd, spv::SelectionControlMaskNone);
+ m_module.opBranchConditional(writeTest, cond.labelIf, cond.labelEnd);
+
+ m_module.opLabel(cond.labelIf);
+ }
+
+ // Perform the actual write operation
+ uint32_t bufferId = isTgsm || isSsbo ? 0 : m_module.opLoad(bufferInfo.typeId, bufferInfo.varId);
+
+ uint32_t scalarTypeId = getVectorTypeId({ DxbcScalarType::Uint32, 1 });
+ uint32_t vectorTypeId = getVectorTypeId({ DxbcScalarType::Uint32, 4 });
+
+ uint32_t srcComponentIndex = 0;
+
+ for (uint32_t i = 0; i < 4; i++) {
+ if (operand.mask[i]) {
+ uint32_t srcComponentId = value.type.ccount > 1
+ ? m_module.opCompositeExtract(scalarTypeId,
+ value.id, 1, &srcComponentIndex)
+ : value.id;
+
+ // Add the component offset to the element index
+ uint32_t elementIndexAdjusted = i != 0
+ ? m_module.opIAdd(getVectorTypeId(elementIndex.type),
+ elementIndex.id, m_module.consti32(i))
+ : elementIndex.id;
+
+ if (isTgsm) {
+ m_module.opStore(
+ m_module.opAccessChain(bufferInfo.typeId,
+ bufferInfo.varId, 1, &elementIndexAdjusted),
+ srcComponentId);
+ } else if (isSsbo) {
+ uint32_t indices[2] = { m_module.constu32(0), elementIndexAdjusted };
+ m_module.opStore(
+ m_module.opAccessChain(bufferInfo.typeId,
+ bufferInfo.varId, 2, indices),
+ srcComponentId);
+ } else if (operand.type == DxbcOperandType::UnorderedAccessView) {
+ const std::array<uint32_t, 4> srcVectorIds = {
+ srcComponentId, srcComponentId,
+ srcComponentId, srcComponentId,
+ };
+
+ m_module.opImageWrite(
+ bufferId, elementIndexAdjusted,
+ m_module.opCompositeConstruct(vectorTypeId,
+ 4, srcVectorIds.data()),
+ SpirvImageOperands());
+ } else {
+ throw DxvkError("DxbcCompiler: Invalid operand type for strucured/raw store");
+ }
+
+ // Write next component
+ srcComponentIndex += 1;
+ }
+ }
+
+ // Make sure that shared memory stores are made visible in
+ // case the game does not synchronize invocations properly
+ if (isTgsm && m_moduleInfo.options.forceTgsmBarriers) {
+ m_module.opMemoryBarrier(
+ m_module.constu32(spv::ScopeWorkgroup),
+ m_module.constu32(spv::MemorySemanticsWorkgroupMemoryMask
+ | spv::MemorySemanticsAcquireReleaseMask));
+ }
+
+ // End conditional block
+ if (!isTgsm) {
+ m_module.opBranch(cond.labelEnd);
+ m_module.opLabel (cond.labelEnd);
+ }
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitQueryBufferSize(
+ const DxbcRegister& resource) {
+ const DxbcBufferInfo bufferInfo = getBufferInfo(resource);
+
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Uint32;
+ result.type.ccount = 1;
+ result.id = m_module.opArrayLength(
+ getVectorTypeId(result.type),
+ bufferInfo.varId, 0);
+
+ // Report a size of 0 if resource is not bound
+ result.id = m_module.opSelect(getVectorTypeId(result.type),
+ bufferInfo.specId, result.id, m_module.constu32(0));
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitQueryTexelBufferSize(
+ const DxbcRegister& resource) {
+ // Load the texel buffer object. This cannot be used with
+ // constant buffers or any other type of resource.
+ const DxbcBufferInfo bufferInfo = getBufferInfo(resource);
+
+ const uint32_t bufferId = m_module.opLoad(
+ bufferInfo.typeId, bufferInfo.varId);
+
+ // We'll store this as a scalar unsigned integer
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Uint32;
+ result.type.ccount = 1;
+ result.id = m_module.opImageQuerySize(
+ getVectorTypeId(result.type), bufferId);
+
+ // Report a size of 0 if resource is not bound
+ result.id = m_module.opSelect(getVectorTypeId(result.type),
+ bufferInfo.specId, result.id, m_module.constu32(0));
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitQueryTextureLods(
+ const DxbcRegister& resource) {
+ const DxbcBufferInfo info = getBufferInfo(resource);
+
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Uint32;
+ result.type.ccount = 1;
+
+ if (info.image.sampled == 1) {
+ result.id = m_module.opImageQueryLevels(
+ getVectorTypeId(result.type),
+ m_module.opLoad(info.typeId, info.varId));
+ } else {
+ // Report one LOD in case of UAVs
+ result.id = m_module.constu32(1);
+ }
+
+ // Report zero LODs for unbound images
+ result.id = m_module.opSelect(getVectorTypeId(result.type),
+ info.specId, result.id, m_module.constu32(0));
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitQueryTextureSamples(
+ const DxbcRegister& resource) {
+ if (resource.type == DxbcOperandType::Rasterizer) {
+ // SPIR-V has no gl_NumSamples equivalent, so we have
+ // to work around it using a specialization constant
+ if (!m_ps.specRsSampleCount) {
+ m_ps.specRsSampleCount = emitNewSpecConstant(
+ DxvkSpecConstantId::RasterizerSampleCount,
+ DxbcScalarType::Uint32, 1,
+ "RasterizerSampleCount");
+ }
+
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Uint32;
+ result.type.ccount = 1;
+ result.id = m_ps.specRsSampleCount;
+ return result;
+ } else {
+ DxbcBufferInfo info = getBufferInfo(resource);
+
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Uint32;
+ result.type.ccount = 1;
+
+ if (info.image.ms) {
+ result.id = m_module.opImageQuerySamples(
+ getVectorTypeId(result.type),
+ m_module.opLoad(info.typeId, info.varId));
+ } else {
+ // OpImageQuerySamples requires MSAA images
+ result.id = m_module.constu32(1);
+ }
+
+ // Report a sample count of 0 for unbound images
+ result.id = m_module.opSelect(getVectorTypeId(result.type),
+ info.specId, result.id, m_module.constu32(0));
+ return result;
+ }
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitQueryTextureSize(
+ const DxbcRegister& resource,
+ DxbcRegisterValue lod) {
+ const DxbcBufferInfo info = getBufferInfo(resource);
+
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Uint32;
+ result.type.ccount = getTexSizeDim(info.image);
+
+ if (info.image.ms == 0 && info.image.sampled == 1) {
+ result.id = m_module.opImageQuerySizeLod(
+ getVectorTypeId(result.type),
+ m_module.opLoad(info.typeId, info.varId),
+ lod.id);
+ } else {
+ result.id = m_module.opImageQuerySize(
+ getVectorTypeId(result.type),
+ m_module.opLoad(info.typeId, info.varId));
+ }
+
+ // Report a size of zero for unbound textures
+ uint32_t zero = m_module.constu32(0);
+ uint32_t cond = info.specId;
+
+ if (result.type.ccount > 1) {
+ std::array<uint32_t, 4> zeroes = {{ zero, zero, zero, zero }};
+ std::array<uint32_t, 4> conds = {{ cond, cond, cond, cond }};
+
+ zero = m_module.opCompositeConstruct(
+ getVectorTypeId(result.type),
+ result.type.ccount, zeroes.data());
+
+ cond = m_module.opCompositeConstruct(
+ m_module.defVectorType(m_module.defBoolType(), result.type.ccount),
+ result.type.ccount, conds.data());
+ }
+
+ result.id = m_module.opSelect(
+ getVectorTypeId(result.type),
+ cond, result.id, zero);
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitCalcBufferIndexStructured(
+ DxbcRegisterValue structId,
+ DxbcRegisterValue structOffset,
+ uint32_t structStride) {
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Sint32;
+ result.type.ccount = 1;
+
+ const uint32_t typeId = getVectorTypeId(result.type);
+
+ uint32_t offset = m_moduleInfo.options.useSdivForBufferIndex
+ ? m_module.opSDiv (typeId, structOffset.id, m_module.consti32(4))
+ : m_module.opShiftRightLogical(typeId, structOffset.id, m_module.consti32(2));
+
+ result.id = m_module.opIAdd(typeId,
+ m_module.opIMul(typeId, structId.id, m_module.consti32(structStride / 4)),
+ offset);
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitCalcBufferIndexRaw(
+ DxbcRegisterValue byteOffset) {
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Sint32;
+ result.type.ccount = 1;
+
+ uint32_t typeId = getVectorTypeId(result.type);
+
+ result.id = m_moduleInfo.options.useSdivForBufferIndex
+ ? m_module.opSDiv (typeId, byteOffset.id, m_module.consti32(4))
+ : m_module.opShiftRightLogical(typeId, byteOffset.id, m_module.consti32(2));
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitCalcTexCoord(
+ DxbcRegisterValue coordVector,
+ const DxbcImageInfo& imageInfo) {
+ const uint32_t dim = getTexCoordDim(imageInfo);
+
+ if (dim != coordVector.type.ccount) {
+ coordVector = emitRegisterExtract(
+ coordVector, DxbcRegMask::firstN(dim));
+ }
+
+ return coordVector;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitLoadTexCoord(
+ const DxbcRegister& coordReg,
+ const DxbcImageInfo& imageInfo) {
+ return emitCalcTexCoord(emitRegisterLoad(coordReg,
+ DxbcRegMask(true, true, true, true)), imageInfo);
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitIndexLoad(
+ DxbcRegIndex index) {
+ if (index.relReg != nullptr) {
+ DxbcRegisterValue result = emitRegisterLoad(
+ *index.relReg, DxbcRegMask(true, false, false, false));
+
+ if (index.offset != 0) {
+ result.id = m_module.opIAdd(
+ getVectorTypeId(result.type), result.id,
+ m_module.consti32(index.offset));
+ }
+
+ return result;
+ } else {
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Sint32;
+ result.type.ccount = 1;
+ result.id = m_module.consti32(index.offset);
+ return result;
+ }
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitValueLoad(
+ DxbcRegisterPointer ptr) {
+ DxbcRegisterValue result;
+ result.type = ptr.type;
+ result.id = m_module.opLoad(
+ getVectorTypeId(result.type),
+ ptr.id);
+ return result;
+ }
+
+
+ void DxbcCompiler::emitValueStore(
+ DxbcRegisterPointer ptr,
+ DxbcRegisterValue value,
+ DxbcRegMask writeMask) {
+ // If the component types are not compatible,
+ // we need to bit-cast the source variable.
+ if (value.type.ctype != ptr.type.ctype)
+ value = emitRegisterBitcast(value, ptr.type.ctype);
+
+ // If the source value consists of only one component,
+ // it is stored in all components of the destination.
+ if (value.type.ccount == 1)
+ value = emitRegisterExtend(value, writeMask.popCount());
+
+ if (ptr.type.ccount == writeMask.popCount()) {
+ // Simple case: We write to the entire register
+ m_module.opStore(ptr.id, value.id);
+ } else {
+ // We only write to part of the destination
+ // register, so we need to load and modify it
+ DxbcRegisterValue tmp = emitValueLoad(ptr);
+ tmp = emitRegisterInsert(tmp, value, writeMask);
+
+ m_module.opStore(ptr.id, tmp.id);
+ }
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitRegisterLoadRaw(
+ const DxbcRegister& reg) {
+ return emitValueLoad(emitGetOperandPtr(reg));
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitConstantBufferLoad(
+ const DxbcRegister& reg,
+ DxbcRegMask writeMask) {
+ // Constant buffers take a two-dimensional index:
+ // (0) register index (immediate)
+ // (1) constant offset (relative)
+ DxbcRegisterInfo info;
+ info.type.ctype = DxbcScalarType::Float32;
+ info.type.ccount = 4;
+ info.type.alength = 0;
+ info.sclass = spv::StorageClassUniform;
+
+ uint32_t regId = reg.idx[0].offset;
+ DxbcRegisterValue constId = emitIndexLoad(reg.idx[1]);
+
+ uint32_t ptrTypeId = getPointerTypeId(info);
+
+ const std::array<uint32_t, 2> indices =
+ {{ m_module.consti32(0), constId.id }};
+
+ DxbcRegisterPointer ptr;
+ ptr.type.ctype = info.type.ctype;
+ ptr.type.ccount = info.type.ccount;
+ ptr.id = m_module.opAccessChain(ptrTypeId,
+ m_constantBuffers.at(regId).varId,
+ indices.size(), indices.data());
+
+ // Load individual components from buffer
+ std::array<uint32_t, 4> ccomps = { 0, 0, 0, 0 };
+ std::array<uint32_t, 4> scomps = { 0, 0, 0, 0 };
+ uint32_t scount = 0;
+
+ for (uint32_t i = 0; i < 4; i++) {
+ uint32_t sindex = reg.swizzle[i];
+
+ if (!writeMask[i] || ccomps[sindex])
+ continue;
+
+ uint32_t componentId = m_module.constu32(sindex);
+ uint32_t componentPtr = m_module.opAccessChain(
+ m_module.defPointerType(
+ getScalarTypeId(DxbcScalarType::Float32),
+ spv::StorageClassUniform),
+ ptr.id, 1, &componentId);
+
+ ccomps[sindex] = m_module.opLoad(
+ getScalarTypeId(DxbcScalarType::Float32),
+ componentPtr);
+ }
+
+ for (uint32_t i = 0; i < 4; i++) {
+ uint32_t sindex = reg.swizzle[i];
+
+ if (writeMask[i])
+ scomps[scount++] = ccomps[sindex];
+ }
+
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Float32;
+ result.type.ccount = scount;
+ result.id = scomps[0];
+
+ if (scount > 1) {
+ result.id = m_module.opCompositeConstruct(
+ getVectorTypeId(result.type),
+ scount, scomps.data());
+ }
+
+ // Apply any post-processing that might be necessary
+ result = emitRegisterBitcast(result, reg.dataType);
+ result = emitSrcOperandModifiers(result, reg.modifiers);
+ return result;
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitRegisterLoad(
+ const DxbcRegister& reg,
+ DxbcRegMask writeMask) {
+ if (reg.type == DxbcOperandType::Imm32
+ || reg.type == DxbcOperandType::Imm64) {
+ DxbcRegisterValue result;
+
+ if (reg.componentCount == DxbcComponentCount::Component1) {
+ // Create one single u32 constant
+ result.type.ctype = DxbcScalarType::Uint32;
+ result.type.ccount = 1;
+ result.id = m_module.constu32(reg.imm.u32_1);
+
+ result = emitRegisterExtend(result, writeMask.popCount());
+ } else if (reg.componentCount == DxbcComponentCount::Component4) {
+ // Create a u32 vector with as many components as needed
+ std::array<uint32_t, 4> indices = { };
+ uint32_t indexId = 0;
+
+ for (uint32_t i = 0; i < indices.size(); i++) {
+ if (writeMask[i]) {
+ indices.at(indexId++) =
+ m_module.constu32(reg.imm.u32_4[i]);
+ }
+ }
+
+ result.type.ctype = DxbcScalarType::Uint32;
+ result.type.ccount = writeMask.popCount();
+ result.id = indices.at(0);
+
+ if (indexId > 1) {
+ result.id = m_module.constComposite(
+ getVectorTypeId(result.type),
+ result.type.ccount, indices.data());
+ }
+
+ } else {
+ // Something went horribly wrong in the decoder or the shader is broken
+ throw DxvkError("DxbcCompiler: Invalid component count for immediate operand");
+ }
+
+ // Cast constants to the requested type
+ return emitRegisterBitcast(result, reg.dataType);
+ } else if (reg.type == DxbcOperandType::ConstantBuffer) {
+ return emitConstantBufferLoad(reg, writeMask);
+ } else {
+ // Load operand from the operand pointer
+ DxbcRegisterValue result = emitRegisterLoadRaw(reg);
+
+ // Apply operand swizzle to the operand value
+ result = emitRegisterSwizzle(result, reg.swizzle, writeMask);
+
+ // Cast it to the requested type. We need to do
+ // this after the swizzling for 64-bit types.
+ result = emitRegisterBitcast(result, reg.dataType);
+
+ // Apply operand modifiers
+ result = emitSrcOperandModifiers(result, reg.modifiers);
+ return result;
+ }
+ }
+
+
+ void DxbcCompiler::emitRegisterStore(
+ const DxbcRegister& reg,
+ DxbcRegisterValue value) {
+ if (reg.type == DxbcOperandType::IndexableTemp) {
+ DxbcRegisterValue vectorId = emitIndexLoad(reg.idx[1]);
+ uint32_t boundsCheck = m_module.opULessThan(
+ m_module.defBoolType(), vectorId.id,
+ m_module.constu32(m_xRegs.at(reg.idx[0].offset).alength));
+
+ DxbcConditional cond;
+ cond.labelIf = m_module.allocateId();
+ cond.labelEnd = m_module.allocateId();
+
+ m_module.opSelectionMerge(cond.labelEnd, spv::SelectionControlMaskNone);
+ m_module.opBranchConditional(boundsCheck, cond.labelIf, cond.labelEnd);
+
+ m_module.opLabel(cond.labelIf);
+ emitValueStore(getIndexableTempPtr(reg, vectorId), value, reg.mask);
+
+ m_module.opBranch(cond.labelEnd);
+ m_module.opLabel (cond.labelEnd);
+ } else {
+ emitValueStore(emitGetOperandPtr(reg), value, reg.mask);
+ }
+ }
+
+
+ uint32_t DxbcCompiler::emitNewSpecConstant(
+ DxvkSpecConstantId specId,
+ DxbcScalarType type,
+ uint32_t value,
+ const char* name) {
+ uint32_t id = m_module.specConst32(
+ getScalarTypeId(type), value);
+
+ m_module.decorateSpecId(id, uint32_t(specId));
+ m_module.setDebugName(id, name);
+ return id;
+ }
+
+
+ void DxbcCompiler::emitInputSetup() {
+ m_module.setLateConst(m_vArrayLengthId, &m_vArrayLength);
+
+ // Copy all defined v# registers into the input array
+ const uint32_t vecTypeId = m_module.defVectorType(m_module.defFloatType(32), 4);
+ const uint32_t ptrTypeId = m_module.defPointerType(vecTypeId, spv::StorageClassPrivate);
+
+ for (uint32_t i = 0; i < m_vRegs.size(); i++) {
+ if (m_vRegs.at(i).id != 0) {
+ const uint32_t registerId = m_module.consti32(i);
+
+ DxbcRegisterPointer srcPtr = m_vRegs.at(i);
+ DxbcRegisterValue srcValue = emitRegisterBitcast(
+ emitValueLoad(srcPtr), DxbcScalarType::Float32);
+
+ DxbcRegisterPointer dstPtr;
+ dstPtr.type = { DxbcScalarType::Float32, 4 };
+ dstPtr.id = m_module.opAccessChain(
+ ptrTypeId, m_vArray, 1, &registerId);
+
+ emitValueStore(dstPtr, srcValue, DxbcRegMask::firstN(srcValue.type.ccount));
+ }
+ }
+
+ // Copy all system value registers into the array,
+ // preserving any previously written contents.
+ for (const DxbcSvMapping& map : m_vMappings) {
+ const uint32_t registerId = m_module.consti32(map.regId);
+
+ const DxbcRegisterValue value = [&] {
+ switch (m_programInfo.type()) {
+ case DxbcProgramType::VertexShader: return emitVsSystemValueLoad(map.sv, map.regMask);
+ case DxbcProgramType::PixelShader: return emitPsSystemValueLoad(map.sv, map.regMask);
+ default: throw DxvkError(str::format("DxbcCompiler: Unexpected stage: ", m_programInfo.type()));
+ }
+ }();
+
+ DxbcRegisterPointer inputReg;
+ inputReg.type.ctype = DxbcScalarType::Float32;
+ inputReg.type.ccount = 4;
+ inputReg.id = m_module.opAccessChain(
+ ptrTypeId, m_vArray, 1, &registerId);
+ emitValueStore(inputReg, value, map.regMask);
+ }
+ }
+
+
+ void DxbcCompiler::emitInputSetup(uint32_t vertexCount) {
+ m_module.setLateConst(m_vArrayLengthId, &m_vArrayLength);
+
+ // Copy all defined v# registers into the input array. Note
+ // that the outer index of the array is the vertex index.
+ const uint32_t vecTypeId = m_module.defVectorType(m_module.defFloatType(32), 4);
+ const uint32_t dstPtrTypeId = m_module.defPointerType(vecTypeId, spv::StorageClassPrivate);
+
+ for (uint32_t i = 0; i < m_vRegs.size(); i++) {
+ if (m_vRegs.at(i).id != 0) {
+ const uint32_t registerId = m_module.consti32(i);
+
+ for (uint32_t v = 0; v < vertexCount; v++) {
+ std::array<uint32_t, 2> indices
+ = {{ m_module.consti32(v), registerId }};
+
+ DxbcRegisterPointer srcPtr;
+ srcPtr.type = m_vRegs.at(i).type;
+ srcPtr.id = m_module.opAccessChain(
+ m_module.defPointerType(getVectorTypeId(srcPtr.type), spv::StorageClassInput),
+ m_vRegs.at(i).id, 1, indices.data());
+
+ DxbcRegisterValue srcValue = emitRegisterBitcast(
+ emitValueLoad(srcPtr), DxbcScalarType::Float32);
+
+ DxbcRegisterPointer dstPtr;
+ dstPtr.type = { DxbcScalarType::Float32, 4 };
+ dstPtr.id = m_module.opAccessChain(
+ dstPtrTypeId, m_vArray, 2, indices.data());
+
+ emitValueStore(dstPtr, srcValue, DxbcRegMask::firstN(srcValue.type.ccount));
+ }
+ }
+ }
+
+ // Copy all system value registers into the array,
+ // preserving any previously written contents.
+ for (const DxbcSvMapping& map : m_vMappings) {
+ const uint32_t registerId = m_module.consti32(map.regId);
+
+ for (uint32_t v = 0; v < vertexCount; v++) {
+ const DxbcRegisterValue value = [&] {
+ switch (m_programInfo.type()) {
+ case DxbcProgramType::GeometryShader: return emitGsSystemValueLoad(map.sv, map.regMask, v);
+ default: throw DxvkError(str::format("DxbcCompiler: Unexpected stage: ", m_programInfo.type()));
+ }
+ }();
+
+ std::array<uint32_t, 2> indices = {
+ m_module.consti32(v), registerId,
+ };
+
+ DxbcRegisterPointer inputReg;
+ inputReg.type.ctype = DxbcScalarType::Float32;
+ inputReg.type.ccount = 4;
+ inputReg.id = m_module.opAccessChain(dstPtrTypeId,
+ m_vArray, indices.size(), indices.data());
+ emitValueStore(inputReg, value, map.regMask);
+ }
+ }
+ }
+
+
+ void DxbcCompiler::emitOutputSetup() {
+ for (const DxbcSvMapping& svMapping : m_oMappings) {
+ DxbcRegisterPointer outputReg = m_oRegs.at(svMapping.regId);
+
+ if (m_programInfo.type() == DxbcProgramType::HullShader) {
+ uint32_t registerIndex = m_module.constu32(svMapping.regId);
+
+ outputReg.type = { DxbcScalarType::Float32, 4 };
+ outputReg.id = m_module.opAccessChain(
+ m_module.defPointerType(
+ getVectorTypeId(outputReg.type),
+ spv::StorageClassPrivate),
+ m_hs.outputPerPatch,
+ 1, &registerIndex);
+ }
+
+ auto sv = svMapping.sv;
+ auto mask = svMapping.regMask;
+ auto value = emitValueLoad(outputReg);
+
+ switch (m_programInfo.type()) {
+ case DxbcProgramType::VertexShader: emitVsSystemValueStore(sv, mask, value); break;
+ case DxbcProgramType::GeometryShader: emitGsSystemValueStore(sv, mask, value); break;
+ case DxbcProgramType::HullShader: emitHsSystemValueStore(sv, mask, value); break;
+ case DxbcProgramType::DomainShader: emitDsSystemValueStore(sv, mask, value); break;
+ case DxbcProgramType::PixelShader: emitPsSystemValueStore(sv, mask, value); break;
+ case DxbcProgramType::ComputeShader: break;
+ }
+ }
+ }
+
+
+ void DxbcCompiler::emitOutputMapping() {
+ // For pixel shaders, we need to swizzle the
+ // output vectors using some spec constants.
+ for (uint32_t i = 0; i < m_oRegs.size(); i++) {
+ if (m_oRegs[i].id == 0 || m_oRegs[i].type.ccount < 2)
+ continue;
+
+ DxbcRegisterValue vector = emitValueLoad(m_oRegs[i]);
+
+ uint32_t specTypeId = getScalarTypeId(DxbcScalarType::Uint32);
+ uint32_t compTypeId = getScalarTypeId(vector.type.ctype);
+
+ uint32_t specId = m_module.specConst32(specTypeId, 0x3210);
+ m_module.decorateSpecId(specId, uint32_t(DxvkSpecConstantId::ColorComponentMappings) + i);
+ m_module.setDebugName(specId, str::format("omap", i).c_str());
+
+ std::array<uint32_t, 4> scalars;
+ for (uint32_t c = 0; c < vector.type.ccount; c++) {
+ scalars[c] = m_module.opVectorExtractDynamic(compTypeId, vector.id,
+ m_module.opBitFieldUExtract(specTypeId, specId,
+ m_module.constu32(4 * c), m_module.constu32(4)));
+ }
+
+ uint32_t typeId = getVectorTypeId(vector.type);
+ vector.id = m_module.opCompositeConstruct(typeId, vector.type.ccount, scalars.data());
+
+ // Replace NaN by zero if requested
+ if (m_moduleInfo.options.enableRtOutputNanFixup && vector.type.ctype == DxbcScalarType::Float32) {
+ uint32_t boolType = m_module.defBoolType();
+
+ if (vector.type.ccount > 1)
+ boolType = m_module.defVectorType(boolType, vector.type.ccount);
+
+ uint32_t zero = emitBuildConstVecf32(0.0f, 0.0f, 0.0f, 0.0f,
+ DxbcRegMask((1u << vector.type.ccount) - 1)).id;
+ uint32_t isNan = m_module.opIsNan(boolType, vector.id);
+ vector.id = m_module.opSelect(typeId, isNan, zero, vector.id);
+ }
+
+ emitValueStore(m_oRegs[i], vector,
+ DxbcRegMask::firstN(vector.type.ccount));
+ }
+ }
+
+
+ void DxbcCompiler::emitOutputDepthClamp() {
+ // HACK: Some drivers do not clamp FragDepth to [minDepth..maxDepth]
+ // before writing to the depth attachment, but we do not have acccess
+ // to those. Clamp to [0..1] instead.
+ if (m_ps.builtinDepth) {
+ DxbcRegisterPointer ptr;
+ ptr.type = { DxbcScalarType::Float32, 1 };
+ ptr.id = m_ps.builtinDepth;
+
+ DxbcRegisterValue value = emitValueLoad(ptr);
+
+ value.id = m_module.opFClamp(
+ getVectorTypeId(ptr.type),
+ value.id,
+ m_module.constf32(0.0f),
+ m_module.constf32(1.0f));
+
+ emitValueStore(ptr, value,
+ DxbcRegMask::firstN(1));
+ }
+ }
+
+
+ void DxbcCompiler::emitInitWorkgroupMemory() {
+ bool hasTgsm = false;
+
+ for (uint32_t i = 0; i < m_gRegs.size(); i++) {
+ if (!m_gRegs[i].varId)
+ continue;
+
+ if (!m_cs.builtinLocalInvocationIndex) {
+ m_cs.builtinLocalInvocationIndex = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 1, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInLocalInvocationIndex,
+ "vThreadIndexInGroup");
+ }
+
+ uint32_t intTypeId = getScalarTypeId(DxbcScalarType::Uint32);
+ uint32_t ptrTypeId = m_module.defPointerType(
+ intTypeId, spv::StorageClassWorkgroup);
+
+ uint32_t numElements = m_gRegs[i].type == DxbcResourceType::Structured
+ ? m_gRegs[i].elementCount * m_gRegs[i].elementStride / 4
+ : m_gRegs[i].elementCount / 4;
+
+ uint32_t numThreads = m_cs.workgroupSizeX *
+ m_cs.workgroupSizeY * m_cs.workgroupSizeZ;
+
+ uint32_t numElementsPerThread = numElements / numThreads;
+ uint32_t numElementsRemaining = numElements % numThreads;
+
+ uint32_t threadId = m_module.opLoad(
+ intTypeId, m_cs.builtinLocalInvocationIndex);
+
+ uint32_t strideId = m_module.constu32(numElementsPerThread);
+ uint32_t zeroId = m_module.constu32(0);
+
+ for (uint32_t e = 0; e < numElementsPerThread; e++) {
+ uint32_t ofsId = m_module.opIAdd(intTypeId,
+ m_module.opIMul(intTypeId, strideId, threadId),
+ m_module.constu32(e));
+
+ uint32_t ptrId = m_module.opAccessChain(
+ ptrTypeId, m_gRegs[i].varId, 1, &ofsId);
+
+ m_module.opStore(ptrId, zeroId);
+ }
+
+ if (numElementsRemaining) {
+ uint32_t condition = m_module.opULessThan(
+ m_module.defBoolType(), threadId,
+ m_module.constu32(numElementsRemaining));
+
+ DxbcConditional cond;
+ cond.labelIf = m_module.allocateId();
+ cond.labelEnd = m_module.allocateId();
+
+ m_module.opSelectionMerge(cond.labelEnd, spv::SelectionControlMaskNone);
+ m_module.opBranchConditional(condition, cond.labelIf, cond.labelEnd);
+
+ m_module.opLabel(cond.labelIf);
+
+ uint32_t ofsId = m_module.opIAdd(intTypeId,
+ m_module.constu32(numThreads * numElementsPerThread),
+ threadId);
+
+ uint32_t ptrId = m_module.opAccessChain(
+ ptrTypeId, m_gRegs[i].varId, 1, &ofsId);
+
+ m_module.opStore(ptrId, zeroId);
+
+ m_module.opBranch(cond.labelEnd);
+ m_module.opLabel (cond.labelEnd);
+ }
+
+ hasTgsm = true;
+ }
+
+ if (hasTgsm) {
+ m_module.opControlBarrier(
+ m_module.constu32(spv::ScopeInvocation),
+ m_module.constu32(spv::ScopeWorkgroup),
+ m_module.constu32(spv::MemorySemanticsWorkgroupMemoryMask
+ | spv::MemorySemanticsAcquireReleaseMask));
+ }
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitVsSystemValueLoad(
+ DxbcSystemValue sv,
+ DxbcRegMask mask) {
+ switch (sv) {
+ case DxbcSystemValue::VertexId: {
+ const uint32_t typeId = getScalarTypeId(DxbcScalarType::Uint32);
+
+ if (m_vs.builtinVertexId == 0) {
+ m_vs.builtinVertexId = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 1, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInVertexIndex,
+ "vs_vertex_index");
+ }
+
+ if (m_vs.builtinBaseVertex == 0) {
+ m_vs.builtinBaseVertex = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 1, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInBaseVertex,
+ "vs_base_vertex");
+ }
+
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Uint32;
+ result.type.ccount = 1;
+ result.id = m_module.opISub(typeId,
+ m_module.opLoad(typeId, m_vs.builtinVertexId),
+ m_module.opLoad(typeId, m_vs.builtinBaseVertex));
+ return result;
+ } break;
+
+ case DxbcSystemValue::InstanceId: {
+ const uint32_t typeId = getScalarTypeId(DxbcScalarType::Uint32);
+
+ if (m_vs.builtinInstanceId == 0) {
+ m_vs.builtinInstanceId = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 1, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInInstanceIndex,
+ "vs_instance_index");
+ }
+
+ if (m_vs.builtinBaseInstance == 0) {
+ m_vs.builtinBaseInstance = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 1, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInBaseInstance,
+ "vs_base_instance");
+ }
+
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Uint32;
+ result.type.ccount = 1;
+ result.id = m_module.opISub(typeId,
+ m_module.opLoad(typeId, m_vs.builtinInstanceId),
+ m_module.opLoad(typeId, m_vs.builtinBaseInstance));
+ return result;
+ } break;
+
+ default:
+ throw DxvkError(str::format(
+ "DxbcCompiler: Unhandled VS SV input: ", sv));
+ }
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitGsSystemValueLoad(
+ DxbcSystemValue sv,
+ DxbcRegMask mask,
+ uint32_t vertexId) {
+ switch (sv) {
+ case DxbcSystemValue::Position: {
+ const std::array<uint32_t, 2> indices = {
+ m_module.consti32(vertexId),
+ m_module.consti32(PerVertex_Position),
+ };
+
+ DxbcRegisterPointer ptrIn;
+ ptrIn.type.ctype = DxbcScalarType::Float32;
+ ptrIn.type.ccount = 4;
+
+ ptrIn.id = m_module.opAccessChain(
+ m_module.defPointerType(
+ getVectorTypeId(ptrIn.type),
+ spv::StorageClassInput),
+ m_perVertexIn,
+ indices.size(),
+ indices.data());
+
+ return emitRegisterExtract(
+ emitValueLoad(ptrIn), mask);
+ } break;
+
+ default:
+ throw DxvkError(str::format(
+ "DxbcCompiler: Unhandled GS SV input: ", sv));
+ }
+ }
+
+
+ DxbcRegisterValue DxbcCompiler::emitPsSystemValueLoad(
+ DxbcSystemValue sv,
+ DxbcRegMask mask) {
+ switch (sv) {
+ case DxbcSystemValue::Position: {
+ if (m_ps.builtinFragCoord == 0) {
+ m_ps.builtinFragCoord = emitNewBuiltinVariable({
+ { DxbcScalarType::Float32, 4, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInFragCoord,
+ "ps_frag_coord");
+ }
+
+ DxbcRegisterPointer ptrIn;
+ ptrIn.type = { DxbcScalarType::Float32, 4 };
+ ptrIn.id = m_ps.builtinFragCoord;
+
+ // The X, Y and Z components of the SV_POSITION semantic
+ // are identical to Vulkan's FragCoord builtin, but we
+ // need to compute the reciprocal of the W component.
+ DxbcRegisterValue fragCoord = emitValueLoad(ptrIn);
+
+ uint32_t componentIndex = 3;
+ uint32_t t_f32 = m_module.defFloatType(32);
+ uint32_t v_wComp = m_module.opCompositeExtract(t_f32, fragCoord.id, 1, &componentIndex);
+ v_wComp = m_module.opFDiv(t_f32, m_module.constf32(1.0f), v_wComp);
+
+ fragCoord.id = m_module.opCompositeInsert(
+ getVectorTypeId(fragCoord.type),
+ v_wComp, fragCoord.id,
+ 1, &componentIndex);
+
+ return emitRegisterExtract(fragCoord, mask);
+ } break;
+
+ case DxbcSystemValue::IsFrontFace: {
+ if (m_ps.builtinIsFrontFace == 0) {
+ m_ps.builtinIsFrontFace = emitNewBuiltinVariable({
+ { DxbcScalarType::Bool, 1, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInFrontFacing,
+ "ps_is_front_face");
+ }
+
+ DxbcRegisterValue result;
+ result.type.ctype = DxbcScalarType::Uint32;
+ result.type.ccount = 1;
+ result.id = m_module.opSelect(
+ getVectorTypeId(result.type),
+ m_module.opLoad(
+ m_module.defBoolType(),
+ m_ps.builtinIsFrontFace),
+ m_module.constu32(0xFFFFFFFF),
+ m_module.constu32(0x00000000));
+ return result;
+ } break;
+
+ case DxbcSystemValue::PrimitiveId: {
+ if (m_primitiveIdIn == 0) {
+ m_module.enableCapability(spv::CapabilityGeometry);
+
+ m_primitiveIdIn = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 1, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInPrimitiveId,
+ "ps_primitive_id");
+ }
+
+ DxbcRegisterPointer ptrIn;
+ ptrIn.type = { DxbcScalarType::Uint32, 1 };
+ ptrIn.id = m_primitiveIdIn;
+
+ return emitValueLoad(ptrIn);
+ } break;
+
+ case DxbcSystemValue::SampleIndex: {
+ if (m_ps.builtinSampleId == 0) {
+ m_module.enableCapability(spv::CapabilitySampleRateShading);
+
+ m_ps.builtinSampleId = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 1, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInSampleId,
+ "ps_sample_id");
+ }
+
+ DxbcRegisterPointer ptrIn;
+ ptrIn.type.ctype = DxbcScalarType::Uint32;
+ ptrIn.type.ccount = 1;
+ ptrIn.id = m_ps.builtinSampleId;
+
+ return emitValueLoad(ptrIn);
+ } break;
+
+ case DxbcSystemValue::RenderTargetId: {
+ if (m_ps.builtinLayer == 0) {
+ m_module.enableCapability(spv::CapabilityGeometry);
+
+ m_ps.builtinLayer = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 1, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInLayer,
+ "v_layer");
+ }
+
+ DxbcRegisterPointer ptr;
+ ptr.type.ctype = DxbcScalarType::Uint32;
+ ptr.type.ccount = 1;
+ ptr.id = m_ps.builtinLayer;
+
+ return emitValueLoad(ptr);
+ } break;
+
+ case DxbcSystemValue::ViewportId: {
+ if (m_ps.builtinViewportId == 0) {
+ m_module.enableCapability(spv::CapabilityMultiViewport);
+
+ m_ps.builtinViewportId = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 1, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInViewportIndex,
+ "v_viewport");
+ }
+
+ DxbcRegisterPointer ptr;
+ ptr.type.ctype = DxbcScalarType::Uint32;
+ ptr.type.ccount = 1;
+ ptr.id = m_ps.builtinViewportId;
+
+ return emitValueLoad(ptr);
+ } break;
+
+ default:
+ throw DxvkError(str::format(
+ "DxbcCompiler: Unhandled PS SV input: ", sv));
+ }
+ }
+
+
+ void DxbcCompiler::emitVsSystemValueStore(
+ DxbcSystemValue sv,
+ DxbcRegMask mask,
+ const DxbcRegisterValue& value) {
+ switch (sv) {
+ case DxbcSystemValue::Position: {
+ const uint32_t memberId = m_module.consti32(PerVertex_Position);
+
+ DxbcRegisterPointer ptr;
+ ptr.type.ctype = DxbcScalarType::Float32;
+ ptr.type.ccount = 4;
+
+ ptr.id = m_module.opAccessChain(
+ m_module.defPointerType(
+ getVectorTypeId(ptr.type),
+ spv::StorageClassOutput),
+ m_perVertexOut, 1, &memberId);
+
+ emitValueStore(ptr, value, mask);
+ } break;
+
+ case DxbcSystemValue::RenderTargetId: {
+ if (m_programInfo.type() != DxbcProgramType::GeometryShader)
+ enableShaderViewportIndexLayer();
+
+ if (m_gs.builtinLayer == 0) {
+ m_module.enableCapability(spv::CapabilityGeometry);
+
+ m_gs.builtinLayer = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 1, 0 },
+ spv::StorageClassOutput },
+ spv::BuiltInLayer,
+ "o_layer");
+ }
+
+ DxbcRegisterPointer ptr;
+ ptr.type = { DxbcScalarType::Uint32, 1 };
+ ptr.id = m_gs.builtinLayer;
+
+ emitValueStore(
+ ptr, emitRegisterExtract(value, mask),
+ DxbcRegMask(true, false, false, false));
+ } break;
+
+ case DxbcSystemValue::ViewportId: {
+ if (m_programInfo.type() != DxbcProgramType::GeometryShader)
+ enableShaderViewportIndexLayer();
+
+ if (m_gs.builtinViewportId == 0) {
+ m_module.enableCapability(spv::CapabilityMultiViewport);
+
+ m_gs.builtinViewportId = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 1, 0 },
+ spv::StorageClassOutput },
+ spv::BuiltInViewportIndex,
+ "o_viewport");
+ }
+
+ DxbcRegisterPointer ptr;
+ ptr.type = { DxbcScalarType::Uint32, 1};
+ ptr.id = m_gs.builtinViewportId;
+
+ emitValueStore(
+ ptr, emitRegisterExtract(value, mask),
+ DxbcRegMask(true, false, false, false));
+ } break;
+
+ default:
+ Logger::warn(str::format(
+ "DxbcCompiler: Unhandled VS SV output: ", sv));
+ }
+ }
+
+
+ void DxbcCompiler::emitHsSystemValueStore(
+ DxbcSystemValue sv,
+ DxbcRegMask mask,
+ const DxbcRegisterValue& value) {
+ if (sv >= DxbcSystemValue::FinalQuadUeq0EdgeTessFactor
+ && sv <= DxbcSystemValue::FinalLineDensityTessFactor) {
+ struct TessFactor {
+ uint32_t array = 0;
+ uint32_t index = 0;
+ };
+
+ static const std::array<TessFactor, 12> s_tessFactors = {{
+ { m_hs.builtinTessLevelOuter, 0 }, // FinalQuadUeq0EdgeTessFactor
+ { m_hs.builtinTessLevelOuter, 1 }, // FinalQuadVeq0EdgeTessFactor
+ { m_hs.builtinTessLevelOuter, 2 }, // FinalQuadUeq1EdgeTessFactor
+ { m_hs.builtinTessLevelOuter, 3 }, // FinalQuadVeq1EdgeTessFactor
+ { m_hs.builtinTessLevelInner, 0 }, // FinalQuadUInsideTessFactor
+ { m_hs.builtinTessLevelInner, 1 }, // FinalQuadVInsideTessFactor
+ { m_hs.builtinTessLevelOuter, 0 }, // FinalTriUeq0EdgeTessFactor
+ { m_hs.builtinTessLevelOuter, 1 }, // FinalTriVeq0EdgeTessFactor
+ { m_hs.builtinTessLevelOuter, 2 }, // FinalTriWeq0EdgeTessFactor
+ { m_hs.builtinTessLevelInner, 0 }, // FinalTriInsideTessFactor
+ { m_hs.builtinTessLevelOuter, 0 }, // FinalLineDensityTessFactor
+ { m_hs.builtinTessLevelOuter, 1 }, // FinalLineDetailTessFactor
+ }};
+
+ const TessFactor tessFactor = s_tessFactors.at(uint32_t(sv)
+ - uint32_t(DxbcSystemValue::FinalQuadUeq0EdgeTessFactor));
+
+ const uint32_t tessFactorArrayIndex
+ = m_module.constu32(tessFactor.index);
+
+ // Apply global tess factor limit
+ float maxTessFactor = m_hs.maxTessFactor;
+
+ if (m_moduleInfo.tess != nullptr) {
+ if (m_moduleInfo.tess->maxTessFactor < maxTessFactor)
+ maxTessFactor = m_moduleInfo.tess->maxTessFactor;
+ }
+
+ DxbcRegisterValue tessValue = emitRegisterExtract(value, mask);
+ tessValue.id = m_module.opFClamp(getVectorTypeId(tessValue.type),
+ tessValue.id, m_module.constf32(0.0f),
+ m_module.constf32(maxTessFactor));
+
+ DxbcRegisterPointer ptr;
+ ptr.type.ctype = DxbcScalarType::Float32;
+ ptr.type.ccount = 1;
+ ptr.id = m_module.opAccessChain(
+ m_module.defPointerType(
+ getVectorTypeId(ptr.type),
+ spv::StorageClassOutput),
+ tessFactor.array, 1,
+ &tessFactorArrayIndex);
+
+ emitValueStore(ptr, tessValue,
+ DxbcRegMask(true, false, false, false));
+ } else {
+ Logger::warn(str::format(
+ "DxbcCompiler: Unhandled HS SV output: ", sv));
+ }
+ }
+
+
+ void DxbcCompiler::emitGsSystemValueStore(
+ DxbcSystemValue sv,
+ DxbcRegMask mask,
+ const DxbcRegisterValue& value) {
+ switch (sv) {
+ case DxbcSystemValue::Position:
+ case DxbcSystemValue::CullDistance:
+ case DxbcSystemValue::ClipDistance:
+ case DxbcSystemValue::RenderTargetId:
+ case DxbcSystemValue::ViewportId:
+ emitVsSystemValueStore(sv, mask, value);
+ break;
+
+ case DxbcSystemValue::PrimitiveId: {
+ if (m_primitiveIdOut == 0) {
+ m_primitiveIdOut = emitNewBuiltinVariable({
+ { DxbcScalarType::Uint32, 1, 0 },
+ spv::StorageClassOutput },
+ spv::BuiltInPrimitiveId,
+ "gs_primitive_id");
+ }
+
+ DxbcRegisterPointer ptr;
+ ptr.type = { DxbcScalarType::Uint32, 1};
+ ptr.id = m_primitiveIdOut;
+
+ emitValueStore(
+ ptr, emitRegisterExtract(value, mask),
+ DxbcRegMask(true, false, false, false));
+ } break;
+
+ default:
+ Logger::warn(str::format(
+ "DxbcCompiler: Unhandled GS SV output: ", sv));
+ }
+ }
+
+
+ void DxbcCompiler::emitPsSystemValueStore(
+ DxbcSystemValue sv,
+ DxbcRegMask mask,
+ const DxbcRegisterValue& value) {
+ Logger::warn(str::format(
+ "DxbcCompiler: Unhandled PS SV output: ", sv));
+ }
+
+
+ void DxbcCompiler::emitDsSystemValueStore(
+ DxbcSystemValue sv,
+ DxbcRegMask mask,
+ const DxbcRegisterValue& value) {
+ switch (sv) {
+ case DxbcSystemValue::Position:
+ case DxbcSystemValue::CullDistance:
+ case DxbcSystemValue::ClipDistance:
+ case DxbcSystemValue::RenderTargetId:
+ case DxbcSystemValue::ViewportId:
+ emitVsSystemValueStore(sv, mask, value);
+ break;
+
+ default:
+ Logger::warn(str::format(
+ "DxbcCompiler: Unhandled DS SV output: ", sv));
+ }
+ }
+
+
+ void DxbcCompiler::emitClipCullStore(
+ DxbcSystemValue sv,
+ uint32_t dstArray) {
+ uint32_t offset = 0;
+
+ if (dstArray == 0)
+ return;
+
+ for (auto e = m_osgn->begin(); e != m_osgn->end(); e++) {
+ if (e->systemValue == sv) {
+ DxbcRegisterPointer srcPtr = m_oRegs.at(e->registerId);
+ DxbcRegisterValue srcValue = emitValueLoad(srcPtr);
+
+ for (uint32_t i = 0; i < 4; i++) {
+ if (e->componentMask[i]) {
+ uint32_t offsetId = m_module.consti32(offset++);
+
+ DxbcRegisterValue component = emitRegisterExtract(
+ srcValue, DxbcRegMask::select(i));
+
+ DxbcRegisterPointer dstPtr;
+ dstPtr.type = { DxbcScalarType::Float32, 1 };
+ dstPtr.id = m_module.opAccessChain(
+ m_module.defPointerType(
+ getVectorTypeId(dstPtr.type),
+ spv::StorageClassOutput),
+ dstArray, 1, &offsetId);
+
+ emitValueStore(dstPtr, component,
+ DxbcRegMask(true, false, false, false));
+ }
+ }
+ }
+ }
+ }
+
+
+ void DxbcCompiler::emitClipCullLoad(
+ DxbcSystemValue sv,
+ uint32_t srcArray) {
+ uint32_t offset = 0;
+
+ if (srcArray == 0)
+ return;
+
+ for (auto e = m_isgn->begin(); e != m_isgn->end(); e++) {
+ if (e->systemValue == sv) {
+ // Load individual components from the source array
+ uint32_t componentIndex = 0;
+ std::array<uint32_t, 4> componentIds = {{ 0, 0, 0, 0 }};
+
+ for (uint32_t i = 0; i < 4; i++) {
+ if (e->componentMask[i]) {
+ uint32_t offsetId = m_module.consti32(offset++);
+
+ DxbcRegisterPointer srcPtr;
+ srcPtr.type = { DxbcScalarType::Float32, 1 };
+ srcPtr.id = m_module.opAccessChain(
+ m_module.defPointerType(
+ getVectorTypeId(srcPtr.type),
+ spv::StorageClassInput),
+ srcArray, 1, &offsetId);
+
+ componentIds[componentIndex++]
+ = emitValueLoad(srcPtr).id;
+ }
+ }
+
+ // Put everything into one vector
+ DxbcRegisterValue dstValue;
+ dstValue.type = { DxbcScalarType::Float32, componentIndex };
+ dstValue.id = componentIds[0];
+
+ if (componentIndex > 1) {
+ dstValue.id = m_module.opCompositeConstruct(
+ getVectorTypeId(dstValue.type),
+ componentIndex, componentIds.data());
+ }
+
+ // Store vector to the input array
+ uint32_t registerId = m_module.consti32(e->registerId);
+
+ DxbcRegisterPointer dstInput;
+ dstInput.type = { DxbcScalarType::Float32, 4 };
+ dstInput.id = m_module.opAccessChain(
+ m_module.defPointerType(
+ getVectorTypeId(dstInput.type),
+ spv::StorageClassPrivate),
+ m_vArray, 1, &registerId);
+
+ emitValueStore(dstInput, dstValue, e->componentMask);
+ }
+ }
+ }
+
+
+ uint32_t DxbcCompiler::emitUavWriteTest(const DxbcBufferInfo& uav) {
+ uint32_t typeId = m_module.defBoolType();
+ uint32_t testId = uav.specId;
+
+ if (m_ps.killState != 0) {
+ uint32_t killState = m_module.opLoad(typeId, m_ps.killState);
+
+ testId = m_module.opLogicalAnd(typeId, testId,
+ m_module.opLogicalNot(typeId, killState));
+ }
+
+ return testId;
+ }
+
+
+ void DxbcCompiler::emitInit() {
+ // Set up common capabilities for all shaders
+ m_module.enableCapability(spv::CapabilityShader);
+ m_module.enableCapability(spv::CapabilityImageQuery);
+
+ // Initialize the shader module with capabilities
+ // etc. Each shader type has its own peculiarities.
+ switch (m_programInfo.type()) {
+ case DxbcProgramType::VertexShader: emitVsInit(); break;
+ case DxbcProgramType::HullShader: emitHsInit(); break;
+ case DxbcProgramType::DomainShader: emitDsInit(); break;
+ case DxbcProgramType::GeometryShader: emitGsInit(); break;
+ case DxbcProgramType::PixelShader: emitPsInit(); break;
+ case DxbcProgramType::ComputeShader: emitCsInit(); break;
+ }
+ }
+
+
+ void DxbcCompiler::emitFunctionBegin(
+ uint32_t entryPoint,
+ uint32_t returnType,
+ uint32_t funcType) {
+ this->emitFunctionEnd();
+
+ m_module.functionBegin(
+ returnType, entryPoint, funcType,
+ spv::FunctionControlMaskNone);
+
+ m_insideFunction = true;
+ }
+
+
+ void DxbcCompiler::emitFunctionEnd() {
+ if (m_insideFunction) {
+ m_module.opReturn();
+ m_module.functionEnd();
+ }
+
+ m_insideFunction = false;
+ }
+
+
+ void DxbcCompiler::emitFunctionLabel() {
+ m_module.opLabel(m_module.allocateId());
+ }
+
+
+ void DxbcCompiler::emitMainFunctionBegin() {
+ this->emitFunctionBegin(
+ m_entryPointId,
+ m_module.defVoidType(),
+ m_module.defFunctionType(
+ m_module.defVoidType(), 0, nullptr));
+ this->emitFunctionLabel();
+ }
+
+
+ void DxbcCompiler::emitVsInit() {
+ m_module.enableCapability(spv::CapabilityClipDistance);
+ m_module.enableCapability(spv::CapabilityCullDistance);
+ m_module.enableCapability(spv::CapabilityDrawParameters);
+
+ // Declare the per-vertex output block. This is where
+ // the vertex shader will write the vertex position.
+ const uint32_t perVertexStruct = this->getPerVertexBlockId();
+ const uint32_t perVertexPointer = m_module.defPointerType(
+ perVertexStruct, spv::StorageClassOutput);
+
+ m_perVertexOut = m_module.newVar(
+ perVertexPointer, spv::StorageClassOutput);
+ m_entryPointInterfaces.push_back(m_perVertexOut);
+ m_module.setDebugName(m_perVertexOut, "vs_vertex_out");
+
+ // Standard input array
+ emitDclInputArray(0);
+
+ // Cull/clip distances as outputs
+ m_clipDistances = emitDclClipCullDistanceArray(
+ m_analysis->clipCullOut.numClipPlanes,
+ spv::BuiltInClipDistance,
+ spv::StorageClassOutput);
+
+ m_cullDistances = emitDclClipCullDistanceArray(
+ m_analysis->clipCullOut.numCullPlanes,
+ spv::BuiltInCullDistance,
+ spv::StorageClassOutput);
+
+ // Main function of the vertex shader
+ m_vs.functionId = m_module.allocateId();
+ m_module.setDebugName(m_vs.functionId, "vs_main");
+
+ this->emitFunctionBegin(
+ m_vs.functionId,
+ m_module.defVoidType(),
+ m_module.defFunctionType(
+ m_module.defVoidType(), 0, nullptr));
+ this->emitFunctionLabel();
+ }
+
+
+ void DxbcCompiler::emitHsInit() {
+ m_module.enableCapability(spv::CapabilityTessellation);
+ m_module.enableCapability(spv::CapabilityClipDistance);
+ m_module.enableCapability(spv::CapabilityCullDistance);
+
+ m_hs.builtinInvocationId = emitNewBuiltinVariable(
+ DxbcRegisterInfo {
+ { DxbcScalarType::Uint32, 1, 0 },
+ spv::StorageClassInput },
+ spv::BuiltInInvocationId,
+ "vOutputControlPointId");
+
+ m_hs.builtinTessLevelOuter = emitBuiltinTessLevelOuter(spv::StorageClassOutput);
+ m_hs.builtinTessLevelInner = emitBuiltinTessLevelInner(spv::StorageClassOutput);
+ }
+
+
+ void DxbcCompiler::emitDsInit() {
+ m_module.enableCapability(spv::CapabilityTessellation);
+ m_module.enableCapability(spv::CapabilityClipDistance);
+ m_module.enableCapability(spv::CapabilityCullDistance);
+
+ m_ds.builtinTessLevelOuter = emitBuiltinTessLevelOuter(spv::StorageClassInput);
+ m_ds.builtinTessLevelInner = emitBuiltinTessLevelInner(spv::StorageClassInput);
+
+ // Declare the per-vertex output block
+ const uint32_t perVertexStruct = this->getPerVertexBlockId();
+ const uint32_t perVertexPointer = m_module.defPointerType(
+ perVertexStruct, spv::StorageClassOutput);
+
+ // Cull/clip distances as outputs
+ m_clipDistances = emitDclClipCullDistanceArray(
+ m_analysis->clipCullOut.numClipPlanes,
+ spv::BuiltInClipDistance,
+ spv::StorageClassOutput);
+
+ m_cullDistances = emitDclClipCullDistanceArray(
+ m_analysis->clipCullOut.numCullPlanes,
+ spv::BuiltInCullDistance,
+ spv::StorageClassOutput);
+
+ m_perVertexOut = m_module.newVar(
+ perVertexPointer, spv::StorageClassOutput);
+ m_entryPointInterfaces.push_back(m_perVertexOut);
+ m_module.setDebugName(m_perVertexOut, "ds_vertex_out");
+
+ // Main function of the domain shader
+ m_ds.functionId = m_module.allocateId();
+ m_module.setDebugName(m_ds.functionId, "ds_main");
+
+ this->emitFunctionBegin(
+ m_ds.functionId,
+ m_module.defVoidType(),
+ m_module.defFunctionType(
+ m_module.defVoidType(), 0, nullptr));
+ this->emitFunctionLabel();
+ }
+
+
+ void DxbcCompiler::emitGsInit() {
+ m_module.enableCapability(spv::CapabilityGeometry);
+ m_module.enableCapability(spv::CapabilityClipDistance);
+ m_module.enableCapability(spv::CapabilityCullDistance);
+
+ // Enable capabilities for xfb mode if necessary
+ if (m_moduleInfo.xfb != nullptr) {
+ m_module.enableCapability(spv::CapabilityGeometryStreams);
+ m_module.enableCapability(spv::CapabilityTransformFeedback);
+
+ m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeXfb);
+ }
+
+ // Declare the per-vertex output block. Outputs are not
+ // declared as arrays, instead they will be flushed when
+ // calling EmitVertex.
+ if (!m_moduleInfo.xfb || m_moduleInfo.xfb->rasterizedStream >= 0) {
+ const uint32_t perVertexStruct = this->getPerVertexBlockId();
+ const uint32_t perVertexPointer = m_module.defPointerType(
+ perVertexStruct, spv::StorageClassOutput);
+
+ m_perVertexOut = m_module.newVar(
+ perVertexPointer, spv::StorageClassOutput);
+ m_entryPointInterfaces.push_back(m_perVertexOut);
+ m_module.setDebugName(m_perVertexOut, "gs_vertex_out");
+ }
+
+ // Cull/clip distances as outputs
+ m_clipDistances = emitDclClipCullDistanceArray(
+ m_analysis->clipCullOut.numClipPlanes,
+ spv::BuiltInClipDistance,
+ spv::StorageClassOutput);
+
+ m_cullDistances = emitDclClipCullDistanceArray(
+ m_analysis->clipCullOut.numCullPlanes,
+ spv::BuiltInCullDistance,
+ spv::StorageClassOutput);
+
+ // Emit Xfb variables if necessary
+ if (m_moduleInfo.xfb != nullptr)
+ emitXfbOutputDeclarations();
+
+ // Main function of the vertex shader
+ m_gs.functionId = m_module.allocateId();
+ m_module.setDebugName(m_gs.functionId, "gs_main");
+
+ this->emitFunctionBegin(
+ m_gs.functionId,
+ m_module.defVoidType(),
+ m_module.defFunctionType(
+ m_module.defVoidType(), 0, nullptr));
+ this->emitFunctionLabel();
+ }
+
+
+ void DxbcCompiler::emitPsInit() {
+ m_module.enableCapability(spv::CapabilityDerivativeControl);
+
+ m_module.setExecutionMode(m_entryPointId,
+ spv::ExecutionModeOriginUpperLeft);
+
+ // Standard input array
+ emitDclInputArray(0);
+
+ // Cull/clip distances as inputs
+ m_clipDistances = emitDclClipCullDistanceArray(
+ m_analysis->clipCullIn.numClipPlanes,
+ spv::BuiltInClipDistance,
+ spv::StorageClassInput);
+
+ m_cullDistances = emitDclClipCullDistanceArray(
+ m_analysis->clipCullIn.numCullPlanes,
+ spv::BuiltInCullDistance,
+ spv::StorageClassInput);
+
+ // Main function of the pixel shader
+ m_ps.functionId = m_module.allocateId();
+ m_module.setDebugName(m_ps.functionId, "ps_main");
+
+ this->emitFunctionBegin(
+ m_ps.functionId,
+ m_module.defVoidType(),
+ m_module.defFunctionType(
+ m_module.defVoidType(), 0, nullptr));
+ this->emitFunctionLabel();
+
+ if (m_analysis->usesKill && m_moduleInfo.options.useDemoteToHelperInvocation) {
+ // This extension basically implements D3D-style discard
+ m_module.enableExtension("SPV_EXT_demote_to_helper_invocation");
+ m_module.enableCapability(spv::CapabilityDemoteToHelperInvocationEXT);
+ } else if (m_analysis->usesKill && m_analysis->usesDerivatives) {
+ // We may have to defer kill operations to the end of
+ // the shader in order to keep derivatives correct.
+ m_ps.killState = m_module.newVarInit(
+ m_module.defPointerType(m_module.defBoolType(), spv::StorageClassPrivate),
+ spv::StorageClassPrivate, m_module.constBool(false));
+
+ m_module.setDebugName(m_ps.killState, "ps_kill");
+
+ if (m_moduleInfo.options.useSubgroupOpsForEarlyDiscard) {
+ m_module.enableCapability(spv::CapabilityGroupNonUniform);
+ m_module.enableCapability(spv::CapabilityGroupNonUniformBallot);
+
+ DxbcRegisterInfo laneId;
+ laneId.type = { DxbcScalarType::Uint32, 1, 0 };
+ laneId.sclass = spv::StorageClassInput;
+
+ m_ps.builtinLaneId = emitNewBuiltinVariable(
+ laneId, spv::BuiltInSubgroupLocalInvocationId,
+ "fLaneId");
+ }
+ }
+ }
+
+
+ void DxbcCompiler::emitCsInit() {
+ // Main function of the compute shader
+ m_cs.functionId = m_module.allocateId();
+ m_module.setDebugName(m_cs.functionId, "cs_main");
+
+ this->emitFunctionBegin(
+ m_cs.functionId,
+ m_module.defVoidType(),
+ m_module.defFunctionType(
+ m_module.defVoidType(), 0, nullptr));
+ this->emitFunctionLabel();
+ }
+
+
+ void DxbcCompiler::emitVsFinalize() {
+ this->emitMainFunctionBegin();
+ this->emitInputSetup();
+ m_module.opFunctionCall(
+ m_module.defVoidType(),
+ m_vs.functionId, 0, nullptr);
+ this->emitOutputSetup();
+ this->emitClipCullStore(DxbcSystemValue::ClipDistance, m_clipDistances);
+ this->emitClipCullStore(DxbcSystemValue::CullDistance, m_cullDistances);
+ this->emitFunctionEnd();
+ }
+
+
+ void DxbcCompiler::emitHsFinalize() {
+ if (m_hs.cpPhase.functionId == 0)
+ m_hs.cpPhase = this->emitNewHullShaderPassthroughPhase();
+
+ // Control point phase
+ this->emitMainFunctionBegin();
+ this->emitInputSetup(m_hs.vertexCountIn);
+ this->emitHsControlPointPhase(m_hs.cpPhase);
+ this->emitHsPhaseBarrier();
+
+ // Fork-join phases and output setup
+ this->emitHsInvocationBlockBegin(1);
+
+ for (const auto& phase : m_hs.forkPhases)
+ this->emitHsForkJoinPhase(phase);
+
+ for (const auto& phase : m_hs.joinPhases)
+ this->emitHsForkJoinPhase(phase);
+
+ this->emitOutputSetup();
+ this->emitHsOutputSetup();
+ this->emitHsInvocationBlockEnd();
+ this->emitFunctionEnd();
+ }
+
+
+ void DxbcCompiler::emitDsFinalize() {
+ this->emitMainFunctionBegin();
+ m_module.opFunctionCall(
+ m_module.defVoidType(),
+ m_ds.functionId, 0, nullptr);
+ this->emitOutputSetup();
+ this->emitClipCullStore(DxbcSystemValue::ClipDistance, m_clipDistances);
+ this->emitClipCullStore(DxbcSystemValue::CullDistance, m_cullDistances);
+ this->emitFunctionEnd();
+ }
+
+
+ void DxbcCompiler::emitGsFinalize() {
+ if (!m_gs.invocationCount)
+ m_module.setInvocations(m_entryPointId, 1);
+
+ this->emitMainFunctionBegin();
+ this->emitInputSetup(
+ primitiveVertexCount(m_gs.inputPrimitive));
+ m_module.opFunctionCall(
+ m_module.defVoidType(),
+ m_gs.functionId, 0, nullptr);
+ // No output setup at this point as that was
+ // already done during the EmitVertex step
+ this->emitFunctionEnd();
+ }
+
+
+ void DxbcCompiler::emitPsFinalize() {
+ this->emitMainFunctionBegin();
+ this->emitInputSetup();
+ this->emitClipCullLoad(DxbcSystemValue::ClipDistance, m_clipDistances);
+ this->emitClipCullLoad(DxbcSystemValue::CullDistance, m_cullDistances);
+
+ m_module.opFunctionCall(
+ m_module.defVoidType(),
+ m_ps.functionId, 0, nullptr);
+
+ if (m_ps.killState != 0) {
+ DxbcConditional cond;
+ cond.labelIf = m_module.allocateId();
+ cond.labelEnd = m_module.allocateId();
+
+ uint32_t killTest = m_module.opLoad(m_module.defBoolType(), m_ps.killState);
+
+ m_module.opSelectionMerge(cond.labelEnd, spv::SelectionControlMaskNone);
+ m_module.opBranchConditional(killTest, cond.labelIf, cond.labelEnd);
+
+ m_module.opLabel(cond.labelIf);
+ m_module.opKill();
+
+ m_module.opLabel(cond.labelEnd);
+ }
+
+ this->emitOutputSetup();
+ this->emitOutputMapping();
+
+ if (m_moduleInfo.options.useDepthClipWorkaround)
+ this->emitOutputDepthClamp();
+
+ this->emitFunctionEnd();
+ }
+
+
+ void DxbcCompiler::emitCsFinalize() {
+ this->emitMainFunctionBegin();
+
+ if (m_moduleInfo.options.zeroInitWorkgroupMemory)
+ this->emitInitWorkgroupMemory();
+
+ m_module.opFunctionCall(
+ m_module.defVoidType(),
+ m_cs.functionId, 0, nullptr);
+
+ this->emitFunctionEnd();
+ }
+
+
+ void DxbcCompiler::emitXfbOutputDeclarations() {
+ for (uint32_t i = 0; i < m_moduleInfo.xfb->entryCount; i++) {
+ const DxbcXfbEntry* xfbEntry = m_moduleInfo.xfb->entries + i;
+ const DxbcSgnEntry* sigEntry = m_osgn->find(
+ xfbEntry->semanticName,
+ xfbEntry->semanticIndex,
+ xfbEntry->streamId);
+
+ if (sigEntry == nullptr)
+ continue;
+
+ DxbcRegisterInfo varInfo;
+ varInfo.type.ctype = DxbcScalarType::Float32;
+ varInfo.type.ccount = xfbEntry->componentCount;
+ varInfo.type.alength = 0;
+ varInfo.sclass = spv::StorageClassOutput;
+
+ uint32_t dstComponentMask = (1 << xfbEntry->componentCount) - 1;
+ uint32_t srcComponentMask = dstComponentMask
+ << sigEntry->componentMask.firstSet()
+ << xfbEntry->componentIndex;
+
+ DxbcXfbVar xfbVar;
+ xfbVar.varId = emitNewVariable(varInfo);
+ xfbVar.streamId = xfbEntry->streamId;
+ xfbVar.outputId = sigEntry->registerId;
+ xfbVar.srcMask = DxbcRegMask(srcComponentMask);
+ xfbVar.dstMask = DxbcRegMask(dstComponentMask);
+ m_xfbVars.push_back(xfbVar);
+
+ m_entryPointInterfaces.push_back(xfbVar.varId);
+ m_module.setDebugName(xfbVar.varId,
+ str::format("xfb", i).c_str());
+
+ m_module.decorateXfb(xfbVar.varId,
+ xfbEntry->streamId, xfbEntry->bufferId, xfbEntry->offset,
+ m_moduleInfo.xfb->strides[xfbEntry->bufferId]);
+ }
+
+ // TODO Compact location/component assignment
+ for (uint32_t i = 0; i < m_xfbVars.size(); i++) {
+ m_xfbVars[i].location = i;
+ m_xfbVars[i].component = 0;
+ }
+
+ for (uint32_t i = 0; i < m_xfbVars.size(); i++) {
+ const DxbcXfbVar* var = &m_xfbVars[i];
+
+ m_module.decorateLocation (var->varId, var->location);
+ m_module.decorateComponent(var->varId, var->component);
+ }
+ }
+
+
+ void DxbcCompiler::emitXfbOutputSetup(
+ uint32_t streamId,
+ bool passthrough) {
+ for (size_t i = 0; i < m_xfbVars.size(); i++) {
+ if (m_xfbVars[i].streamId == streamId) {
+ DxbcRegisterPointer srcPtr = passthrough
+ ? m_vRegs[m_xfbVars[i].outputId]
+ : m_oRegs[m_xfbVars[i].outputId];
+
+ if (passthrough) {
+ srcPtr = emitArrayAccess(srcPtr,
+ spv::StorageClassInput,
+ m_module.constu32(0));
+ }
+
+ DxbcRegisterPointer dstPtr;
+ dstPtr.type.ctype = DxbcScalarType::Float32;
+ dstPtr.type.ccount = m_xfbVars[i].dstMask.popCount();
+ dstPtr.id = m_xfbVars[i].varId;
+
+ DxbcRegisterValue value = emitRegisterExtract(
+ emitValueLoad(srcPtr), m_xfbVars[i].srcMask);
+ emitValueStore(dstPtr, value, m_xfbVars[i].dstMask);
+ }
+ }
+ }
+
+
+ void DxbcCompiler::emitHsControlPointPhase(
+ const DxbcCompilerHsControlPointPhase& phase) {
+ m_module.opFunctionCall(
+ m_module.defVoidType(),
+ phase.functionId, 0, nullptr);
+ }
+
+
+ void DxbcCompiler::emitHsForkJoinPhase(
+ const DxbcCompilerHsForkJoinPhase& phase) {
+ for (uint32_t i = 0; i < phase.instanceCount; i++) {
+ uint32_t invocationId = m_module.constu32(i);
+
+ m_module.opFunctionCall(
+ m_module.defVoidType(),
+ phase.functionId, 1,
+ &invocationId);
+ }
+ }
+
+
+ void DxbcCompiler::emitDclInputArray(uint32_t vertexCount) {
+ DxbcVectorType info;
+ info.ctype = DxbcScalarType::Float32;
+ info.ccount = 4;
+
+ // Define the array type. This will be two-dimensional
+ // in some shaders, with the outer index representing
+ // the vertex ID within an invocation.
+ m_vArrayLength = m_isgn != nullptr ? std::max(1u, m_isgn->maxRegisterCount()) : 1;
+ m_vArrayLengthId = m_module.lateConst32(getScalarTypeId(DxbcScalarType::Uint32));
+
+ uint32_t vectorTypeId = getVectorTypeId(info);
+ uint32_t arrayTypeId = m_module.defArrayType(vectorTypeId, m_vArrayLengthId);
+
+ if (vertexCount != 0) {
+ arrayTypeId = m_module.defArrayType(
+ arrayTypeId, m_module.constu32(vertexCount));
+ }
+
+ // Define the actual variable. Note that this is private
+ // because we will copy input registers and some system
+ // variables to the array during the setup phase.
+ const uint32_t ptrTypeId = m_module.defPointerType(
+ arrayTypeId, spv::StorageClassPrivate);
+
+ const uint32_t varId = m_module.newVar(
+ ptrTypeId, spv::StorageClassPrivate);
+
+ m_module.setDebugName(varId, "shader_in");
+ m_vArray = varId;
+ }
+
+
+ void DxbcCompiler::emitDclInputPerVertex(
+ uint32_t vertexCount,
+ const char* varName) {
+ uint32_t typeId = getPerVertexBlockId();
+
+ if (vertexCount != 0) {
+ typeId = m_module.defArrayType(typeId,
+ m_module.constu32(vertexCount));
+ }
+
+ const uint32_t ptrTypeId = m_module.defPointerType(
+ typeId, spv::StorageClassInput);
+
+ m_perVertexIn = m_module.newVar(
+ ptrTypeId, spv::StorageClassInput);
+ m_module.setDebugName(m_perVertexIn, varName);
+
+ m_entryPointInterfaces.push_back(m_perVertexIn);
+ }
+
+
+ uint32_t DxbcCompiler::emitDclClipCullDistanceArray(
+ uint32_t length,
+ spv::BuiltIn builtIn,
+ spv::StorageClass storageClass) {
+ if (length == 0)
+ return 0;
+
+ uint32_t t_f32 = m_module.defFloatType(32);
+ uint32_t t_arr = m_module.defArrayType(t_f32, m_module.constu32(length));
+ uint32_t t_ptr = m_module.defPointerType(t_arr, storageClass);
+ uint32_t varId = m_module.newVar(t_ptr, storageClass);
+
+ m_module.decorateBuiltIn(varId, builtIn);
+ m_module.setDebugName(varId,
+ builtIn == spv::BuiltInClipDistance
+ ? "clip_distances"
+ : "cull_distances");
+
+ m_entryPointInterfaces.push_back(varId);
+ return varId;
+ }
+
+
+ DxbcCompilerHsControlPointPhase DxbcCompiler::emitNewHullShaderControlPointPhase() {
+ uint32_t funTypeId = m_module.defFunctionType(
+ m_module.defVoidType(), 0, nullptr);
+
+ uint32_t funId = m_module.allocateId();
+
+ this->emitFunctionBegin(funId,
+ m_module.defVoidType(),
+ funTypeId);
+ this->emitFunctionLabel();
+
+ DxbcCompilerHsControlPointPhase result;
+ result.functionId = funId;
+ return result;
+ }
+
+
+ DxbcCompilerHsControlPointPhase DxbcCompiler::emitNewHullShaderPassthroughPhase() {
+ uint32_t funTypeId = m_module.defFunctionType(
+ m_module.defVoidType(), 0, nullptr);
+
+ // Begin passthrough function
+ uint32_t funId = m_module.allocateId();
+ m_module.setDebugName(funId, "hs_passthrough");
+
+ this->emitFunctionBegin(funId,
+ m_module.defVoidType(),
+ funTypeId);
+ this->emitFunctionLabel();
+
+ // We'll basically copy each input variable to the corresponding
+ // output, using the shader's invocation ID as the array index.
+ const uint32_t invocationId = m_module.opLoad(
+ getScalarTypeId(DxbcScalarType::Uint32),
+ m_hs.builtinInvocationId);
+
+ for (auto i = m_isgn->begin(); i != m_isgn->end(); i++) {
+ this->emitDclInput(
+ i->registerId, m_hs.vertexCountIn,
+ i->componentMask,
+ DxbcSystemValue::None,
+ DxbcInterpolationMode::Undefined);
+
+ // Vector type index
+ const std::array<uint32_t, 2> dstIndices
+ = {{ invocationId, m_module.constu32(i->registerId) }};
+
+ DxbcRegisterPointer srcPtr;
+ srcPtr.type = m_vRegs.at(i->registerId).type;
+ srcPtr.id = m_module.opAccessChain(
+ m_module.defPointerType(getVectorTypeId(srcPtr.type), spv::StorageClassInput),
+ m_vRegs.at(i->registerId).id, 1, &invocationId);
+
+ DxbcRegisterValue srcValue = emitRegisterBitcast(
+ emitValueLoad(srcPtr), DxbcScalarType::Float32);
+
+ DxbcRegisterPointer dstPtr;
+ dstPtr.type = { DxbcScalarType::Float32, 4 };
+ dstPtr.id = m_module.opAccessChain(
+ m_module.defPointerType(getVectorTypeId(dstPtr.type), spv::StorageClassOutput),
+ m_hs.outputPerVertex, dstIndices.size(), dstIndices.data());
+
+ emitValueStore(dstPtr, srcValue, DxbcRegMask::firstN(srcValue.type.ccount));
+ }
+
+ // End function
+ this->emitFunctionEnd();
+
+ DxbcCompilerHsControlPointPhase result;
+ result.functionId = funId;
+ return result;
+ }
+
+
+ DxbcCompilerHsForkJoinPhase DxbcCompiler::emitNewHullShaderForkJoinPhase() {
+ uint32_t argTypeId = m_module.defIntType(32, 0);
+ uint32_t funTypeId = m_module.defFunctionType(
+ m_module.defVoidType(), 1, &argTypeId);
+
+ uint32_t funId = m_module.allocateId();
+
+ this->emitFunctionBegin(funId,
+ m_module.defVoidType(),
+ funTypeId);
+
+ uint32_t argId = m_module.functionParameter(argTypeId);
+ this->emitFunctionLabel();
+
+ DxbcCompilerHsForkJoinPhase result;
+ result.functionId = funId;
+ result.instanceId = argId;
+ return result;
+ }
+
+
+ void DxbcCompiler::emitHsPhaseBarrier() {
+ uint32_t exeScopeId = m_module.constu32(spv::ScopeWorkgroup);
+ uint32_t memScopeId = m_module.constu32(spv::ScopeInvocation);
+ uint32_t semanticId = m_module.constu32(spv::MemorySemanticsMaskNone);
+
+ m_module.opControlBarrier(exeScopeId, memScopeId, semanticId);
+ }
+
+
+ void DxbcCompiler::emitHsInvocationBlockBegin(uint32_t count) {
+ uint32_t invocationId = m_module.opLoad(
+ getScalarTypeId(DxbcScalarType::Uint32),
+ m_hs.builtinInvocationId);
+
+ uint32_t condition = m_module.opULessThan(
+ m_module.defBoolType(), invocationId,
+ m_module.constu32(count));
+
+ m_hs.invocationBlockBegin = m_module.allocateId();
+ m_hs.invocationBlockEnd = m_module.allocateId();
+
+ m_module.opSelectionMerge(
+ m_hs.invocationBlockEnd,
+ spv::SelectionControlMaskNone);
+
+ m_module.opBranchConditional(
+ condition,
+ m_hs.invocationBlockBegin,
+ m_hs.invocationBlockEnd);
+
+ m_module.opLabel(
+ m_hs.invocationBlockBegin);
+ }
+
+
+ void DxbcCompiler::emitHsInvocationBlockEnd() {
+ m_module.opBranch (m_hs.invocationBlockEnd);
+ m_module.opLabel (m_hs.invocationBlockEnd);
+
+ m_hs.invocationBlockBegin = 0;
+ m_hs.invocationBlockEnd = 0;
+ }
+
+
+ void DxbcCompiler::emitHsOutputSetup() {
+ uint32_t outputPerPatch = emitTessInterfacePerPatch(spv::StorageClassOutput);
+
+ if (!outputPerPatch)
+ return;
+
+ uint32_t vecType = getVectorTypeId({ DxbcScalarType::Float32, 4 });
+
+ uint32_t srcPtrType = m_module.defPointerType(vecType, spv::StorageClassPrivate);
+ uint32_t dstPtrType = m_module.defPointerType(vecType, spv::StorageClassOutput);
+
+ for (uint32_t i = 0; i < 32; i++) {
+ if (m_hs.outputPerPatchMask & (1 << i)) {
+ uint32_t index = m_module.constu32(i);
+
+ uint32_t srcPtr = m_module.opAccessChain(srcPtrType, m_hs.outputPerPatch, 1, &index);
+ uint32_t dstPtr = m_module.opAccessChain(dstPtrType, outputPerPatch, 1, &index);
+
+ m_module.opStore(dstPtr, m_module.opLoad(vecType, srcPtr));
+ }
+ }
+ }
+
+
+ uint32_t DxbcCompiler::emitTessInterfacePerPatch(spv::StorageClass storageClass) {
+ const char* name = "vPatch";
+
+ if (storageClass == spv::StorageClassPrivate)
+ name = "rPatch";
+ if (storageClass == spv::StorageClassOutput)
+ name = "oPatch";
+
+ uint32_t arrLen = m_psgn != nullptr ? m_psgn->maxRegisterCount() : 0;
+
+ if (!arrLen)
+ return 0;
+
+ uint32_t vecType = m_module.defVectorType (m_module.defFloatType(32), 4);
+ uint32_t arrType = m_module.defArrayType (vecType, m_module.constu32(arrLen));
+ uint32_t ptrType = m_module.defPointerType(arrType, storageClass);
+ uint32_t varId = m_module.newVar (ptrType, storageClass);
+
+ m_module.setDebugName (varId, name);
+
+ if (storageClass != spv::StorageClassPrivate) {
+ m_module.decorate (varId, spv::DecorationPatch);
+ m_module.decorateLocation (varId, 0);
+
+ m_entryPointInterfaces.push_back(varId);
+ }
+
+ return varId;
+ }
+
+
+ uint32_t DxbcCompiler::emitTessInterfacePerVertex(spv::StorageClass storageClass, uint32_t vertexCount) {
+ const bool isInput = storageClass == spv::StorageClassInput;
+
+ uint32_t arrLen = isInput
+ ? (m_isgn != nullptr ? m_isgn->maxRegisterCount() : 0)
+ : (m_osgn != nullptr ? m_osgn->maxRegisterCount() : 0);
+
+ if (!arrLen)
+ return 0;
+
+ uint32_t locIdx = m_psgn != nullptr
+ ? m_psgn->maxRegisterCount()
+ : 0;
+
+ uint32_t vecType = m_module.defVectorType (m_module.defFloatType(32), 4);
+ uint32_t arrTypeInner = m_module.defArrayType (vecType, m_module.constu32(arrLen));
+ uint32_t arrTypeOuter = m_module.defArrayType (arrTypeInner, m_module.constu32(vertexCount));
+ uint32_t ptrType = m_module.defPointerType(arrTypeOuter, storageClass);
+ uint32_t varId = m_module.newVar (ptrType, storageClass);
+
+ m_module.setDebugName (varId, isInput ? "vVertex" : "oVertex");
+ m_module.decorateLocation (varId, locIdx);
+
+ if (storageClass != spv::StorageClassPrivate)
+ m_entryPointInterfaces.push_back(varId);
+ return varId;
+ }
+
+
+ uint32_t DxbcCompiler::emitSamplePosArray() {
+ const std::array<uint32_t, 32> samplePosVectors = {{
+ // Invalid sample count / unbound resource
+ m_module.constvec2f32( 0.0f, 0.0f),
+ // VK_SAMPLE_COUNT_1_BIT
+ m_module.constvec2f32( 0.0f, 0.0f),
+ // VK_SAMPLE_COUNT_2_BIT
+ m_module.constvec2f32( 0.25f, 0.25f),
+ m_module.constvec2f32(-0.25f,-0.25f),
+ // VK_SAMPLE_COUNT_4_BIT
+ m_module.constvec2f32(-0.125f,-0.375f),
+ m_module.constvec2f32( 0.375f,-0.125f),
+ m_module.constvec2f32(-0.375f, 0.125f),
+ m_module.constvec2f32( 0.125f, 0.375f),
+ // VK_SAMPLE_COUNT_8_BIT
+ m_module.constvec2f32( 0.0625f,-0.1875f),
+ m_module.constvec2f32(-0.0625f, 0.1875f),
+ m_module.constvec2f32( 0.3125f, 0.0625f),
+ m_module.constvec2f32(-0.1875f,-0.3125f),
+ m_module.constvec2f32(-0.3125f, 0.3125f),
+ m_module.constvec2f32(-0.4375f,-0.0625f),
+ m_module.constvec2f32( 0.1875f, 0.4375f),
+ m_module.constvec2f32( 0.4375f,-0.4375f),
+ // VK_SAMPLE_COUNT_16_BIT
+ m_module.constvec2f32( 0.0625f, 0.0625f),
+ m_module.constvec2f32(-0.0625f,-0.1875f),
+ m_module.constvec2f32(-0.1875f, 0.1250f),
+ m_module.constvec2f32( 0.2500f,-0.0625f),
+ m_module.constvec2f32(-0.3125f,-0.1250f),
+ m_module.constvec2f32( 0.1250f, 0.3125f),
+ m_module.constvec2f32( 0.3125f, 0.1875f),
+ m_module.constvec2f32( 0.1875f,-0.3125f),
+ m_module.constvec2f32(-0.1250f, 0.3750f),
+ m_module.constvec2f32( 0.0000f,-0.4375f),
+ m_module.constvec2f32(-0.2500f,-0.3750f),
+ m_module.constvec2f32(-0.3750f, 0.2500f),
+ m_module.constvec2f32(-0.5000f, 0.0000f),
+ m_module.constvec2f32( 0.4375f,-0.2500f),
+ m_module.constvec2f32( 0.3750f, 0.4375f),
+ m_module.constvec2f32(-0.4375f,-0.5000f),
+ }};
+
+ uint32_t arrayTypeId = getArrayTypeId({
+ DxbcScalarType::Float32, 2,
+ static_cast<uint32_t>(samplePosVectors.size()) });
+
+ uint32_t samplePosArray = m_module.constComposite(
+ arrayTypeId,
+ samplePosVectors.size(),
+ samplePosVectors.data());
+
+ uint32_t varId = m_module.newVarInit(
+ m_module.defPointerType(arrayTypeId, spv::StorageClassPrivate),
+ spv::StorageClassPrivate, samplePosArray);
+
+ m_module.setDebugName(varId, "g_sample_pos");
+ return varId;
+ }
+
+
+ void DxbcCompiler::emitFloatControl() {
+ DxbcFloatControlFlags flags = m_moduleInfo.options.floatControl;
+
+ if (flags.isClear())
+ return;
+
+ const uint32_t width32 = 32;
+ const uint32_t width64 = 64;
+
+ m_module.enableExtension("SPV_KHR_float_controls");
+
+ if (flags.test(DxbcFloatControlFlag::DenormFlushToZero32)) {
+ m_module.enableCapability(spv::CapabilityDenormFlushToZero);
+ m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeDenormFlushToZero, 1, &width32);
+ }
+
+ if (flags.test(DxbcFloatControlFlag::PreserveNan32)) {
+ m_module.enableCapability(spv::CapabilitySignedZeroInfNanPreserve);
+ m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeSignedZeroInfNanPreserve, 1, &width32);
+ }
+
+ if (m_module.hasCapability(spv::CapabilityFloat64)) {
+ if (flags.test(DxbcFloatControlFlag::DenormPreserve64)) {
+ m_module.enableCapability(spv::CapabilityDenormPreserve);
+ m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeDenormPreserve, 1, &width64);
+ }
+
+ if (flags.test(DxbcFloatControlFlag::PreserveNan64)) {
+ m_module.enableCapability(spv::CapabilitySignedZeroInfNanPreserve);
+ m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeSignedZeroInfNanPreserve, 1, &width64);
+ }
+ }
+ }
+
+
+ uint32_t DxbcCompiler::emitNewVariable(const DxbcRegisterInfo& info) {
+ const uint32_t ptrTypeId = this->getPointerTypeId(info);
+ return m_module.newVar(ptrTypeId, info.sclass);
+ }
+
+
+ uint32_t DxbcCompiler::emitNewBuiltinVariable(
+ const DxbcRegisterInfo& info,
+ spv::BuiltIn builtIn,
+ const char* name) {
+ const uint32_t varId = emitNewVariable(info);
+
+ m_module.setDebugName(varId, name);
+ m_module.decorateBuiltIn(varId, builtIn);
+
+ if (m_programInfo.type() == DxbcProgramType::PixelShader
+ && info.type.ctype != DxbcScalarType::Float32
+ && info.type.ctype != DxbcScalarType::Bool
+ && info.sclass == spv::StorageClassInput)
+ m_module.decorate(varId, spv::DecorationFlat);
+
+ m_entryPointInterfaces.push_back(varId);
+ return varId;
+ }
+
+
+ uint32_t DxbcCompiler::emitBuiltinTessLevelOuter(spv::StorageClass storageClass) {
+ uint32_t id = emitNewBuiltinVariable(
+ DxbcRegisterInfo {
+ { DxbcScalarType::Float32, 0, 4 },
+ storageClass },
+ spv::BuiltInTessLevelOuter,
+ "bTessLevelOuter");
+
+ m_module.decorate(id, spv::DecorationPatch);
+ return id;
+ }
+
+
+ uint32_t DxbcCompiler::emitBuiltinTessLevelInner(spv::StorageClass storageClass) {
+ uint32_t id = emitNewBuiltinVariable(
+ DxbcRegisterInfo {
+ { DxbcScalarType::Float32, 0, 2 },
+ storageClass },
+ spv::BuiltInTessLevelInner,
+ "bTessLevelInner");
+
+ m_module.decorate(id, spv::DecorationPatch);
+ return id;
+ }
+
+
+ void DxbcCompiler::enableShaderViewportIndexLayer() {
+ if (!m_extensions.shaderViewportIndexLayer) {
+ m_extensions.shaderViewportIndexLayer = true;
+
+ m_module.enableExtension("SPV_EXT_shader_viewport_index_layer");
+ m_module.enableCapability(spv::CapabilityShaderViewportIndexLayerEXT);
+ }
+ }
+
+
+ DxbcCfgBlock* DxbcCompiler::cfgFindBlock(
+ const std::initializer_list<DxbcCfgBlockType>& types) {
+ for (auto cur = m_controlFlowBlocks.rbegin();
+ cur != m_controlFlowBlocks.rend(); cur++) {
+ for (auto type : types) {
+ if (cur->type == type)
+ return &(*cur);
+ }
+ }
+
+ return nullptr;
+ }
+
+
+ DxbcBufferInfo DxbcCompiler::getBufferInfo(const DxbcRegister& reg) {
+ const uint32_t registerId = reg.idx[0].offset;
+
+ switch (reg.type) {
+ case DxbcOperandType::Resource: {
+ const auto& texture = m_textures.at(registerId);
+
+ DxbcBufferInfo result;
+ result.image = texture.imageInfo;
+ result.stype = texture.sampledType;
+ result.type = texture.type;
+ result.typeId = texture.imageTypeId;
+ result.varId = texture.varId;
+ result.specId = texture.specId;
+ result.stride = texture.structStride;
+ result.align = texture.structAlign;
+ return result;
+ } break;
+
+ case DxbcOperandType::UnorderedAccessView: {
+ const auto& uav = m_uavs.at(registerId);
+
+ DxbcBufferInfo result;
+ result.image = uav.imageInfo;
+ result.stype = uav.sampledType;
+ result.type = uav.type;
+ result.typeId = uav.imageTypeId;
+ result.varId = uav.varId;
+ result.specId = uav.specId;
+ result.stride = uav.structStride;
+ result.align = uav.structAlign;
+ return result;
+ } break;
+
+ case DxbcOperandType::ThreadGroupSharedMemory: {
+ DxbcBufferInfo result;
+ result.image = { spv::DimBuffer, 0, 0, 0 };
+ result.stype = DxbcScalarType::Uint32;
+ result.type = m_gRegs.at(registerId).type;
+ result.typeId = m_module.defPointerType(
+ getScalarTypeId(DxbcScalarType::Uint32),
+ spv::StorageClassWorkgroup);
+ result.varId = m_gRegs.at(registerId).varId;
+ result.specId = 0;
+ result.stride = m_gRegs.at(registerId).elementStride;
+ result.align = 0;
+ return result;
+ } break;
+
+ default:
+ throw DxvkError(str::format("DxbcCompiler: Invalid operand type for buffer: ", reg.type));
+ }
+ }
+
+
+ uint32_t DxbcCompiler::getTexSizeDim(const DxbcImageInfo& imageType) const {
+ switch (imageType.dim) {
+ case spv::DimBuffer: return 1 + imageType.array;
+ case spv::Dim1D: return 1 + imageType.array;
+ case spv::Dim2D: return 2 + imageType.array;
+ case spv::Dim3D: return 3 + imageType.array;
+ case spv::DimCube: return 2 + imageType.array;
+ default: throw DxvkError("DxbcCompiler: getTexLayerDim: Unsupported image dimension");
+ }
+ }
+
+
+ uint32_t DxbcCompiler::getTexLayerDim(const DxbcImageInfo& imageType) const {
+ switch (imageType.dim) {
+ case spv::DimBuffer: return 1;
+ case spv::Dim1D: return 1;
+ case spv::Dim2D: return 2;
+ case spv::Dim3D: return 3;
+ case spv::DimCube: return 3;
+ default: throw DxvkError("DxbcCompiler: getTexLayerDim: Unsupported image dimension");
+ }
+ }
+
+
+ uint32_t DxbcCompiler::getTexCoordDim(const DxbcImageInfo& imageType) const {
+ return getTexLayerDim(imageType) + imageType.array;
+ }
+
+
+ DxbcRegMask DxbcCompiler::getTexCoordMask(const DxbcImageInfo& imageType) const {
+ return DxbcRegMask::firstN(getTexCoordDim(imageType));
+ }
+
+
+ DxbcVectorType DxbcCompiler::getInputRegType(uint32_t regIdx) const {
+ switch (m_programInfo.type()) {
+ case DxbcProgramType::VertexShader: {
+ const DxbcSgnEntry* entry = m_isgn->findByRegister(regIdx);
+
+ DxbcVectorType result;
+ result.ctype = DxbcScalarType::Float32;
+ result.ccount = 4;
+
+ if (entry != nullptr) {
+ result.ctype = entry->componentType;
+ result.ccount = entry->componentMask.popCount();
+ }
+
+ return result;
+ }
+
+ case DxbcProgramType::DomainShader: {
+ DxbcVectorType result;
+ result.ctype = DxbcScalarType::Float32;
+ result.ccount = 4;
+ return result;
+ }
+
+ default: {
+ DxbcVectorType result;
+ result.ctype = DxbcScalarType::Float32;
+ result.ccount = 4;
+
+ if (m_isgn->findByRegister(regIdx))
+ result.ccount = m_isgn->regMask(regIdx).minComponents();
+ return result;
+ }
+ }
+ }
+
+
+ DxbcVectorType DxbcCompiler::getOutputRegType(uint32_t regIdx) const {
+ switch (m_programInfo.type()) {
+ case DxbcProgramType::PixelShader: {
+ const DxbcSgnEntry* entry = m_osgn->findByRegister(regIdx);
+
+ DxbcVectorType result;
+ result.ctype = DxbcScalarType::Float32;
+ result.ccount = 4;
+
+ if (entry != nullptr) {
+ result.ctype = entry->componentType;
+ result.ccount = entry->componentMask.popCount();
+ }
+
+ return result;
+ }
+
+ case DxbcProgramType::HullShader: {
+ DxbcVectorType result;
+ result.ctype = DxbcScalarType::Float32;
+ result.ccount = 4;
+ return result;
+ }
+
+ default: {
+ DxbcVectorType result;
+ result.ctype = DxbcScalarType::Float32;
+ result.ccount = 4;
+
+ if (m_osgn->findByRegister(regIdx))
+ result.ccount = m_osgn->regMask(regIdx).minComponents();
+ return result;
+ }
+ }
+ }
+
+
+ DxbcImageInfo DxbcCompiler::getResourceType(
+ DxbcResourceDim resourceType,
+ bool isUav) const {
+ uint32_t ms = m_moduleInfo.options.disableMsaa ? 0 : 1;
+
+ DxbcImageInfo typeInfo = [resourceType, isUav, ms] () -> DxbcImageInfo {
+ switch (resourceType) {
+ case DxbcResourceDim::Buffer: return { spv::DimBuffer, 0, 0, isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_MAX_ENUM };
+ case DxbcResourceDim::Texture1D: return { spv::Dim1D, 0, 0, isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_1D };
+ case DxbcResourceDim::Texture1DArr: return { spv::Dim1D, 1, 0, isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_1D_ARRAY };
+ case DxbcResourceDim::Texture2D: return { spv::Dim2D, 0, 0, isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_2D };
+ case DxbcResourceDim::Texture2DArr: return { spv::Dim2D, 1, 0, isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_2D_ARRAY };
+ case DxbcResourceDim::Texture2DMs: return { spv::Dim2D, 0, ms,isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_2D };
+ case DxbcResourceDim::Texture2DMsArr: return { spv::Dim2D, 1, ms,isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_2D_ARRAY };
+ case DxbcResourceDim::Texture3D: return { spv::Dim3D, 0, 0, isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_3D };
+ case DxbcResourceDim::TextureCube: return { spv::DimCube, 0, 0, isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_CUBE };
+ case DxbcResourceDim::TextureCubeArr: return { spv::DimCube, 1, 0, isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_CUBE_ARRAY };
+ default: throw DxvkError(str::format("DxbcCompiler: Unsupported resource type: ", resourceType));
+ }
+ }();
+
+ return typeInfo;
+ }
+
+
+ spv::ImageFormat DxbcCompiler::getScalarImageFormat(DxbcScalarType type) const {
+ switch (type) {
+ case DxbcScalarType::Float32: return spv::ImageFormatR32f;
+ case DxbcScalarType::Sint32: return spv::ImageFormatR32i;
+ case DxbcScalarType::Uint32: return spv::ImageFormatR32ui;
+ default: throw DxvkError("DxbcCompiler: Unhandled scalar resource type");
+ }
+ }
+
+
+ bool DxbcCompiler::isDoubleType(DxbcScalarType type) const {
+ return type == DxbcScalarType::Sint64
+ || type == DxbcScalarType::Uint64
+ || type == DxbcScalarType::Float64;
+ }
+
+ DxbcRegisterPointer DxbcCompiler::getIndexableTempPtr(
+ const DxbcRegister& operand,
+ DxbcRegisterValue vectorId) {
+ // x# regs are indexed as follows:
+ // (0) register index (immediate)
+ // (1) element index (relative)
+ const uint32_t regId = operand.idx[0].offset;
+
+ DxbcRegisterInfo info;
+ info.type.ctype = DxbcScalarType::Float32;
+ info.type.ccount = m_xRegs[regId].ccount;
+ info.type.alength = 0;
+ info.sclass = spv::StorageClassPrivate;
+
+ DxbcRegisterPointer result;
+ result.type.ctype = info.type.ctype;
+ result.type.ccount = info.type.ccount;
+ result.id = m_module.opAccessChain(
+ getPointerTypeId(info),
+ m_xRegs.at(regId).varId,
+ 1, &vectorId.id);
+
+ return result;
+ }
+
+ uint32_t DxbcCompiler::getScalarTypeId(DxbcScalarType type) {
+ if (type == DxbcScalarType::Float64)
+ m_module.enableCapability(spv::CapabilityFloat64);
+
+ if (type == DxbcScalarType::Sint64 || type == DxbcScalarType::Uint64)
+ m_module.enableCapability(spv::CapabilityInt64);
+
+ switch (type) {
+ case DxbcScalarType::Uint32: return m_module.defIntType(32, 0);
+ case DxbcScalarType::Uint64: return m_module.defIntType(64, 0);
+ case DxbcScalarType::Sint32: return m_module.defIntType(32, 1);
+ case DxbcScalarType::Sint64: return m_module.defIntType(64, 1);
+ case DxbcScalarType::Float32: return m_module.defFloatType(32);
+ case DxbcScalarType::Float64: return m_module.defFloatType(64);
+ case DxbcScalarType::Bool: return m_module.defBoolType();
+ }
+
+ throw DxvkError("DxbcCompiler: Invalid scalar type");
+ }
+
+
+ uint32_t DxbcCompiler::getVectorTypeId(const DxbcVectorType& type) {
+ uint32_t typeId = this->getScalarTypeId(type.ctype);
+
+ if (type.ccount > 1)
+ typeId = m_module.defVectorType(typeId, type.ccount);
+
+ return typeId;
+ }
+
+
+ uint32_t DxbcCompiler::getArrayTypeId(const DxbcArrayType& type) {
+ DxbcVectorType vtype;
+ vtype.ctype = type.ctype;
+ vtype.ccount = type.ccount;
+
+ uint32_t typeId = this->getVectorTypeId(vtype);
+
+ if (type.alength != 0) {
+ typeId = m_module.defArrayType(typeId,
+ m_module.constu32(type.alength));
+ }
+
+ return typeId;
+ }
+
+
+ uint32_t DxbcCompiler::getPointerTypeId(const DxbcRegisterInfo& type) {
+ return m_module.defPointerType(
+ this->getArrayTypeId(type.type),
+ type.sclass);
+ }
+
+
+ uint32_t DxbcCompiler::getPerVertexBlockId() {
+ uint32_t t_f32 = m_module.defFloatType(32);
+ uint32_t t_f32_v4 = m_module.defVectorType(t_f32, 4);
+// uint32_t t_f32_a4 = m_module.defArrayType(t_f32, m_module.constu32(4));
+
+ std::array<uint32_t, 1> members;
+ members[PerVertex_Position] = t_f32_v4;
+// members[PerVertex_CullDist] = t_f32_a4;
+// members[PerVertex_ClipDist] = t_f32_a4;
+
+ uint32_t typeId = m_module.defStructTypeUnique(
+ members.size(), members.data());
+
+ m_module.memberDecorateBuiltIn(typeId, PerVertex_Position, spv::BuiltInPosition);
+// m_module.memberDecorateBuiltIn(typeId, PerVertex_CullDist, spv::BuiltInCullDistance);
+// m_module.memberDecorateBuiltIn(typeId, PerVertex_ClipDist, spv::BuiltInClipDistance);
+ m_module.decorateBlock(typeId);
+
+ m_module.setDebugName(typeId, "s_per_vertex");
+ m_module.setDebugMemberName(typeId, PerVertex_Position, "position");
+// m_module.setDebugMemberName(typeId, PerVertex_CullDist, "cull_dist");
+// m_module.setDebugMemberName(typeId, PerVertex_ClipDist, "clip_dist");
+ return typeId;
+ }
+
+
+ uint32_t DxbcCompiler::getFunctionId(
+ uint32_t functionNr) {
+ auto entry = m_subroutines.find(functionNr);
+ if (entry != m_subroutines.end())
+ return entry->second;
+
+ uint32_t functionId = m_module.allocateId();
+ m_subroutines.insert({ functionNr, functionId });
+ return functionId;
+ }
+
+
+ DxbcCompilerHsForkJoinPhase* DxbcCompiler::getCurrentHsForkJoinPhase() {
+ switch (m_hs.currPhaseType) {
+ case DxbcCompilerHsPhase::Fork: return &m_hs.forkPhases.at(m_hs.currPhaseId);
+ case DxbcCompilerHsPhase::Join: return &m_hs.joinPhases.at(m_hs.currPhaseId);
+ default: return nullptr;
+ }
+ }
+
+}