From 0c86c54d48533e3505e56e639059d28331fe4bc3 Mon Sep 17 00:00:00 2001 From: Fire Cube Date: Mon, 21 Apr 2025 23:25:15 +0200 Subject: [PATCH] Implement SET_PC_B64 instruction (#2823) * basic impl * minor improvements * clang * more clang * improvements requested by squidbus --- .../frontend/control_flow_graph.cpp | 63 ++++++++++++++++++- .../frontend/instruction.cpp | 2 +- .../frontend/translate/scalar_flow.cpp | 1 + 3 files changed, 62 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index cf1882b8c..b53db9e94 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -4,6 +4,7 @@ #include #include #include "common/assert.h" +#include "common/logging/log.h" #include "shader_recompiler/frontend/control_flow_graph.h" namespace Shader::Gcn { @@ -67,6 +68,39 @@ static bool IgnoresExecMask(const GcnInst& inst) { return false; } +static std::optional ResolveSetPcTarget(std::span list, u32 setpc_index, + std::span pc_map) { + if (setpc_index < 3) { + return std::nullopt; + } + + const auto& getpc = list[setpc_index - 3]; + const auto& arith = list[setpc_index - 2]; + const auto& setpc = list[setpc_index]; + + if (getpc.opcode != Opcode::S_GETPC_B64 || + !(arith.opcode == Opcode::S_ADD_U32 || arith.opcode == Opcode::S_SUB_U32) || + setpc.opcode != Opcode::S_SETPC_B64) + return std::nullopt; + + if (getpc.dst[0].code != setpc.src[0].code || arith.dst[0].code != setpc.src[0].code) + return std::nullopt; + + if (arith.src_count < 2 || arith.src[1].field != OperandField::LiteralConst) + return std::nullopt; + + const u32 imm = arith.src[1].code; + + const s32 signed_offset = + (arith.opcode == Opcode::S_ADD_U32) ? static_cast(imm) : -static_cast(imm); + + const u32 base_pc = pc_map[setpc_index - 3] + getpc.length; + + const u32 result_pc = static_cast(static_cast(base_pc) + signed_offset); + LOG_DEBUG(Render_Recompiler, "SetPC target: {} + {} = {}", base_pc, signed_offset, result_pc); + return result_pc & ~0x3u; +} + static constexpr size_t LabelReserveSize = 32; CFG::CFG(Common::ObjectPool& block_pool_, std::span inst_list_) @@ -89,9 +123,20 @@ void CFG::EmitLabels() { index_to_pc[i] = pc; const GcnInst inst = inst_list[i]; if (inst.IsUnconditionalBranch()) { - const u32 target = inst.BranchTarget(pc); + u32 target = inst.BranchTarget(pc); + if (inst.opcode == Opcode::S_SETPC_B64) { + if (auto t = ResolveSetPcTarget(inst_list, i, index_to_pc)) { + target = *t; + } else { + ASSERT_MSG( + false, + "S_SETPC_B64 without a resolvable offset at PC {:#x} (Index {}): Involved " + "instructions not recognized or invalid pattern", + pc, i); + } + } AddLabel(target); - // Emit this label so that the block ends with s_branch instruction + // Emit this label so that the block ends with the branching instruction AddLabel(pc + inst.length); } else if (inst.IsConditionalBranch()) { const u32 true_label = inst.BranchTarget(pc); @@ -102,6 +147,7 @@ void CFG::EmitLabels() { const u32 next_label = pc + inst.length; AddLabel(next_label); } + pc += inst.length; } index_to_pc[inst_list.size()] = pc; @@ -280,7 +326,18 @@ void CFG::LinkBlocks() { // Find the branch targets from the instruction and link the blocks. // Note: Block end address is one instruction after end_inst. const u32 branch_pc = block.end - end_inst.length; - const u32 target_pc = end_inst.BranchTarget(branch_pc); + u32 target_pc = 0; + if (end_inst.opcode == Opcode::S_SETPC_B64) { + auto tgt = ResolveSetPcTarget(inst_list, block.end_index, index_to_pc); + ASSERT_MSG(tgt, + "S_SETPC_B64 without a resolvable offset at PC {:#x} (Index {}): Involved " + "instructions not recognized or invalid pattern", + branch_pc, block.end_index); + target_pc = *tgt; + } else { + target_pc = end_inst.BranchTarget(branch_pc); + } + if (end_inst.IsUnconditionalBranch()) { auto* target_block = get_block(target_pc); ++target_block->num_predecessors; diff --git a/src/shader_recompiler/frontend/instruction.cpp b/src/shader_recompiler/frontend/instruction.cpp index a0c132053..246c2b85a 100644 --- a/src/shader_recompiler/frontend/instruction.cpp +++ b/src/shader_recompiler/frontend/instruction.cpp @@ -18,7 +18,7 @@ bool GcnInst::IsTerminateInstruction() const { } bool GcnInst::IsUnconditionalBranch() const { - return opcode == Opcode::S_BRANCH; + return opcode == Opcode::S_BRANCH || opcode == Opcode::S_SETPC_B64; } bool GcnInst::IsFork() const { diff --git a/src/shader_recompiler/frontend/translate/scalar_flow.cpp b/src/shader_recompiler/frontend/translate/scalar_flow.cpp index 0e02b77a2..cd1cf51f0 100644 --- a/src/shader_recompiler/frontend/translate/scalar_flow.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_flow.cpp @@ -18,6 +18,7 @@ void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) { return; case Opcode::S_GETPC_B64: return S_GETPC_B64(pc, inst); + case Opcode::S_SETPC_B64: case Opcode::S_WAITCNT: case Opcode::S_NOP: case Opcode::S_ENDPGM: