diff --git a/externals/ext-boost b/externals/ext-boost index f2474e1b5..ca6f230e6 160000 --- a/externals/ext-boost +++ b/externals/ext-boost @@ -1 +1 @@ -Subproject commit f2474e1b584fb7a3ed6f85ba875e6eacd742ec8a +Subproject commit ca6f230e67be7cc45fc919057f07b2aee64dadc1 diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 354196d31..8c3122b28 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -47,6 +47,15 @@ static IR::Condition MakeCondition(const GcnInst& inst) { } } +static bool IgnoresExecMask(Opcode opcode) { + switch (opcode) { + case Opcode::V_WRITELANE_B32: + return true; + default: + return false; + } +} + static constexpr size_t LabelReserveSize = 32; CFG::CFG(Common::ObjectPool& block_pool_, std::span inst_list_) @@ -133,20 +142,26 @@ void CFG::EmitDivergenceLabels() { curr_begin = -1; continue; } - // Add a label to the instruction right after the open scope call. - // It is the start of a new basic block. - const auto& save_inst = inst_list[curr_begin]; - const Label label = index_to_pc[curr_begin] + save_inst.length; - AddLabel(label); - // Add a label to the close scope instruction. - // There are 3 cases where we need to close a scope. - // * Close scope instruction inside the block - // * Close scope instruction at the end of the block (cbranch or endpgm) - // * Normal instruction at the end of the block - // For the last case we must NOT add a label as that would cause - // the instruction to be separated into its own basic block. - if (is_close) { - AddLabel(index_to_pc[index]); + // If all instructions in the scope ignore exec masking, we shouldn't insert a + // scope. + const auto start = inst_list.begin() + curr_begin + 1; + if (!std::ranges::all_of(start, inst_list.begin() + index, IgnoresExecMask, + &GcnInst::opcode)) { + // Add a label to the instruction right after the open scope call. + // It is the start of a new basic block. + const auto& save_inst = inst_list[curr_begin]; + const Label label = index_to_pc[curr_begin] + save_inst.length; + AddLabel(label); + // Add a label to the close scope instruction. + // There are 3 cases where we need to close a scope. + // * Close scope instruction inside the block + // * Close scope instruction at the end of the block (cbranch or endpgm) + // * Normal instruction at the end of the block + // For the last case we must NOT add a label as that would cause + // the instruction to be separated into its own basic block. + if (is_close) { + AddLabel(index_to_pc[index]); + } } // Reset scope begin. curr_begin = -1; diff --git a/src/shader_recompiler/ir/basic_block.cpp b/src/shader_recompiler/ir/basic_block.cpp index 426acb2b8..b4d1a78c7 100644 --- a/src/shader_recompiler/ir/basic_block.cpp +++ b/src/shader_recompiler/ir/basic_block.cpp @@ -19,12 +19,14 @@ void Block::AppendNewInst(Opcode op, std::initializer_list args) { Block::iterator Block::PrependNewInst(iterator insertion_point, const Inst& base_inst) { Inst* const inst{inst_pool->Create(base_inst)}; + inst->SetParent(this); return instructions.insert(insertion_point, *inst); } Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args, u32 flags) { Inst* const inst{inst_pool->Create(op, flags)}; + inst->SetParent(this); const auto result_it{instructions.insert(insertion_point, *inst)}; if (inst->NumArgs() != args.size()) { diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index abd31a728..9b4ad63d2 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include "shader_recompiler/exception.h" @@ -119,10 +120,10 @@ void Inst::SetArg(size_t index, Value value) { } const IR::Value arg{Arg(index)}; if (!arg.IsImmediate()) { - UndoUse(arg); + UndoUse(arg.Inst(), index); } if (!value.IsImmediate()) { - Use(value); + Use(value.Inst(), index); } if (op == Opcode::Phi) { phi_args[index].second = value; @@ -143,7 +144,7 @@ Block* Inst::PhiBlock(size_t index) const { void Inst::AddPhiOperand(Block* predecessor, const Value& value) { if (!value.IsImmediate()) { - Use(value); + Use(value.Inst(), phi_args.size()); } phi_args.emplace_back(predecessor, value); } @@ -155,17 +156,19 @@ void Inst::Invalidate() { void Inst::ClearArgs() { if (op == Opcode::Phi) { - for (auto& pair : phi_args) { + for (auto i = 0; i < phi_args.size(); i++) { + auto& pair = phi_args[i]; IR::Value& value{pair.second}; if (!value.IsImmediate()) { - UndoUse(value); + UndoUse(value.Inst(), i); } } phi_args.clear(); } else { - for (auto& value : args) { + for (auto i = 0; i < args.size(); i++) { + auto& value = args[i]; if (!value.IsImmediate()) { - UndoUse(value); + UndoUse(value.Inst(), i); } } // Reset arguments to null @@ -174,13 +177,21 @@ void Inst::ClearArgs() { } } -void Inst::ReplaceUsesWith(Value replacement) { - Invalidate(); - ReplaceOpcode(Opcode::Identity); - if (!replacement.IsImmediate()) { - Use(replacement); +void Inst::ReplaceUsesWith(Value replacement, bool preserve) { + // Copy since user->SetArg will mutate this->uses + // Could also do temp_uses = std::move(uses) but more readable + const auto temp_uses = uses; + for (const auto& [user, operand] : temp_uses) { + DEBUG_ASSERT(user->Arg(operand).Inst() == this); + user->SetArg(operand, replacement); + } + Invalidate(); + if (preserve) { + // Still useful to have Identity for indirection. + // SSA pass would be more complicated without it + ReplaceOpcode(Opcode::Identity); + SetArg(0, replacement); } - args[0] = replacement; } void Inst::ReplaceOpcode(IR::Opcode opcode) { @@ -195,14 +206,15 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) { op = opcode; } -void Inst::Use(const Value& value) { - Inst* const inst{value.Inst()}; - ++inst->use_count; +void Inst::Use(Inst* used, u32 operand) { + DEBUG_ASSERT(0 == std::count(used->uses.begin(), used->uses.end(), IR::Use(this, operand))); + used->uses.emplace_front(this, operand); } -void Inst::UndoUse(const Value& value) { - Inst* const inst{value.Inst()}; - --inst->use_count; +void Inst::UndoUse(Inst* used, u32 operand) { + IR::Use use(this, operand); + DEBUG_ASSERT(1 == std::count(used->uses.begin(), used->uses.end(), use)); + used->uses.remove(use); } } // namespace Shader::IR diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp index a03fe051c..9624ce6a5 100644 --- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp @@ -43,7 +43,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { if (is_lhs_immediate && is_rhs_immediate) { const auto result{imm_fn(Arg(lhs), Arg(rhs))}; - inst.ReplaceUsesWith(IR::Value{result}); + inst.ReplaceUsesWithAndRemove(IR::Value{result}); return false; } if (is_lhs_immediate && !is_rhs_immediate) { @@ -75,7 +75,7 @@ bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { return false; } using Indices = std::make_index_sequence::NUM_ARGS>; - inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); + inst.ReplaceUsesWithAndRemove(EvalImmediates(inst, func, Indices{})); return true; } @@ -83,12 +83,12 @@ template void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { const IR::Value value{inst.Arg(0)}; if (value.IsImmediate()) { - inst.ReplaceUsesWith(IR::Value{std::bit_cast(Arg(value))}); + inst.ReplaceUsesWithAndRemove(IR::Value{std::bit_cast(Arg(value))}); return; } IR::Inst* const arg_inst{value.InstRecursive()}; if (arg_inst->GetOpcode() == reverse) { - inst.ReplaceUsesWith(arg_inst->Arg(0)); + inst.ReplaceUsesWithAndRemove(arg_inst->Arg(0)); return; } } @@ -131,7 +131,7 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser if (!result) { return; } - inst.ReplaceUsesWith(*result); + inst.ReplaceUsesWithAndRemove(*result); } void FoldConvert(IR::Inst& inst, IR::Opcode opposite) { @@ -141,7 +141,7 @@ void FoldConvert(IR::Inst& inst, IR::Opcode opposite) { } IR::Inst* const producer{value.InstRecursive()}; if (producer->GetOpcode() == opposite) { - inst.ReplaceUsesWith(producer->Arg(0)); + inst.ReplaceUsesWithAndRemove(producer->Arg(0)); } } @@ -152,9 +152,9 @@ void FoldLogicalAnd(IR::Inst& inst) { const IR::Value rhs{inst.Arg(1)}; if (rhs.IsImmediate()) { if (rhs.U1()) { - inst.ReplaceUsesWith(inst.Arg(0)); + inst.ReplaceUsesWithAndRemove(inst.Arg(0)); } else { - inst.ReplaceUsesWith(IR::Value{false}); + inst.ReplaceUsesWithAndRemove(IR::Value{false}); } } } @@ -162,7 +162,7 @@ void FoldLogicalAnd(IR::Inst& inst) { void FoldSelect(IR::Inst& inst) { const IR::Value cond{inst.Arg(0)}; if (cond.IsImmediate()) { - inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2)); + inst.ReplaceUsesWithAndRemove(cond.U1() ? inst.Arg(1) : inst.Arg(2)); } } @@ -173,9 +173,9 @@ void FoldLogicalOr(IR::Inst& inst) { const IR::Value rhs{inst.Arg(1)}; if (rhs.IsImmediate()) { if (rhs.U1()) { - inst.ReplaceUsesWith(IR::Value{true}); + inst.ReplaceUsesWithAndRemove(IR::Value{true}); } else { - inst.ReplaceUsesWith(inst.Arg(0)); + inst.ReplaceUsesWithAndRemove(inst.Arg(0)); } } } @@ -183,12 +183,12 @@ void FoldLogicalOr(IR::Inst& inst) { void FoldLogicalNot(IR::Inst& inst) { const IR::U1 value{inst.Arg(0)}; if (value.IsImmediate()) { - inst.ReplaceUsesWith(IR::Value{!value.U1()}); + inst.ReplaceUsesWithAndRemove(IR::Value{!value.U1()}); return; } IR::Inst* const arg{value.InstRecursive()}; if (arg->GetOpcode() == IR::Opcode::LogicalNot) { - inst.ReplaceUsesWith(arg->Arg(0)); + inst.ReplaceUsesWithAndRemove(arg->Arg(0)); } } @@ -199,7 +199,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { } IR::Inst* const arg_inst{value.InstRecursive()}; if (arg_inst->GetOpcode() == reverse) { - inst.ReplaceUsesWith(arg_inst->Arg(0)); + inst.ReplaceUsesWithAndRemove(arg_inst->Arg(0)); return; } } @@ -211,7 +211,7 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) { } const IR::Value rhs{inst.Arg(1)}; if (rhs.IsImmediate() && Arg(rhs) == 0) { - inst.ReplaceUsesWith(inst.Arg(0)); + inst.ReplaceUsesWithAndRemove(inst.Arg(0)); return; } } @@ -226,21 +226,58 @@ void FoldCmpClass(IR::Block& block, IR::Inst& inst) { } else if ((class_mask & IR::FloatClassFunc::Finite) == IR::FloatClassFunc::Finite) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::F32 value = IR::F32{inst.Arg(0)}; - inst.ReplaceUsesWith(ir.LogicalNot(ir.LogicalOr(ir.FPIsInf(value), ir.FPIsInf(value)))); + inst.ReplaceUsesWithAndRemove( + ir.LogicalNot(ir.LogicalOr(ir.FPIsInf(value), ir.FPIsInf(value)))); } else { UNREACHABLE(); } } -void FoldReadLane(IR::Inst& inst) { +void FoldReadLane(IR::Block& block, IR::Inst& inst) { const u32 lane = inst.Arg(1).U32(); IR::Inst* prod = inst.Arg(0).InstRecursive(); - while (prod->GetOpcode() == IR::Opcode::WriteLane) { - if (prod->Arg(2).U32() == lane) { - inst.ReplaceUsesWith(prod->Arg(1)); + + const auto search_chain = [lane](const IR::Inst* prod) -> IR::Value { + while (prod->GetOpcode() == IR::Opcode::WriteLane) { + if (prod->Arg(2).U32() == lane) { + return prod->Arg(1); + } + prod = prod->Arg(0).InstRecursive(); + } + return {}; + }; + + if (prod->GetOpcode() == IR::Opcode::WriteLane) { + if (const IR::Value value = search_chain(prod); !value.IsEmpty()) { + inst.ReplaceUsesWith(value); + } + return; + } + + if (prod->GetOpcode() == IR::Opcode::Phi) { + boost::container::small_vector phi_args; + for (size_t arg_index = 0; arg_index < prod->NumArgs(); ++arg_index) { + const IR::Inst* arg{prod->Arg(arg_index).InstRecursive()}; + if (arg->GetOpcode() != IR::Opcode::WriteLane) { + return; + } + const IR::Value value = search_chain(arg); + if (value.IsEmpty()) { + continue; + } + phi_args.emplace_back(value); + } + if (std::ranges::all_of(phi_args, [&](IR::Value value) { return value == phi_args[0]; })) { + inst.ReplaceUsesWith(phi_args[0]); return; } - prod = prod->Arg(0).InstRecursive(); + const auto insert_point = IR::Block::InstructionList::s_iterator_to(*prod); + IR::Inst* const new_phi{&*block.PrependNewInst(insert_point, IR::Opcode::Phi)}; + new_phi->SetFlags(IR::Type::U32); + for (size_t arg_index = 0; arg_index < phi_args.size(); arg_index++) { + new_phi->AddPhiOperand(prod->PhiBlock(arg_index), phi_args[arg_index]); + } + inst.ReplaceUsesWith(IR::Value{new_phi}); } } @@ -290,7 +327,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::SelectF64: return FoldSelect(inst); case IR::Opcode::ReadLane: - return FoldReadLane(inst); + return FoldReadLane(block, inst); case IR::Opcode::FPNeg32: FoldWhenAllImmediates(inst, [](f32 a) { return -a; }); return; diff --git a/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp b/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp index 76bfcf911..c109f3595 100644 --- a/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp +++ b/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp @@ -25,7 +25,7 @@ void LowerSharedMemToRegisters(IR::Program& program) { }); ASSERT(it != ds_writes.end()); // Replace data read with value written. - inst.ReplaceUsesWith((*it)->Arg(1)); + inst.ReplaceUsesWithAndRemove((*it)->Arg(1)); } } } diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index c1ff3d2f2..89c5c78a0 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -596,7 +596,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, } return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info); }(); - inst.ReplaceUsesWith(new_inst); + inst.ReplaceUsesWithAndRemove(new_inst); } void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { diff --git a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp index df73c1bc8..1d252bee1 100644 --- a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp @@ -164,7 +164,6 @@ IR::Opcode UndefOpcode(const FlagTag) noexcept { enum class Status { Start, SetValue, - PreparePhiArgument, PushPhiArgument, }; @@ -253,12 +252,10 @@ public: IR::Inst* const phi{stack.back().phi}; phi->AddPhiOperand(*stack.back().pred_it, stack.back().result); ++stack.back().pred_it; - } - [[fallthrough]]; - case Status::PreparePhiArgument: prepare_phi_operand(); break; } + } } while (stack.size() > 1); return stack.back().result; } @@ -266,9 +263,7 @@ public: void SealBlock(IR::Block* block) { const auto it{incomplete_phis.find(block)}; if (it != incomplete_phis.end()) { - for (auto& pair : it->second) { - auto& variant{pair.first}; - auto& phi{pair.second}; + for (auto& [variant, phi] : it->second) { std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant); } } @@ -289,7 +284,7 @@ private: const size_t num_args{phi.NumArgs()}; for (size_t arg_index = 0; arg_index < num_args; ++arg_index) { const IR::Value& op{phi.Arg(arg_index)}; - if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) { + if (op.Resolve() == same.Resolve() || op.Resolve() == IR::Value{&phi}) { // Unique value or self-reference continue; } @@ -314,9 +309,15 @@ private: ++reinsert_point; } // Reinsert the phi node and reroute all its uses to the "same" value + const auto users = phi.Uses(); list.insert(reinsert_point, phi); phi.ReplaceUsesWith(same); - // TODO: Try to recursively remove all phi users, which might have become trivial + // Try to recursively remove all phi users, which might have become trivial + for (const auto& [user, arg_index] : users) { + if (user->GetOpcode() == IR::Opcode::Phi) { + TryRemoveTrivialPhi(*user, user->GetParent(), undef_opcode); + } + } return same; } diff --git a/src/shader_recompiler/ir/value.h b/src/shader_recompiler/ir/value.h index 7e46747b9..dbe8b5cc4 100644 --- a/src/shader_recompiler/ir/value.h +++ b/src/shader_recompiler/ir/value.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -107,6 +108,16 @@ public: explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {} }; +struct Use { + Inst* user; + u32 operand; + + Use() = default; + Use(Inst* user_, u32 operand_) : user(user_), operand(operand_) {} + Use(const Use&) = default; + bool operator==(const Use&) const noexcept = default; +}; + class Inst : public boost::intrusive::list_base_hook<> { public: explicit Inst(IR::Opcode op_, u32 flags_) noexcept; @@ -118,14 +129,22 @@ public: Inst& operator=(Inst&&) = delete; Inst(Inst&&) = delete; + IR::Block* GetParent() const { + ASSERT(parent); + return parent; + } + void SetParent(IR::Block* block) { + parent = block; + } + /// Get the number of uses this instruction has. [[nodiscard]] int UseCount() const noexcept { - return use_count; + return uses.size(); } /// Determines whether this instruction has uses or not. [[nodiscard]] bool HasUses() const noexcept { - return use_count > 0; + return uses.size() > 0; } /// Get the opcode this microinstruction represents. @@ -167,7 +186,13 @@ public: void Invalidate(); void ClearArgs(); - void ReplaceUsesWith(Value replacement); + void ReplaceUsesWithAndRemove(Value replacement) { + ReplaceUsesWith(replacement, false); + } + + void ReplaceUsesWith(Value replacement) { + ReplaceUsesWith(replacement, true); + } void ReplaceOpcode(IR::Opcode opcode); @@ -197,25 +222,32 @@ public: return std::bit_cast(definition); } + const auto Uses() const { + return uses; + } + private: struct NonTriviallyDummy { NonTriviallyDummy() noexcept {} }; - void Use(const Value& value); - void UndoUse(const Value& value); + void Use(Inst* used, u32 operand); + void UndoUse(Inst* used, u32 operand); + void ReplaceUsesWith(Value replacement, bool preserve); IR::Opcode op{}; - int use_count{}; u32 flags{}; u32 definition{}; + IR::Block* parent{}; union { NonTriviallyDummy dummy{}; boost::container::small_vector, 2> phi_args; std::array args; }; + + boost::container::list uses; }; -static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); +static_assert(sizeof(Inst) <= 160, "Inst size unintentionally increased"); using U1 = TypedValue; using U8 = TypedValue; @@ -373,4 +405,4 @@ template <> struct hash { std::size_t operator()(const Shader::IR::Value& v) const; }; -} // namespace std \ No newline at end of file +} // namespace std diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 38fca6bc0..1bbd77f82 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -715,7 +715,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { false); } else if (dma_data->src_sel == DmaDataSrc::Gds && dma_data->dst_sel == DmaDataDst::Memory) { - LOG_WARNING(Render_Vulkan, "GDS memory read"); + // LOG_WARNING(Render_Vulkan, "GDS memory read"); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Memory) { rasterizer->InlineData(dma_data->DstAddress(),