From 52ab1ed04b900206c989a3cb78932217421d545a Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Wed, 16 Apr 2025 08:08:09 -0700 Subject: [PATCH] shader_recompiler: Implement S_FLBIT_I32_B32 and V_MUL_HI_I32. (#2793) --- .../backend/spirv/emit_spirv_instructions.h | 4 ++-- .../backend/spirv/emit_spirv_integer.cpp | 13 +++++++++---- .../frontend/translate/scalar_alu.cpp | 13 +++++++++++++ .../frontend/translate/translate.h | 1 + .../frontend/translate/vector_alu.cpp | 4 +++- src/shader_recompiler/ir/ir_emitter.cpp | 4 ++-- src/shader_recompiler/ir/ir_emitter.h | 2 +- src/shader_recompiler/ir/opcodes.inc | 4 ++-- 8 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index fb37799f5..68438fbba 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -338,8 +338,8 @@ Id EmitIAdd64(EmitContext& ctx, Id a, Id b); Id EmitIAddCary32(EmitContext& ctx, Id a, Id b); Id EmitISub32(EmitContext& ctx, Id a, Id b); Id EmitISub64(EmitContext& ctx, Id a, Id b); -Id EmitSMulExt(EmitContext& ctx, Id a, Id b); -Id EmitUMulExt(EmitContext& ctx, Id a, Id b); +Id EmitSMulHi(EmitContext& ctx, Id a, Id b); +Id EmitUMulHi(EmitContext& ctx, Id a, Id b); Id EmitIMul32(EmitContext& ctx, Id a, Id b); Id EmitIMul64(EmitContext& ctx, Id a, Id b); Id EmitSDiv32(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 36726b6df..10bfbb2ab 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -72,12 +72,17 @@ Id EmitISub64(EmitContext& ctx, Id a, Id b) { return ctx.OpISub(ctx.U64, a, b); } -Id EmitSMulExt(EmitContext& ctx, Id a, Id b) { - return ctx.OpSMulExtended(ctx.full_result_i32x2, a, b); +Id EmitSMulHi(EmitContext& ctx, Id a, Id b) { + const auto signed_a{ctx.OpBitcast(ctx.S32[1], a)}; + const auto signed_b{ctx.OpBitcast(ctx.S32[1], b)}; + const auto mul_ext{ctx.OpSMulExtended(ctx.full_result_i32x2, signed_a, signed_b)}; + const auto signed_hi{ctx.OpCompositeExtract(ctx.S32[1], mul_ext, 1)}; + return ctx.OpBitcast(ctx.U32[1], signed_hi); } -Id EmitUMulExt(EmitContext& ctx, Id a, Id b) { - return ctx.OpUMulExtended(ctx.full_result_u32x2, a, b); +Id EmitUMulHi(EmitContext& ctx, Id a, Id b) { + const auto mul_ext{ctx.OpUMulExtended(ctx.full_result_u32x2, a, b)}; + return ctx.OpCompositeExtract(ctx.U32[1], mul_ext, 1); } Id EmitIMul32(EmitContext& ctx, Id a, Id b) { diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 39f972848..c21c9b611 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -110,6 +110,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { return S_FF1_I32_B32(inst); case Opcode::S_FF1_I32_B64: return S_FF1_I32_B64(inst); + case Opcode::S_FLBIT_I32_B32: + return S_FLBIT_I32_B32(inst); case Opcode::S_BITSET0_B32: return S_BITSET_B32(inst, 0); case Opcode::S_BITSET1_B32: @@ -660,6 +662,17 @@ void Translator::S_FF1_I32_B64(const GcnInst& inst) { SetDst(inst.dst[0], result); } +void Translator::S_FLBIT_I32_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + // Gcn wants the MSB position counting from the left, but SPIR-V counts from the rightmost (LSB) + // position + const IR::U32 msb_pos = ir.FindUMsb(src0); + const IR::U32 pos_from_left = ir.ISub(ir.Imm32(31), msb_pos); + // Select 0xFFFFFFFF if src0 was 0 + const IR::U1 cond = ir.INotEqual(src0, ir.Imm32(0)); + SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))}); +} + void Translator::S_BITSET_B32(const GcnInst& inst, u32 bit_value) { const IR::U32 old_value{GetSrc(inst.dst[0])}; const IR::U32 offset{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0U), ir.Imm32(5U))}; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 6803cda25..520720b0f 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -119,6 +119,7 @@ public: void S_BCNT1_I32_B64(const GcnInst& inst); void S_FF1_I32_B32(const GcnInst& inst); void S_FF1_I32_B64(const GcnInst& inst); + void S_FLBIT_I32_B32(const GcnInst& inst); void S_BITSET_B32(const GcnInst& inst, u32 bit_value); void S_GETPC_B64(u32 pc, const GcnInst& inst); void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index da25f5434..787cf6ad3 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -394,6 +394,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_MUL_HI_U32(false, inst); case Opcode::V_MUL_LO_I32: return V_MUL_LO_U32(inst); + case Opcode::V_MUL_HI_I32: + return V_MUL_HI_U32(true, inst); case Opcode::V_MAD_U64_U32: return V_MAD_U64_U32(inst); case Opcode::V_NOP: @@ -1279,7 +1281,7 @@ void Translator::V_MUL_LO_U32(const GcnInst& inst) { void Translator::V_MUL_HI_U32(bool is_signed, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 hi{ir.CompositeExtract(ir.IMulExt(src0, src1, is_signed), 1)}; + const IR::U32 hi{ir.IMulHi(src0, src1, is_signed)}; SetDst(inst.dst[0], hi); } diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index a171d32a2..a51d126c7 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1388,8 +1388,8 @@ U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) { } } -IR::Value IREmitter::IMulExt(const U32& a, const U32& b, bool is_signed) { - return Inst(is_signed ? Opcode::SMulExt : Opcode::UMulExt, a, b); +U32 IREmitter::IMulHi(const U32& a, const U32& b, bool is_signed) { + return Inst(is_signed ? Opcode::SMulHi : Opcode::UMulHi, a, b); } U32U64 IREmitter::IMul(const U32U64& a, const U32U64& b) { diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 48cc02725..f1d564b80 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -240,7 +240,7 @@ public: [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] Value IAddCary(const U32& a, const U32& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); - [[nodiscard]] Value IMulExt(const U32& a, const U32& b, bool is_signed = false); + [[nodiscard]] U32 IMulHi(const U32& a, const U32& b, bool is_signed = false); [[nodiscard]] U32U64 IMul(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false); [[nodiscard]] U32 IMod(const U32& a, const U32& b, bool is_signed = false); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 93d759b74..10819f898 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -317,8 +317,8 @@ OPCODE(ISub32, U32, U32, OPCODE(ISub64, U64, U64, U64, ) OPCODE(IMul32, U32, U32, U32, ) OPCODE(IMul64, U64, U64, U64, ) -OPCODE(SMulExt, U32x2, U32, U32, ) -OPCODE(UMulExt, U32x2, U32, U32, ) +OPCODE(SMulHi, U32, U32, U32, ) +OPCODE(UMulHi, U32, U32, U32, ) OPCODE(SDiv32, U32, U32, U32, ) OPCODE(UDiv32, U32, U32, U32, ) OPCODE(SMod32, U32, U32, U32, )