diff --git a/CMakeLists.txt b/CMakeLists.txt index af811e9fb..833bbe3ce 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -701,6 +701,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/post_order.h src/shader_recompiler/ir/program.cpp src/shader_recompiler/ir/program.h + src/shader_recompiler/ir/reinterpret.h src/shader_recompiler/ir/reg.h src/shader_recompiler/ir/type.cpp src/shader_recompiler/ir/type.h diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index 74e736cf6..d064b5d05 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -6,16 +6,22 @@ namespace Shader::Backend::SPIRV { -Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) { - return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); +template +Id EmitCompositeConstruct(EmitContext& ctx, IR::Inst* inst, Args&&... args) { + return inst->AreAllArgsImmediates() ? ctx.ConstantComposite(args...) + : ctx.OpCompositeConstruct(args...); } -Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { - return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3); +Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + return EmitCompositeConstruct(ctx, inst, ctx.U32[2], e1, e2); } -Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { - return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4); +Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) { + return EmitCompositeConstruct(ctx, inst, ctx.U32[3], e1, e2, e3); +} + +Id EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { + return EmitCompositeConstruct(ctx, inst, ctx.U32[4], e1, e2, e3, e4); } Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) { @@ -42,16 +48,30 @@ Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); } -Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { - return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); +Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.U32[2], composite1, composite2, comp0, comp1); } -Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) { - return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3); +Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.U32[3], composite1, composite2, comp0, comp1, comp2); } -Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { - return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4); +Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.U32[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + +Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + return EmitCompositeConstruct(ctx, inst, ctx.F16[2], e1, e2); +} + +Id EmitCompositeConstructF16x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) { + return EmitCompositeConstruct(ctx, inst, ctx.F16[3], e1, e2, e3); +} + +Id EmitCompositeConstructF16x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { + return EmitCompositeConstruct(ctx, inst, ctx.F16[4], e1, e2, e3, e4); } Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { @@ -78,16 +98,30 @@ Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); } -Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { - return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); +Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F16[2], composite1, composite2, comp0, comp1); } -Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { - return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3); +Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F16[3], composite1, composite2, comp0, comp1, comp2); } -Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { - return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4); +Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F16[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + +Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[2], e1, e2); +} + +Id EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[3], e1, e2, e3); +} + +Id EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[4], e1, e2, e3, e4); } Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { @@ -114,6 +148,20 @@ Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index); } +Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F32[2], composite1, composite2, comp0, comp1); +} + +Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F32[3], composite1, composite2, comp0, comp1, comp2); +} + +Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F32[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + void EmitCompositeConstructF64x2(EmitContext&) { UNREACHABLE_MSG("SPIR-V Instruction"); } @@ -150,4 +198,18 @@ Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); } +Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F64[2], composite1, composite2, comp0, comp1); +} + +Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F64[3], composite1, composite2, comp0, comp1, comp2); +} + +Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F64[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 2946edab3..c3d937fe7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -238,7 +238,7 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod } texel = ctx.OpImageRead(color_type, image, coords, operands.mask, operands.operands); } - return !texture.is_integer ? ctx.OpBitcast(ctx.U32[4], texel) : texel; + return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], texel) : texel; } void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id ms, @@ -253,8 +253,8 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id } else if (Sirit::ValidId(lod)) { LOG_WARNING(Render, "Image write with LOD not supported by driver"); } - ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color), operands.mask, - operands.operands); + const Id texel = texture.is_integer ? ctx.OpBitcast(color_type, color) : color; + ctx.OpImageWrite(image, coords, texel, operands.mask, operands.operands); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index d26cf6662..0d9fcff46 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -120,33 +120,48 @@ Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value); -Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); +Id EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); +Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructF16x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF16x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); +Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx); void EmitCompositeConstructF64x4(EmitContext& ctx); @@ -156,6 +171,11 @@ void EmitCompositeExtractF64x4(EmitContext& ctx); Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index 5927aa696..83240e17f 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -25,34 +25,28 @@ void Translator::EmitExport(const GcnInst& inst) { IR::VectorReg(inst.src[3].code), }; - const auto swizzle = [&](u32 comp) { + const auto set_attribute = [&](u32 comp, IR::F32 value) { if (!IR::IsMrt(attrib)) { - return comp; + ir.SetAttribute(attrib, value, comp); + return; } const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0); - switch (runtime_info.fs_info.color_buffers[index].mrt_swizzle) { - case MrtSwizzle::Identity: - return comp; - case MrtSwizzle::Alt: - static constexpr std::array AltSwizzle = {2, 1, 0, 3}; - return AltSwizzle[comp]; - case MrtSwizzle::Reverse: - static constexpr std::array RevSwizzle = {3, 2, 1, 0}; - return RevSwizzle[comp]; - case MrtSwizzle::ReverseAlt: - static constexpr std::array AltRevSwizzle = {3, 0, 1, 2}; - return AltRevSwizzle[comp]; - default: - UNREACHABLE(); + const auto [r, g, b, a] = runtime_info.fs_info.color_buffers[index].swizzle; + const std::array swizzle_array = {r, g, b, a}; + const auto swizzled_comp = swizzle_array[comp]; + if (u32(swizzled_comp) < u32(AmdGpu::CompSwizzle::Red)) { + ir.SetAttribute(attrib, value, comp); + return; } + ir.SetAttribute(attrib, value, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red)); }; const auto unpack = [&](u32 idx) { const IR::Value value = ir.UnpackHalf2x16(ir.GetVectorReg(vsrc[idx])); const IR::F32 r = IR::F32{ir.CompositeExtract(value, 0)}; const IR::F32 g = IR::F32{ir.CompositeExtract(value, 1)}; - ir.SetAttribute(attrib, r, swizzle(idx * 2)); - ir.SetAttribute(attrib, g, swizzle(idx * 2 + 1)); + set_attribute(idx * 2, r); + set_attribute(idx * 2 + 1, g); }; // Components are float16 packed into a VGPR @@ -73,7 +67,7 @@ void Translator::EmitExport(const GcnInst& inst) { continue; } const IR::F32 comp = ir.GetVectorReg(vsrc[i]); - ir.SetAttribute(attrib, comp, swizzle(i)); + set_attribute(i, comp); } } if (IR::IsMrt(attrib)) { diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 237acf309..7f5504663 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -10,6 +10,7 @@ #include "shader_recompiler/info.h" #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/reg.h" +#include "shader_recompiler/ir/reinterpret.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/types.h" @@ -475,26 +476,12 @@ void Translator::EmitFetch(const GcnInst& inst) { // Read the V# of the attribute to figure out component number and type. const auto buffer = info.ReadUdReg(attrib.sgpr_base, attrib.dword_offset); + const auto values = + ir.CompositeConstruct(ir.GetAttribute(attr, 0), ir.GetAttribute(attr, 1), + ir.GetAttribute(attr, 2), ir.GetAttribute(attr, 3)); + const auto swizzled = ApplySwizzle(ir, values, buffer.DstSelect()); for (u32 i = 0; i < 4; i++) { - const IR::F32 comp = [&] { - switch (buffer.GetSwizzle(i)) { - case AmdGpu::CompSwizzle::One: - return ir.Imm32(1.f); - case AmdGpu::CompSwizzle::Zero: - return ir.Imm32(0.f); - case AmdGpu::CompSwizzle::Red: - return ir.GetAttribute(attr, 0); - case AmdGpu::CompSwizzle::Green: - return ir.GetAttribute(attr, 1); - case AmdGpu::CompSwizzle::Blue: - return ir.GetAttribute(attr, 2); - case AmdGpu::CompSwizzle::Alpha: - return ir.GetAttribute(attr, 3); - default: - UNREACHABLE(); - } - }(); - ir.SetVectorReg(dst_reg++, comp); + ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)}); } // In case of programmable step rates we need to fallback to instance data pulling in diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 79d46cd42..c5be08b7d 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -326,7 +326,7 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) { const IR::VectorReg src_reg{inst.src[1].code}; - std::array comps{}; + std::array comps{}; for (u32 i = 0; i < num_dwords; i++) { comps[i] = ir.GetVectorReg(src_reg + i); } @@ -424,7 +424,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) { if (((mimg.dmask >> i) & 1) == 0) { continue; } - IR::U32 value = IR::U32{ir.CompositeExtract(texel, i)}; + IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)}; ir.SetVectorReg(dest_reg++, value); } } diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 20e6eae0b..823f9bdcd 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -663,6 +663,86 @@ Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_ } } +Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1) { + if (vector1.Type() != vector2.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type()); + } + if (comp0 >= 4 || comp1 >= 4) { + UNREACHABLE_MSG("One or more out of bounds elements {}, {}", comp0, comp1); + } + const auto shuffle{[&](Opcode opcode) -> Value { + return Inst(opcode, vector1, vector2, Value{static_cast(comp0)}, + Value{static_cast(comp1)}); + }}; + switch (vector1.Type()) { + case Type::U32x4: + return shuffle(Opcode::CompositeShuffleU32x2); + case Type::F16x4: + return shuffle(Opcode::CompositeShuffleF16x2); + case Type::F32x4: + return shuffle(Opcode::CompositeShuffleF32x2); + case Type::F64x4: + return shuffle(Opcode::CompositeShuffleF64x2); + default: + ThrowInvalidType(vector1.Type()); + } +} + +Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2) { + if (vector1.Type() != vector2.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type()); + } + if (comp0 >= 6 || comp1 >= 6 || comp2 >= 6) { + UNREACHABLE_MSG("One or more out of bounds elements {}, {}, {}", comp0, comp1, comp2); + } + const auto shuffle{[&](Opcode opcode) -> Value { + return Inst(opcode, vector1, vector2, Value{static_cast(comp0)}, + Value{static_cast(comp1)}, Value{static_cast(comp2)}); + }}; + switch (vector1.Type()) { + case Type::U32x4: + return shuffle(Opcode::CompositeShuffleU32x3); + case Type::F16x4: + return shuffle(Opcode::CompositeShuffleF16x3); + case Type::F32x4: + return shuffle(Opcode::CompositeShuffleF32x3); + case Type::F64x4: + return shuffle(Opcode::CompositeShuffleF64x3); + default: + ThrowInvalidType(vector1.Type()); + } +} + +Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2, size_t comp3) { + if (vector1.Type() != vector2.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type()); + } + if (comp0 >= 8 || comp1 >= 8 || comp2 >= 8 || comp3 >= 8) { + UNREACHABLE_MSG("One or more out of bounds elements {}, {}, {}, {}", comp0, comp1, comp2, + comp3); + } + const auto shuffle{[&](Opcode opcode) -> Value { + return Inst(opcode, vector1, vector2, Value{static_cast(comp0)}, + Value{static_cast(comp1)}, Value{static_cast(comp2)}, + Value{static_cast(comp3)}); + }}; + switch (vector1.Type()) { + case Type::U32x4: + return shuffle(Opcode::CompositeShuffleU32x4); + case Type::F16x4: + return shuffle(Opcode::CompositeShuffleF16x4); + case Type::F32x4: + return shuffle(Opcode::CompositeShuffleF32x4); + case Type::F64x4: + return shuffle(Opcode::CompositeShuffleF64x4); + default: + ThrowInvalidType(vector1.Type()); + } +} + Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { if (true_value.Type() != false_value.Type()) { UNREACHABLE_MSG("Mismatching types {} and {}", true_value.Type(), false_value.Type()); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index f65baee2a..9aab9459b 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -155,6 +155,13 @@ public: [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); + [[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1); + [[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2); + [[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2, size_t comp3); + [[nodiscard]] Value Select(const U1& condition, const Value& true_value, const Value& false_value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 1194c3792..6242a230e 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -99,7 +99,7 @@ OPCODE(StoreBufferU32, Void, Opaq OPCODE(StoreBufferU32x2, Void, Opaque, Opaque, U32x2, ) OPCODE(StoreBufferU32x3, Void, Opaque, Opaque, U32x3, ) OPCODE(StoreBufferU32x4, Void, Opaque, Opaque, U32x4, ) -OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, U32x4, ) +OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32x4, ) // Buffer atomic operations OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 ) @@ -124,6 +124,9 @@ OPCODE(CompositeExtractU32x4, U32, U32x OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) +OPCODE(CompositeShuffleU32x2, U32x2, U32x2, U32x2, U32, U32, ) +OPCODE(CompositeShuffleU32x3, U32x3, U32x3, U32x3, U32, U32, U32, ) +OPCODE(CompositeShuffleU32x4, U32x4, U32x4, U32x4, U32, U32, U32, U32, ) OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) @@ -133,6 +136,9 @@ OPCODE(CompositeExtractF16x4, F16, F16x OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) +OPCODE(CompositeShuffleF16x2, F16x2, F16x2, F16x2, U32, U32, ) +OPCODE(CompositeShuffleF16x3, F16x3, F16x3, F16x3, U32, U32, U32, ) +OPCODE(CompositeShuffleF16x4, F16x4, F16x4, F16x4, U32, U32, U32, U32, ) OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) @@ -142,6 +148,9 @@ OPCODE(CompositeExtractF32x4, F32, F32x OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) +OPCODE(CompositeShuffleF32x2, F32x2, F32x2, F32x2, U32, U32, ) +OPCODE(CompositeShuffleF32x3, F32x3, F32x3, F32x3, U32, U32, U32, ) +OPCODE(CompositeShuffleF32x4, F32x4, F32x4, F32x4, U32, U32, U32, U32, ) OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) @@ -151,6 +160,9 @@ OPCODE(CompositeExtractF64x4, F64, F64x OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) +OPCODE(CompositeShuffleF64x2, F64x2, F64x2, F64x2, U32, U32, ) +OPCODE(CompositeShuffleF64x3, F64x3, F64x3, F64x3, U32, U32, U32, ) +OPCODE(CompositeShuffleF64x4, F64x4, F64x4, F64x4, U32, U32, U32, U32, ) // Select operations OPCODE(SelectU1, U1, U1, U1, U1, ) @@ -346,8 +358,8 @@ OPCODE(ImageGatherDref, F32x4, Opaq OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageRead, U32x4, Opaque, Opaque, U32, U32, ) -OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32, U32x4, ) +OPCODE(ImageRead, F32x4, Opaque, Opaque, U32, U32, ) +OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32, F32x4, ) // Image atomic operations OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index e6d23bfe7..636752912 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/ir/breadth_first_search.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/program.h" +#include "shader_recompiler/ir/reinterpret.h" #include "video_core/amdgpu/resource.h" namespace Shader::Optimization { @@ -128,35 +129,6 @@ bool IsImageInstruction(const IR::Inst& inst) { } } -IR::Value SwizzleVector(IR::IREmitter& ir, auto sharp, IR::Value texel) { - boost::container::static_vector comps; - for (u32 i = 0; i < 4; i++) { - switch (sharp.GetSwizzle(i)) { - case AmdGpu::CompSwizzle::Zero: - comps.emplace_back(ir.Imm32(0.f)); - break; - case AmdGpu::CompSwizzle::One: - comps.emplace_back(ir.Imm32(1.f)); - break; - case AmdGpu::CompSwizzle::Red: - comps.emplace_back(ir.CompositeExtract(texel, 0)); - break; - case AmdGpu::CompSwizzle::Green: - comps.emplace_back(ir.CompositeExtract(texel, 1)); - break; - case AmdGpu::CompSwizzle::Blue: - comps.emplace_back(ir.CompositeExtract(texel, 2)); - break; - case AmdGpu::CompSwizzle::Alpha: - comps.emplace_back(ir.CompositeExtract(texel, 3)); - break; - default: - UNREACHABLE(); - } - } - return ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]); -}; - class Descriptors { public: explicit Descriptors(Info& info_) @@ -409,15 +381,6 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; inst.SetArg(0, ir.Imm32(binding)); ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); - - // Apply dst_sel swizzle on formatted buffer instructions - if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) { - inst.SetArg(2, SwizzleVector(ir, buffer, inst.Arg(2))); - } else { - const auto inst_info = inst.Flags(); - const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info); - inst.ReplaceUsesWith(SwizzleVector(ir, buffer, texel)); - } } IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t, @@ -765,10 +728,6 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip }(); inst.SetArg(1, coords); - if (inst.GetOpcode() == IR::Opcode::ImageWrite) { - inst.SetArg(4, SwizzleVector(ir, image, inst.Arg(4))); - } - if (inst_info.has_lod) { ASSERT(inst.GetOpcode() == IR::Opcode::ImageRead || inst.GetOpcode() == IR::Opcode::ImageWrite); @@ -783,6 +742,50 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip } } +void PatchTextureBufferInterpretation(IR::Block& block, IR::Inst& inst, Info& info) { + const auto binding = inst.Arg(0).U32(); + const auto buffer_res = info.texture_buffers[binding]; + const auto buffer = buffer_res.GetSharp(info); + if (!buffer.Valid()) { + // Don't need to swizzle invalid buffer. + return; + } + + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) { + inst.SetArg(2, ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect())); + } else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) { + const auto inst_info = inst.Flags(); + const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info); + const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect()); + inst.ReplaceUsesWith(swizzled); + } +} + +void PatchImageInterpretation(IR::Block& block, IR::Inst& inst, Info& info) { + const auto binding = inst.Arg(0).U32(); + const auto image_res = info.images[binding & 0xFFFF]; + const auto image = image_res.GetSharp(info); + if (!image.Valid() || !image_res.IsStorage(image)) { + // Don't need to swizzle invalid or non-storage image. + return; + } + + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + if (inst.GetOpcode() == IR::Opcode::ImageWrite) { + inst.SetArg(4, ApplySwizzle(ir, inst.Arg(4), image.DstSelect())); + } else if (inst.GetOpcode() == IR::Opcode::ImageRead) { + const auto inst_info = inst.Flags(); + const auto lod = inst.Arg(2); + const auto ms = inst.Arg(3); + const auto texel = + ir.ImageRead(inst.Arg(0), inst.Arg(1), lod.IsEmpty() ? IR::U32{} : IR::U32{lod}, + ms.IsEmpty() ? IR::U32{} : IR::U32{ms}, inst_info); + const auto swizzled = ApplySwizzle(ir, texel, image.DstSelect()); + inst.ReplaceUsesWith(swizzled); + } +} + void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { // Insert gds binding in the shader if it doesn't exist already. @@ -852,6 +855,19 @@ void ResourceTrackingPass(IR::Program& program) { } } } + // Second pass to reinterpret format read/write where needed, since we now know + // the bindings and their properties. + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (IsTextureBufferInstruction(inst)) { + PatchTextureBufferInterpretation(*block, inst, info); + continue; + } + if (IsImageInstruction(inst)) { + PatchImageInterpretation(*block, inst, info); + } + } + } } } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/reinterpret.h b/src/shader_recompiler/ir/reinterpret.h new file mode 100644 index 000000000..73d587a56 --- /dev/null +++ b/src/shader_recompiler/ir/reinterpret.h @@ -0,0 +1,24 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/ir/ir_emitter.h" +#include "video_core/amdgpu/resource.h" + +namespace Shader::IR { + +/// Applies a component swizzle to a vec4. +inline Value ApplySwizzle(IREmitter& ir, const Value& vector, const AmdGpu::CompMapping& swizzle) { + // Constants are indexed as 0 and 1, and components are 4-7. Thus we can apply a swizzle + // using two vectors and a shuffle, using one vector of constants and one of the components. + const auto zero = ir.Imm32(0.f); + const auto one = ir.Imm32(1.f); + const auto constants_vec = ir.CompositeConstruct(zero, one, zero, zero); + const auto swizzled = + ir.CompositeShuffle(constants_vec, vector, size_t(swizzle.r), size_t(swizzle.g), + size_t(swizzle.b), size_t(swizzle.a)); + return swizzled; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index bbf74f5d3..781a0b14a 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -180,7 +180,7 @@ struct FragmentRuntimeInfo { std::array inputs; struct PsColorBuffer { AmdGpu::NumberFormat num_format; - MrtSwizzle mrt_swizzle; + AmdGpu::CompMapping swizzle; auto operator<=>(const PsColorBuffer&) const noexcept = default; }; diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 5bf97ee51..f8a86c63b 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -31,7 +31,7 @@ struct BufferSpecialization { struct TextureBufferSpecialization { bool is_integer = false; - u32 dst_select = 0; + AmdGpu::CompMapping dst_select{}; auto operator<=>(const TextureBufferSpecialization&) const = default; }; @@ -40,13 +40,9 @@ struct ImageSpecialization { AmdGpu::ImageType type = AmdGpu::ImageType::Color2D; bool is_integer = false; bool is_storage = false; - u32 dst_select = 0; + AmdGpu::CompMapping dst_select{}; - bool operator==(const ImageSpecialization& other) const { - return type == other.type && is_integer == other.is_integer && - is_storage == other.is_storage && - (dst_select != 0 ? dst_select == other.dst_select : true); - } + auto operator<=>(const ImageSpecialization&) const = default; }; struct FMaskSpecialization { diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 83271a82d..f1607f03e 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -889,10 +889,54 @@ struct Liverpool { return !info.linear_general; } - NumberFormat NumFormat() const { + [[nodiscard]] DataFormat DataFormat() const { + return RemapDataFormat(info.format); + } + + [[nodiscard]] NumberFormat NumFormat() const { // There is a small difference between T# and CB number types, account for it. - return info.number_type == AmdGpu::NumberFormat::SnormNz ? AmdGpu::NumberFormat::Srgb - : info.number_type.Value(); + return RemapNumberFormat(info.number_type == NumberFormat::SnormNz + ? NumberFormat::Srgb + : info.number_type.Value()); + } + + [[nodiscard]] CompMapping Swizzle() const { + // clang-format off + static constexpr std::array, 4> mrt_swizzles{{ + // Standard + std::array{{ + {.r = CompSwizzle::Red, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Alpha}, + }}, + // Alternate + std::array{{ + {.r = CompSwizzle::Green, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Alpha, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Alpha, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Alpha}, + }}, + // StandardReverse + std::array{{ + {.r = CompSwizzle::Blue, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Green, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Blue, .b = CompSwizzle::Green, .a = CompSwizzle::Red}, + }}, + // AlternateReverse + std::array{{ + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Green, .a = CompSwizzle::Blue}, + }}, + }}; + // clang-format on + const auto swap_idx = static_cast(info.comp_swap.Value()); + const auto components_idx = NumComponents(info.format) - 1; + const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx]; + return RemapComponents(info.format, mrt_swizzle); } }; diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 6bbe1fb7e..4de25adbf 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -20,6 +20,85 @@ enum class CompSwizzle : u32 { Alpha = 7, }; +struct CompMapping { + CompSwizzle r : 3; + CompSwizzle g : 3; + CompSwizzle b : 3; + CompSwizzle a : 3; + + auto operator<=>(const CompMapping& other) const = default; + + template + [[nodiscard]] std::array Apply(const std::array& data) const { + return { + ApplySingle(data, r), + ApplySingle(data, g), + ApplySingle(data, b), + ApplySingle(data, a), + }; + } + +private: + template + T ApplySingle(const std::array& data, const CompSwizzle swizzle) const { + switch (swizzle) { + case CompSwizzle::Zero: + return T(0); + case CompSwizzle::One: + return T(1); + case CompSwizzle::Red: + return data[0]; + case CompSwizzle::Green: + return data[1]; + case CompSwizzle::Blue: + return data[2]; + case CompSwizzle::Alpha: + return data[3]; + default: + UNREACHABLE(); + } + } +}; + +inline DataFormat RemapDataFormat(const DataFormat format) { + switch (format) { + case DataFormat::Format11_11_10: + return DataFormat::Format10_11_11; + case DataFormat::Format10_10_10_2: + return DataFormat::Format2_10_10_10; + case DataFormat::Format5_5_5_1: + return DataFormat::Format1_5_5_5; + default: + return format; + } +} + +inline NumberFormat RemapNumberFormat(const NumberFormat format) { + return format; +} + +inline CompMapping RemapComponents(const DataFormat format, const CompMapping components) { + switch (format) { + case DataFormat::Format11_11_10: + return { + .r = components.b, + .g = components.g, + .b = components.r, + .a = components.a, + }; + case DataFormat::Format10_10_10_2: + case DataFormat::Format5_5_5_1: + return { + .r = components.a, + .g = components.b, + .b = components.g, + .a = components.r, + }; + default: + return components; + } +} + // Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture] struct Buffer { u64 base_address : 44; @@ -52,21 +131,22 @@ struct Buffer { return std::memcmp(this, &other, sizeof(Buffer)) == 0; } - u32 DstSelect() const { - return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9); - } - - CompSwizzle GetSwizzle(u32 comp) const noexcept { - const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w}; - return static_cast(select[comp]); + CompMapping DstSelect() const { + const CompMapping dst_sel{ + .r = CompSwizzle(dst_sel_x), + .g = CompSwizzle(dst_sel_y), + .b = CompSwizzle(dst_sel_z), + .a = CompSwizzle(dst_sel_w), + }; + return RemapComponents(DataFormat(data_format), dst_sel); } NumberFormat GetNumberFmt() const noexcept { - return static_cast(num_format); + return RemapNumberFormat(NumberFormat(num_format)); } DataFormat GetDataFmt() const noexcept { - return static_cast(data_format); + return RemapDataFormat(DataFormat(data_format)); } u32 GetStride() const noexcept { @@ -186,10 +266,11 @@ struct Image { static constexpr Image Null() { Image image{}; image.data_format = u64(DataFormat::Format8_8_8_8); - image.dst_sel_x = 4; - image.dst_sel_y = 5; - image.dst_sel_z = 6; - image.dst_sel_w = 7; + image.num_format = u64(NumberFormat::Unorm); + image.dst_sel_x = u64(CompSwizzle::Red); + image.dst_sel_y = u64(CompSwizzle::Green); + image.dst_sel_z = u64(CompSwizzle::Blue); + image.dst_sel_w = u64(CompSwizzle::Alpha); image.tiling_index = u64(TilingMode::Texture_MicroTiled); image.type = u64(ImageType::Color2D); return image; @@ -207,43 +288,14 @@ struct Image { return base_address != 0; } - u32 DstSelect() const { - return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9); - } - - CompSwizzle GetSwizzle(u32 comp) const noexcept { - const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w}; - return static_cast(select[comp]); - } - - static char SelectComp(u32 sel) { - switch (sel) { - case 0: - return '0'; - case 1: - return '1'; - case 4: - return 'R'; - case 5: - return 'G'; - case 6: - return 'B'; - case 7: - return 'A'; - default: - UNREACHABLE(); - } - } - - std::string DstSelectName() const { - std::string result = "["; - u32 dst_sel = DstSelect(); - for (u32 i = 0; i < 4; i++) { - result += SelectComp(dst_sel & 7); - dst_sel >>= 3; - } - result += ']'; - return result; + CompMapping DstSelect() const { + const CompMapping dst_sel{ + .r = CompSwizzle(dst_sel_x), + .g = CompSwizzle(dst_sel_y), + .b = CompSwizzle(dst_sel_z), + .a = CompSwizzle(dst_sel_w), + }; + return RemapComponents(DataFormat(data_format), dst_sel); } u32 Pitch() const { @@ -285,11 +337,11 @@ struct Image { } DataFormat GetDataFmt() const noexcept { - return static_cast(data_format); + return RemapDataFormat(DataFormat(data_format)); } NumberFormat GetNumberFmt() const noexcept { - return static_cast(num_format); + return RemapNumberFormat(NumberFormat(num_format)); } TilingMode GetTilingMode() const { diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 6bd50ab06..c41b760ba 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -324,6 +324,34 @@ vk::BorderColor BorderColor(AmdGpu::BorderColor color) { } } +vk::ComponentSwizzle ComponentSwizzle(AmdGpu::CompSwizzle comp_swizzle) { + switch (comp_swizzle) { + case AmdGpu::CompSwizzle::Zero: + return vk::ComponentSwizzle::eZero; + case AmdGpu::CompSwizzle::One: + return vk::ComponentSwizzle::eOne; + case AmdGpu::CompSwizzle::Red: + return vk::ComponentSwizzle::eR; + case AmdGpu::CompSwizzle::Green: + return vk::ComponentSwizzle::eG; + case AmdGpu::CompSwizzle::Blue: + return vk::ComponentSwizzle::eB; + case AmdGpu::CompSwizzle::Alpha: + return vk::ComponentSwizzle::eA; + default: + UNREACHABLE(); + } +} + +vk::ComponentMapping ComponentMapping(AmdGpu::CompMapping comp_mapping) { + return vk::ComponentMapping{ + .r = ComponentSwizzle(comp_mapping.r), + .g = ComponentSwizzle(comp_mapping.g), + .b = ComponentSwizzle(comp_mapping.b), + .a = ComponentSwizzle(comp_mapping.a), + }; +} + static constexpr vk::FormatFeatureFlags2 BufferRead = vk::FormatFeatureFlagBits2::eUniformTexelBuffer | vk::FormatFeatureFlagBits2::eVertexBuffer; static constexpr vk::FormatFeatureFlags2 BufferWrite = @@ -538,10 +566,8 @@ std::span SurfaceFormats() { // 10_11_11 CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format10_11_11, AmdGpu::NumberFormat::Float, vk::Format::eB10G11R11UfloatPack32), - // 11_11_10 - CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format11_11_10, AmdGpu::NumberFormat::Float, - vk::Format::eB10G11R11UfloatPack32), - // 10_10_10_2 + // 11_11_10 - Remapped to 10_11_11. + // 10_10_10_2 - Remapped to 2_10_10_10. // 2_10_10_10 CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Unorm, vk::Format::eA2B10G10R10UnormPack32), @@ -614,7 +640,7 @@ std::span SurfaceFormats() { // 1_5_5_5 CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format1_5_5_5, AmdGpu::NumberFormat::Unorm, vk::Format::eR5G5B5A1UnormPack16), - // 5_5_5_1 + // 5_5_5_1 - Remapped to 1_5_5_5. // 4_4_4_4 CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format4_4_4_4, AmdGpu::NumberFormat::Unorm, vk::Format::eR4G4B4A4UnormPack16), @@ -677,31 +703,6 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu return format->vk_format; } -vk::Format AdjustColorBufferFormat(vk::Format base_format, - Liverpool::ColorBuffer::SwapMode comp_swap) { - const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate; - const bool comp_swap_reverse = comp_swap == Liverpool::ColorBuffer::SwapMode::StandardReverse; - const bool comp_swap_alt_reverse = - comp_swap == Liverpool::ColorBuffer::SwapMode::AlternateReverse; - if (comp_swap_alt) { - switch (base_format) { - case vk::Format::eR8G8B8A8Unorm: - return vk::Format::eB8G8R8A8Unorm; - case vk::Format::eB8G8R8A8Unorm: - return vk::Format::eR8G8B8A8Unorm; - case vk::Format::eR8G8B8A8Srgb: - return vk::Format::eB8G8R8A8Srgb; - case vk::Format::eB8G8R8A8Srgb: - return vk::Format::eR8G8B8A8Srgb; - case vk::Format::eA2B10G10R10UnormPack32: - return vk::Format::eA2R10G10B10UnormPack32; - default: - break; - } - } - return base_format; -} - static constexpr DepthFormatInfo CreateDepthFormatInfo( const DepthBuffer::ZFormat z_format, const DepthBuffer::StencilFormat stencil_format, const vk::Format vk_format) { @@ -744,21 +745,12 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat } vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { - const auto comp_swap = color_buffer.info.comp_swap.Value(); - const auto format = color_buffer.info.format.Value(); - const auto number_type = color_buffer.info.number_type.Value(); + const auto comp_swizzle = color_buffer.Swizzle(); + const auto format = color_buffer.DataFormat(); + const auto number_type = color_buffer.NumFormat(); const auto& c0 = color_buffer.clear_word0; const auto& c1 = color_buffer.clear_word1; - const auto num_bits = AmdGpu::NumBits(color_buffer.info.format); - const auto num_components = AmdGpu::NumComponents(format); - - const bool comp_swap_alt = - comp_swap == AmdGpu::Liverpool::ColorBuffer::SwapMode::Alternate || - comp_swap == AmdGpu::Liverpool::ColorBuffer::SwapMode::AlternateReverse; - const bool comp_swap_reverse = - comp_swap == AmdGpu::Liverpool::ColorBuffer::SwapMode::StandardReverse || - comp_swap == AmdGpu::Liverpool::ColorBuffer::SwapMode::AlternateReverse; vk::ClearColorValue color{}; @@ -1079,26 +1071,7 @@ vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color break; } - if (num_components == 1) { - if (comp_swap != Liverpool::ColorBuffer::SwapMode::Standard) { - color.float32[static_cast(comp_swap)] = color.float32[0]; - color.float32[0] = 0.0f; - } - } else { - if (comp_swap_alt && num_components == 4) { - std::swap(color.float32[0], color.float32[2]); - } - - if (comp_swap_reverse) { - std::reverse(std::begin(color.float32), std::begin(color.float32) + num_components); - } - - if (comp_swap_alt && num_components != 4) { - color.float32[3] = color.float32[num_components - 1]; - color.float32[num_components - 1] = 0.0f; - } - } - + color.float32 = comp_swizzle.Apply(color.float32); return {.color = color}; } diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 25a27e20e..a68280e7d 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -42,6 +42,10 @@ vk::SamplerMipmapMode MipFilter(AmdGpu::MipFilter filter); vk::BorderColor BorderColor(AmdGpu::BorderColor color); +vk::ComponentSwizzle ComponentSwizzle(AmdGpu::CompSwizzle comp_swizzle); + +vk::ComponentMapping ComponentMapping(AmdGpu::CompMapping comp_mapping); + struct SurfaceFormatInfo { AmdGpu::DataFormat data_format; AmdGpu::NumberFormat number_format; @@ -52,9 +56,6 @@ std::span SurfaceFormats(); vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format); -vk::Format AdjustColorBufferFormat(vk::Format base_format, - Liverpool::ColorBuffer::SwapMode comp_swap); - struct DepthFormatInfo { Liverpool::DepthBuffer::ZFormat z_format; Liverpool::DepthBuffer::StencilFormat stencil_format; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index ee8afa3e6..c8f4999b1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -32,7 +32,7 @@ struct GraphicsPipelineKey { u32 num_color_attachments; std::array color_formats; std::array color_num_formats; - std::array mrt_swizzles; + std::array color_swizzles; vk::Format depth_format; vk::Format stencil_format; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c880cad70..cd1b42b05 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -168,7 +168,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS for (u32 i = 0; i < Shader::MaxColorBuffers; i++) { info.fs_info.color_buffers[i] = { .num_format = graphics_key.color_num_formats[i], - .mrt_swizzle = static_cast(graphics_key.mrt_swizzles[i]), + .swizzle = graphics_key.color_swizzles[i], }; } break; @@ -304,7 +304,7 @@ bool PipelineCache::RefreshGraphicsKey() { key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm); key.blend_controls.fill({}); key.write_masks.fill({}); - key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard); + key.color_swizzles.fill({}); key.vertex_buffer_formats.fill(vk::Format::eUndefined); key.patch_control_points = 0; @@ -327,14 +327,10 @@ bool PipelineCache::RefreshGraphicsKey() { continue; } - const auto base_format = - LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); key.color_formats[remapped_cb] = - LiverpoolToVK::AdjustColorBufferFormat(base_format, col_buf.info.comp_swap.Value()); + LiverpoolToVK::SurfaceFormat(col_buf.DataFormat(), col_buf.NumFormat()); key.color_num_formats[remapped_cb] = col_buf.NumFormat(); - if (base_format == key.color_formats[remapped_cb]) { - key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value(); - } + key.color_swizzles[remapped_cb] = col_buf.Swizzle(); } fetch_shader = std::nullopt; @@ -450,7 +446,7 @@ bool PipelineCache::RefreshGraphicsKey() { // of the latter we need to change format to undefined, and either way we need to // increment the index for the null attachment binding. key.color_formats[remapped_cb] = vk::Format::eUndefined; - key.mrt_swizzles[remapped_cb] = Liverpool::ColorBuffer::SwapMode::Standard; + key.color_swizzles[remapped_cb] = {}; ++remapped_cb; continue; } diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 2cc4aab38..0559f1be3 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -265,9 +265,9 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept { props.is_tiled = buffer.IsTiled(); tiling_mode = buffer.GetTilingMode(); - pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat()); + pixel_format = LiverpoolToVK::SurfaceFormat(buffer.DataFormat(), buffer.NumFormat()); num_samples = buffer.NumSamples(); - num_bits = NumBits(buffer.info.format); + num_bits = NumBits(buffer.DataFormat()); type = vk::ImageType::e2D; size.width = hint.Valid() ? hint.width : buffer.Pitch(); size.height = hint.Valid() ? hint.height : buffer.Height(); diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 9e67b7f73..a9ae41dd1 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -31,25 +31,6 @@ vk::ImageViewType ConvertImageViewType(AmdGpu::ImageType type) { } } -vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) { - switch (dst_sel) { - case 0: - return vk::ComponentSwizzle::eZero; - case 1: - return vk::ComponentSwizzle::eOne; - case 4: - return vk::ComponentSwizzle::eR; - case 5: - return vk::ComponentSwizzle::eG; - case 6: - return vk::ComponentSwizzle::eB; - case 7: - return vk::ComponentSwizzle::eA; - default: - UNREACHABLE(); - } -} - ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept : is_storage{desc.IsStorage(image)} { const auto dfmt = image.GetDataFmt(); @@ -87,21 +68,15 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso } if (!is_storage) { - mapping.r = ConvertComponentSwizzle(image.dst_sel_x); - mapping.g = ConvertComponentSwizzle(image.dst_sel_y); - mapping.b = ConvertComponentSwizzle(image.dst_sel_z); - mapping.a = ConvertComponentSwizzle(image.dst_sel_w); + mapping = Vulkan::LiverpoolToVK::ComponentMapping(image.DstSelect()); } } ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer) noexcept { - const auto base_format = - Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat()); range.base.layer = col_buffer.view.slice_start; range.extent.layers = col_buffer.NumSlices() - range.base.layer; type = range.extent.layers > 1 ? vk::ImageViewType::e2DArray : vk::ImageViewType::e2D; - format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat(base_format, - col_buffer.info.comp_swap.Value()); + format = Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.DataFormat(), col_buffer.NumFormat()); } ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer,