diff --git a/UnleashedRecomp/CMakeLists.txt b/UnleashedRecomp/CMakeLists.txt index c44c4b35..974345ba 100644 --- a/UnleashedRecomp/CMakeLists.txt +++ b/UnleashedRecomp/CMakeLists.txt @@ -414,7 +414,9 @@ function(compile_pixel_shader FILE_PATH) endfunction() compile_pixel_shader(blend_color_alpha_ps) -compile_vertex_shader(copy_vs) +compile_vertex_shader(copy_vs) +compile_pixel_shader(copy_color_ps) +compile_pixel_shader(copy_depth_ps) compile_pixel_shader(csd_filter_ps) compile_vertex_shader(csd_no_tex_vs) compile_vertex_shader(csd_vs) @@ -427,7 +429,10 @@ compile_pixel_shader(gamma_correction_ps) compile_pixel_shader(imgui_ps) compile_vertex_shader(imgui_vs) compile_pixel_shader(movie_ps) -compile_vertex_shader(movie_vs) +compile_vertex_shader(movie_vs) +compile_pixel_shader(resolve_msaa_color_2x) +compile_pixel_shader(resolve_msaa_color_4x) +compile_pixel_shader(resolve_msaa_color_8x) compile_pixel_shader(resolve_msaa_depth_2x) compile_pixel_shader(resolve_msaa_depth_4x) compile_pixel_shader(resolve_msaa_depth_8x) diff --git a/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp b/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp index 32aac2a8..a543b74e 100644 --- a/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp +++ b/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp @@ -27,6 +27,8 @@ //# define D3D12_DEBUG_LAYER_GPU_BASED_VALIDATION_ENABLED #endif +//#define D3D12_DEBUG_SET_STABLE_POWER_STATE + // Old Windows SDK versions don't provide this macro, so we workaround it by making sure it is defined. #ifndef D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE #define D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE (D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE) @@ -692,6 +694,20 @@ namespace plume { ); } + static D3D12_RESOLVE_MODE toD3D12(RenderResolveMode resolveMode) { + switch (resolveMode) { + case RenderResolveMode::MIN: + return D3D12_RESOLVE_MODE_MIN; + case RenderResolveMode::MAX: + return D3D12_RESOLVE_MODE_MAX; + case RenderResolveMode::AVERAGE: + return D3D12_RESOLVE_MODE_AVERAGE; + default: + assert(false && "Unknown resolve mode."); + return D3D12_RESOLVE_MODE_AVERAGE; + } + } + static void setObjectName(ID3D12Object *object, const std::string &name) { const std::wstring wideCharName = Utf8ToUtf16(name); object->SetName(wideCharName.c_str()); @@ -1916,7 +1932,7 @@ namespace plume { resetSamplePositions(); } - void D3D12CommandList::resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect) { + void D3D12CommandList::resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect, RenderResolveMode resolveMode) { assert(dstTexture != nullptr); assert(srcTexture != nullptr); @@ -1931,7 +1947,7 @@ namespace plume { } setSamplePositions(interfaceDstTexture); - d3d->ResolveSubresourceRegion(interfaceDstTexture->d3d, 0, dstX, dstY, interfaceSrcTexture->d3d, 0, (srcRect != nullptr) ? &rect : nullptr, toDXGI(interfaceDstTexture->desc.format), D3D12_RESOLVE_MODE_AVERAGE); + d3d->ResolveSubresourceRegion(interfaceDstTexture->d3d, 0, dstX, dstY, interfaceSrcTexture->d3d, 0, (srcRect != nullptr) ? &rect : nullptr, toDXGI(interfaceDstTexture->desc.format), toD3D12(resolveMode)); resetSamplePositions(); } @@ -3373,6 +3389,10 @@ namespace plume { return; } + #ifdef D3D12_DEBUG_SET_STABLE_POWER_STATE + d3d->SetStablePowerState(TRUE); + #endif + D3D12MA::ALLOCATOR_DESC allocatorDesc = {}; allocatorDesc.pDevice = d3d; allocatorDesc.pAdapter = adapter; diff --git a/UnleashedRecomp/gpu/rhi/plume_d3d12.h b/UnleashedRecomp/gpu/rhi/plume_d3d12.h index b1a8645f..291a7d47 100644 --- a/UnleashedRecomp/gpu/rhi/plume_d3d12.h +++ b/UnleashedRecomp/gpu/rhi/plume_d3d12.h @@ -192,7 +192,7 @@ namespace plume { void copyBuffer(const RenderBuffer *dstBuffer, const RenderBuffer *srcBuffer) override; void copyTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override; void resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override; - void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect) override; + void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect, RenderResolveMode resolveMode) override; void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) override; void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) override; void discardTexture(const RenderTexture* texture) override; diff --git a/UnleashedRecomp/gpu/rhi/plume_render_interface.h b/UnleashedRecomp/gpu/rhi/plume_render_interface.h index ef2a5ed6..4b9e5339 100644 --- a/UnleashedRecomp/gpu/rhi/plume_render_interface.h +++ b/UnleashedRecomp/gpu/rhi/plume_render_interface.h @@ -143,7 +143,7 @@ namespace plume { virtual void copyBuffer(const RenderBuffer *dstBuffer, const RenderBuffer *srcBuffer) = 0; virtual void copyTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) = 0; virtual void resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) = 0; - virtual void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect = nullptr) = 0; + virtual void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect = nullptr, RenderResolveMode resolveMode = RenderResolveMode::AVERAGE) = 0; virtual void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) = 0; virtual void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) = 0; virtual void discardTexture(const RenderTexture* texture) = 0; // D3D12 only. diff --git a/UnleashedRecomp/gpu/rhi/plume_render_interface_types.h b/UnleashedRecomp/gpu/rhi/plume_render_interface_types.h index 7352e863..b7551832 100644 --- a/UnleashedRecomp/gpu/rhi/plume_render_interface_types.h +++ b/UnleashedRecomp/gpu/rhi/plume_render_interface_types.h @@ -483,6 +483,12 @@ namespace plume { CPU }; + enum class RenderResolveMode { + MIN, + MAX, + AVERAGE + }; + // Global functions. constexpr uint32_t RenderFormatSize(RenderFormat format) { diff --git a/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp b/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp index 192c7ed5..1ff395a8 100644 --- a/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp +++ b/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp @@ -3074,12 +3074,13 @@ namespace plume { } void VulkanCommandList::resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) { - resolveTextureRegion(dstTexture, 0, 0, srcTexture, nullptr); + resolveTextureRegion(dstTexture, 0, 0, srcTexture, nullptr, RenderResolveMode::AVERAGE); } - void VulkanCommandList::resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect) { + void VulkanCommandList::resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect, RenderResolveMode resolveMode) { assert(dstTexture != nullptr); assert(srcTexture != nullptr); + assert(resolveMode == RenderResolveMode::AVERAGE && "Vulkan only supports AVERAGE resolve mode."); thread_local std::vector imageResolves; imageResolves.clear(); diff --git a/UnleashedRecomp/gpu/rhi/plume_vulkan.h b/UnleashedRecomp/gpu/rhi/plume_vulkan.h index 122ffcf3..469f0560 100644 --- a/UnleashedRecomp/gpu/rhi/plume_vulkan.h +++ b/UnleashedRecomp/gpu/rhi/plume_vulkan.h @@ -315,7 +315,7 @@ namespace plume { void copyBuffer(const RenderBuffer *dstBuffer, const RenderBuffer *srcBuffer) override; void copyTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override; void resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override; - void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect) override; + void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect, RenderResolveMode resolveMode) override; void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) override; void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) override; void discardTexture(const RenderTexture* texture) override; diff --git a/UnleashedRecomp/gpu/shader/copy_color_ps.hlsl b/UnleashedRecomp/gpu/shader/copy_color_ps.hlsl new file mode 100644 index 00000000..0559557a --- /dev/null +++ b/UnleashedRecomp/gpu/shader/copy_color_ps.hlsl @@ -0,0 +1,8 @@ +#include "copy_common.hlsli" + +Texture2D g_Texture2DDescriptorHeap[] : register(t0, space0); + +float4 main(in float4 position : SV_Position) : SV_Target +{ + return g_Texture2DDescriptorHeap[g_PushConstants.ResourceDescriptorIndex].Load(int3(position.xy, 0)); +} diff --git a/UnleashedRecomp/gpu/shader/copy_common.hlsli b/UnleashedRecomp/gpu/shader/copy_common.hlsli new file mode 100644 index 00000000..5984ad0b --- /dev/null +++ b/UnleashedRecomp/gpu/shader/copy_common.hlsli @@ -0,0 +1,8 @@ +#pragma once + +struct PushConstants +{ + uint ResourceDescriptorIndex; +}; + +[[vk::push_constant]] ConstantBuffer g_PushConstants : register(b3, space4); diff --git a/UnleashedRecomp/gpu/shader/copy_depth_ps.hlsl b/UnleashedRecomp/gpu/shader/copy_depth_ps.hlsl new file mode 100644 index 00000000..251f893a --- /dev/null +++ b/UnleashedRecomp/gpu/shader/copy_depth_ps.hlsl @@ -0,0 +1,8 @@ +#include "copy_common.hlsli" + +Texture2D g_Texture2DDescriptorHeap[] : register(t0, space0); + +float main(in float4 position : SV_Position) : SV_Depth +{ + return g_Texture2DDescriptorHeap[g_PushConstants.ResourceDescriptorIndex].Load(int3(position.xy, 0)); +} diff --git a/UnleashedRecomp/gpu/shader/resolve_msaa_color.hlsli b/UnleashedRecomp/gpu/shader/resolve_msaa_color.hlsli new file mode 100644 index 00000000..f9b029d9 --- /dev/null +++ b/UnleashedRecomp/gpu/shader/resolve_msaa_color.hlsli @@ -0,0 +1,15 @@ +#pragma once + +#include "copy_common.hlsli" + +Texture2DMS g_Texture2DMSDescriptorHeap[] : register(t0, space0); + +float4 main(in float4 position : SV_Position) : SV_Target +{ + float4 result = g_Texture2DMSDescriptorHeap[g_PushConstants.ResourceDescriptorIndex].Load(int2(position.xy), 0); + + [unroll] for (int i = 1; i < SAMPLE_COUNT; i++) + result += g_Texture2DMSDescriptorHeap[g_PushConstants.ResourceDescriptorIndex].Load(int2(position.xy), i); + + return result / SAMPLE_COUNT; +} diff --git a/UnleashedRecomp/gpu/shader/resolve_msaa_color_2x.hlsl b/UnleashedRecomp/gpu/shader/resolve_msaa_color_2x.hlsl new file mode 100644 index 00000000..95338bfa --- /dev/null +++ b/UnleashedRecomp/gpu/shader/resolve_msaa_color_2x.hlsl @@ -0,0 +1,2 @@ +#define SAMPLE_COUNT 2 +#include "resolve_msaa_color.hlsli" diff --git a/UnleashedRecomp/gpu/shader/resolve_msaa_color_4x.hlsl b/UnleashedRecomp/gpu/shader/resolve_msaa_color_4x.hlsl new file mode 100644 index 00000000..71b8b8a4 --- /dev/null +++ b/UnleashedRecomp/gpu/shader/resolve_msaa_color_4x.hlsl @@ -0,0 +1,2 @@ +#define SAMPLE_COUNT 4 +#include "resolve_msaa_color.hlsli" diff --git a/UnleashedRecomp/gpu/shader/resolve_msaa_color_8x.hlsl b/UnleashedRecomp/gpu/shader/resolve_msaa_color_8x.hlsl new file mode 100644 index 00000000..9a0f8ac4 --- /dev/null +++ b/UnleashedRecomp/gpu/shader/resolve_msaa_color_8x.hlsl @@ -0,0 +1,2 @@ +#define SAMPLE_COUNT 8 +#include "resolve_msaa_color.hlsli" diff --git a/UnleashedRecomp/gpu/shader/resolve_msaa_depth.hlsli b/UnleashedRecomp/gpu/shader/resolve_msaa_depth.hlsli index d413717b..a06c7bac 100644 --- a/UnleashedRecomp/gpu/shader/resolve_msaa_depth.hlsli +++ b/UnleashedRecomp/gpu/shader/resolve_msaa_depth.hlsli @@ -1,11 +1,6 @@ #pragma once -struct PushConstants -{ - uint ResourceDescriptorIndex; -}; - -[[vk::push_constant]] ConstantBuffer g_PushConstants : register(b3, space4); +#include "copy_common.hlsli" Texture2DMS g_Texture2DMSDescriptorHeap[] : register(t0, space0); diff --git a/UnleashedRecomp/gpu/video.cpp b/UnleashedRecomp/gpu/video.cpp index fdca431f..fa692ee0 100644 --- a/UnleashedRecomp/gpu/video.cpp +++ b/UnleashedRecomp/gpu/video.cpp @@ -40,6 +40,8 @@ #ifdef UNLEASHED_RECOMP_D3D12 #include "shader/blend_color_alpha_ps.hlsl.dxil.h" #include "shader/copy_vs.hlsl.dxil.h" +#include "shader/copy_color_ps.hlsl.dxil.h" +#include "shader/copy_depth_ps.hlsl.dxil.h" #include "shader/csd_filter_ps.hlsl.dxil.h" #include "shader/csd_no_tex_vs.hlsl.dxil.h" #include "shader/csd_vs.hlsl.dxil.h" @@ -53,6 +55,9 @@ #include "shader/imgui_vs.hlsl.dxil.h" #include "shader/movie_ps.hlsl.dxil.h" #include "shader/movie_vs.hlsl.dxil.h" +#include "shader/resolve_msaa_color_2x.hlsl.dxil.h" +#include "shader/resolve_msaa_color_4x.hlsl.dxil.h" +#include "shader/resolve_msaa_color_8x.hlsl.dxil.h" #include "shader/resolve_msaa_depth_2x.hlsl.dxil.h" #include "shader/resolve_msaa_depth_4x.hlsl.dxil.h" #include "shader/resolve_msaa_depth_8x.hlsl.dxil.h" @@ -60,6 +65,8 @@ #include "shader/blend_color_alpha_ps.hlsl.spirv.h" #include "shader/copy_vs.hlsl.spirv.h" +#include "shader/copy_color_ps.hlsl.spirv.h" +#include "shader/copy_depth_ps.hlsl.spirv.h" #include "shader/csd_filter_ps.hlsl.spirv.h" #include "shader/csd_no_tex_vs.hlsl.spirv.h" #include "shader/csd_vs.hlsl.spirv.h" @@ -73,6 +80,9 @@ #include "shader/imgui_vs.hlsl.spirv.h" #include "shader/movie_ps.hlsl.spirv.h" #include "shader/movie_vs.hlsl.spirv.h" +#include "shader/resolve_msaa_color_2x.hlsl.spirv.h" +#include "shader/resolve_msaa_color_4x.hlsl.spirv.h" +#include "shader/resolve_msaa_color_8x.hlsl.spirv.h" #include "shader/resolve_msaa_depth_2x.hlsl.spirv.h" #include "shader/resolve_msaa_depth_4x.hlsl.spirv.h" #include "shader/resolve_msaa_depth_8x.hlsl.spirv.h" @@ -136,6 +146,14 @@ struct PipelineState }; #pragma pack(pop) +struct UploadAllocation +{ + const RenderBuffer* buffer; + uint64_t offset; + uint8_t* memory; + uint64_t deviceAddress; +}; + struct SharedConstants { uint32_t texture2DIndices[16]{}; @@ -158,6 +176,8 @@ static RenderViewport g_viewport(0.0f, 0.0f, 1280.0f, 720.0f); static PipelineState g_pipelineState; static int32_t g_depthBias; static float g_slopeScaledDepthBias; +static UploadAllocation g_vertexShaderConstants; +static UploadAllocation g_pixelShaderConstants; static SharedConstants g_sharedConstants; static GuestTexture* g_textures[16]; static RenderSamplerDesc g_samplerDescs[16]; @@ -215,6 +235,9 @@ static bool g_vulkan = false; static constexpr bool g_vulkan = true; #endif +static constexpr bool g_hardwareResolve = true; +static constexpr bool g_hardwareDepthResolve = true; + static std::unique_ptr g_interface; static std::unique_ptr g_device; @@ -380,14 +403,6 @@ struct UploadBuffer uint64_t deviceAddress = 0; }; -struct UploadAllocation -{ - const RenderBuffer* buffer; - uint64_t offset; - uint8_t* memory; - uint64_t deviceAddress; -}; - struct UploadAllocator { std::vector buffers; @@ -1139,6 +1154,14 @@ static const std::pair g_setRenderStateFunctions[] = { D3DRS_COLORWRITEENABLE, HostToGuestFunction> } }; +static std::unique_ptr g_copyShader; + +static std::unique_ptr g_copyColorShader; +static ankerl::unordered_dense::map> g_copyColorPipelines; +static std::unique_ptr g_copyDepthPipeline; + +static std::unique_ptr g_resolveMsaaColorShaders[3]; +static ankerl::unordered_dense::map, 3>> g_resolveMsaaColorPipelines; static std::unique_ptr g_resolveMsaaDepthPipelines[3]; enum @@ -1462,6 +1485,9 @@ static void BeginCommandList() g_backBuffer->layout = RenderTextureLayout::UNKNOWN; + g_vertexShaderConstants = {}; + g_pixelShaderConstants = {}; + for (size_t i = 0; i < 16; i++) { g_sharedConstants.texture2DIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D; @@ -1711,7 +1737,23 @@ bool Video::CreateHostDevice(const char *sdlVideoDriver) g_pipelineLayout = pipelineLayoutBuilder.create(g_device.get()); - auto copyShader = CREATE_SHADER(copy_vs); + g_copyShader = CREATE_SHADER(copy_vs); + g_copyColorShader = CREATE_SHADER(copy_color_ps); + auto copyDepthShader = CREATE_SHADER(copy_depth_ps); + + RenderGraphicsPipelineDesc desc; + desc.pipelineLayout = g_pipelineLayout.get(); + desc.vertexShader = g_copyShader.get(); + desc.pixelShader = copyDepthShader.get(); + desc.depthFunction = RenderComparisonFunction::ALWAYS; + desc.depthEnabled = true; + desc.depthWriteEnabled = true; + desc.depthTargetFormat = RenderFormat::D32_FLOAT; + g_copyDepthPipeline = g_device->createGraphicsPipeline(desc); + + g_resolveMsaaColorShaders[0] = CREATE_SHADER(resolve_msaa_color_2x); + g_resolveMsaaColorShaders[1] = CREATE_SHADER(resolve_msaa_color_4x); + g_resolveMsaaColorShaders[2] = CREATE_SHADER(resolve_msaa_color_8x); for (size_t i = 0; i < std::size(g_resolveMsaaDepthPipelines); i++) { @@ -1729,9 +1771,9 @@ bool Video::CreateHostDevice(const char *sdlVideoDriver) break; } - RenderGraphicsPipelineDesc desc; + desc = {}; desc.pipelineLayout = g_pipelineLayout.get(); - desc.vertexShader = copyShader.get(); + desc.vertexShader = g_copyShader.get(); desc.pixelShader = pixelShader.get(); desc.depthFunction = RenderComparisonFunction::ALWAYS; desc.depthEnabled = true; @@ -1758,9 +1800,9 @@ bool Video::CreateHostDevice(const char *sdlVideoDriver) auto gammaCorrectionShader = CREATE_SHADER(gamma_correction_ps); - RenderGraphicsPipelineDesc desc; + desc = {}; desc.pipelineLayout = g_pipelineLayout.get(); - desc.vertexShader = copyShader.get(); + desc.vertexShader = g_copyShader.get(); desc.pixelShader = gammaCorrectionShader.get(); desc.renderTargetFormat[0] = BACKBUFFER_FORMAT; desc.renderTargetBlend[0] = RenderBlendDesc::Copy(); @@ -2490,6 +2532,15 @@ void Video::Present() g_presentProfiler.Reset(); } +static void Present(GuestDevice* device) +{ + Video::Present(); + + // Invalidate vertex/pixel shader constants. + device->dirtyFlags[0] = ~0; + device->dirtyFlags[1] = ~0; +} + void Video::StartPipelinePrecompilation() { g_shouldPrecompilePipelines = true; @@ -2714,7 +2765,13 @@ static GuestTexture* CreateTexture(uint32_t width, uint32_t height, uint32_t dep desc.mipLevels = levels; desc.arraySize = 1; desc.format = ConvertFormat(format); - desc.flags = (desc.format == RenderFormat::D32_FLOAT) ? RenderTextureFlag::DEPTH_TARGET : RenderTextureFlag::NONE; + + if (desc.format == RenderFormat::D32_FLOAT) + desc.flags = RenderTextureFlag::DEPTH_TARGET; + else if (usage != 0) + desc.flags = RenderTextureFlag::RENDER_TARGET; + else + desc.flags = RenderTextureFlag::NONE; texture->textureHolder = g_device->createTexture(desc); texture->texture = texture->textureHolder.get(); @@ -2974,24 +3031,25 @@ static bool PopulateBarriersForStretchRect(GuestSurface* renderTarget, GuestSurf RenderTextureLayout srcLayout; RenderTextureLayout dstLayout; + bool shaderResolve = true; - if (multiSampling) + if (multiSampling && g_hardwareResolve) { - if (surface == depthStencil) - { - srcLayout = RenderTextureLayout::SHADER_READ; - dstLayout = RenderTextureLayout::DEPTH_WRITE; - } - else + // Hardware depth resolve is only supported on D3D12 when programmable sample positions are available. + bool hardwareDepthResolveAvailable = g_hardwareDepthResolve && !g_vulkan && g_capabilities.sampleLocations; + + if (surface->format != RenderFormat::D32_FLOAT || hardwareDepthResolveAvailable) { srcLayout = RenderTextureLayout::RESOLVE_SOURCE; dstLayout = RenderTextureLayout::RESOLVE_DEST; + shaderResolve = false; } } - else + + if (shaderResolve) { - srcLayout = RenderTextureLayout::COPY_SOURCE; - dstLayout = RenderTextureLayout::COPY_DEST; + srcLayout = RenderTextureLayout::SHADER_READ; + dstLayout = (surface->format == RenderFormat::D32_FLOAT ? RenderTextureLayout::DEPTH_WRITE : RenderTextureLayout::COLOR_WRITE); } AddBarrier(surface, srcLayout); @@ -3018,9 +3076,28 @@ static void ExecutePendingStretchRectCommands(GuestSurface* renderTarget, GuestS for (const auto texture : surface->destinationTextures) { - if (multiSampling) + bool shaderResolve = true; + + if (multiSampling && g_hardwareResolve) { - if (surface == depthStencil) + bool hardwareDepthResolveAvailable = g_hardwareDepthResolve && !g_vulkan && g_capabilities.sampleLocations; + + if (surface->format != RenderFormat::D32_FLOAT || hardwareDepthResolveAvailable) + { + if (surface->format == RenderFormat::D32_FLOAT) + commandList->resolveTextureRegion(texture->texture, 0, 0, surface->texture, nullptr, RenderResolveMode::MIN); + else + commandList->resolveTexture(texture->texture, surface->texture); + + shaderResolve = false; + } + } + + if (shaderResolve) + { + RenderPipeline* pipeline = nullptr; + + if (multiSampling) { uint32_t pipelineIndex = 0; @@ -3040,44 +3117,92 @@ static void ExecutePendingStretchRectCommands(GuestSurface* renderTarget, GuestS break; } - if (texture->framebuffer == nullptr) + if (texture->format == RenderFormat::D32_FLOAT) + { + pipeline = g_resolveMsaaDepthPipelines[pipelineIndex].get(); + } + else + { + auto& resolveMsaaColorPipeline = g_resolveMsaaColorPipelines[surface->format][pipelineIndex]; + if (resolveMsaaColorPipeline == nullptr) + { + RenderGraphicsPipelineDesc desc; + desc.pipelineLayout = g_pipelineLayout.get(); + desc.vertexShader = g_copyShader.get(); + desc.pixelShader = g_resolveMsaaColorShaders[pipelineIndex].get(); + desc.renderTargetFormat[0] = texture->format; + desc.renderTargetBlend[0] = RenderBlendDesc::Copy(); + desc.renderTargetCount = 1; + resolveMsaaColorPipeline = g_device->createGraphicsPipeline(desc); + } + + pipeline = resolveMsaaColorPipeline.get(); + } + } + else + { + if (texture->format == RenderFormat::D32_FLOAT) + { + pipeline = g_copyDepthPipeline.get(); + } + else + { + auto& copyColorPipeline = g_copyColorPipelines[surface->format]; + if (copyColorPipeline == nullptr) + { + RenderGraphicsPipelineDesc desc; + desc.pipelineLayout = g_pipelineLayout.get(); + desc.vertexShader = g_copyShader.get(); + desc.pixelShader = g_copyColorShader.get(); + desc.renderTargetFormat[0] = texture->format; + desc.renderTargetBlend[0] = RenderBlendDesc::Copy(); + desc.renderTargetCount = 1; + copyColorPipeline = g_device->createGraphicsPipeline(desc); + } + + pipeline = copyColorPipeline.get(); + } + } + + if (texture->framebuffer == nullptr) + { + if (texture->format == RenderFormat::D32_FLOAT) { RenderFramebufferDesc desc; desc.depthAttachment = texture->texture; texture->framebuffer = g_device->createFramebuffer(desc); } - - if (g_framebuffer != texture->framebuffer.get()) + else { - commandList->setFramebuffer(texture->framebuffer.get()); - g_framebuffer = texture->framebuffer.get(); - } - - commandList->setPipeline(g_resolveMsaaDepthPipelines[pipelineIndex].get()); - commandList->setViewports(RenderViewport(0.0f, 0.0f, float(texture->width), float(texture->height), 0.0f, 1.0f)); - commandList->setScissors(RenderRect(0, 0, texture->width, texture->height)); - commandList->setGraphicsPushConstants(0, &surface->descriptorIndex, 0, sizeof(uint32_t)); - commandList->drawInstanced(6, 1, 0, 0); - - g_dirtyStates.renderTargetAndDepthStencil = true; - g_dirtyStates.viewport = true; - g_dirtyStates.pipelineState = true; - g_dirtyStates.scissorRect = true; - - if (g_vulkan) - { - g_dirtyStates.depthBias = true; // Static depth bias in MSAA pipeline invalidates dynamic depth bias. - g_dirtyStates.vertexShaderConstants = true; + RenderFramebufferDesc desc; + desc.colorAttachments = const_cast(&texture->texture); + desc.colorAttachmentsCount = 1; + texture->framebuffer = g_device->createFramebuffer(desc); } } - else + + if (g_framebuffer != texture->framebuffer.get()) { - commandList->resolveTexture(texture->texture, surface->texture); + commandList->setFramebuffer(texture->framebuffer.get()); + g_framebuffer = texture->framebuffer.get(); + } + + commandList->setPipeline(pipeline); + commandList->setViewports(RenderViewport(0.0f, 0.0f, float(texture->width), float(texture->height), 0.0f, 1.0f)); + commandList->setScissors(RenderRect(0, 0, texture->width, texture->height)); + commandList->setGraphicsPushConstants(0, &surface->descriptorIndex, 0, sizeof(uint32_t)); + commandList->drawInstanced(6, 1, 0, 0); + + g_dirtyStates.renderTargetAndDepthStencil = true; + g_dirtyStates.viewport = true; + g_dirtyStates.pipelineState = true; + g_dirtyStates.scissorRect = true; + + if (g_vulkan) + { + g_dirtyStates.vertexShaderConstants = true; // The push constant call invalidates vertex shader constants. + g_dirtyStates.depthBias = true; // Static depth bias in copy pipeline invalidates dynamic depth bias. } - } - else - { - commandList->copyTexture(texture->texture, surface->texture); } texture->sourceSurface = nullptr; @@ -3841,7 +3966,7 @@ static void FlushRenderStateForMainThread(GuestDevice* device, LocalRenderComman } } - if (g_dirtyStates.vertexShaderConstants || device->dirtyFlags[0] != 0) + if (device->dirtyFlags[0] != 0) { auto& cmd = queue.enqueue(); cmd.type = RenderCommandType::SetVertexShaderConstants; @@ -3850,7 +3975,7 @@ static void FlushRenderStateForMainThread(GuestDevice* device, LocalRenderComman device->dirtyFlags[0] = 0; } - if (g_dirtyStates.pixelShaderConstants || device->dirtyFlags[1] != 0) + if (device->dirtyFlags[1] != 0) { auto& cmd = queue.enqueue(); cmd.type = RenderCommandType::SetPixelShaderConstants; @@ -3915,12 +4040,14 @@ static void ProcSetSamplerState(const RenderCommand& cmd) static void ProcSetVertexShaderConstants(const RenderCommand& cmd) { - SetRootDescriptor(cmd.setVertexShaderConstants.allocation, 0); + g_vertexShaderConstants = cmd.setVertexShaderConstants.allocation; + g_dirtyStates.vertexShaderConstants = true; } static void ProcSetPixelShaderConstants(const RenderCommand& cmd) { - SetRootDescriptor(cmd.setPixelShaderConstants.allocation, 1); + g_pixelShaderConstants = cmd.setPixelShaderConstants.allocation; + g_dirtyStates.pixelShaderConstants = true; } static void ProcAddPipeline(const RenderCommand& cmd) @@ -4010,6 +4137,12 @@ static void FlushRenderStateForRenderThread() if (g_dirtyStates.depthBias && g_capabilities.dynamicDepthBias) commandList->setDepthBias(g_depthBias, 0.0f, g_slopeScaledDepthBias); + if (g_dirtyStates.vertexShaderConstants) + SetRootDescriptor(g_vertexShaderConstants, 0); + + if (g_dirtyStates.pixelShaderConstants) + SetRootDescriptor(g_pixelShaderConstants, 1); + if (g_dirtyStates.sharedConstants) { auto sharedConstants = g_uploadAllocators[g_frame].allocate(&g_sharedConstants, sizeof(g_sharedConstants), 0x100); @@ -7001,7 +7134,7 @@ GUEST_FUNCTION_HOOK(sub_82BE96F0, GetSurfaceDesc); GUEST_FUNCTION_HOOK(sub_82BE04B0, GetVertexDeclaration); GUEST_FUNCTION_HOOK(sub_82BE0530, HashVertexDeclaration); -GUEST_FUNCTION_HOOK(sub_82BDA8C0, Video::Present); +GUEST_FUNCTION_HOOK(sub_82BDA8C0, Present); GUEST_FUNCTION_HOOK(sub_82BDD330, GetBackBuffer); GUEST_FUNCTION_HOOK(sub_82BE9498, CreateTexture); diff --git a/tools/XenosRecomp b/tools/XenosRecomp index 96458eb7..855a5a8c 160000 --- a/tools/XenosRecomp +++ b/tools/XenosRecomp @@ -1 +1 @@ -Subproject commit 96458eb7bc01798c951bce9d627852c2870bc54d +Subproject commit 855a5a8c51ea5f84baecbf4fc87c182795d482c9