Implement rasterization path for framebuffer copy operations. (#270)

* Move all copies & resolves to rasterization.

* Fix Vulkan crash.

* Implement hardware resolve path for both color and depth targets.
This commit is contained in:
Skyth (Asilkan) 2025-02-03 23:02:47 +03:00 committed by GitHub
parent aaad10d797
commit 11d0fd2f9c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 280 additions and 75 deletions

View file

@ -415,6 +415,8 @@ endfunction()
compile_pixel_shader(blend_color_alpha_ps)
compile_vertex_shader(copy_vs)
compile_pixel_shader(copy_color_ps)
compile_pixel_shader(copy_depth_ps)
compile_pixel_shader(csd_filter_ps)
compile_vertex_shader(csd_no_tex_vs)
compile_vertex_shader(csd_vs)
@ -428,6 +430,9 @@ compile_pixel_shader(imgui_ps)
compile_vertex_shader(imgui_vs)
compile_pixel_shader(movie_ps)
compile_vertex_shader(movie_vs)
compile_pixel_shader(resolve_msaa_color_2x)
compile_pixel_shader(resolve_msaa_color_4x)
compile_pixel_shader(resolve_msaa_color_8x)
compile_pixel_shader(resolve_msaa_depth_2x)
compile_pixel_shader(resolve_msaa_depth_4x)
compile_pixel_shader(resolve_msaa_depth_8x)

View file

@ -27,6 +27,8 @@
//# define D3D12_DEBUG_LAYER_GPU_BASED_VALIDATION_ENABLED
#endif
//#define D3D12_DEBUG_SET_STABLE_POWER_STATE
// Old Windows SDK versions don't provide this macro, so we workaround it by making sure it is defined.
#ifndef D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE
#define D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE (D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE)
@ -692,6 +694,20 @@ namespace plume {
);
}
static D3D12_RESOLVE_MODE toD3D12(RenderResolveMode resolveMode) {
switch (resolveMode) {
case RenderResolveMode::MIN:
return D3D12_RESOLVE_MODE_MIN;
case RenderResolveMode::MAX:
return D3D12_RESOLVE_MODE_MAX;
case RenderResolveMode::AVERAGE:
return D3D12_RESOLVE_MODE_AVERAGE;
default:
assert(false && "Unknown resolve mode.");
return D3D12_RESOLVE_MODE_AVERAGE;
}
}
static void setObjectName(ID3D12Object *object, const std::string &name) {
const std::wstring wideCharName = Utf8ToUtf16(name);
object->SetName(wideCharName.c_str());
@ -1916,7 +1932,7 @@ namespace plume {
resetSamplePositions();
}
void D3D12CommandList::resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect) {
void D3D12CommandList::resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect, RenderResolveMode resolveMode) {
assert(dstTexture != nullptr);
assert(srcTexture != nullptr);
@ -1931,7 +1947,7 @@ namespace plume {
}
setSamplePositions(interfaceDstTexture);
d3d->ResolveSubresourceRegion(interfaceDstTexture->d3d, 0, dstX, dstY, interfaceSrcTexture->d3d, 0, (srcRect != nullptr) ? &rect : nullptr, toDXGI(interfaceDstTexture->desc.format), D3D12_RESOLVE_MODE_AVERAGE);
d3d->ResolveSubresourceRegion(interfaceDstTexture->d3d, 0, dstX, dstY, interfaceSrcTexture->d3d, 0, (srcRect != nullptr) ? &rect : nullptr, toDXGI(interfaceDstTexture->desc.format), toD3D12(resolveMode));
resetSamplePositions();
}
@ -3373,6 +3389,10 @@ namespace plume {
return;
}
#ifdef D3D12_DEBUG_SET_STABLE_POWER_STATE
d3d->SetStablePowerState(TRUE);
#endif
D3D12MA::ALLOCATOR_DESC allocatorDesc = {};
allocatorDesc.pDevice = d3d;
allocatorDesc.pAdapter = adapter;

View file

@ -192,7 +192,7 @@ namespace plume {
void copyBuffer(const RenderBuffer *dstBuffer, const RenderBuffer *srcBuffer) override;
void copyTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override;
void resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override;
void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect) override;
void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect, RenderResolveMode resolveMode) override;
void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) override;
void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) override;
void discardTexture(const RenderTexture* texture) override;

View file

@ -143,7 +143,7 @@ namespace plume {
virtual void copyBuffer(const RenderBuffer *dstBuffer, const RenderBuffer *srcBuffer) = 0;
virtual void copyTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) = 0;
virtual void resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) = 0;
virtual void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect = nullptr) = 0;
virtual void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect = nullptr, RenderResolveMode resolveMode = RenderResolveMode::AVERAGE) = 0;
virtual void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) = 0;
virtual void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) = 0;
virtual void discardTexture(const RenderTexture* texture) = 0; // D3D12 only.

View file

@ -483,6 +483,12 @@ namespace plume {
CPU
};
enum class RenderResolveMode {
MIN,
MAX,
AVERAGE
};
// Global functions.
constexpr uint32_t RenderFormatSize(RenderFormat format) {

View file

@ -3074,12 +3074,13 @@ namespace plume {
}
void VulkanCommandList::resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) {
resolveTextureRegion(dstTexture, 0, 0, srcTexture, nullptr);
resolveTextureRegion(dstTexture, 0, 0, srcTexture, nullptr, RenderResolveMode::AVERAGE);
}
void VulkanCommandList::resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect) {
void VulkanCommandList::resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect, RenderResolveMode resolveMode) {
assert(dstTexture != nullptr);
assert(srcTexture != nullptr);
assert(resolveMode == RenderResolveMode::AVERAGE && "Vulkan only supports AVERAGE resolve mode.");
thread_local std::vector<VkImageResolve> imageResolves;
imageResolves.clear();

View file

@ -315,7 +315,7 @@ namespace plume {
void copyBuffer(const RenderBuffer *dstBuffer, const RenderBuffer *srcBuffer) override;
void copyTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override;
void resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override;
void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect) override;
void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect, RenderResolveMode resolveMode) override;
void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) override;
void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) override;
void discardTexture(const RenderTexture* texture) override;

View file

@ -0,0 +1,8 @@
#include "copy_common.hlsli"
Texture2D<float4> g_Texture2DDescriptorHeap[] : register(t0, space0);
float4 main(in float4 position : SV_Position) : SV_Target
{
return g_Texture2DDescriptorHeap[g_PushConstants.ResourceDescriptorIndex].Load(int3(position.xy, 0));
}

View file

@ -0,0 +1,8 @@
#pragma once
struct PushConstants
{
uint ResourceDescriptorIndex;
};
[[vk::push_constant]] ConstantBuffer<PushConstants> g_PushConstants : register(b3, space4);

View file

@ -0,0 +1,8 @@
#include "copy_common.hlsli"
Texture2D<float> g_Texture2DDescriptorHeap[] : register(t0, space0);
float main(in float4 position : SV_Position) : SV_Depth
{
return g_Texture2DDescriptorHeap[g_PushConstants.ResourceDescriptorIndex].Load(int3(position.xy, 0));
}

View file

@ -0,0 +1,15 @@
#pragma once
#include "copy_common.hlsli"
Texture2DMS<float4, SAMPLE_COUNT> g_Texture2DMSDescriptorHeap[] : register(t0, space0);
float4 main(in float4 position : SV_Position) : SV_Target
{
float4 result = g_Texture2DMSDescriptorHeap[g_PushConstants.ResourceDescriptorIndex].Load(int2(position.xy), 0);
[unroll] for (int i = 1; i < SAMPLE_COUNT; i++)
result += g_Texture2DMSDescriptorHeap[g_PushConstants.ResourceDescriptorIndex].Load(int2(position.xy), i);
return result / SAMPLE_COUNT;
}

View file

@ -0,0 +1,2 @@
#define SAMPLE_COUNT 2
#include "resolve_msaa_color.hlsli"

View file

@ -0,0 +1,2 @@
#define SAMPLE_COUNT 4
#include "resolve_msaa_color.hlsli"

View file

@ -0,0 +1,2 @@
#define SAMPLE_COUNT 8
#include "resolve_msaa_color.hlsli"

View file

@ -1,11 +1,6 @@
#pragma once
struct PushConstants
{
uint ResourceDescriptorIndex;
};
[[vk::push_constant]] ConstantBuffer<PushConstants> g_PushConstants : register(b3, space4);
#include "copy_common.hlsli"
Texture2DMS<float, SAMPLE_COUNT> g_Texture2DMSDescriptorHeap[] : register(t0, space0);

View file

@ -40,6 +40,8 @@
#ifdef UNLEASHED_RECOMP_D3D12
#include "shader/blend_color_alpha_ps.hlsl.dxil.h"
#include "shader/copy_vs.hlsl.dxil.h"
#include "shader/copy_color_ps.hlsl.dxil.h"
#include "shader/copy_depth_ps.hlsl.dxil.h"
#include "shader/csd_filter_ps.hlsl.dxil.h"
#include "shader/csd_no_tex_vs.hlsl.dxil.h"
#include "shader/csd_vs.hlsl.dxil.h"
@ -53,6 +55,9 @@
#include "shader/imgui_vs.hlsl.dxil.h"
#include "shader/movie_ps.hlsl.dxil.h"
#include "shader/movie_vs.hlsl.dxil.h"
#include "shader/resolve_msaa_color_2x.hlsl.dxil.h"
#include "shader/resolve_msaa_color_4x.hlsl.dxil.h"
#include "shader/resolve_msaa_color_8x.hlsl.dxil.h"
#include "shader/resolve_msaa_depth_2x.hlsl.dxil.h"
#include "shader/resolve_msaa_depth_4x.hlsl.dxil.h"
#include "shader/resolve_msaa_depth_8x.hlsl.dxil.h"
@ -60,6 +65,8 @@
#include "shader/blend_color_alpha_ps.hlsl.spirv.h"
#include "shader/copy_vs.hlsl.spirv.h"
#include "shader/copy_color_ps.hlsl.spirv.h"
#include "shader/copy_depth_ps.hlsl.spirv.h"
#include "shader/csd_filter_ps.hlsl.spirv.h"
#include "shader/csd_no_tex_vs.hlsl.spirv.h"
#include "shader/csd_vs.hlsl.spirv.h"
@ -73,6 +80,9 @@
#include "shader/imgui_vs.hlsl.spirv.h"
#include "shader/movie_ps.hlsl.spirv.h"
#include "shader/movie_vs.hlsl.spirv.h"
#include "shader/resolve_msaa_color_2x.hlsl.spirv.h"
#include "shader/resolve_msaa_color_4x.hlsl.spirv.h"
#include "shader/resolve_msaa_color_8x.hlsl.spirv.h"
#include "shader/resolve_msaa_depth_2x.hlsl.spirv.h"
#include "shader/resolve_msaa_depth_4x.hlsl.spirv.h"
#include "shader/resolve_msaa_depth_8x.hlsl.spirv.h"
@ -136,6 +146,14 @@ struct PipelineState
};
#pragma pack(pop)
struct UploadAllocation
{
const RenderBuffer* buffer;
uint64_t offset;
uint8_t* memory;
uint64_t deviceAddress;
};
struct SharedConstants
{
uint32_t texture2DIndices[16]{};
@ -158,6 +176,8 @@ static RenderViewport g_viewport(0.0f, 0.0f, 1280.0f, 720.0f);
static PipelineState g_pipelineState;
static int32_t g_depthBias;
static float g_slopeScaledDepthBias;
static UploadAllocation g_vertexShaderConstants;
static UploadAllocation g_pixelShaderConstants;
static SharedConstants g_sharedConstants;
static GuestTexture* g_textures[16];
static RenderSamplerDesc g_samplerDescs[16];
@ -215,6 +235,9 @@ static bool g_vulkan = false;
static constexpr bool g_vulkan = true;
#endif
static constexpr bool g_hardwareResolve = true;
static constexpr bool g_hardwareDepthResolve = true;
static std::unique_ptr<RenderInterface> g_interface;
static std::unique_ptr<RenderDevice> g_device;
@ -380,14 +403,6 @@ struct UploadBuffer
uint64_t deviceAddress = 0;
};
struct UploadAllocation
{
const RenderBuffer* buffer;
uint64_t offset;
uint8_t* memory;
uint64_t deviceAddress;
};
struct UploadAllocator
{
std::vector<UploadBuffer> buffers;
@ -1139,6 +1154,14 @@ static const std::pair<GuestRenderState, PPCFunc*> g_setRenderStateFunctions[] =
{ D3DRS_COLORWRITEENABLE, HostToGuestFunction<SetRenderState<D3DRS_COLORWRITEENABLE>> }
};
static std::unique_ptr<RenderShader> g_copyShader;
static std::unique_ptr<RenderShader> g_copyColorShader;
static ankerl::unordered_dense::map<RenderFormat, std::unique_ptr<RenderPipeline>> g_copyColorPipelines;
static std::unique_ptr<RenderPipeline> g_copyDepthPipeline;
static std::unique_ptr<RenderShader> g_resolveMsaaColorShaders[3];
static ankerl::unordered_dense::map<RenderFormat, std::array<std::unique_ptr<RenderPipeline>, 3>> g_resolveMsaaColorPipelines;
static std::unique_ptr<RenderPipeline> g_resolveMsaaDepthPipelines[3];
enum
@ -1462,6 +1485,9 @@ static void BeginCommandList()
g_backBuffer->layout = RenderTextureLayout::UNKNOWN;
g_vertexShaderConstants = {};
g_pixelShaderConstants = {};
for (size_t i = 0; i < 16; i++)
{
g_sharedConstants.texture2DIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D;
@ -1711,7 +1737,23 @@ bool Video::CreateHostDevice(const char *sdlVideoDriver)
g_pipelineLayout = pipelineLayoutBuilder.create(g_device.get());
auto copyShader = CREATE_SHADER(copy_vs);
g_copyShader = CREATE_SHADER(copy_vs);
g_copyColorShader = CREATE_SHADER(copy_color_ps);
auto copyDepthShader = CREATE_SHADER(copy_depth_ps);
RenderGraphicsPipelineDesc desc;
desc.pipelineLayout = g_pipelineLayout.get();
desc.vertexShader = g_copyShader.get();
desc.pixelShader = copyDepthShader.get();
desc.depthFunction = RenderComparisonFunction::ALWAYS;
desc.depthEnabled = true;
desc.depthWriteEnabled = true;
desc.depthTargetFormat = RenderFormat::D32_FLOAT;
g_copyDepthPipeline = g_device->createGraphicsPipeline(desc);
g_resolveMsaaColorShaders[0] = CREATE_SHADER(resolve_msaa_color_2x);
g_resolveMsaaColorShaders[1] = CREATE_SHADER(resolve_msaa_color_4x);
g_resolveMsaaColorShaders[2] = CREATE_SHADER(resolve_msaa_color_8x);
for (size_t i = 0; i < std::size(g_resolveMsaaDepthPipelines); i++)
{
@ -1729,9 +1771,9 @@ bool Video::CreateHostDevice(const char *sdlVideoDriver)
break;
}
RenderGraphicsPipelineDesc desc;
desc = {};
desc.pipelineLayout = g_pipelineLayout.get();
desc.vertexShader = copyShader.get();
desc.vertexShader = g_copyShader.get();
desc.pixelShader = pixelShader.get();
desc.depthFunction = RenderComparisonFunction::ALWAYS;
desc.depthEnabled = true;
@ -1758,9 +1800,9 @@ bool Video::CreateHostDevice(const char *sdlVideoDriver)
auto gammaCorrectionShader = CREATE_SHADER(gamma_correction_ps);
RenderGraphicsPipelineDesc desc;
desc = {};
desc.pipelineLayout = g_pipelineLayout.get();
desc.vertexShader = copyShader.get();
desc.vertexShader = g_copyShader.get();
desc.pixelShader = gammaCorrectionShader.get();
desc.renderTargetFormat[0] = BACKBUFFER_FORMAT;
desc.renderTargetBlend[0] = RenderBlendDesc::Copy();
@ -2490,6 +2532,15 @@ void Video::Present()
g_presentProfiler.Reset();
}
static void Present(GuestDevice* device)
{
Video::Present();
// Invalidate vertex/pixel shader constants.
device->dirtyFlags[0] = ~0;
device->dirtyFlags[1] = ~0;
}
void Video::StartPipelinePrecompilation()
{
g_shouldPrecompilePipelines = true;
@ -2714,7 +2765,13 @@ static GuestTexture* CreateTexture(uint32_t width, uint32_t height, uint32_t dep
desc.mipLevels = levels;
desc.arraySize = 1;
desc.format = ConvertFormat(format);
desc.flags = (desc.format == RenderFormat::D32_FLOAT) ? RenderTextureFlag::DEPTH_TARGET : RenderTextureFlag::NONE;
if (desc.format == RenderFormat::D32_FLOAT)
desc.flags = RenderTextureFlag::DEPTH_TARGET;
else if (usage != 0)
desc.flags = RenderTextureFlag::RENDER_TARGET;
else
desc.flags = RenderTextureFlag::NONE;
texture->textureHolder = g_device->createTexture(desc);
texture->texture = texture->textureHolder.get();
@ -2974,24 +3031,25 @@ static bool PopulateBarriersForStretchRect(GuestSurface* renderTarget, GuestSurf
RenderTextureLayout srcLayout;
RenderTextureLayout dstLayout;
bool shaderResolve = true;
if (multiSampling)
if (multiSampling && g_hardwareResolve)
{
if (surface == depthStencil)
{
srcLayout = RenderTextureLayout::SHADER_READ;
dstLayout = RenderTextureLayout::DEPTH_WRITE;
}
else
// Hardware depth resolve is only supported on D3D12 when programmable sample positions are available.
bool hardwareDepthResolveAvailable = g_hardwareDepthResolve && !g_vulkan && g_capabilities.sampleLocations;
if (surface->format != RenderFormat::D32_FLOAT || hardwareDepthResolveAvailable)
{
srcLayout = RenderTextureLayout::RESOLVE_SOURCE;
dstLayout = RenderTextureLayout::RESOLVE_DEST;
shaderResolve = false;
}
}
else
if (shaderResolve)
{
srcLayout = RenderTextureLayout::COPY_SOURCE;
dstLayout = RenderTextureLayout::COPY_DEST;
srcLayout = RenderTextureLayout::SHADER_READ;
dstLayout = (surface->format == RenderFormat::D32_FLOAT ? RenderTextureLayout::DEPTH_WRITE : RenderTextureLayout::COLOR_WRITE);
}
AddBarrier(surface, srcLayout);
@ -3018,9 +3076,28 @@ static void ExecutePendingStretchRectCommands(GuestSurface* renderTarget, GuestS
for (const auto texture : surface->destinationTextures)
{
if (multiSampling)
bool shaderResolve = true;
if (multiSampling && g_hardwareResolve)
{
if (surface == depthStencil)
bool hardwareDepthResolveAvailable = g_hardwareDepthResolve && !g_vulkan && g_capabilities.sampleLocations;
if (surface->format != RenderFormat::D32_FLOAT || hardwareDepthResolveAvailable)
{
if (surface->format == RenderFormat::D32_FLOAT)
commandList->resolveTextureRegion(texture->texture, 0, 0, surface->texture, nullptr, RenderResolveMode::MIN);
else
commandList->resolveTexture(texture->texture, surface->texture);
shaderResolve = false;
}
}
if (shaderResolve)
{
RenderPipeline* pipeline = nullptr;
if (multiSampling)
{
uint32_t pipelineIndex = 0;
@ -3040,12 +3117,69 @@ static void ExecutePendingStretchRectCommands(GuestSurface* renderTarget, GuestS
break;
}
if (texture->format == RenderFormat::D32_FLOAT)
{
pipeline = g_resolveMsaaDepthPipelines[pipelineIndex].get();
}
else
{
auto& resolveMsaaColorPipeline = g_resolveMsaaColorPipelines[surface->format][pipelineIndex];
if (resolveMsaaColorPipeline == nullptr)
{
RenderGraphicsPipelineDesc desc;
desc.pipelineLayout = g_pipelineLayout.get();
desc.vertexShader = g_copyShader.get();
desc.pixelShader = g_resolveMsaaColorShaders[pipelineIndex].get();
desc.renderTargetFormat[0] = texture->format;
desc.renderTargetBlend[0] = RenderBlendDesc::Copy();
desc.renderTargetCount = 1;
resolveMsaaColorPipeline = g_device->createGraphicsPipeline(desc);
}
pipeline = resolveMsaaColorPipeline.get();
}
}
else
{
if (texture->format == RenderFormat::D32_FLOAT)
{
pipeline = g_copyDepthPipeline.get();
}
else
{
auto& copyColorPipeline = g_copyColorPipelines[surface->format];
if (copyColorPipeline == nullptr)
{
RenderGraphicsPipelineDesc desc;
desc.pipelineLayout = g_pipelineLayout.get();
desc.vertexShader = g_copyShader.get();
desc.pixelShader = g_copyColorShader.get();
desc.renderTargetFormat[0] = texture->format;
desc.renderTargetBlend[0] = RenderBlendDesc::Copy();
desc.renderTargetCount = 1;
copyColorPipeline = g_device->createGraphicsPipeline(desc);
}
pipeline = copyColorPipeline.get();
}
}
if (texture->framebuffer == nullptr)
{
if (texture->format == RenderFormat::D32_FLOAT)
{
RenderFramebufferDesc desc;
desc.depthAttachment = texture->texture;
texture->framebuffer = g_device->createFramebuffer(desc);
}
else
{
RenderFramebufferDesc desc;
desc.colorAttachments = const_cast<const RenderTexture**>(&texture->texture);
desc.colorAttachmentsCount = 1;
texture->framebuffer = g_device->createFramebuffer(desc);
}
}
if (g_framebuffer != texture->framebuffer.get())
{
@ -3053,7 +3187,7 @@ static void ExecutePendingStretchRectCommands(GuestSurface* renderTarget, GuestS
g_framebuffer = texture->framebuffer.get();
}
commandList->setPipeline(g_resolveMsaaDepthPipelines[pipelineIndex].get());
commandList->setPipeline(pipeline);
commandList->setViewports(RenderViewport(0.0f, 0.0f, float(texture->width), float(texture->height), 0.0f, 1.0f));
commandList->setScissors(RenderRect(0, 0, texture->width, texture->height));
commandList->setGraphicsPushConstants(0, &surface->descriptorIndex, 0, sizeof(uint32_t));
@ -3066,19 +3200,10 @@ static void ExecutePendingStretchRectCommands(GuestSurface* renderTarget, GuestS
if (g_vulkan)
{
g_dirtyStates.depthBias = true; // Static depth bias in MSAA pipeline invalidates dynamic depth bias.
g_dirtyStates.vertexShaderConstants = true;
g_dirtyStates.vertexShaderConstants = true; // The push constant call invalidates vertex shader constants.
g_dirtyStates.depthBias = true; // Static depth bias in copy pipeline invalidates dynamic depth bias.
}
}
else
{
commandList->resolveTexture(texture->texture, surface->texture);
}
}
else
{
commandList->copyTexture(texture->texture, surface->texture);
}
texture->sourceSurface = nullptr;
@ -3841,7 +3966,7 @@ static void FlushRenderStateForMainThread(GuestDevice* device, LocalRenderComman
}
}
if (g_dirtyStates.vertexShaderConstants || device->dirtyFlags[0] != 0)
if (device->dirtyFlags[0] != 0)
{
auto& cmd = queue.enqueue();
cmd.type = RenderCommandType::SetVertexShaderConstants;
@ -3850,7 +3975,7 @@ static void FlushRenderStateForMainThread(GuestDevice* device, LocalRenderComman
device->dirtyFlags[0] = 0;
}
if (g_dirtyStates.pixelShaderConstants || device->dirtyFlags[1] != 0)
if (device->dirtyFlags[1] != 0)
{
auto& cmd = queue.enqueue();
cmd.type = RenderCommandType::SetPixelShaderConstants;
@ -3915,12 +4040,14 @@ static void ProcSetSamplerState(const RenderCommand& cmd)
static void ProcSetVertexShaderConstants(const RenderCommand& cmd)
{
SetRootDescriptor(cmd.setVertexShaderConstants.allocation, 0);
g_vertexShaderConstants = cmd.setVertexShaderConstants.allocation;
g_dirtyStates.vertexShaderConstants = true;
}
static void ProcSetPixelShaderConstants(const RenderCommand& cmd)
{
SetRootDescriptor(cmd.setPixelShaderConstants.allocation, 1);
g_pixelShaderConstants = cmd.setPixelShaderConstants.allocation;
g_dirtyStates.pixelShaderConstants = true;
}
static void ProcAddPipeline(const RenderCommand& cmd)
@ -4010,6 +4137,12 @@ static void FlushRenderStateForRenderThread()
if (g_dirtyStates.depthBias && g_capabilities.dynamicDepthBias)
commandList->setDepthBias(g_depthBias, 0.0f, g_slopeScaledDepthBias);
if (g_dirtyStates.vertexShaderConstants)
SetRootDescriptor(g_vertexShaderConstants, 0);
if (g_dirtyStates.pixelShaderConstants)
SetRootDescriptor(g_pixelShaderConstants, 1);
if (g_dirtyStates.sharedConstants)
{
auto sharedConstants = g_uploadAllocators[g_frame].allocate<false>(&g_sharedConstants, sizeof(g_sharedConstants), 0x100);
@ -7001,7 +7134,7 @@ GUEST_FUNCTION_HOOK(sub_82BE96F0, GetSurfaceDesc);
GUEST_FUNCTION_HOOK(sub_82BE04B0, GetVertexDeclaration);
GUEST_FUNCTION_HOOK(sub_82BE0530, HashVertexDeclaration);
GUEST_FUNCTION_HOOK(sub_82BDA8C0, Video::Present);
GUEST_FUNCTION_HOOK(sub_82BDA8C0, Present);
GUEST_FUNCTION_HOOK(sub_82BDD330, GetBackBuffer);
GUEST_FUNCTION_HOOK(sub_82BE9498, CreateTexture);

@ -1 +1 @@
Subproject commit 96458eb7bc01798c951bce9d627852c2870bc54d
Subproject commit 855a5a8c51ea5f84baecbf4fc87c182795d482c9