Implement MSAA properly for Vulkan.

This commit is contained in:
Skyth 2024-10-19 22:22:09 +03:00
parent 7ed7921c54
commit c2ce012155
12 changed files with 258 additions and 51 deletions

3
.gitmodules vendored
View file

@ -4,3 +4,6 @@
[submodule "thirdparty/ddspp"]
path = thirdparty/ddspp
url = https://github.com/redorav/ddspp.git
[submodule "thirdparty/ShaderRecomp"]
path = thirdparty/ShaderRecomp
url = https://github.com/hedge-dev/ShaderRecomp.git

View file

@ -95,6 +95,7 @@ find_package(VulkanMemoryAllocator CONFIG REQUIRED)
find_package(xxHash CONFIG REQUIRED)
find_package(PkgConfig REQUIRED)
pkg_check_modules(tomlplusplus REQUIRED IMPORTED_TARGET tomlplusplus)
find_package(directx-dxc REQUIRED)
target_link_libraries(UnleashedRecomp PRIVATE
comctl32
@ -122,3 +123,32 @@ target_include_directories(UnleashedRecomp PRIVATE
)
target_precompile_headers(UnleashedRecomp PUBLIC ${SWA_PRECOMPILED_HEADERS})
function(compile_shader FILE_PATH TARGET_NAME)
set(FILE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/gpu/shader/${FILE_PATH}.hlsl)
cmake_path(GET FILE_PATH STEM VARIABLE_NAME)
add_custom_command(
OUTPUT ${FILE_PATH}.dxil.h
COMMAND ${DIRECTX_DXC_TOOL} -T ${TARGET_NAME} -HV 2021 -all-resources-bound -Wno-ignored-attributes -Fh ${FILE_PATH}.dxil.h ${FILE_PATH} -Vn g_${VARIABLE_NAME}_dxil
DEPENDS ${FILE_PATH}
)
add_custom_command(
OUTPUT ${FILE_PATH}.spirv.h
COMMAND ${DIRECTX_DXC_TOOL} -T ${TARGET_NAME} -HV 2021 -all-resources-bound -spirv -fvk-use-dx-layout ${ARGN} -Fh ${FILE_PATH}.spirv.h ${FILE_PATH} -Vn g_${VARIABLE_NAME}_spirv
DEPENDS ${FILE_PATH}
)
target_sources(UnleashedRecomp PRIVATE ${FILE_PATH}.dxil.h ${FILE_PATH}.spirv.h)
endfunction()
function(compile_vertex_shader FILE_PATH)
compile_shader(${FILE_PATH} vs_6_0 -fvk-invert-y)
endfunction()
function(compile_pixel_shader FILE_PATH)
compile_shader(${FILE_PATH} ps_6_0)
endfunction()
compile_vertex_shader(copy_vs)
compile_pixel_shader(resolve_msaa_depth_2x)
compile_pixel_shader(resolve_msaa_depth_4x)
compile_pixel_shader(resolve_msaa_depth_8x)

1
UnleashedRecomp/gpu/shader/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
*.hlsl.*.h

View file

@ -0,0 +1,5 @@
void main(in uint vertexId : SV_VertexID, out float4 position : SV_Position, out float2 texCoord : TEXCOORD)
{
texCoord = float2((vertexId << 1) & 2, vertexId & 2);
position = float4(texCoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
}

View file

@ -0,0 +1,18 @@
struct PushConstants
{
uint ResourceDescriptorIndex;
};
[[vk::push_constant]] ConstantBuffer<PushConstants> g_PushConstants : register(b3, space4);
Texture2DMS<float, SAMPLE_COUNT> g_Texture2DMSDescriptorHeap[] : register(t0, space0);
float main(in float4 position : SV_Position) : SV_Depth
{
float result = g_Texture2DMSDescriptorHeap[g_PushConstants.ResourceDescriptorIndex].Load(int2(position.xy), 0);
[unroll] for (int i = 1; i < SAMPLE_COUNT; i++)
result = max(result, g_Texture2DMSDescriptorHeap[g_PushConstants.ResourceDescriptorIndex].Load(int2(position.xy), i));
return result;
}

View file

@ -0,0 +1,2 @@
#define SAMPLE_COUNT 2
#include "resolve_msaa_depth.hlsli"

View file

@ -0,0 +1,2 @@
#define SAMPLE_COUNT 4
#include "resolve_msaa_depth.hlsli"

View file

@ -0,0 +1,2 @@
#define SAMPLE_COUNT 8
#include "resolve_msaa_depth.hlsli"

View file

@ -10,6 +10,15 @@
#include "video.h"
#include "ui/window.h"
#include "shader/copy_vs.hlsl.dxil.h"
#include "shader/copy_vs.hlsl.spirv.h"
#include "shader/resolve_msaa_depth_2x.hlsl.dxil.h"
#include "shader/resolve_msaa_depth_2x.hlsl.spirv.h"
#include "shader/resolve_msaa_depth_4x.hlsl.dxil.h"
#include "shader/resolve_msaa_depth_4x.hlsl.spirv.h"
#include "shader/resolve_msaa_depth_8x.hlsl.dxil.h"
#include "shader/resolve_msaa_depth_8x.hlsl.spirv.h"
namespace RT64
{
extern std::unique_ptr<RenderInterface> CreateD3D12Interface();
@ -532,6 +541,8 @@ static const std::pair<GuestRenderState, void*> g_setRenderStateFunctions[] =
{ D3DRS_COLORWRITEENABLE, GuestFunction<SetRenderStateColorWriteEnable> }
};
static std::unique_ptr<RenderPipeline> g_resolveMsaaDepthPipelines[3];
static void CreateHostDevice()
{
for (uint32_t i = 0; i < 16; i++)
@ -595,10 +606,49 @@ static void CreateHostDevice()
pipelineLayoutBuilder.addRootDescriptor(0, 4, RenderRootDescriptorType::CONSTANT_BUFFER);
pipelineLayoutBuilder.addRootDescriptor(1, 4, RenderRootDescriptorType::CONSTANT_BUFFER);
pipelineLayoutBuilder.addRootDescriptor(2, 4, RenderRootDescriptorType::CONSTANT_BUFFER);
pipelineLayoutBuilder.addPushConstant(3, 4, 4, RenderShaderStageFlag::PIXEL); // For copy/resolve shaders.
}
pipelineLayoutBuilder.end();
g_pipelineLayout = pipelineLayoutBuilder.create(g_device.get());
#define CREATE_SHADER(NAME) \
g_device->createShader( \
g_vulkan ? g_##NAME##_spirv : g_##NAME##_dxil, \
g_vulkan ? sizeof(g_##NAME##_spirv) : sizeof(g_##NAME##_dxil), \
"main", \
g_vulkan ? RenderShaderFormat::SPIRV : RenderShaderFormat::DXIL)
auto copyShader = CREATE_SHADER(copy_vs);
for (size_t i = 0; i < std::size(g_resolveMsaaDepthPipelines); i++)
{
std::unique_ptr<RenderShader> pixelShader;
switch (i)
{
case 0:
pixelShader = CREATE_SHADER(resolve_msaa_depth_2x);
break;
case 1:
pixelShader = CREATE_SHADER(resolve_msaa_depth_4x);
break;
case 2:
pixelShader = CREATE_SHADER(resolve_msaa_depth_8x);
break;
}
RenderGraphicsPipelineDesc desc;
desc.pipelineLayout = g_pipelineLayout.get();
desc.vertexShader = copyShader.get();
desc.pixelShader = pixelShader.get();
desc.depthFunction = RenderComparisonFunction::ALWAYS;
desc.depthEnabled = true;
desc.depthWriteEnabled = true;
desc.depthTargetFormat = RenderFormat::D32_FLOAT;
g_resolveMsaaDepthPipelines[i] = g_device->createGraphicsPipeline(desc);
}
#undef CREATE_SHADER
}
static void WaitForGPU()
@ -768,6 +818,9 @@ static void DestructResource(GuestResource* resource)
{
std::lock_guard lock(g_tempMutex);
g_tempTextures[g_frame].emplace_back(std::move(surface->textureHolder));
if (surface->descriptorIndex != NULL)
g_tempDescriptorIndices[g_frame].push_back(surface->descriptorIndex);
}
surface->~GuestSurface();
@ -1063,7 +1116,7 @@ static GuestSurface* CreateSurface(uint32_t width, uint32_t height, uint32_t for
desc.depth = 1;
desc.mipLevels = 1;
desc.arraySize = 1;
//desc.multisampling.sampleCount = (desc.format != RenderFormat::D32_FLOAT && multiSample != 0) ? RenderSampleCount::COUNT_2 : RenderSampleCount::COUNT_1;
desc.multisampling.sampleCount = multiSample != 0 && Config::MSAA > 1 ? Config::MSAA : RenderSampleCount::COUNT_1;
desc.format = ConvertFormat(format);
desc.flags = desc.format == RenderFormat::D32_FLOAT ? RenderTextureFlag::DEPTH_TARGET : RenderTextureFlag::RENDER_TARGET;
@ -1077,24 +1130,155 @@ static GuestSurface* CreateSurface(uint32_t width, uint32_t height, uint32_t for
surface->format = desc.format;
surface->sampleCount = desc.multisampling.sampleCount;
if (multiSample != 0 && desc.format == RenderFormat::D32_FLOAT)
{
RenderTextureViewDesc viewDesc;
viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D;
viewDesc.format = RenderFormat::D32_FLOAT;
viewDesc.mipLevels = 1;
surface->textureView = surface->textureHolder->createTextureView(viewDesc);
surface->descriptorIndex = g_textureDescriptorAllocator.allocate();
g_textureDescriptorSet->setTexture(surface->descriptorIndex, surface->textureHolder.get(), RenderTextureLayout::SHADER_READ, surface->textureView.get());
}
return surface;
}
static void FlushViewport()
{
bool renderingToBackBuffer = g_renderTarget == g_backBuffer &&
g_backBuffer->texture != g_backBuffer->textureHolder.get();
auto& commandList = g_commandLists[g_frame];
if (g_dirtyStates.viewport)
{
if (renderingToBackBuffer)
{
uint32_t width = g_swapChain->getWidth();
uint32_t height = g_swapChain->getHeight();
commandList->setViewports(RenderViewport(
g_viewport.x * width / 1280.0f,
g_viewport.y * height / 720.0f,
g_viewport.width * width / 1280.0f,
g_viewport.height * height / 720.0f,
g_viewport.minDepth,
g_viewport.maxDepth));
}
else
{
commandList->setViewports(g_viewport);
}
}
if (g_dirtyStates.scissorRect)
{
auto scissorRect = g_scissorTestEnable ? g_scissorRect : RenderRect(
g_viewport.x,
g_viewport.y,
g_viewport.x + g_viewport.width,
g_viewport.y + g_viewport.height);
if (renderingToBackBuffer)
{
uint32_t width = g_swapChain->getWidth();
uint32_t height = g_swapChain->getHeight();
scissorRect.left = scissorRect.left * width / 1280;
scissorRect.top = scissorRect.top * height / 720;
scissorRect.right = scissorRect.right * width / 1280;
scissorRect.bottom = scissorRect.bottom * height / 720;
}
commandList->setScissors(scissorRect);
}
}
static void StretchRect(GuestDevice* device, uint32_t flags, uint32_t, GuestTexture* texture)
{
const bool isDepthStencil = (flags & 0x4) != 0;
const auto surface = isDepthStencil ? g_depthStencil : g_renderTarget;
const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1;
g_barriers.emplace_back(surface->texture, multiSampling ? RenderTextureLayout::RESOLVE_SOURCE : RenderTextureLayout::COPY_SOURCE);
g_barriers.emplace_back(texture->texture.get(), multiSampling ? RenderTextureLayout::RESOLVE_DEST : RenderTextureLayout::COPY_DEST);
RenderTextureLayout srcLayout;
RenderTextureLayout dstLayout;
if (multiSampling)
{
if (isDepthStencil)
{
srcLayout = RenderTextureLayout::SHADER_READ;
dstLayout = RenderTextureLayout::DEPTH_WRITE;
}
else
{
srcLayout = RenderTextureLayout::RESOLVE_SOURCE;
dstLayout = RenderTextureLayout::RESOLVE_DEST;
}
}
else
{
srcLayout = RenderTextureLayout::COPY_SOURCE;
dstLayout = RenderTextureLayout::COPY_DEST;
}
g_barriers.emplace_back(surface->texture, srcLayout);
g_barriers.emplace_back(texture->texture.get(), dstLayout);
FlushBarriers();
auto& commandList = g_commandLists[g_frame];
if (multiSampling)
commandList->resolveTexture(texture->texture.get(), surface->texture);
else
{
if (isDepthStencil)
{
uint32_t pipelineIndex = 0;
switch (g_depthStencil->sampleCount)
{
case RenderSampleCount::COUNT_2:
pipelineIndex = 0;
break;
case RenderSampleCount::COUNT_4:
pipelineIndex = 1;
break;
case RenderSampleCount::COUNT_8:
pipelineIndex = 2;
break;
default:
assert(false && "Unsupported MSAA sample count");
break;
}
if (texture->framebuffer == nullptr)
{
RenderFramebufferDesc desc;
desc.depthAttachment = texture->texture.get();
texture->framebuffer = g_device->createFramebuffer(desc);
}
FlushViewport();
commandList->setFramebuffer(texture->framebuffer.get());
commandList->setPipeline(g_resolveMsaaDepthPipelines[pipelineIndex].get());
commandList->setGraphicsPushConstants(0, &g_depthStencil->descriptorIndex, 0, sizeof(uint32_t));
commandList->drawInstanced(6, 1, 0, 0);
g_dirtyStates.renderTargetAndDepthStencil = true;
g_dirtyStates.pipelineState = true;
if (g_vulkan)
g_dirtyStates.vertexShaderConstants = true;
}
else
{
commandList->resolveTexture(texture->texture.get(), surface->texture);
}
}
else
{
commandList->copyTexture(texture->texture.get(), surface->texture);
}
surface->pendingBarrier = true;
texture->pendingBarrier = true;
@ -1356,33 +1540,10 @@ static RenderBorderColor ConvertBorderColor(uint32_t value)
static void FlushRenderState(GuestDevice* device)
{
FlushFramebuffer();
FlushViewport();
auto& commandList = g_commandLists[g_frame];
bool renderingToBackBuffer = g_renderTarget == g_backBuffer &&
g_backBuffer->texture != g_backBuffer->textureHolder.get();
if (g_dirtyStates.viewport)
{
if (renderingToBackBuffer)
{
uint32_t width = g_swapChain->getWidth();
uint32_t height = g_swapChain->getHeight();
commandList->setViewports(RenderViewport(
g_viewport.x * width / 1280.0f,
g_viewport.y * height / 720.0f,
g_viewport.width * width / 1280.0f,
g_viewport.height * height / 720.0f,
g_viewport.minDepth,
g_viewport.maxDepth));
}
else
{
commandList->setViewports(g_viewport);
}
}
if (g_dirtyStates.pipelineState)
commandList->setPipeline(CreateGraphicsPipeline(g_pipelineState));
@ -1464,28 +1625,6 @@ static void FlushRenderState(GuestDevice* device)
setRootDescriptor(sharedConstants, 2);
}
if (g_dirtyStates.scissorRect)
{
auto scissorRect = g_scissorTestEnable ? g_scissorRect : RenderRect(
g_viewport.x,
g_viewport.y,
g_viewport.x + g_viewport.width,
g_viewport.y + g_viewport.height);
if (renderingToBackBuffer)
{
uint32_t width = g_swapChain->getWidth();
uint32_t height = g_swapChain->getHeight();
scissorRect.left = scissorRect.left * width / 1280;
scissorRect.top = scissorRect.top * height / 720;
scissorRect.right = scissorRect.right * width / 1280;
scissorRect.bottom = scissorRect.bottom * height / 720;
}
commandList->setScissors(scissorRect);
}
if (g_dirtyStates.vertexShaderConstants || device->dirtyFlags[0] != 0)
{
auto vertexShaderConstants = uploadAllocator.allocate<true>(device->vertexShaderFloatConstants, 0x1000, 0x100);

View file

@ -98,6 +98,7 @@ struct GuestTexture : GuestResource
void* mappedMemory = nullptr;
uint32_t descriptorIndex = 0;
bool pendingBarrier = true;
std::unique_ptr<RenderFramebuffer> framebuffer;
};
struct GuestLockedRect
@ -144,12 +145,14 @@ struct GuestSurface : GuestResource
{
std::unique_ptr<RenderTexture> textureHolder;
RenderTexture* texture = nullptr;
std::unique_ptr<RenderTextureView> textureView;
uint32_t width = 0;
uint32_t height = 0;
RenderFormat format = RenderFormat::UNKNOWN;
ankerl::unordered_dense::map<const RenderTexture*, std::unique_ptr<RenderFramebuffer>> framebuffers;
bool pendingBarrier = true;
RenderSampleCounts sampleCount = RenderSampleCount::COUNT_1;
uint32_t descriptorIndex = 0;
};
enum GuestDeclType

1
thirdparty/ShaderRecomp vendored Submodule

@ -0,0 +1 @@
Subproject commit 9da6b59ce51c5becc919c2f1aed7c5e5f3b86f31

View file

@ -2,6 +2,7 @@
"builtin-baseline": "e63bd09dc0b7204467705c1c7c71d0e2a3f8860b",
"dependencies": [
"d3d12-memory-allocator",
"directx-dxc",
"sdl2",
"unordered-dense",
"volk",