diff --git a/UnleashedRecomp/gpu/video.cpp b/UnleashedRecomp/gpu/video.cpp index 282d75cd..fdca431f 100644 --- a/UnleashedRecomp/gpu/video.cpp +++ b/UnleashedRecomp/gpu/video.cpp @@ -155,11 +155,11 @@ static GuestSurface* g_renderTarget; static GuestSurface* g_depthStencil; static RenderFramebuffer* g_framebuffer; static RenderViewport g_viewport(0.0f, 0.0f, 1280.0f, 720.0f); -static bool g_halfPixel = true; static PipelineState g_pipelineState; static int32_t g_depthBias; static float g_slopeScaledDepthBias; static SharedConstants g_sharedConstants; +static GuestTexture* g_textures[16]; static RenderSamplerDesc g_samplerDescs[16]; static bool g_scissorTestEnable = false; static RenderRect g_scissorRect; @@ -681,6 +681,9 @@ enum class CsdFilterState static CsdFilterState g_csdFilterState; +static ankerl::unordered_dense::set g_pendingSurfaceCopies; +static ankerl::unordered_dense::set g_pendingMsaaResolves; + enum class RenderCommandType { SetRenderState, @@ -694,6 +697,7 @@ enum class RenderCommandType StretchRect, SetRenderTarget, SetDepthStencilSurface, + ExecutePendingStretchRectCommands, Clear, SetViewport, SetTexture, @@ -710,7 +714,7 @@ enum class RenderCommandType SetVertexShader, SetStreamSource, SetIndices, - SetPixelShader + SetPixelShader, }; struct RenderCommand @@ -1465,6 +1469,8 @@ static void BeginCommandList() g_sharedConstants.textureCubeIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE; } + memset(g_textures, 0, sizeof(g_textures)); + if (Config::GITextureFiltering == EGITextureFiltering::Bicubic) g_pipelineState.specConstants |= SPEC_CONSTANT_BICUBIC_GI_FILTER; else @@ -2409,9 +2415,12 @@ static std::atomic g_executedCommandList; void Video::Present() { + RenderCommand cmd; + cmd.type = RenderCommandType::ExecutePendingStretchRectCommands; + g_renderQueue.enqueue(cmd); + DrawImGui(); - RenderCommand cmd; cmd.type = RenderCommandType::ExecuteCommandList; g_renderQueue.enqueue(cmd); @@ -2497,7 +2506,7 @@ static void SetRootDescriptor(const UploadAllocation& allocation, size_t index) } static void ProcExecuteCommandList(const RenderCommand& cmd) -{ +{ if (g_swapChainValid) { auto swapChainTexture = g_swapChain->getTexture(g_backBufferIndex); @@ -2795,16 +2804,13 @@ static GuestSurface* CreateSurface(uint32_t width, uint32_t height, uint32_t for surface->guestFormat = format; surface->sampleCount = desc.multisampling.sampleCount; - if (desc.multisampling.sampleCount != RenderSampleCount::COUNT_1 && desc.format == RenderFormat::D32_FLOAT) - { - RenderTextureViewDesc viewDesc; - viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D; - viewDesc.format = RenderFormat::D32_FLOAT; - viewDesc.mipLevels = 1; - surface->textureView = surface->textureHolder->createTextureView(viewDesc); - surface->descriptorIndex = g_textureDescriptorAllocator.allocate(); - g_textureDescriptorSet->setTexture(surface->descriptorIndex, surface->textureHolder.get(), RenderTextureLayout::SHADER_READ, surface->textureView.get()); - } + RenderTextureViewDesc viewDesc; + viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D; + viewDesc.format = desc.format; + viewDesc.mipLevels = 1; + surface->textureView = surface->textureHolder->createTextureView(viewDesc); + surface->descriptorIndex = g_textureDescriptorAllocator.allocate(); + g_textureDescriptorSet->setTexture(surface->descriptorIndex, surface->textureHolder.get(), RenderTextureLayout::SHADER_READ, surface->textureView.get()); #ifdef _DEBUG surface->texture->setName(fmt::format("{} {:X}", desc.flags & RenderTextureFlag::RENDER_TARGET ? "Render Target" : "Depth Stencil", g_memory.MapVirtual(surface))); @@ -2820,11 +2826,8 @@ static void FlushViewport() if (g_dirtyStates.viewport) { auto viewport = g_viewport; - if (g_halfPixel) - { - viewport.x += 0.5f; - viewport.y += 0.5f; - } + viewport.x += 0.5f; + viewport.y += 0.5f; if (viewport.minDepth > viewport.maxDepth) std::swap(viewport.minDepth, viewport.maxDepth); @@ -2848,13 +2851,6 @@ static void FlushViewport() } } -static bool SetHalfPixel(bool enable) -{ - bool oldValue = g_halfPixel; - SetDirtyValue(g_dirtyStates.viewport, g_halfPixel, enable); - return oldValue; -} - static void StretchRect(GuestDevice* device, uint32_t flags, uint32_t, GuestTexture* texture) { RenderCommand cmd; @@ -2864,105 +2860,43 @@ static void StretchRect(GuestDevice* device, uint32_t flags, uint32_t, GuestText g_renderQueue.enqueue(cmd); } +static void SetTextureInRenderThread(uint32_t index, GuestTexture* texture); +static void SetSurface(uint32_t index, GuestSurface* surface); + static void ProcStretchRect(const RenderCommand& cmd) { const auto& args = cmd.stretchRect; const bool isDepthStencil = (args.flags & 0x4) != 0; const auto surface = isDepthStencil ? g_depthStencil : g_renderTarget; - const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1; - RenderTextureLayout srcLayout; - RenderTextureLayout dstLayout; + // Erase previous pending command so it doesn't cause the texture to be overriden. + if (args.texture->sourceSurface != nullptr) + args.texture->sourceSurface->destinationTextures.erase(args.texture); - if (multiSampling) + args.texture->sourceSurface = surface; + surface->destinationTextures.emplace(args.texture); + + // If the texture is assigned to any slots, set it again. This'll also push the barrier. + for (uint32_t i = 0; i < std::size(g_textures); i++) { - if (isDepthStencil) + if (g_textures[i] == args.texture) { - srcLayout = RenderTextureLayout::SHADER_READ; - dstLayout = RenderTextureLayout::DEPTH_WRITE; - } - else - { - srcLayout = RenderTextureLayout::RESOLVE_SOURCE; - dstLayout = RenderTextureLayout::RESOLVE_DEST; + // Set the original texture for MSAA textures as they always get resolved. + if (surface->sampleCount != RenderSampleCount::COUNT_1) + { + SetTextureInRenderThread(i, args.texture); + g_pendingMsaaResolves.emplace(surface); + } + else + { + SetSurface(i, surface); + } } } - else - { - srcLayout = RenderTextureLayout::COPY_SOURCE; - dstLayout = RenderTextureLayout::COPY_DEST; - } - AddBarrier(surface, srcLayout); - AddBarrier(args.texture, dstLayout); - FlushBarriers(); - - auto& commandList = g_commandLists[g_frame]; - if (multiSampling) - { - if (isDepthStencil) - { - uint32_t pipelineIndex = 0; - - switch (g_depthStencil->sampleCount) - { - case RenderSampleCount::COUNT_2: - pipelineIndex = 0; - break; - case RenderSampleCount::COUNT_4: - pipelineIndex = 1; - break; - case RenderSampleCount::COUNT_8: - pipelineIndex = 2; - break; - default: - assert(false && "Unsupported MSAA sample count"); - break; - } - - if (args.texture->framebuffer == nullptr) - { - RenderFramebufferDesc desc; - desc.depthAttachment = args.texture->texture; - args.texture->framebuffer = g_device->createFramebuffer(desc); - } - - if (g_framebuffer != args.texture->framebuffer.get()) - { - commandList->setFramebuffer(args.texture->framebuffer.get()); - g_framebuffer = args.texture->framebuffer.get(); - } - - bool oldHalfPixel = SetHalfPixel(false); - FlushViewport(); - - commandList->setPipeline(g_resolveMsaaDepthPipelines[pipelineIndex].get()); - commandList->setGraphicsPushConstants(0, &g_depthStencil->descriptorIndex, 0, sizeof(uint32_t)); - commandList->drawInstanced(6, 1, 0, 0); - - g_dirtyStates.renderTargetAndDepthStencil = true; - g_dirtyStates.pipelineState = true; - - if (g_vulkan) - { - g_dirtyStates.depthBias = true; // Static depth bias in MSAA pipeline invalidates dynamic depth bias. - g_dirtyStates.vertexShaderConstants = true; - } - - SetHalfPixel(oldHalfPixel); - } - else - { - commandList->resolveTexture(args.texture->texture, surface->texture); - } - } - else - { - commandList->copyTexture(args.texture->texture, surface->texture); - } - - AddBarrier(args.texture, RenderTextureLayout::SHADER_READ); + // Remember to clear later. + g_pendingSurfaceCopies.emplace(surface); } static void SetDefaultViewport(GuestDevice* device, GuestSurface* surface) @@ -3028,6 +2962,170 @@ static void ProcSetDepthStencilSurface(const RenderCommand& cmd) SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.depthStencilFormat, args.depthStencil != nullptr ? args.depthStencil->format : RenderFormat::UNKNOWN); } +static bool PopulateBarriersForStretchRect(GuestSurface* renderTarget, GuestSurface* depthStencil) +{ + bool addedAny = false; + + for (const auto surface : { renderTarget, depthStencil }) + { + if (surface != nullptr && !surface->destinationTextures.empty()) + { + const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1; + + RenderTextureLayout srcLayout; + RenderTextureLayout dstLayout; + + if (multiSampling) + { + if (surface == depthStencil) + { + srcLayout = RenderTextureLayout::SHADER_READ; + dstLayout = RenderTextureLayout::DEPTH_WRITE; + } + else + { + srcLayout = RenderTextureLayout::RESOLVE_SOURCE; + dstLayout = RenderTextureLayout::RESOLVE_DEST; + } + } + else + { + srcLayout = RenderTextureLayout::COPY_SOURCE; + dstLayout = RenderTextureLayout::COPY_DEST; + } + + AddBarrier(surface, srcLayout); + + for (const auto texture : surface->destinationTextures) + AddBarrier(texture, dstLayout); + + addedAny = true; + } + } + + return addedAny; +} + +static void ExecutePendingStretchRectCommands(GuestSurface* renderTarget, GuestSurface* depthStencil) +{ + auto& commandList = g_commandLists[g_frame]; + + for (const auto surface : { renderTarget, depthStencil }) + { + if (surface != nullptr && !surface->destinationTextures.empty()) + { + const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1; + + for (const auto texture : surface->destinationTextures) + { + if (multiSampling) + { + if (surface == depthStencil) + { + uint32_t pipelineIndex = 0; + + switch (surface->sampleCount) + { + case RenderSampleCount::COUNT_2: + pipelineIndex = 0; + break; + case RenderSampleCount::COUNT_4: + pipelineIndex = 1; + break; + case RenderSampleCount::COUNT_8: + pipelineIndex = 2; + break; + default: + assert(false && "Unsupported MSAA sample count"); + break; + } + + if (texture->framebuffer == nullptr) + { + RenderFramebufferDesc desc; + desc.depthAttachment = texture->texture; + texture->framebuffer = g_device->createFramebuffer(desc); + } + + if (g_framebuffer != texture->framebuffer.get()) + { + commandList->setFramebuffer(texture->framebuffer.get()); + g_framebuffer = texture->framebuffer.get(); + } + + commandList->setPipeline(g_resolveMsaaDepthPipelines[pipelineIndex].get()); + commandList->setViewports(RenderViewport(0.0f, 0.0f, float(texture->width), float(texture->height), 0.0f, 1.0f)); + commandList->setScissors(RenderRect(0, 0, texture->width, texture->height)); + commandList->setGraphicsPushConstants(0, &surface->descriptorIndex, 0, sizeof(uint32_t)); + commandList->drawInstanced(6, 1, 0, 0); + + g_dirtyStates.renderTargetAndDepthStencil = true; + g_dirtyStates.viewport = true; + g_dirtyStates.pipelineState = true; + g_dirtyStates.scissorRect = true; + + if (g_vulkan) + { + g_dirtyStates.depthBias = true; // Static depth bias in MSAA pipeline invalidates dynamic depth bias. + g_dirtyStates.vertexShaderConstants = true; + } + } + else + { + commandList->resolveTexture(texture->texture, surface->texture); + } + } + else + { + commandList->copyTexture(texture->texture, surface->texture); + } + + texture->sourceSurface = nullptr; + + // Check if any texture slots had this texture assigned, and make it point back at the original texture. + for (uint32_t i = 0; i < std::size(g_textures); i++) + { + if (g_textures[i] == texture) + SetTextureInRenderThread(i, texture); + } + } + + surface->destinationTextures.clear(); + } + } +} + +static void ProcExecutePendingStretchRectCommands(const RenderCommand& cmd) +{ + bool foundAny = false; + + for (const auto surface : g_pendingSurfaceCopies) + { + // Depth stencil textures in this game are guaranteed to be transient. + if (surface->format != RenderFormat::D32_FLOAT) + foundAny |= PopulateBarriersForStretchRect(surface, nullptr); + } + + if (foundAny) + { + FlushBarriers(); + + for (const auto surface : g_pendingSurfaceCopies) + { + if (surface->format != RenderFormat::D32_FLOAT) + ExecutePendingStretchRectCommands(surface, nullptr); + + for (const auto texture : surface->destinationTextures) + texture->sourceSurface = nullptr; + + surface->destinationTextures.clear(); + } + } + + g_pendingSurfaceCopies.clear(); + g_pendingMsaaResolves.clear(); +} + static void SetFramebuffer(GuestSurface* renderTarget, GuestSurface* depthStencil, bool settingForClear) { if (settingForClear || g_dirtyStates.renderTargetAndDepthStencil) @@ -3106,6 +3204,12 @@ static void ProcClear(const RenderCommand& cmd) { const auto& args = cmd.clear; + if (PopulateBarriersForStretchRect(g_renderTarget, g_depthStencil)) + { + FlushBarriers(); + ExecutePendingStretchRectCommands(g_renderTarget, g_depthStencil); + } + AddBarrier(g_renderTarget, RenderTextureLayout::COLOR_WRITE); AddBarrier(g_depthStencil, RenderTextureLayout::DEPTH_WRITE); FlushBarriers(); @@ -3194,22 +3298,55 @@ static void SetTexture(GuestDevice* device, uint32_t index, GuestTexture* textur g_renderQueue.enqueue(cmd); } +static void SetTextureInRenderThread(uint32_t index, GuestTexture* texture) +{ + AddBarrier(texture, RenderTextureLayout::SHADER_READ); + + auto viewDimension = texture != nullptr ? texture->viewDimension : RenderTextureViewDimension::UNKNOWN; + + SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture2DIndices[index], + viewDimension == RenderTextureViewDimension::TEXTURE_2D ? texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D); + + SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture3DIndices[index], texture != nullptr && + viewDimension == RenderTextureViewDimension::TEXTURE_3D ? texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D); + + SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.textureCubeIndices[index], texture != nullptr && + viewDimension == RenderTextureViewDimension::TEXTURE_CUBE ? texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE); +} + +static void SetSurface(uint32_t index, GuestSurface* surface) +{ + AddBarrier(surface, RenderTextureLayout::SHADER_READ); + + SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture2DIndices[index], surface->descriptorIndex); + SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture3DIndices[index], uint32_t(TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D)); + SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.textureCubeIndices[index], uint32_t(TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE)); +} + static void ProcSetTexture(const RenderCommand& cmd) { const auto& args = cmd.setTexture; - AddBarrier(args.texture, RenderTextureLayout::SHADER_READ); - - auto viewDimension = args.texture != nullptr ? args.texture->viewDimension : RenderTextureViewDimension::UNKNOWN; - - SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture2DIndices[args.index], - viewDimension == RenderTextureViewDimension::TEXTURE_2D ? args.texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D); - - SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture3DIndices[args.index], args.texture != nullptr && - viewDimension == RenderTextureViewDimension::TEXTURE_3D ? args.texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D); + // If a pending copy operation is detected, set the source surface. The indices will be fixed later if flushing is necessary. + bool shouldSetTexture = true; + if (args.texture != nullptr && args.texture->sourceSurface != nullptr) + { + // MSAA surfaces need to be resolved and cannot be used directly. + if (args.texture->sourceSurface->sampleCount != RenderSampleCount::COUNT_1) + { + g_pendingMsaaResolves.emplace(args.texture->sourceSurface); + } + else + { + SetSurface(args.index, args.texture->sourceSurface); + shouldSetTexture = false; + } + } - SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.textureCubeIndices[args.index], args.texture != nullptr && - viewDimension == RenderTextureViewDimension::TEXTURE_CUBE ? args.texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE); + if (shouldSetTexture) + SetTextureInRenderThread(args.index, args.texture); + + g_textures[args.index] = args.texture; } static void SetScissorRect(GuestDevice* device, GuestRect* rect) @@ -3815,9 +3952,34 @@ static void FlushRenderStateForRenderThread() auto renderTarget = g_pipelineState.colorWriteEnable ? g_renderTarget : nullptr; auto depthStencil = g_pipelineState.zEnable ? g_depthStencil : nullptr; + bool foundAny = PopulateBarriersForStretchRect(renderTarget, depthStencil); + + for (const auto surface : g_pendingMsaaResolves) + { + bool isDepthStencil = (surface->format == RenderFormat::D32_FLOAT); + foundAny |= PopulateBarriersForStretchRect(isDepthStencil ? nullptr : surface, isDepthStencil ? surface : nullptr); + } + + if (foundAny) + { + FlushBarriers(); + ExecutePendingStretchRectCommands(renderTarget, depthStencil); + + for (const auto surface : g_pendingMsaaResolves) + { + bool isDepthStencil = (surface->format == RenderFormat::D32_FLOAT); + ExecutePendingStretchRectCommands(isDepthStencil ? nullptr : surface, isDepthStencil ? surface : nullptr); + } + } + + if (!g_pendingMsaaResolves.empty()) + g_pendingMsaaResolves.clear(); + AddBarrier(renderTarget, RenderTextureLayout::COLOR_WRITE); AddBarrier(depthStencil, RenderTextureLayout::DEPTH_WRITE); + FlushBarriers(); + SetFramebuffer(renderTarget, depthStencil, false); FlushViewport(); @@ -4580,35 +4742,36 @@ static std::thread g_renderThread([] auto& cmd = commands[i]; switch (cmd.type) { - case RenderCommandType::SetRenderState: ProcSetRenderState(cmd); break; - case RenderCommandType::DestructResource: ProcDestructResource(cmd); break; - case RenderCommandType::UnlockTextureRect: ProcUnlockTextureRect(cmd); break; - case RenderCommandType::UnlockBuffer16: ProcUnlockBuffer16(cmd); break; - case RenderCommandType::UnlockBuffer32: ProcUnlockBuffer32(cmd); break; - case RenderCommandType::DrawImGui: ProcDrawImGui(cmd); break; - case RenderCommandType::ExecuteCommandList: ProcExecuteCommandList(cmd); break; - case RenderCommandType::BeginCommandList: ProcBeginCommandList(cmd); break; - case RenderCommandType::StretchRect: ProcStretchRect(cmd); break; - case RenderCommandType::SetRenderTarget: ProcSetRenderTarget(cmd); break; - case RenderCommandType::SetDepthStencilSurface: ProcSetDepthStencilSurface(cmd); break; - case RenderCommandType::Clear: ProcClear(cmd); break; - case RenderCommandType::SetViewport: ProcSetViewport(cmd); break; - case RenderCommandType::SetTexture: ProcSetTexture(cmd); break; - case RenderCommandType::SetScissorRect: ProcSetScissorRect(cmd); break; - case RenderCommandType::SetSamplerState: ProcSetSamplerState(cmd); break; - case RenderCommandType::SetBooleans: ProcSetBooleans(cmd); break; - case RenderCommandType::SetVertexShaderConstants: ProcSetVertexShaderConstants(cmd); break; - case RenderCommandType::SetPixelShaderConstants: ProcSetPixelShaderConstants(cmd); break; - case RenderCommandType::AddPipeline: ProcAddPipeline(cmd); break; - case RenderCommandType::DrawPrimitive: ProcDrawPrimitive(cmd); break; - case RenderCommandType::DrawIndexedPrimitive: ProcDrawIndexedPrimitive(cmd); break; - case RenderCommandType::DrawPrimitiveUP: ProcDrawPrimitiveUP(cmd); break; - case RenderCommandType::SetVertexDeclaration: ProcSetVertexDeclaration(cmd); break; - case RenderCommandType::SetVertexShader: ProcSetVertexShader(cmd); break; - case RenderCommandType::SetStreamSource: ProcSetStreamSource(cmd); break; - case RenderCommandType::SetIndices: ProcSetIndices(cmd); break; - case RenderCommandType::SetPixelShader: ProcSetPixelShader(cmd); break; - default: assert(false && "Unrecognized render command type."); break; + case RenderCommandType::SetRenderState: ProcSetRenderState(cmd); break; + case RenderCommandType::DestructResource: ProcDestructResource(cmd); break; + case RenderCommandType::UnlockTextureRect: ProcUnlockTextureRect(cmd); break; + case RenderCommandType::UnlockBuffer16: ProcUnlockBuffer16(cmd); break; + case RenderCommandType::UnlockBuffer32: ProcUnlockBuffer32(cmd); break; + case RenderCommandType::DrawImGui: ProcDrawImGui(cmd); break; + case RenderCommandType::ExecuteCommandList: ProcExecuteCommandList(cmd); break; + case RenderCommandType::BeginCommandList: ProcBeginCommandList(cmd); break; + case RenderCommandType::StretchRect: ProcStretchRect(cmd); break; + case RenderCommandType::SetRenderTarget: ProcSetRenderTarget(cmd); break; + case RenderCommandType::SetDepthStencilSurface: ProcSetDepthStencilSurface(cmd); break; + case RenderCommandType::ExecutePendingStretchRectCommands: ProcExecutePendingStretchRectCommands(cmd); break; + case RenderCommandType::Clear: ProcClear(cmd); break; + case RenderCommandType::SetViewport: ProcSetViewport(cmd); break; + case RenderCommandType::SetTexture: ProcSetTexture(cmd); break; + case RenderCommandType::SetScissorRect: ProcSetScissorRect(cmd); break; + case RenderCommandType::SetSamplerState: ProcSetSamplerState(cmd); break; + case RenderCommandType::SetBooleans: ProcSetBooleans(cmd); break; + case RenderCommandType::SetVertexShaderConstants: ProcSetVertexShaderConstants(cmd); break; + case RenderCommandType::SetPixelShaderConstants: ProcSetPixelShaderConstants(cmd); break; + case RenderCommandType::AddPipeline: ProcAddPipeline(cmd); break; + case RenderCommandType::DrawPrimitive: ProcDrawPrimitive(cmd); break; + case RenderCommandType::DrawIndexedPrimitive: ProcDrawIndexedPrimitive(cmd); break; + case RenderCommandType::DrawPrimitiveUP: ProcDrawPrimitiveUP(cmd); break; + case RenderCommandType::SetVertexDeclaration: ProcSetVertexDeclaration(cmd); break; + case RenderCommandType::SetVertexShader: ProcSetVertexShader(cmd); break; + case RenderCommandType::SetStreamSource: ProcSetStreamSource(cmd); break; + case RenderCommandType::SetIndices: ProcSetIndices(cmd); break; + case RenderCommandType::SetPixelShader: ProcSetPixelShader(cmd); break; + default: assert(false && "Unrecognized render command type."); break; } } @@ -6772,6 +6935,52 @@ PPC_FUNC(sub_825E2F78) __imp__sub_825E2F78(ctx, base); } +// Game shares surfaces with identical descriptions. We don't want to share shadow maps, +// so we can set its format to a depth format that still resolves to the same type in recomp, +// but manages to keep the surfaces actually separated in guest code. +void FxShadowMapInitMidAsmHook(PPCRegister& r11) +{ + uint8_t* base = g_memory.base; + + uint32_t surface = PPC_LOAD_U32(PPC_LOAD_U32(PPC_LOAD_U32(r11.u32 + 0x24) + 0x4)); + PPC_STORE_U32(surface + 0x20, D3DFMT_D24FS8); +} + +// Re-render objects in the terrain shadow map instead of copying the texture. +static bool g_jumpOverStretchRect; + +void FxShadowMapNoTerrainMidAsmHook(PPCRegister& r4, PPCRegister& r30) +{ + // Set the no terrain shadow map as the render target. + uint8_t* base = g_memory.base; + r4.u64 = PPC_LOAD_U32(r30.u32 + 0x58); +} + +bool FxShadowMapMidAsmHook(PPCRegister& r4, PPCRegister& r5, PPCRegister& r6, PPCRegister& r30) +{ + if (g_jumpOverStretchRect) + { + // Reset for the next time shadow maps get rendered. + g_jumpOverStretchRect = false; + + // Jump over the stretch rect call. + return false; + } + else + { + // Mark to jump over the stretch call the next time. + g_jumpOverStretchRect = true; + + // Jump to the beginning. Set registers accordingly to set the terrain shadow map as the render target. + uint8_t* base = g_memory.base; + r6.u64 = 0; + r5.u64 = 0; + r4.u64 = PPC_LOAD_U32(r30.u32 + 0x50); + + return true; + } +} + GUEST_FUNCTION_HOOK(sub_82BD99B0, CreateDevice); GUEST_FUNCTION_HOOK(sub_82BE6230, DestructResource); diff --git a/UnleashedRecomp/gpu/video.h b/UnleashedRecomp/gpu/video.h index 43376031..2ef1119c 100644 --- a/UnleashedRecomp/gpu/video.h +++ b/UnleashedRecomp/gpu/video.h @@ -158,6 +158,7 @@ struct GuestTexture : GuestBaseTexture void* mappedMemory = nullptr; std::unique_ptr framebuffer; std::unique_ptr patchedTexture; + struct GuestSurface* sourceSurface = nullptr; }; struct GuestLockedRect @@ -205,6 +206,7 @@ struct GuestSurface : GuestBaseTexture uint32_t guestFormat = 0; ankerl::unordered_dense::map> framebuffers; RenderSampleCounts sampleCount = RenderSampleCount::COUNT_1; + ankerl::unordered_dense::set destinationTextures; }; enum GuestDeclType diff --git a/UnleashedRecompLib/config/SWA.toml b/UnleashedRecompLib/config/SWA.toml index 315bc450..f7aeefd0 100644 --- a/UnleashedRecompLib/config/SWA.toml +++ b/UnleashedRecompLib/config/SWA.toml @@ -918,3 +918,21 @@ jump_address = 0x822C111C [[midasm_hook]] name = "PressStartSaveLoadThreadMidAsmHook" address = 0x822C4358 + +[[midasm_hook]] +name = "FxShadowMapInitMidAsmHook" +address = 0x82BAD8F4 +registers = ["r11"] + +[[midasm_hook]] +name = "FxShadowMapNoTerrainMidAsmHook" +address = 0x82BAD9EC +registers = ["r4", "r30"] +after_instruction = true + +[[midasm_hook]] +name = "FxShadowMapMidAsmHook" +address = 0x82BADADC +registers = ["r4", "r5", "r6", "r30"] +jump_address_on_true = 0x82BAD9F0 +jump_address_on_false = 0x82BADAFC