diff --git a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp index 5331465f4b..244e5b3cb4 100644 --- a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp +++ b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp @@ -12,10 +12,42 @@ logs::channel cellGcmSys("cellGcmSys", logs::level::notice); -extern s32 cellGcmCallback(vm::ptr context, u32 count); +extern s32 cellGcmCallback(vm::ptr context, u32 count); extern void ppu_register_function_at(u32 addr, ppu_function_t ptr); -const u32 tiled_pitches[] = { +namespace gcm +{ + struct context_t + { + vm::ps3::bptr callback; + be_t rtoc; + }; + + static vm::ptr context; + static vm::pptr current_context; + + vm::ps3::ptr allocate_callback_function() + { + u32 address = vm::alloc(sizeof(u32) * 2, vm::main); + + vm::write32(address + sizeof(u32) * 0, ppu_instructions::HACK(FIND_FUNC(cellGcmCallback))); + vm::write32(address + sizeof(u32) * 1, ppu_instructions::BLR()); + ppu_register_function_at(address, BIND_FUNC(cellGcmCallback)); + + return vm::cast(address); + } + + void initialize(vm::pptr current_context) + { + gcm::current_context = current_context; + + context = vm::cast(vm::alloc(sizeof(context_t), vm::main)); + context->callback = allocate_callback_function(); + context->rtoc = 0xabadcafe; + } +} + +static const u32 tiled_pitches[] = { 0x00000000, 0x00000200, 0x00000300, 0x00000400, 0x00000500, 0x00000600, 0x00000700, 0x00000800, 0x00000A00, 0x00000C00, 0x00000D00, 0x00000E00, @@ -27,16 +59,10 @@ const u32 tiled_pitches[] = { 0x00010000 }; -u32 local_size = 0; -u32 local_addr = 0; -u64 system_mode = 0; +static u64 g_system_mode = 0; -CellGcmConfig current_config; -CellGcmContextData current_context; -gcmInfo gcm_info; - -u32 map_offset_addr = 0; -u32 map_offset_pos = 0; +static u32 g_map_offset_addr = 0; +static u32 g_map_offset_pos = 0; // Auxiliary functions @@ -65,6 +91,19 @@ u32 gcmGetLocalMemorySize(u32 sdk_version) return 0x0E000000; // 224MB } +s32 gcmMapLocalMemory() +{ + rsx::state.frame_buffer = vm::cast(0xC0000000); + rsx::state.frame_buffer_size = 0xf900000; // TODO: Get sdk_version in _cellGcmFunc15 and pass it to gcmGetLocalMemorySize + + if (!vm::falloc(rsx::state.frame_buffer.addr(), rsx::state.frame_buffer_size, vm::video)) + { + return CELL_GCM_ERROR_FAILURE; + } + + return CELL_OK; +} + CellGcmOffsetTable offsetTable; void InitOffsetTable() @@ -83,30 +122,35 @@ void InitOffsetTable() u32 cellGcmGetLabelAddress(u8 index) { cellGcmSys.trace("cellGcmGetLabelAddress(index=%d)", index); - return gcm_info.label_addr + 0x10 * index; + return rsx::state.context.ptr(&rsx::context_t::semaphores, index).addr(); } vm::ptr cellGcmGetReportDataAddressLocation(u32 index, u32 location) { cellGcmSys.warning("cellGcmGetReportDataAddressLocation(index=%d, location=%d)", index, location); - if (location == CELL_GCM_LOCATION_LOCAL) { - if (index >= 2048) { + if (location == CELL_GCM_LOCATION_LOCAL) + { + if (index >= sizeof(rsx::context_t::reports) / sizeof(*rsx::context_t::reports)) + { cellGcmSys.error("cellGcmGetReportDataAddressLocation: Wrong local index (%d)", index); return vm::null; } - return vm::ptr::make(0xC0000000 + index * 0x10); + + return rsx::state.context.ptr(&rsx::context_t::reports, index); } - if (location == CELL_GCM_LOCATION_MAIN) { - if (index >= 1024 * 1024) { + if (location == CELL_GCM_LOCATION_MAIN) + { + if (index >= sizeof(rsx::frame_buffer_t::reports) / sizeof(*rsx::frame_buffer_t::reports)) + { cellGcmSys.error("cellGcmGetReportDataAddressLocation: Wrong main index (%d)", index); return vm::null; } - return vm::ptr::make(RSXIOMem.RealAddr(index * 0x10)); + + return rsx::state.frame_buffer.ptr(&rsx::frame_buffer_t::reports, index); } - cellGcmSys.error("cellGcmGetReportDataAddressLocation: Wrong location (%d)", location); return vm::null; } @@ -114,11 +158,13 @@ u64 cellGcmGetTimeStamp(u32 index) { cellGcmSys.trace("cellGcmGetTimeStamp(index=%d)", index); - if (index >= 2048) { + if (index >= sizeof(rsx::frame_buffer_t::reports) / sizeof(*rsx::frame_buffer_t::reports)) + { cellGcmSys.error("cellGcmGetTimeStamp: Wrong local index (%d)", index); return 0; } - return vm::read64(0xC0000000 + index * 0x10); + + return rsx::state.frame_buffer->reports[index].timer; } s32 cellGcmGetCurrentField() @@ -145,7 +191,7 @@ u32 cellGcmGetNotifyDataAddress(u32 index) */ vm::ptr _cellGcmFunc12() { - return vm::ptr::make(0xC0000000); // TODO + return vm::cast(rsx::state.frame_buffer.addr()); } u32 cellGcmGetReport(u32 type, u32 index) @@ -173,7 +219,7 @@ u32 cellGcmGetReportDataAddress(u32 index) cellGcmSys.error("cellGcmGetReportDataAddress: Wrong local index (%d)", index); return 0; } - return 0xC0000000 + index * 0x10; + return vm::cast(rsx::state.frame_buffer.addr() + index * 0x10); } u32 cellGcmGetReportDataLocation(u32 index, u32 location) @@ -216,7 +262,7 @@ u32 cellGcmGetControlRegister() { cellGcmSys.trace("cellGcmGetControlRegister()"); - return gcm_info.control_addr; + return rsx::state.context.ptr(&rsx::context_t::control).ptr(&rsx::control_t::put).addr(); } u32 cellGcmGetDefaultCommandWordSize() @@ -257,7 +303,7 @@ s32 cellGcmBindTile(u8 index) return CELL_GCM_ERROR_INVALID_VALUE; } - fxm::get()->tiles[index].binded = true; + rsx::state.unpacked_tiles[index].binded = true; return CELL_OK; } @@ -272,7 +318,7 @@ s32 cellGcmBindZcull(u8 index) return CELL_GCM_ERROR_INVALID_VALUE; } - fxm::get()->zculls[index].binded = true; + rsx::state.unpacked_zculls[index].binded = true; return CELL_OK; } @@ -281,14 +327,18 @@ s32 cellGcmGetConfiguration(vm::ptr config) { cellGcmSys.trace("cellGcmGetConfiguration(config=*0x%x)", config); - *config = current_config; + config->localAddress = rsx::state.frame_buffer.addr(); + config->localSize = rsx::state.frame_buffer_size; + config->ioSize = rsx::state.io_size; + config->memoryFrequency = 650000000; + config->coreFrequency = 500000000; return CELL_OK; } s32 cellGcmGetFlipStatus() { - s32 status = fxm::get()->flip_status; + s32 status = rsx::state.flip_status; cellGcmSys.trace("cellGcmGetFlipStatus() -> %d", status); @@ -319,29 +369,40 @@ void _cellGcmFunc15(vm::ptr context) return; } -u32 g_defaultCommandBufferBegin, g_defaultCommandBufferFragmentCount; +static u32 g_defaultCommandBufferBegin, g_defaultCommandBufferFragmentCount; // Called by cellGcmInit -s32 _cellGcmInitBody(vm::pptr context, u32 cmdSize, u32 ioSize, u32 ioAddress) +s32 _cellGcmInitBody(vm::pptr context, u32 cmdSize, u32 ioSize, u32 ioAddress) { cellGcmSys.warning("_cellGcmInitBody(context=**0x%x, cmdSize=0x%x, ioSize=0x%x, ioAddress=0x%x)", context, cmdSize, ioSize, ioAddress); - current_config.ioAddress = 0; - current_config.localAddress = 0; - local_size = 0; - local_addr = 0; + InitOffsetTable(); - if (!local_size && !local_addr) + if (gcmMapEaIoAddress(ioAddress, 0, ioSize, false) != CELL_OK) { - local_size = 0xf900000; // TODO: Get sdk_version in _cellGcmFunc15 and pass it to gcmGetLocalMemorySize - local_addr = 0xC0000000; - vm::falloc(0xC0000000, local_size, vm::video); + cellGcmSys.error("cellGcmInit: CELL_GCM_ERROR_FAILURE"); + return CELL_GCM_ERROR_FAILURE; } - cellGcmSys.warning("*** local memory(addr=0x%x, size=0x%x)", local_addr, local_size); + gcm::initialize(context); - InitOffsetTable(); - if (system_mode == CELL_GCM_SYSTEM_MODE_IOMAP_512MB) + rsx::state.device = vm::cast(vm::alloc(sizeof(rsx::device_t), vm::main)); + rsx::state.context = vm::cast(vm::alloc(sizeof(rsx::context_t), vm::main)); + + //rsx::state.io = vm::cast(ioAddress); + rsx::state.io_size = cmdSize; + rsx::state.display_buffers_count = 0; + rsx::state.current_display_buffer = 0; + rsx::state.display_buffers = vm::cast(vm::alloc(sizeof(CellGcmDisplayInfo) * 8, vm::main)); + rsx::state.zculls = vm::cast(vm::alloc(sizeof(CellGcmZcullInfo) * rsx::limits::zculls_count, vm::main)); + rsx::state.tiles = vm::cast(vm::alloc(sizeof(CellGcmTileInfo) * rsx::limits::tiles_count, vm::main)); + rsx::state.flip_status = 0; + + gcmMapLocalMemory(); + + cellGcmSys.warning("*** local memory(addr=0x%x, size=0x%x)", rsx::state.frame_buffer, rsx::state.frame_buffer_size); + + if (g_system_mode == CELL_GCM_SYSTEM_MODE_IOMAP_512MB) { cellGcmSys.warning("cellGcmInit(): 512MB io address space used"); RSXIOMem.SetRange(0, 0x20000000 /*512MB*/); @@ -352,59 +413,28 @@ s32 _cellGcmInitBody(vm::pptr context, u32 cmdSize, u32 ioSi RSXIOMem.SetRange(0, 0x10000000 /*256MB*/); } - if (gcmMapEaIoAddress(ioAddress, 0, ioSize, false) != CELL_OK) - { - cellGcmSys.error("cellGcmInit: CELL_GCM_ERROR_FAILURE"); - return CELL_GCM_ERROR_FAILURE; - } + g_map_offset_addr = 0; + g_map_offset_pos = 0; - map_offset_addr = 0; - map_offset_pos = 0; - current_config.ioSize = ioSize; - current_config.ioAddress = ioAddress; - current_config.localSize = local_size; - current_config.localAddress = local_addr; - current_config.memoryFrequency = 650000000; - current_config.coreFrequency = 500000000; + u32 commandBufferPageSize = 32 * 1024; - // Create contexts + g_defaultCommandBufferBegin = ioAddress + 4096; + g_defaultCommandBufferFragmentCount = cmdSize / commandBufferPageSize; - g_defaultCommandBufferBegin = ioAddress; - g_defaultCommandBufferFragmentCount = cmdSize / (32 * 1024); + rsx::state.context->control.begin = vm::cast(g_defaultCommandBufferBegin); + rsx::state.context->control.end = vm::cast(g_defaultCommandBufferBegin + commandBufferPageSize - 4); // 4b at the end for jump + rsx::state.context->control.current = rsx::state.context->control.begin; + rsx::state.context->control.callback = vm::cast(gcm::context.ptr(&gcm::context_t::callback).addr()); + rsx::state.context->control.put = 0; + rsx::state.context->control.get = 0; + rsx::state.context->control.ref = -1; - gcm_info.context_addr = vm::alloc(0x1000, vm::main); - gcm_info.control_addr = vm::alloc(0x1000, vm::main); - gcm_info.label_addr = vm::alloc(0x1000, vm::main); // ??? + //TODO: send new context + fxm::get()->init(); - current_context.begin.set(g_defaultCommandBufferBegin + 4096); // 4 kb reserved at the beginning - current_context.end.set(g_defaultCommandBufferBegin + 32 * 1024 - 4); // 4b at the end for jump - current_context.current = current_context.begin; - current_context.callback.set(gcm_info.context_addr + 0x40); + rsx::state.context->semaphores[3].value = 1; - vm::write32(gcm_info.context_addr + 0x40, gcm_info.context_addr + 0x48); - vm::write32(gcm_info.context_addr + 0x44, 0xabadcafe); - vm::write32(gcm_info.context_addr + 0x48, ppu_instructions::HACK(FIND_FUNC(cellGcmCallback))); - vm::write32(gcm_info.context_addr + 0x4c, ppu_instructions::BLR()); - ppu_register_function_at(gcm_info.context_addr + 0x48, BIND_FUNC(cellGcmCallback)); - - vm::_ref(gcm_info.context_addr) = current_context; - context->set(gcm_info.context_addr); - - auto& ctrl = vm::_ref(gcm_info.control_addr); - ctrl.put = 0; - ctrl.get = 0; - ctrl.ref = -1; - - const auto render = fxm::get(); - render->ctxt_addr = context.addr(); - render->gcm_buffers.set(vm::alloc(sizeof(CellGcmDisplayInfo) * 8, vm::main)); - render->zculls_addr = vm::alloc(sizeof(CellGcmZcullInfo) * 8, vm::main); - render->tiles_addr = vm::alloc(sizeof(CellGcmTileInfo) * 15, vm::main); - render->gcm_buffers_count = 0; - render->gcm_current_buffer = 0; - render->main_mem_addr = 0; - render->label_addr = gcm_info.label_addr; - render->init(g_defaultCommandBufferBegin, cmdSize, gcm_info.control_addr, local_addr); + *context = rsx::state.context; return CELL_OK; } @@ -413,7 +443,7 @@ s32 cellGcmResetFlipStatus() { cellGcmSys.trace("cellGcmResetFlipStatus()"); - fxm::get()->flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_WAITING; + rsx::state.flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_WAITING; return CELL_OK; } @@ -427,7 +457,7 @@ s32 cellGcmSetDebugOutputLevel(s32 level) case CELL_GCM_DEBUG_LEVEL0: case CELL_GCM_DEBUG_LEVEL1: case CELL_GCM_DEBUG_LEVEL2: - fxm::get()->debug_level = level; + rsx::state.debug_level = level; break; default: return CELL_EINVAL; @@ -446,18 +476,14 @@ s32 cellGcmSetDisplayBuffer(u32 id, u32 offset, u32 pitch, u32 width, u32 height return CELL_EINVAL; } - const auto render = fxm::get(); + rsx::state.display_buffers[id].offset = offset; + rsx::state.display_buffers[id].pitch = pitch; + rsx::state.display_buffers[id].width = width; + rsx::state.display_buffers[id].height = height; - auto buffers = render->gcm_buffers; - - buffers[id].offset = offset; - buffers[id].pitch = pitch; - buffers[id].width = width; - buffers[id].height = height; - - if (id + 1 > render->gcm_buffers_count) + if (id + 1 > rsx::state.display_buffers_count) { - render->gcm_buffers_count = id + 1; + rsx::state.display_buffers_count = id + 1; } return CELL_OK; @@ -467,7 +493,7 @@ void cellGcmSetFlipHandler(vm::ptr handler) { cellGcmSys.warning("cellGcmSetFlipHandler(handler=*0x%x)", handler); - fxm::get()->flip_handler = handler; + rsx::state.flip_handler = handler; } s32 cellGcmSetFlipMode(u32 mode) @@ -479,7 +505,7 @@ s32 cellGcmSetFlipMode(u32 mode) case CELL_GCM_DISPLAY_HSYNC: case CELL_GCM_DISPLAY_VSYNC: case CELL_GCM_DISPLAY_HSYNC_WITH_NOISE: - fxm::get()->flip_mode = mode; + rsx::state.flip_mode = mode; break; default: @@ -492,11 +518,10 @@ s32 cellGcmSetFlipMode(u32 mode) void cellGcmSetFlipStatus() { cellGcmSys.warning("cellGcmSetFlipStatus()"); - - fxm::get()->flip_status = 0; + rsx::state.flip_status = 0; } -s32 cellGcmSetPrepareFlip(PPUThread& ppu, vm::ptr ctxt, u32 id) +s32 cellGcmSetPrepareFlip(PPUThread& ppu, vm::ptr ctxt, u32 id) { cellGcmSys.trace("cellGcmSetPrepareFlip(ctx=*0x%x, id=0x%x)", ctxt, id); @@ -508,30 +533,28 @@ s32 cellGcmSetPrepareFlip(PPUThread& ppu, vm::ptr ctxt, u32 if (ctxt->current + 2 >= ctxt->end) { - if (s32 res = ctxt->callback(ppu, ctxt, 8 /* ??? */)) + if (s32 res = (*ctxt->callback)(ppu, ctxt.addr(), 8 /* ??? */)) { cellGcmSys.error("cellGcmSetPrepareFlip: callback failed (0x%08x)", res); return res; } } - const u32 cmd_size = rsx::make_command(ctxt->current, GCM_FLIP_COMMAND, { id }); - - if (ctxt.addr() == gcm_info.context_addr) - { - vm::_ref(gcm_info.control_addr).put += cmd_size; - } + ctxt->put += rsx::make_command(ctxt->current, GCM_FLIP_COMMAND, { id }); return id; } -s32 cellGcmSetFlip(PPUThread& ppu, vm::ptr ctxt, u32 id) +s32 cellGcmSetFlip(PPUThread& ppu, vm::ptr ctxt, u32 id) { cellGcmSys.trace("cellGcmSetFlip(ctxt=*0x%x, id=0x%x)", ctxt, id); if (s32 res = cellGcmSetPrepareFlip(ppu, ctxt, id)) { - if (res < 0) return CELL_GCM_ERROR_FAILURE; + if (res < 0) + { + return CELL_GCM_ERROR_FAILURE; + } } return CELL_OK; @@ -591,9 +614,7 @@ s32 cellGcmSetTileInfo(u8 index, u8 location, u32 offset, u32 size, u32 pitch, u cellGcmSys.error("cellGcmSetTileInfo: bad compression mode! (%d)", comp); } - const auto render = fxm::get(); - - auto& tile = render->tiles[index]; + auto& tile = rsx::state.unpacked_tiles[index]; tile.location = location; tile.offset = offset; tile.size = size; @@ -602,7 +623,7 @@ s32 cellGcmSetTileInfo(u8 index, u8 location, u32 offset, u32 size, u32 pitch, u tile.base = base; tile.bank = bank; - vm::_ptr(render->tiles_addr)[index] = tile.pack(); + rsx::state.tiles[index] = tile.pack(); return CELL_OK; } @@ -610,7 +631,7 @@ void cellGcmSetUserHandler(vm::ptr handler) { cellGcmSys.warning("cellGcmSetUserHandler(handler=*0x%x)", handler); - fxm::get()->user_handler = handler; + rsx::state.user_handler = handler; } s32 cellGcmSetUserCommand() @@ -622,7 +643,7 @@ void cellGcmSetVBlankHandler(vm::ptr handler) { cellGcmSys.warning("cellGcmSetVBlankHandler(handler=*0x%x)", handler); - fxm::get()->vblank_handler = handler; + rsx::state.vblank_handler = handler; } s32 cellGcmSetWaitFlip(vm::ptr ctxt) @@ -650,9 +671,7 @@ s32 cellGcmSetZcull(u8 index, u32 offset, u32 width, u32 height, u32 cullStart, return CELL_GCM_ERROR_INVALID_VALUE; } - const auto render = fxm::get(); - - auto& zcull = render->zculls[index]; + auto& zcull = rsx::state.unpacked_zculls[index]; zcull.offset = offset; zcull.width = width; zcull.height = height; @@ -665,7 +684,7 @@ s32 cellGcmSetZcull(u8 index, u32 offset, u32 width, u32 height, u32 cullStart, zcull.sRef = sRef; zcull.sMask = sMask; - vm::_ptr(render->zculls_addr)[index] = zcull.pack(); + rsx::state.zculls[index] = zcull.pack(); return CELL_OK; } @@ -679,7 +698,7 @@ s32 cellGcmUnbindTile(u8 index) return CELL_GCM_ERROR_INVALID_VALUE; } - fxm::get()->tiles[index].binded = false; + rsx::state.unpacked_tiles[index].binded = false; return CELL_OK; } @@ -694,37 +713,34 @@ s32 cellGcmUnbindZcull(u8 index) return CELL_EINVAL; } - fxm::get()->zculls[index].binded = false; + rsx::state.unpacked_zculls[index].binded = false; return CELL_OK; } -u32 cellGcmGetTileInfo() +vm::ptr cellGcmGetTileInfo() { cellGcmSys.warning("cellGcmGetTileInfo()"); - return fxm::get()->tiles_addr; + return rsx::state.tiles; } -u32 cellGcmGetZcullInfo() +vm::ptr cellGcmGetZcullInfo() { cellGcmSys.warning("cellGcmGetZcullInfo()"); - return fxm::get()->zculls_addr; + return rsx::state.zculls; } -u32 cellGcmGetDisplayInfo() +vm::ptr cellGcmGetDisplayInfo() { cellGcmSys.warning("cellGcmGetDisplayInfo()"); - return fxm::get()->gcm_buffers.addr(); + return rsx::state.display_buffers; } s32 cellGcmGetCurrentDisplayBufferId(vm::ptr id) { cellGcmSys.warning("cellGcmGetCurrentDisplayBufferId(id=*0x%x)", id); - if ((*id = fxm::get()->gcm_current_buffer) > UINT8_MAX) - { - throw EXCEPTION("Unexpected"); - } + *id = rsx::state.current_display_buffer; return CELL_OK; } @@ -756,7 +772,7 @@ u64 cellGcmGetLastFlipTime() { cellGcmSys.trace("cellGcmGetLastFlipTime()"); - return fxm::get()->last_flip_time; + return rsx::state.last_flip_time; } u64 cellGcmGetLastSecondVTime() @@ -769,7 +785,7 @@ u64 cellGcmGetVBlankCount() { cellGcmSys.trace("cellGcmGetVBlankCount()"); - return fxm::get()->vblank_count; + return rsx::state.vblank_count; } s32 cellGcmSysGetLastVBlankTime() @@ -781,7 +797,7 @@ s32 cellGcmInitSystemMode(u64 mode) { cellGcmSys.trace("cellGcmInitSystemMode(mode=0x%x)", mode); - system_mode = mode; + g_system_mode = mode; return CELL_OK; } @@ -839,7 +855,7 @@ s32 cellGcmAddressToOffset(u32 address, vm::ptr offset) cellGcmSys.trace("cellGcmAddressToOffset(address=0x%x, offset=*0x%x)", address, offset); // Address not on main memory or local memory - if (address >= 0xD0000000) + if (address >= rsx::state.frame_buffer.addr() + rsx::state.frame_buffer_size) { return CELL_GCM_ERROR_FAILURE; } @@ -849,7 +865,7 @@ s32 cellGcmAddressToOffset(u32 address, vm::ptr offset) // Address in local memory if ((address >> 28) == 0xC) { - result = address - 0xC0000000; + result = address - rsx::state.frame_buffer.addr(); } // Address in main memory else check else @@ -946,18 +962,15 @@ s32 cellGcmMapLocalMemory(vm::ptr address, vm::ptr size) { cellGcmSys.warning("cellGcmMapLocalMemory(address=*0x%x, size=*0x%x)", address, size); - if (!local_addr && !local_size && vm::falloc(local_addr = 0xC0000000, local_size = 0xf900000 /* TODO */, vm::video)) + s32 result = gcmMapLocalMemory(); + + if (result == CELL_OK) { - *address = local_addr; - *size = local_size; - } - else - { - cellGcmSys.error("RSX local memory already mapped"); - return CELL_GCM_ERROR_FAILURE; + *address = rsx::state.frame_buffer.addr(); + *size = rsx::state.frame_buffer_size; } - return CELL_OK; + return result; } s32 cellGcmMapMainMemory(u32 ea, u32 size, vm::ptr offset) @@ -989,8 +1002,6 @@ s32 cellGcmMapMainMemory(u32 ea, u32 size, vm::ptr offset) return CELL_GCM_ERROR_NO_IO_PAGE_TABLE; } - render->main_mem_addr = render->ioAddress; - return CELL_OK; } @@ -1129,7 +1140,8 @@ s32 cellGcmSetCursorImageOffset(u32 offset) void cellGcmSetDefaultCommandBuffer() { cellGcmSys.warning("cellGcmSetDefaultCommandBuffer()"); - vm::write32(fxm::get()->ctxt_addr, gcm_info.context_addr); + + *gcm::current_context = rsx::state.context; } s32 cellGcmSetDefaultCommandBufferAndSegmentWordSize() @@ -1141,19 +1153,21 @@ s32 cellGcmSetDefaultCommandBufferAndSegmentWordSize() // Other //------------------------------------------------------------------------ -s32 _cellGcmSetFlipCommand(PPUThread& ppu, vm::ptr ctx, u32 id) +s32 _cellGcmSetFlipCommand(PPUThread& ppu, vm::ptr ctx, u32 id) { cellGcmSys.trace("cellGcmSetFlipCommand(ctx=*0x%x, id=0x%x)", ctx, id); return cellGcmSetPrepareFlip(ppu, ctx, id); } -s32 _cellGcmSetFlipCommandWithWaitLabel(PPUThread& ppu, vm::ptr ctx, u32 id, u32 label_index, u32 label_value) +s32 _cellGcmSetFlipCommandWithWaitLabel(PPUThread& ppu, vm::ptr ctx, u32 id, u32 label_index, u32 label_value) { cellGcmSys.trace("cellGcmSetFlipCommandWithWaitLabel(ctx=*0x%x, id=0x%x, label_index=0x%x, label_value=0x%x)", ctx, id, label_index, label_value); - s32 res = cellGcmSetPrepareFlip(ppu, ctx, id); - vm::write32(gcm_info.label_addr + 0x10 * label_index, label_value); + s32 res = cellGcmSetPrepareFlip(ppu, ctx.ptr(&rsx::context_t::control), id); + ctx->semaphores[label_index].value = label_value; + //TODO: time + return res < 0 ? CELL_GCM_ERROR_FAILURE : CELL_OK; } @@ -1186,9 +1200,7 @@ s32 cellGcmSetTile(u8 index, u8 location, u32 offset, u32 size, u32 pitch, u8 co cellGcmSys.error("cellGcmSetTile: bad compression mode! (%d)", comp); } - const auto render = fxm::get(); - - auto& tile = render->tiles[index]; + auto& tile = rsx::state.unpacked_tiles[index]; tile.location = location; tile.offset = offset; tile.size = size; @@ -1197,7 +1209,7 @@ s32 cellGcmSetTile(u8 index, u8 location, u32 offset, u32 size, u32 pitch, u8 co tile.base = base; tile.bank = bank; - vm::_ptr(render->tiles_addr)[index] = tile.pack(); + rsx::state.tiles[index] = tile.pack(); return CELL_OK; } @@ -1283,23 +1295,23 @@ static bool isInCommandBufferExcept(u32 getPos, u32 bufferBegin, u32 bufferEnd) return true; } -s32 cellGcmCallback(vm::ptr context, u32 count) +s32 cellGcmCallback(vm::ptr context, u32 count) { cellGcmSys.trace("cellGcmCallback(context=*0x%x, count=0x%x)", context, count); - auto& ctrl = vm::_ref(gcm_info.control_addr); + auto& ctrl = *context; const std::chrono::time_point enterWait = std::chrono::system_clock::now(); // Flush command buffer (ie allow RSX to read up to context->current) - ctrl.put.exchange(getOffsetFromAddress(context->current.addr())); + ctrl.put.exchange(getOffsetFromAddress(ctrl.current.addr())); - std::pair newCommandBuffer = getNextCommandBufferBeginEnd(context->current.addr()); + std::pair newCommandBuffer = getNextCommandBufferBeginEnd(ctrl.current.addr()); u32 offset = getOffsetFromAddress(newCommandBuffer.first); // Write jump instruction - *context->current = CELL_GCM_METHOD_FLAG_JUMP | offset; + *ctrl.current = CELL_GCM_METHOD_FLAG_JUMP | offset; // Update current command buffer - context->begin.set(newCommandBuffer.first); - context->current.set(newCommandBuffer.first); - context->end.set(newCommandBuffer.second); + ctrl.begin.set(newCommandBuffer.first); + ctrl.current.set(newCommandBuffer.first); + ctrl.end.set(newCommandBuffer.second); // Wait for rsx to "release" the new command buffer while (!Emu.IsStopped()) @@ -1319,8 +1331,10 @@ s32 cellGcmCallback(vm::ptr context, u32 count) //---------------------------------------------------------------------------- -DECLARE(ppu_module_manager::cellGcmSys)("cellGcmSys", []() +DECLARE(ppu_module_manager::cellGcmSys)("cellGcmSys", [](ppu_static_module *_this) { + //_this->on_load.push(&gcm::initialize); + // Data Retrieval REG_FUNC(cellGcmSys, cellGcmGetCurrentField); REG_FUNC(cellGcmSys, cellGcmGetLabelAddress); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 47bc395ad5..9b1ddde7fa 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -496,9 +496,9 @@ void D3D12GSRender::flip(int buffer) if (false) { CellGcmDisplayInfo* buffers = nullptr;// = vm::ps3::_ptr(m_gcm_buffers_addr); - u32 addr = rsx::get_address(gcm_buffers[gcm_current_buffer].offset, CELL_GCM_LOCATION_LOCAL); - w = gcm_buffers[gcm_current_buffer].width; - h = gcm_buffers[gcm_current_buffer].height; + u32 addr = rsx::get_address(rsx::state.display_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL); + w = rsx::state.display_buffers[buffer].width; + h = rsx::state.display_buffers[buffer].height; u8 *src_buffer = vm::ps3::_ptr(addr); row_pitch = align(w * 4, 256); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp index e77875d69a..1d187b62b2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp @@ -326,12 +326,12 @@ void D3D12GSRender::copy_render_target_to_dma_location() u32 address_color[] = { - rsx::get_address(offset_color[0], context_dma_color[0]), - rsx::get_address(offset_color[1], context_dma_color[1]), - rsx::get_address(offset_color[2], context_dma_color[2]), - rsx::get_address(offset_color[3], context_dma_color[3]), + rsx::get_address_dma(offset_color[0], context_dma_color[0]), + rsx::get_address_dma(offset_color[1], context_dma_color[1]), + rsx::get_address_dma(offset_color[2], context_dma_color[2]), + rsx::get_address_dma(offset_color[3], context_dma_color[3]), }; - u32 address_z = rsx::get_address(offset_zeta, m_context_dma_z); + u32 address_z = rsx::get_address_dma(offset_zeta, m_context_dma_z); bool need_transfer = false; diff --git a/rpcs3/Emu/RSX/GCM.h b/rpcs3/Emu/RSX/GCM.h index 82c583c876..3fa49c3919 100644 --- a/rpcs3/Emu/RSX/GCM.h +++ b/rpcs3/Emu/RSX/GCM.h @@ -590,15 +590,24 @@ enum }; // GPU Class Handles +enum +{ + CELL_GCM_CONTEXT_SURFACE2D = 0x313371C3, + CELL_GCM_CONTEXT_SWIZZLE2D = 0x31337A73, +}; + enum { CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER = 0xFEED0000, // Local memory CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER = 0xFEED0001, // Main memory - CELL_GCM_CONTEXT_SURFACE2D = 0x313371C3, - CELL_GCM_CONTEXT_SWIZZLE2D = 0x31337A73, CELL_GCM_CONTEXT_DMA_TO_MEMORY_GET_REPORT = 0x66626660, CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_MAIN = 0xBAD68000, + CELL_GCM_CONTEXT_DMA_TO_MEMORY_GET_NOTIFY0 = 0x66604200, CELL_GCM_CONTEXT_DMA_NOTIFY_MAIN_0 = 0x6660420F, + CELL_GCM_CONTEXT_DMA_SEMAPHORE_RW = 0x66606660, + CELL_GCM_CONTEXT_DMA_SEMAPHORE_R = 0x66616661, + CELL_GCM_CONTEXT_DMA_DEVICE_RW = 0x56616660, + CELL_GCM_CONTEXT_DMA_DEVICE_R = 0x56616661 }; struct CellGcmControl @@ -627,15 +636,7 @@ struct CellGcmContextData vm::ps3::bptr begin; vm::ps3::bptr end; vm::ps3::bptr current; - vm::ps3::bptr callback; -}; - -struct gcmInfo -{ - u32 config_addr; - u32 context_addr; - u32 control_addr; - u32 label_addr; + vm::ps3::bpptr callback; }; struct CellGcmSurface diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 3fefc33933..a50d849390 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -26,13 +26,19 @@ namespace u32 get_front_face_ccw(u32 ffv) { + rsx::window_origin shader_window_origin = rsx::to_window_origin((rsx::method_registers[NV4097_SET_SHADER_WINDOW] >> 12) & 0xf); + + if (shader_window_origin == rsx::window_origin::bottom) + { + ffv ^= 1; + } + switch (ffv) { default: // Disgaea 3 pass some garbage value at startup, this is needed to survive. case CELL_GCM_CW: return GL_CW; case CELL_GCM_CCW: return GL_CCW; } - throw EXCEPTION("Unknown front face value: 0x%X", ffv); } } @@ -163,6 +169,7 @@ void GLGSRender::begin() if (u32 blend_mrt = rsx::method_registers[NV4097_SET_BLEND_ENABLE_MRT]) { + __glcheck enable(blend_mrt & 1, GL_BLEND, 0); __glcheck enable(blend_mrt & 2, GL_BLEND, 1); __glcheck enable(blend_mrt & 4, GL_BLEND, 2); __glcheck enable(blend_mrt & 8, GL_BLEND, 3); @@ -197,7 +204,7 @@ void GLGSRender::begin() __glcheck glCullFace(rsx::method_registers[NV4097_SET_CULL_FACE]); } - __glcheck glFrontFace(get_front_face_ccw(rsx::method_registers[NV4097_SET_FRONT_FACE] ^ 1)); + __glcheck glFrontFace(get_front_face_ccw(rsx::method_registers[NV4097_SET_FRONT_FACE])); __glcheck enable(rsx::method_registers[NV4097_SET_POLY_SMOOTH_ENABLE], GL_POLYGON_SMOOTH); @@ -697,7 +704,9 @@ bool GLGSRender::load_program() RSXFragmentProgram fragment_program = get_current_fragment_program(); GLProgramBuffer prog_buffer; - __glcheck prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, nullptr); + m_program = &prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, nullptr); + + return true; } rsx::program_info info = programs_cache.get(get_raw_program(), rsx::decompile_language::glsl); @@ -792,21 +801,21 @@ bool GLGSRender::load_program() void GLGSRender::flip(int buffer) { - u32 buffer_width = gcm_buffers[buffer].width; - u32 buffer_height = gcm_buffers[buffer].height; - u32 buffer_pitch = gcm_buffers[buffer].pitch; + u32 buffer_width = rsx::state.display_buffers[buffer].width; + u32 buffer_height = rsx::state.display_buffers[buffer].height; + u32 buffer_pitch = rsx::state.display_buffers[buffer].pitch; glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDisable(GL_SCISSOR_TEST); glDisable(GL_DEPTH_TEST); glDisable(GL_STENCIL_TEST); - rsx::tiled_region buffer_region = get_tiled_address(gcm_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL); + rsx::tiled_region buffer_region = get_tiled_address(rsx::state.display_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL); u32 absolute_address = buffer_region.address + buffer_region.base; if (0) { - LOG_NOTICE(RSX, "flip(%d) -> 0x%x [0x%x]", buffer, absolute_address, rsx::get_address(gcm_buffers[1 - buffer].offset, CELL_GCM_LOCATION_LOCAL)); + LOG_NOTICE(RSX, "flip(%d) -> 0x%x [0x%x]", buffer, absolute_address, rsx::get_address(rsx::state.display_buffers[1 - buffer].offset, CELL_GCM_LOCATION_LOCAL)); } gl::texture *render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address); diff --git a/rpcs3/Emu/RSX/GL/gl_render_targets.cpp b/rpcs3/Emu/RSX/GL/gl_render_targets.cpp index 8d568f0b17..249228440c 100644 --- a/rpcs3/Emu/RSX/GL/gl_render_targets.cpp +++ b/rpcs3/Emu/RSX/GL/gl_render_targets.cpp @@ -187,13 +187,13 @@ void GLGSRender::read_buffers() for (int i = index; i < index + count; ++i) { u32 offset = rsx::method_registers[rsx::internals::mr_color_offset[i]]; - u32 location = rsx::method_registers[rsx::internals::mr_color_dma[i]]; + u32 dma = rsx::method_registers[rsx::internals::mr_color_dma[i]]; u32 pitch = rsx::method_registers[rsx::internals::mr_color_pitch[i]]; if (pitch <= 64) continue; - rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf); + rsx::tiled_region color_buffer = get_tiled_address_dma(offset, dma); u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0)); bool success = m_gl_texture_cache.explicit_writeback((*std::get<1>(m_rtts.m_bound_render_targets[i])), texaddr, pitch); @@ -254,7 +254,7 @@ void GLGSRender::read_buffers() if (pitch <= 64) return; - u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]); + u32 depth_address = rsx::get_address_dma(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]); bool in_cache = m_gl_texture_cache.explicit_writeback((*std::get<1>(m_rtts.m_bound_depth_stencil)), depth_address, pitch); if (in_cache) @@ -269,7 +269,7 @@ void GLGSRender::read_buffers() __glcheck pbo_depth.create(m_surface.width * m_surface.height * pixel_size); __glcheck pbo_depth.map([&](GLubyte* pixels) { - u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]); + u32 depth_address = rsx::get_address_dma(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]); if (m_surface.depth_format == rsx::surface_depth_format::z16) { @@ -315,13 +315,13 @@ void GLGSRender::write_buffers() for (int i = index; i < index + count; ++i) { u32 offset = rsx::method_registers[rsx::internals::mr_color_offset[i]]; - u32 location = rsx::method_registers[rsx::internals::mr_color_dma[i]]; + u32 dma = rsx::method_registers[rsx::internals::mr_color_dma[i]]; u32 pitch = rsx::method_registers[rsx::internals::mr_color_pitch[i]]; if (pitch <= 64) continue; - rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf); + rsx::tiled_region color_buffer = get_tiled_address_dma(offset, dma); u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0)); u32 range = pitch * height; @@ -370,7 +370,7 @@ void GLGSRender::write_buffers() return; auto depth_format = rsx::internals::surface_depth_format_to_gl(m_surface.depth_format); - u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]); + u32 depth_address = rsx::get_address_dma(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]); u32 range = std::get<1>(m_rtts.m_bound_depth_stencil)->width() * std::get<1>(m_rtts.m_bound_depth_stencil)->height() * 2; if (m_surface.depth_format != rsx::surface_depth_format::z16) range *= 2; diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 6393cea19d..aec792204f 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -34,6 +34,7 @@ namespace vm { using namespace ps3; } namespace rsx { std::function g_access_violation_handler; + state_t state; std::string old_shaders_cache::shaders_cache::path_to_root() { @@ -97,27 +98,41 @@ namespace rsx } } - u32 get_address(u32 offset, u32 location) + u32 get_address(u32 offset, u8 location) { - u32 res = 0; - switch (location) { - case CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER: case CELL_GCM_LOCATION_LOCAL: - { - //TODO: don't use not named constants like 0xC0000000 - res = 0xC0000000 + offset; - break; + return rsx::state.frame_buffer.addr() + offset; + + case CELL_GCM_LOCATION_MAIN: + //if (fxm::get()->strict_ordering[offset >> 20]) + //{ + // _mm_mfence(); // probably doesn't have any effect on current implementation + //} + + if (u32 result = RSXIOMem.RealAddr(offset)) + { + return result; + } + + throw EXCEPTION("%s(offset=0x%x, location=0x%x): RSXIO memory not mapped", __FUNCTION__, offset, location); } - case CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER: - case CELL_GCM_LOCATION_MAIN: + throw EXCEPTION("%s(offset=0x%x, location=0x%x): Invalid location", __FUNCTION__, offset, location); + } + + u32 get_address_dma(u32 offset, u32 dma) + { + switch (dma) { - res = (u32)RSXIOMem.RealAddr(offset); // TODO: Error Check? - if (res == 0) + case CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER: + return rsx::state.frame_buffer.addr() + offset; + + case CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER: + if (u32 result = RSXIOMem.RealAddr(offset)) { - throw EXCEPTION("GetAddress(offset=0x%x, location=0x%x): RSXIO memory not mapped", offset, location); + return result; } //if (fxm::get()->strict_ordering[offset >> 20]) @@ -125,15 +140,31 @@ namespace rsx // _mm_mfence(); // probably doesn't have any effect on current implementation //} + throw EXCEPTION("%s(offset=0x%x, dma=0x%x): RSXIO memory not mapped", __FUNCTION__, offset, dma); + + case CELL_GCM_CONTEXT_DMA_TO_MEMORY_GET_REPORT: + return rsx::state.frame_buffer.ptr(&rsx::frame_buffer_t::reports).addr() + offset; + + case CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_MAIN: + return rsx::state.context.ptr(&rsx::context_t::reports).addr() + offset; + + case CELL_GCM_CONTEXT_DMA_TO_MEMORY_GET_NOTIFY0: + return rsx::state.context.ptr(&rsx::context_t::notifies).addr() + offset; + + case CELL_GCM_CONTEXT_DMA_NOTIFY_MAIN_0: + //TODO break; - } - default: - { - throw EXCEPTION("Invalid location (offset=0x%x, location=0x%x)", offset, location); - } + + case CELL_GCM_CONTEXT_DMA_SEMAPHORE_RW: + case CELL_GCM_CONTEXT_DMA_SEMAPHORE_R: + return rsx::state.context.ptr(&rsx::context_t::semaphores).addr() + offset; + + case CELL_GCM_CONTEXT_DMA_DEVICE_RW: + case CELL_GCM_CONTEXT_DMA_DEVICE_R: + return rsx::state.device.addr() + offset; } - return res; + throw EXCEPTION("%s(offset=0x%x, dma=0x%x): Invalid dma", __FUNCTION__, offset, dma); } u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size) @@ -377,24 +408,24 @@ namespace rsx reset(); - last_flip_time = get_system_time() - 1000000; + rsx::state.last_flip_time = get_system_time() - 1000000; scope_thread vblank("VBlank Thread", [this]() { const u64 start_time = get_system_time(); - vblank_count = 0; + rsx::state.vblank_count = 0; // TODO: exit condition while (!Emu.IsStopped()) { - if (get_system_time() - start_time > vblank_count * 1000000 / 60) + if (get_system_time() - start_time > rsx::state.vblank_count * 1000000 / 60) { - vblank_count++; + rsx::state.vblank_count++; - if (vblank_handler) + if (rsx::state.vblank_handler) { - Emu.GetCallbackManager().Async([func = vblank_handler](PPUThread& ppu) + Emu.GetCallbackManager().Async([func = rsx::state.vblank_handler](PPUThread& ppu) { func(ppu, 1); }); @@ -403,17 +434,19 @@ namespace rsx continue; } - std::this_thread::sleep_for(1ms); // hack + std::this_thread::yield(); } }); + auto &ctrl = state.context->control; + // TODO: exit condition while (true) { CHECK_EMU_STATUS; - const u32 get = ctrl->get; - const u32 put = ctrl->put; + const u32 get = ctrl.get; + const u32 put = ctrl.put; if (put == get || !Emu.IsRunning()) { @@ -428,7 +461,7 @@ namespace rsx { u32 offs = cmd & 0x1fffffff; //LOG_WARNING(RSX, "rsx jump(0x%x) #addr=0x%x, cmd=0x%x, get=0x%x, put=0x%x", offs, m_ioAddress + get, cmd, get, put); - ctrl->get = offs; + ctrl.get = offs; continue; } if (cmd & CELL_GCM_METHOD_FLAG_CALL) @@ -436,7 +469,7 @@ namespace rsx m_call_stack.push(get + 4); u32 offs = cmd & ~3; //LOG_WARNING(RSX, "rsx call(0x%x) #0x%x - 0x%x", offs, cmd, get); - ctrl->get = offs; + ctrl.get = offs; continue; } if (cmd == CELL_GCM_METHOD_FLAG_RETURN) @@ -444,13 +477,13 @@ namespace rsx u32 get = m_call_stack.top(); m_call_stack.pop(); //LOG_WARNING(RSX, "rsx return(0x%x)", get); - ctrl->get = get; + ctrl.get = get; continue; } if (cmd == 0) //nop { - ctrl->get = get + 4; + ctrl.get = get + 4; continue; } @@ -483,7 +516,7 @@ namespace rsx } } - ctrl->get = get + (count + 1) * 4; + ctrl.get = get + (count + 1) * 4; } } @@ -576,7 +609,7 @@ namespace rsx { if (m_internal_tasks.empty()) { - std::this_thread::sleep_for(1ms); + std::this_thread::yield(); } else { @@ -656,10 +689,10 @@ namespace rsx }; return { - rsx::get_address(offset_color[0], context_dma_color[0]), - rsx::get_address(offset_color[1], context_dma_color[1]), - rsx::get_address(offset_color[2], context_dma_color[2]), - rsx::get_address(offset_color[3], context_dma_color[3]), + context_dma_color[0] ? rsx::get_address_dma(offset_color[0], context_dma_color[0]) : 0, + context_dma_color[1] ? rsx::get_address_dma(offset_color[1], context_dma_color[1]) : 0, + context_dma_color[2] ? rsx::get_address_dma(offset_color[2], context_dma_color[2]) : 0, + context_dma_color[2] ? rsx::get_address_dma(offset_color[3], context_dma_color[3]) : 0, }; } @@ -667,7 +700,7 @@ namespace rsx { u32 m_context_dma_z = rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]; u32 offset_zeta = rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET]; - return rsx::get_address(offset_zeta, m_context_dma_z); + return offset_zeta ? rsx::get_address_dma(offset_zeta, m_context_dma_z) : 0; } RSXVertexProgram thread::get_current_vertex_program() const @@ -923,10 +956,23 @@ namespace rsx method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] = 0xffffffff; - method_registers[NV4097_SET_CONTEXT_DMA_REPORT] = CELL_GCM_CONTEXT_DMA_TO_MEMORY_GET_REPORT; rsx::method_registers[NV4097_SET_TWO_SIDE_LIGHT_EN] = true; rsx::method_registers[NV4097_SET_ALPHA_FUNC] = CELL_GCM_ALWAYS; + method_registers[NV4097_SET_CONTEXT_DMA_REPORT] = CELL_GCM_CONTEXT_DMA_TO_MEMORY_GET_REPORT; + method_registers[NV406E_SET_CONTEXT_DMA_SEMAPHORE] = CELL_GCM_CONTEXT_DMA_SEMAPHORE_RW; + + method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN] = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER; + method_registers[NV309E_SET_CONTEXT_DMA_IMAGE] = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER; + method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_IN] = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER; + method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_OUT] = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER; + + method_registers[NV4097_SET_CONTEXT_DMA_COLOR_A] = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER; + method_registers[NV4097_SET_CONTEXT_DMA_COLOR_B] = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER; + method_registers[NV4097_SET_CONTEXT_DMA_COLOR_C] = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER; + method_registers[NV4097_SET_CONTEXT_DMA_COLOR_D] = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER; + method_registers[NV4097_SET_CONTEXT_DMA_ZETA] = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER; + // Reset vertex attrib array for (int i = 0; i < limits::vertex_count; i++) { @@ -945,14 +991,8 @@ namespace rsx } } - void thread::init(const u32 ioAddress, const u32 ioSize, const u32 ctrlAddress, const u32 localAddress) + void thread::init() { - ctrl = vm::_ptr(ctrlAddress); - this->ioAddress = ioAddress; - this->ioSize = ioSize; - local_mem_addr = localAddress; - flip_status = 0; - m_used_gcm_commands.clear(); on_init_rsx(); @@ -961,7 +1001,7 @@ namespace rsx GcmTileInfo *thread::find_tile(u32 offset, u32 location) { - for (GcmTileInfo &tile : tiles) + for (GcmTileInfo &tile : state.unpacked_tiles) { if (!tile.binded || tile.location != location) { @@ -977,7 +1017,23 @@ namespace rsx return nullptr; } - tiled_region thread::get_tiled_address(u32 offset, u32 location) + tiled_region thread::get_tiled_address_dma(u32 offset, u32 dma) + { + u32 address = get_address_dma(offset, dma); + + GcmTileInfo *tile = find_tile(offset, dma & 1); + u32 base = 0; + + if (tile) + { + base = offset - tile->offset; + address = get_address_dma(tile->offset, dma); + } + + return{ address, base, tile, (u8*)vm::base(address) }; + } + + tiled_region thread::get_tiled_address(u32 offset, u8 location) { u32 address = get_address(offset, location); diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 10488e37c8..930627b057 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -87,6 +87,113 @@ namespace rsx }; } + struct semaphore_t + { + be_t value; + be_t padding; + be_t timestamp; + }; + + struct notify_t + { + be_t timestamp; + be_t zero; + }; + + union device_t + { + u8 raw[0x1000]; + }; + + union control_t; + + using callback_t = s32(u32, u32); + + union control_t + { + struct + { + vm::ps3::bptr begin; + vm::ps3::bptr end; + vm::ps3::bptr current; + vm::ps3::bpptr callback; + + atomic_be_t put; + atomic_be_t get; + atomic_be_t ref; + }; + + //for gcm capability + struct + { + CellGcmContextData data; + CellGcmControl control; + }; + + u8 raw[0x1000]; + }; + + union driver_info_t + { + struct + { + be_t version_driver; + be_t version_gpu; + be_t memory_size; + be_t hardware_channel; + be_t nvcore_frequency; + be_t memory_frequency; + }; + + u8 raw[0x4000]; + }; + + struct context_t + { + control_t control; + driver_info_t driver_info; + semaphore_t semaphores[0x100]; + notify_t notifies[0x40]; + CellGcmReportData reports[0x800]; + }; + + struct frame_buffer_t + { + CellGcmReportData reports[0x100000]; + }; + + //TODO: separate gcm and rsx data + struct state_t + { + vm::ps3::ptr device; + vm::ps3::ptr context; //TODO: support for multiply contexts + + vm::ps3::ptr flip_handler = vm::null; + vm::ps3::ptr user_handler = vm::null; + vm::ps3::ptr vblank_handler = vm::null; + + u64 last_flip_time; + u64 vblank_count; + u32 flip_status; + u32 flip_mode; + u32 debug_level; + u32 io_size; + + vm::ps3::ptr frame_buffer; + u32 frame_buffer_size; + + vm::ps3::ptr tiles; + vm::ps3::ptr zculls; + vm::ps3::ptr display_buffers; + u32 display_buffers_count; + u32 current_display_buffer; + + GcmTileInfo unpacked_tiles[limits::tiles_count]; + GcmZcullInfo unpacked_zculls[limits::zculls_count]; + }; + + extern state_t state; + namespace old_shaders_cache { struct decompiled_shader @@ -139,7 +246,8 @@ namespace rsx u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size); - u32 get_address(u32 offset, u32 location); + u32 get_address(u32 offset, u8 location); + u32 get_address_dma(u32 offset, u32 dma); struct tiled_region { @@ -212,13 +320,8 @@ namespace rsx old_shaders_cache::shaders_cache shaders_cache; rsx::programs_cache programs_cache; - CellGcmControl* ctrl = nullptr; - Timer timer_sync; - GcmTileInfo tiles[limits::tiles_count]; - GcmZcullInfo zculls[limits::zculls_count]; - rsx::texture textures[limits::textures_count]; rsx::vertex_texture vertex_textures[limits::vertex_textures_count]; @@ -260,23 +363,11 @@ namespace rsx void capture_frame(const std::string &name); public: - u32 ioAddress, ioSize; - int flip_status; - int flip_mode; - int debug_level; int frequency_mode; - u32 tiles_addr; - u32 zculls_addr; - vm::ps3::ptr gcm_buffers = vm::null; - u32 gcm_buffers_count; - u32 gcm_current_buffer; - u32 ctxt_addr; - u32 label_addr; rsx::draw_command draw_command; primitive_type draw_mode; - u32 local_mem_addr, main_mem_addr; bool strict_ordering[0x1000]; bool draw_inline_vertex_array; @@ -295,13 +386,6 @@ namespace rsx u32 draw_array_first; double fps_limit = 59.94; - public: - u64 last_flip_time; - vm::ps3::ptr flip_handler = vm::null; - vm::ps3::ptr user_handler = vm::null; - vm::ps3::ptr vblank_handler = vm::null; - u64 vblank_count; - public: std::set m_used_gcm_commands; @@ -388,9 +472,10 @@ namespace rsx struct raw_program get_raw_program() const; public: void reset(); - void init(const u32 ioAddress, const u32 ioSize, const u32 ctrlAddress, const u32 localAddress); + void init(); - tiled_region get_tiled_address(u32 offset, u32 location); + tiled_region get_tiled_address(u32 offset, u8 location); + tiled_region get_tiled_address_dma(u32 offset, u32 dma); GcmTileInfo *find_tile(u32 offset, u32 location); u32 ReadIO32(u32 addr); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index ba930606d3..579ad8d3c1 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1,275 +1,275 @@ -#include "stdafx.h" -#include "Emu/Memory/Memory.h" -#include "Emu/System.h" -#include "VKGSRender.h" -#include "../rsx_methods.h" -#include "../Common/BufferUtils.h" -#include "VKFormats.h" - -namespace -{ - u32 get_max_depth_value(rsx::surface_depth_format format) - { - switch (format) - { - case rsx::surface_depth_format::z16: return 0xFFFF; - case rsx::surface_depth_format::z24s8: return 0xFFFFFF; - } - throw EXCEPTION("Unknow depth format"); - } - - u8 get_pixel_size(rsx::surface_depth_format format) - { - switch (format) - { - case rsx::surface_depth_format::z16: return 2; - case rsx::surface_depth_format::z24s8: return 4; - } - throw EXCEPTION("Unknow depth format"); - } -} - -namespace vk -{ - VkCompareOp compare_op(u32 gl_name) - { - switch (gl_name) - { - case CELL_GCM_NEVER: - return VK_COMPARE_OP_NEVER; - case CELL_GCM_GREATER: - return VK_COMPARE_OP_GREATER; - case CELL_GCM_LESS: - return VK_COMPARE_OP_LESS; - case CELL_GCM_LEQUAL: - return VK_COMPARE_OP_LESS_OR_EQUAL; - case CELL_GCM_GEQUAL: - return VK_COMPARE_OP_GREATER_OR_EQUAL; - case CELL_GCM_EQUAL: - return VK_COMPARE_OP_EQUAL; - case CELL_GCM_ALWAYS: - return VK_COMPARE_OP_ALWAYS; - default: - throw EXCEPTION("Unsupported compare op: 0x%X", gl_name); - } - } - - std::pair get_compatible_surface_format(rsx::surface_color_format color_format) - { - switch (color_format) - { - case rsx::surface_color_format::r5g6b5: - return std::make_pair(VK_FORMAT_R5G6B5_UNORM_PACK16, vk::default_component_map()); - - case rsx::surface_color_format::a8r8g8b8: - return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, vk::default_component_map()); - - case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: - case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: - case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: - case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: - case rsx::surface_color_format::a8b8g8r8: - { - VkComponentMapping no_alpha = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_ONE }; - return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, no_alpha); - } - - case rsx::surface_color_format::w16z16y16x16: - return std::make_pair(VK_FORMAT_R16G16B16A16_SFLOAT, vk::default_component_map()); - - case rsx::surface_color_format::w32z32y32x32: - return std::make_pair(VK_FORMAT_R32G32B32A32_SFLOAT, vk::default_component_map()); - - case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: - case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: - { - VkComponentMapping no_alpha = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_ONE }; - return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, no_alpha); - } - - case rsx::surface_color_format::b8: - return std::make_pair(VK_FORMAT_R8_UNORM, vk::default_component_map()); - - case rsx::surface_color_format::g8b8: - return std::make_pair(VK_FORMAT_R8G8_UNORM, vk::default_component_map()); - - case rsx::surface_color_format::x32: - return std::make_pair(VK_FORMAT_R32_SFLOAT, vk::default_component_map()); - - default: - LOG_ERROR(RSX, "Surface color buffer: Unsupported surface color format (0x%x)", color_format); - return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, vk::default_component_map()); - } - } - - /** Maps color_format, depth_stencil_format and color count to an int as below : - * idx = color_count + 5 * depth_stencil_idx + 15 * color_format_idx - * This should perform a 1:1 mapping - */ - - size_t get_render_pass_location(VkFormat color_format, VkFormat depth_stencil_format, u8 color_count) - { - size_t color_format_idx = 0; - size_t depth_format_idx = 0; - - EXPECTS(color_count < 5); - - switch (color_format) - { - case VK_FORMAT_R5G6B5_UNORM_PACK16: - color_format_idx = 0; - break; - case VK_FORMAT_B8G8R8A8_UNORM: - color_format_idx = 1; - break; - case VK_FORMAT_R16G16B16A16_SFLOAT: - color_format_idx = 2; - break; - case VK_FORMAT_R32G32B32A32_SFLOAT: - color_format_idx = 3; - break; - case VK_FORMAT_R8_UINT: - color_format_idx = 4; - break; - case VK_FORMAT_R8G8_UINT: - color_format_idx = 5; - break; - case VK_FORMAT_A1R5G5B5_UNORM_PACK16: - color_format_idx = 6; - break; - case VK_FORMAT_R32_SFLOAT: - color_format_idx = 7; - break; - } - - switch (depth_stencil_format) - { - case VK_FORMAT_D16_UNORM: - depth_format_idx = 0; - break; - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - depth_format_idx = 1; - break; - case VK_FORMAT_UNDEFINED: - depth_format_idx = 2; - break; - } - - return color_count + 5 * depth_format_idx + 5 * 3 * color_format_idx; - } - - std::vector get_draw_buffers(rsx::surface_target fmt) - { - switch (fmt) - { - case rsx::surface_target::none: - return{}; - case rsx::surface_target::surface_a: - return{ 0 }; - case rsx::surface_target::surface_b: - return{ 1 }; - case rsx::surface_target::surfaces_a_b: - return{ 0, 1 }; - case rsx::surface_target::surfaces_a_b_c: - return{ 0, 1, 2 }; - case rsx::surface_target::surfaces_a_b_c_d: - return{ 0, 1, 2, 3 }; - default: - LOG_ERROR(RSX, "Bad surface color target: %d", fmt); - return{}; - } - } - - VkLogicOp get_logic_op(u32 op) - { - switch (op) - { - case CELL_GCM_CLEAR: return VK_LOGIC_OP_CLEAR; - case CELL_GCM_AND: return VK_LOGIC_OP_AND; - case CELL_GCM_AND_REVERSE: return VK_LOGIC_OP_AND_REVERSE; - case CELL_GCM_COPY: return VK_LOGIC_OP_COPY; - case CELL_GCM_AND_INVERTED: return VK_LOGIC_OP_AND_INVERTED; - case CELL_GCM_NOOP: return VK_LOGIC_OP_NO_OP; - case CELL_GCM_XOR: return VK_LOGIC_OP_XOR; - case CELL_GCM_OR: return VK_LOGIC_OP_OR; - case CELL_GCM_NOR: return VK_LOGIC_OP_NOR; - case CELL_GCM_EQUIV: return VK_LOGIC_OP_EQUIVALENT; - case CELL_GCM_INVERT: return VK_LOGIC_OP_INVERT; - case CELL_GCM_OR_REVERSE: return VK_LOGIC_OP_OR_REVERSE; - case CELL_GCM_COPY_INVERTED: return VK_LOGIC_OP_COPY_INVERTED; - case CELL_GCM_OR_INVERTED: return VK_LOGIC_OP_OR_INVERTED; - case CELL_GCM_NAND: return VK_LOGIC_OP_NAND; - default: - throw EXCEPTION("Unknown logic op 0x%X", op); - } - } - - VkBlendFactor get_blend_factor(u16 factor) - { - switch (factor) - { - case CELL_GCM_ONE: return VK_BLEND_FACTOR_ONE; - case CELL_GCM_ZERO: return VK_BLEND_FACTOR_ZERO; - case CELL_GCM_SRC_ALPHA: return VK_BLEND_FACTOR_SRC_ALPHA; - case CELL_GCM_DST_ALPHA: return VK_BLEND_FACTOR_DST_ALPHA; - case CELL_GCM_SRC_COLOR: return VK_BLEND_FACTOR_SRC_COLOR; - case CELL_GCM_DST_COLOR: return VK_BLEND_FACTOR_DST_COLOR; - case CELL_GCM_CONSTANT_COLOR: return VK_BLEND_FACTOR_CONSTANT_COLOR; - case CELL_GCM_CONSTANT_ALPHA: return VK_BLEND_FACTOR_CONSTANT_ALPHA; - case CELL_GCM_ONE_MINUS_SRC_COLOR: return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR; - case CELL_GCM_ONE_MINUS_DST_COLOR: return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR; - case CELL_GCM_ONE_MINUS_SRC_ALPHA: return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - case CELL_GCM_ONE_MINUS_DST_ALPHA: return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; - case CELL_GCM_ONE_MINUS_CONSTANT_ALPHA: return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA; - case CELL_GCM_ONE_MINUS_CONSTANT_COLOR: return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR; - default: - throw EXCEPTION("Unknown blend factor 0x%X", factor); - } - }; - - VkBlendOp get_blend_op(u16 op) - { - switch (op) - { - case CELL_GCM_FUNC_ADD: return VK_BLEND_OP_ADD; - case CELL_GCM_FUNC_SUBTRACT: return VK_BLEND_OP_SUBTRACT; - case CELL_GCM_FUNC_REVERSE_SUBTRACT: return VK_BLEND_OP_REVERSE_SUBTRACT; - case CELL_GCM_MIN: return VK_BLEND_OP_MIN; - case CELL_GCM_MAX: return VK_BLEND_OP_MAX; - default: - throw EXCEPTION("Unknown blend op: 0x%X", op); - } - } - - VkStencilOp get_stencil_op(u32 op) - { - switch (op) - { - case CELL_GCM_KEEP: return VK_STENCIL_OP_KEEP; - case CELL_GCM_ZERO: return VK_STENCIL_OP_ZERO; - case CELL_GCM_REPLACE: return VK_STENCIL_OP_REPLACE; - case CELL_GCM_INCR: return VK_STENCIL_OP_INCREMENT_AND_CLAMP; - case CELL_GCM_DECR: return VK_STENCIL_OP_DECREMENT_AND_CLAMP; - case CELL_GCM_INVERT: return VK_STENCIL_OP_INVERT; - case CELL_GCM_INCR_WRAP: return VK_STENCIL_OP_INCREMENT_AND_WRAP; - case CELL_GCM_DECR_WRAP: return VK_STENCIL_OP_DECREMENT_AND_WRAP; - default: - throw EXCEPTION("Unknown stencil op: 0x%X", op); - } - } - - VkFrontFace get_front_face_ccw(u32 ffv) - { - switch (ffv) - { - default: // Disgaea 3 pass some garbage value at startup, this is needed to survive. - case CELL_GCM_CW: return VK_FRONT_FACE_CLOCKWISE; - case CELL_GCM_CCW: return VK_FRONT_FACE_COUNTER_CLOCKWISE; - } - throw EXCEPTION("Unknown front face value: 0x%X", ffv); - } - +#include "stdafx.h" +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" +#include "VKGSRender.h" +#include "../rsx_methods.h" +#include "../Common/BufferUtils.h" +#include "VKFormats.h" + +namespace +{ + u32 get_max_depth_value(rsx::surface_depth_format format) + { + switch (format) + { + case rsx::surface_depth_format::z16: return 0xFFFF; + case rsx::surface_depth_format::z24s8: return 0xFFFFFF; + } + throw EXCEPTION("Unknow depth format"); + } + + u8 get_pixel_size(rsx::surface_depth_format format) + { + switch (format) + { + case rsx::surface_depth_format::z16: return 2; + case rsx::surface_depth_format::z24s8: return 4; + } + throw EXCEPTION("Unknow depth format"); + } +} + +namespace vk +{ + VkCompareOp compare_op(u32 gl_name) + { + switch (gl_name) + { + case CELL_GCM_NEVER: + return VK_COMPARE_OP_NEVER; + case CELL_GCM_GREATER: + return VK_COMPARE_OP_GREATER; + case CELL_GCM_LESS: + return VK_COMPARE_OP_LESS; + case CELL_GCM_LEQUAL: + return VK_COMPARE_OP_LESS_OR_EQUAL; + case CELL_GCM_GEQUAL: + return VK_COMPARE_OP_GREATER_OR_EQUAL; + case CELL_GCM_EQUAL: + return VK_COMPARE_OP_EQUAL; + case CELL_GCM_ALWAYS: + return VK_COMPARE_OP_ALWAYS; + default: + throw EXCEPTION("Unsupported compare op: 0x%X", gl_name); + } + } + + std::pair get_compatible_surface_format(rsx::surface_color_format color_format) + { + switch (color_format) + { + case rsx::surface_color_format::r5g6b5: + return std::make_pair(VK_FORMAT_R5G6B5_UNORM_PACK16, vk::default_component_map()); + + case rsx::surface_color_format::a8r8g8b8: + return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, vk::default_component_map()); + + case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: + case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: + case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: + case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: + case rsx::surface_color_format::a8b8g8r8: + { + VkComponentMapping no_alpha = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_ONE }; + return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, no_alpha); + } + + case rsx::surface_color_format::w16z16y16x16: + return std::make_pair(VK_FORMAT_R16G16B16A16_SFLOAT, vk::default_component_map()); + + case rsx::surface_color_format::w32z32y32x32: + return std::make_pair(VK_FORMAT_R32G32B32A32_SFLOAT, vk::default_component_map()); + + case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: + case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: + { + VkComponentMapping no_alpha = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_ONE }; + return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, no_alpha); + } + + case rsx::surface_color_format::b8: + return std::make_pair(VK_FORMAT_R8_UNORM, vk::default_component_map()); + + case rsx::surface_color_format::g8b8: + return std::make_pair(VK_FORMAT_R8G8_UNORM, vk::default_component_map()); + + case rsx::surface_color_format::x32: + return std::make_pair(VK_FORMAT_R32_SFLOAT, vk::default_component_map()); + + default: + LOG_ERROR(RSX, "Surface color buffer: Unsupported surface color format (0x%x)", color_format); + return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, vk::default_component_map()); + } + } + + /** Maps color_format, depth_stencil_format and color count to an int as below : + * idx = color_count + 5 * depth_stencil_idx + 15 * color_format_idx + * This should perform a 1:1 mapping + */ + + size_t get_render_pass_location(VkFormat color_format, VkFormat depth_stencil_format, u8 color_count) + { + size_t color_format_idx = 0; + size_t depth_format_idx = 0; + + EXPECTS(color_count < 5); + + switch (color_format) + { + case VK_FORMAT_R5G6B5_UNORM_PACK16: + color_format_idx = 0; + break; + case VK_FORMAT_B8G8R8A8_UNORM: + color_format_idx = 1; + break; + case VK_FORMAT_R16G16B16A16_SFLOAT: + color_format_idx = 2; + break; + case VK_FORMAT_R32G32B32A32_SFLOAT: + color_format_idx = 3; + break; + case VK_FORMAT_R8_UINT: + color_format_idx = 4; + break; + case VK_FORMAT_R8G8_UINT: + color_format_idx = 5; + break; + case VK_FORMAT_A1R5G5B5_UNORM_PACK16: + color_format_idx = 6; + break; + case VK_FORMAT_R32_SFLOAT: + color_format_idx = 7; + break; + } + + switch (depth_stencil_format) + { + case VK_FORMAT_D16_UNORM: + depth_format_idx = 0; + break; + case VK_FORMAT_D24_UNORM_S8_UINT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + depth_format_idx = 1; + break; + case VK_FORMAT_UNDEFINED: + depth_format_idx = 2; + break; + } + + return color_count + 5 * depth_format_idx + 5 * 3 * color_format_idx; + } + + std::vector get_draw_buffers(rsx::surface_target fmt) + { + switch (fmt) + { + case rsx::surface_target::none: + return{}; + case rsx::surface_target::surface_a: + return{ 0 }; + case rsx::surface_target::surface_b: + return{ 1 }; + case rsx::surface_target::surfaces_a_b: + return{ 0, 1 }; + case rsx::surface_target::surfaces_a_b_c: + return{ 0, 1, 2 }; + case rsx::surface_target::surfaces_a_b_c_d: + return{ 0, 1, 2, 3 }; + default: + LOG_ERROR(RSX, "Bad surface color target: %d", fmt); + return{}; + } + } + + VkLogicOp get_logic_op(u32 op) + { + switch (op) + { + case CELL_GCM_CLEAR: return VK_LOGIC_OP_CLEAR; + case CELL_GCM_AND: return VK_LOGIC_OP_AND; + case CELL_GCM_AND_REVERSE: return VK_LOGIC_OP_AND_REVERSE; + case CELL_GCM_COPY: return VK_LOGIC_OP_COPY; + case CELL_GCM_AND_INVERTED: return VK_LOGIC_OP_AND_INVERTED; + case CELL_GCM_NOOP: return VK_LOGIC_OP_NO_OP; + case CELL_GCM_XOR: return VK_LOGIC_OP_XOR; + case CELL_GCM_OR: return VK_LOGIC_OP_OR; + case CELL_GCM_NOR: return VK_LOGIC_OP_NOR; + case CELL_GCM_EQUIV: return VK_LOGIC_OP_EQUIVALENT; + case CELL_GCM_INVERT: return VK_LOGIC_OP_INVERT; + case CELL_GCM_OR_REVERSE: return VK_LOGIC_OP_OR_REVERSE; + case CELL_GCM_COPY_INVERTED: return VK_LOGIC_OP_COPY_INVERTED; + case CELL_GCM_OR_INVERTED: return VK_LOGIC_OP_OR_INVERTED; + case CELL_GCM_NAND: return VK_LOGIC_OP_NAND; + default: + throw EXCEPTION("Unknown logic op 0x%X", op); + } + } + + VkBlendFactor get_blend_factor(u16 factor) + { + switch (factor) + { + case CELL_GCM_ONE: return VK_BLEND_FACTOR_ONE; + case CELL_GCM_ZERO: return VK_BLEND_FACTOR_ZERO; + case CELL_GCM_SRC_ALPHA: return VK_BLEND_FACTOR_SRC_ALPHA; + case CELL_GCM_DST_ALPHA: return VK_BLEND_FACTOR_DST_ALPHA; + case CELL_GCM_SRC_COLOR: return VK_BLEND_FACTOR_SRC_COLOR; + case CELL_GCM_DST_COLOR: return VK_BLEND_FACTOR_DST_COLOR; + case CELL_GCM_CONSTANT_COLOR: return VK_BLEND_FACTOR_CONSTANT_COLOR; + case CELL_GCM_CONSTANT_ALPHA: return VK_BLEND_FACTOR_CONSTANT_ALPHA; + case CELL_GCM_ONE_MINUS_SRC_COLOR: return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR; + case CELL_GCM_ONE_MINUS_DST_COLOR: return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR; + case CELL_GCM_ONE_MINUS_SRC_ALPHA: return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + case CELL_GCM_ONE_MINUS_DST_ALPHA: return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; + case CELL_GCM_ONE_MINUS_CONSTANT_ALPHA: return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA; + case CELL_GCM_ONE_MINUS_CONSTANT_COLOR: return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR; + default: + throw EXCEPTION("Unknown blend factor 0x%X", factor); + } + }; + + VkBlendOp get_blend_op(u16 op) + { + switch (op) + { + case CELL_GCM_FUNC_ADD: return VK_BLEND_OP_ADD; + case CELL_GCM_FUNC_SUBTRACT: return VK_BLEND_OP_SUBTRACT; + case CELL_GCM_FUNC_REVERSE_SUBTRACT: return VK_BLEND_OP_REVERSE_SUBTRACT; + case CELL_GCM_MIN: return VK_BLEND_OP_MIN; + case CELL_GCM_MAX: return VK_BLEND_OP_MAX; + default: + throw EXCEPTION("Unknown blend op: 0x%X", op); + } + } + + VkStencilOp get_stencil_op(u32 op) + { + switch (op) + { + case CELL_GCM_KEEP: return VK_STENCIL_OP_KEEP; + case CELL_GCM_ZERO: return VK_STENCIL_OP_ZERO; + case CELL_GCM_REPLACE: return VK_STENCIL_OP_REPLACE; + case CELL_GCM_INCR: return VK_STENCIL_OP_INCREMENT_AND_CLAMP; + case CELL_GCM_DECR: return VK_STENCIL_OP_DECREMENT_AND_CLAMP; + case CELL_GCM_INVERT: return VK_STENCIL_OP_INVERT; + case CELL_GCM_INCR_WRAP: return VK_STENCIL_OP_INCREMENT_AND_WRAP; + case CELL_GCM_DECR_WRAP: return VK_STENCIL_OP_DECREMENT_AND_WRAP; + default: + throw EXCEPTION("Unknown stencil op: 0x%X", op); + } + } + + VkFrontFace get_front_face_ccw(u32 ffv) + { + switch (ffv) + { + default: // Disgaea 3 pass some garbage value at startup, this is needed to survive. + case CELL_GCM_CW: return VK_FRONT_FACE_CLOCKWISE; + case CELL_GCM_CCW: return VK_FRONT_FACE_COUNTER_CLOCKWISE; + } + throw EXCEPTION("Unknown front face value: 0x%X", ffv); + } + VkCullModeFlags get_cull_face(u32 cfv) { switch (cfv) @@ -280,1122 +280,1122 @@ namespace vk default: return VK_CULL_MODE_NONE; } throw EXCEPTION("Unknown cull face value: 0x%X", cfv); - } -} - - -namespace -{ - VkRenderPass precompute_render_pass(VkDevice dev, VkFormat color_format, u8 number_of_color_surface, VkFormat depth_format) - { - // Some driver crashes when using empty render pass - if (number_of_color_surface == 0 && depth_format == VK_FORMAT_UNDEFINED) - return nullptr; - /* Describe a render pass and framebuffer attachments */ - std::vector attachments = {}; - std::vector attachment_references; - - VkAttachmentDescription color_attachement_description = {}; - color_attachement_description.format = color_format; - color_attachement_description.samples = VK_SAMPLE_COUNT_1_BIT; - color_attachement_description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - color_attachement_description.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - color_attachement_description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - color_attachement_description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - color_attachement_description.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - color_attachement_description.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - for (u32 i = 0; i < number_of_color_surface; ++i) - { - attachments.push_back(color_attachement_description); - attachment_references.push_back({ i, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL }); - } - - if (depth_format != VK_FORMAT_UNDEFINED) - { - VkAttachmentDescription depth_attachement_description = {}; - depth_attachement_description.format = depth_format; - depth_attachement_description.samples = VK_SAMPLE_COUNT_1_BIT; - depth_attachement_description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - depth_attachement_description.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - depth_attachement_description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - depth_attachement_description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; - depth_attachement_description.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - depth_attachement_description.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - attachments.push_back(depth_attachement_description); - - attachment_references.push_back({ number_of_color_surface, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL }); - } - - VkSubpassDescription subpass = {}; - subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - subpass.colorAttachmentCount = number_of_color_surface; - subpass.pColorAttachments = number_of_color_surface > 0 ? attachment_references.data() : nullptr; - subpass.pDepthStencilAttachment = depth_format != VK_FORMAT_UNDEFINED ? &attachment_references.back() : nullptr; - - VkRenderPassCreateInfo rp_info = {}; - rp_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - rp_info.attachmentCount = static_cast(attachments.size()); - rp_info.pAttachments = attachments.data(); - rp_info.subpassCount = 1; - rp_info.pSubpasses = &subpass; - - VkRenderPass result; - CHECK_RESULT(vkCreateRenderPass(dev, &rp_info, NULL, &result)); - return result; - } - - std::array get_precomputed_render_passes(VkDevice dev, const vk::gpu_formats_support &gpu_format_support) - { - std::array result = {}; - - const std::array depth_format_list = { VK_FORMAT_UNDEFINED, VK_FORMAT_D16_UNORM, gpu_format_support.d24_unorm_s8 ? VK_FORMAT_D24_UNORM_S8_UINT : VK_FORMAT_D32_SFLOAT_S8_UINT }; - const std::array color_format_list = { VK_FORMAT_R5G6B5_UNORM_PACK16, VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R8_UINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_A1R5G5B5_UNORM_PACK16, VK_FORMAT_R32_SFLOAT }; - - - for (const VkFormat &color_format : color_format_list) - { - for (const VkFormat &depth_stencil_format : depth_format_list) - { - for (u8 number_of_draw_buffer = 0; number_of_draw_buffer <= 4; number_of_draw_buffer++) - { - size_t idx = vk::get_render_pass_location(color_format, depth_stencil_format, number_of_draw_buffer); - result[idx] = precompute_render_pass(dev, color_format, number_of_draw_buffer, depth_stencil_format); - } - } - } - return result; - } - - std::tuple get_shared_pipeline_layout(VkDevice dev) - { - std::array bindings = {}; - - size_t idx = 0; - // Vertex buffer - for (int i = 0; i < 16; i++) - { - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = VERTEX_BUFFERS_FIRST_BIND_SLOT + i; - idx++; - } - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT; - - idx++; - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = VERTEX_CONSTANT_BUFFERS_BIND_SLOT; - - idx++; - - for (int i = 0; i < 16; i++) - { - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = TEXTURES_FIRST_BIND_SLOT + i; - idx++; - } - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS; - bindings[idx].binding = SCALE_OFFSET_BIND_SLOT; - - VkDescriptorSetLayoutCreateInfo infos = {}; - infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - infos.pBindings = bindings.data(); - infos.bindingCount = static_cast(bindings.size()); - - VkDescriptorSetLayout set_layout; - CHECK_RESULT(vkCreateDescriptorSetLayout(dev, &infos, nullptr, &set_layout)); - - VkPipelineLayoutCreateInfo layout_info = {}; - layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - layout_info.setLayoutCount = 1; - layout_info.pSetLayouts = &set_layout; - - VkPipelineLayout result; - CHECK_RESULT(vkCreatePipelineLayout(dev, &layout_info, nullptr, &result)); - return std::make_tuple(result, set_layout); - } -} - -VKGSRender::VKGSRender() : GSRender(frame_type::Vulkan) -{ - shaders_cache.load(rsx::old_shaders_cache::shader_language::glsl); - - m_thread_context.createInstance("RPCS3"); - m_thread_context.makeCurrentInstance(1); - m_thread_context.enable_debugging(); - -#ifdef _WIN32 - HINSTANCE hInstance = NULL; - HWND hWnd = (HWND)m_frame->handle(); - - std::vector& gpus = m_thread_context.enumerateDevices(); - m_swap_chain = m_thread_context.createSwapChain(hInstance, hWnd, gpus[0]); -#endif - - m_device = (vk::render_device *)(&m_swap_chain->get_device()); - - m_memory_type_mapping = get_memory_mapping(m_device->gpu()); - - m_optimal_tiling_supported_formats = vk::get_optimal_tiling_supported_formats(m_device->gpu()); - - vk::set_current_thread_ctx(m_thread_context); - vk::set_current_renderer(m_swap_chain->get_device()); - - m_client_width = m_frame->client_width(); - m_client_height = m_frame->client_height(); - m_swap_chain->init_swapchain(m_client_width, m_client_height); - - //create command buffer... - m_command_buffer_pool.create((*m_device)); - m_command_buffer.create(m_command_buffer_pool); - open_command_buffer(); - - for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i) - { - vk::change_image_layout(m_command_buffer, m_swap_chain->get_swap_chain_image(i), - VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, - vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)); - - VkClearColorValue clear_color{}; - auto range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT); - vkCmdClearColorImage(m_command_buffer, m_swap_chain->get_swap_chain_image(i), VK_IMAGE_LAYOUT_GENERAL, &clear_color, 1, &range); - vk::change_image_layout(m_command_buffer, m_swap_chain->get_swap_chain_image(i), - VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, - vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)); - - } - - -#define RING_BUFFER_SIZE 16 * 1024 * DESCRIPTOR_MAX_DRAW_CALLS - m_uniform_buffer_ring_info.init(RING_BUFFER_SIZE); - m_uniform_buffer_ring_info.heap.reset(new vk::buffer(*m_device, RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0)); - m_index_buffer_ring_info.init(RING_BUFFER_SIZE); - m_index_buffer_ring_info.heap.reset(new vk::buffer(*m_device, RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0)); - m_texture_upload_buffer_ring_info.init(8 * RING_BUFFER_SIZE); - m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, 8 * RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0)); - - m_render_passes = get_precomputed_render_passes(*m_device, m_optimal_tiling_supported_formats); - - std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device); - - VkDescriptorPoolSize uniform_buffer_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS }; - VkDescriptorPoolSize uniform_texel_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 16 * DESCRIPTOR_MAX_DRAW_CALLS }; - VkDescriptorPoolSize texture_pool = { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , 16 * DESCRIPTOR_MAX_DRAW_CALLS }; - - std::vector sizes{ uniform_buffer_pool, uniform_texel_pool, texture_pool }; - - descriptor_pool.create(*m_device, sizes.data(), static_cast(sizes.size())); - - - null_buffer = std::make_unique(*m_device, 32, m_memory_type_mapping.host_visible_coherent, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0); - null_buffer_view = std::make_unique(*m_device, null_buffer->value, VK_FORMAT_R32_SFLOAT, 0, 32); - - VkFenceCreateInfo fence_info = {}; - fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - - CHECK_RESULT(vkCreateFence(*m_device, &fence_info, nullptr, &m_submit_fence)); - - VkSemaphoreCreateInfo semaphore_info = {}; - semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - - vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &m_present_semaphore); -} - -VKGSRender::~VKGSRender() -{ - vkQueueWaitIdle(m_swap_chain->get_present_queue()); - - if (m_present_semaphore) - { - vkDestroySemaphore((*m_device), m_present_semaphore, nullptr); - m_present_semaphore = nullptr; - } - - vk::destroy_global_resources(); - - //TODO: Properly destroy shader modules instead of calling clear... - m_prog_buffer.clear(); - - m_index_buffer_ring_info.heap.release(); - m_uniform_buffer_ring_info.heap.release(); - m_attrib_ring_info.heap.release(); - m_texture_upload_buffer_ring_info.heap.release(); - null_buffer.release(); - null_buffer_view.release(); - m_buffer_view_to_clean.clear(); - m_sampler_to_clean.clear(); - m_framebuffer_to_clean.clear(); - - for (auto &render_pass : m_render_passes) - if (render_pass) - vkDestroyRenderPass(*m_device, render_pass, nullptr); - - m_rtts.destroy(); - - vkDestroyPipelineLayout(*m_device, pipeline_layout, nullptr); - vkDestroyDescriptorSetLayout(*m_device, descriptor_layouts, nullptr); - - descriptor_pool.destroy(); - - m_command_buffer.destroy(); - m_command_buffer_pool.destroy(); - - m_swap_chain->destroy(); - - m_thread_context.close(); - delete m_swap_chain; -} - -bool VKGSRender::on_access_violation(u32 address, bool is_writing) -{ - if (is_writing) - return m_texture_cache.invalidate_address(address); - - return false; -} - -void VKGSRender::begin() -{ - rsx::thread::begin(); - - //TODO: Fence sync, ring-buffers, etc - //CHECK_RESULT(vkDeviceWaitIdle((*m_device))); - - //Ease resource pressure if the number of draw calls becomes too high - if (m_used_descriptors >= DESCRIPTOR_MAX_DRAW_CALLS) - { - close_and_submit_command_buffer({}, m_submit_fence); - CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL)); - - vkResetDescriptorPool(*m_device, descriptor_pool, 0); - CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence)); - CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0)); - open_command_buffer(); - - m_used_descriptors = 0; - m_uniform_buffer_ring_info.m_get_pos = m_uniform_buffer_ring_info.get_current_put_pos_minus_one(); - m_index_buffer_ring_info.m_get_pos = m_index_buffer_ring_info.get_current_put_pos_minus_one(); - m_attrib_ring_info.m_get_pos = m_attrib_ring_info.get_current_put_pos_minus_one(); - m_texture_upload_buffer_ring_info.m_get_pos = m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one(); - } - - VkDescriptorSetAllocateInfo alloc_info = {}; - alloc_info.descriptorPool = descriptor_pool; - alloc_info.descriptorSetCount = 1; - alloc_info.pSetLayouts = &descriptor_layouts; - alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - - VkDescriptorSet new_descriptor_set; - CHECK_RESULT(vkAllocateDescriptorSets(*m_device, &alloc_info, &new_descriptor_set)); - - descriptor_sets = new_descriptor_set; - - init_buffers(); - - if (!load_program()) - return; - - u32 line_width = rsx::method_registers[NV4097_SET_LINE_WIDTH]; - float actual_line_width = (line_width >> 3) + (line_width & 7) / 8.f; - - vkCmdSetLineWidth(m_command_buffer, actual_line_width); - - //TODO: Set up other render-state parameters into the program pipeline - - m_draw_calls++; - m_used_descriptors++; -} - -namespace -{ - bool normalize(rsx::vertex_base_type type) - { - switch (type) - { - case rsx::vertex_base_type::s1: - case rsx::vertex_base_type::ub: - case rsx::vertex_base_type::cmp: - return true; - case rsx::vertex_base_type::f: - case rsx::vertex_base_type::sf: - case rsx::vertex_base_type::ub256: - case rsx::vertex_base_type::s32k: - return false; - } - throw EXCEPTION("unknown vertex type"); - } -} - - - -void VKGSRender::end() -{ - size_t idx = vk::get_render_pass_location( - vk::get_compatible_surface_format(m_surface.color_format).first, - vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, m_surface.depth_format), - (u8)vk::get_draw_buffers(rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])).size()); - VkRenderPass current_render_pass = m_render_passes[idx]; - - for (int i = 0; i < rsx::limits::textures_count; ++i) - { - if (m_program->has_uniform("tex" + std::to_string(i))) - { - if (!textures[i].enabled()) - { - m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), descriptor_sets); - continue; - } - vk::image_view *texture0 = m_texture_cache.upload_texture(m_command_buffer, textures[i], m_rtts, m_memory_type_mapping, m_texture_upload_buffer_ring_info, m_texture_upload_buffer_ring_info.heap.get()); - - VkFilter min_filter; - VkSamplerMipmapMode mip_mode; - std::tie(min_filter, mip_mode) = vk::get_min_filter_and_mip(textures[i].min_filter()); - m_sampler_to_clean.push_back(std::make_unique( - *m_device, - vk::vk_wrap_mode(textures[i].wrap_s()), vk::vk_wrap_mode(textures[i].wrap_t()), vk::vk_wrap_mode(textures[i].wrap_r()), - !!(textures[i].format() & CELL_GCM_TEXTURE_UN), - textures[i].bias(), vk::max_aniso(textures[i].max_aniso()), textures[i].min_lod(), textures[i].max_lod(), - min_filter, vk::get_mag_filter(textures[i].mag_filter()), mip_mode, vk::get_border_color(textures[i].border_color()) - )); - m_program->bind_uniform({ m_sampler_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), descriptor_sets); - } - } - - VkRenderPassBeginInfo rp_begin = {}; - rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - rp_begin.renderPass = current_render_pass; - rp_begin.framebuffer = m_framebuffer_to_clean.back()->value; - rp_begin.renderArea.offset.x = 0; - rp_begin.renderArea.offset.y = 0; - rp_begin.renderArea.extent.width = m_frame->client_width(); - rp_begin.renderArea.extent.height = m_frame->client_height(); - - vkCmdBeginRenderPass(m_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); - - auto upload_info = upload_vertex_data(); - - vkCmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); - vkCmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &descriptor_sets, 0, nullptr); - - if (!std::get<1>(upload_info)) - vkCmdDraw(m_command_buffer, vertex_draw_count, 1, 0, 0); - else - { - VkIndexType index_type; - u32 index_count; - VkDeviceSize offset; - std::tie(std::ignore, std::ignore, index_count, offset, index_type) = upload_info; - - vkCmdBindIndexBuffer(m_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type); - vkCmdDrawIndexed(m_command_buffer, index_count, 1, 0, 0, 0); - } - - vkCmdEndRenderPass(m_command_buffer); - - - rsx::thread::end(); -} - -void VKGSRender::set_viewport() -{ - u32 viewport_horizontal = rsx::method_registers[NV4097_SET_VIEWPORT_HORIZONTAL]; - u32 viewport_vertical = rsx::method_registers[NV4097_SET_VIEWPORT_VERTICAL]; - - u16 viewport_x = viewport_horizontal & 0xffff; - u16 viewport_y = viewport_vertical & 0xffff; - u16 viewport_w = viewport_horizontal >> 16; - u16 viewport_h = viewport_vertical >> 16; - - u32 scissor_horizontal = rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL]; - u32 scissor_vertical = rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL]; - u16 scissor_x = scissor_horizontal; - u16 scissor_w = scissor_horizontal >> 16; - u16 scissor_y = scissor_vertical; - u16 scissor_h = scissor_vertical >> 16; - -// u32 shader_window = rsx::method_registers[NV4097_SET_SHADER_WINDOW]; -// rsx::window_origin shader_window_origin = rsx::to_window_origin((shader_window >> 12) & 0xf); - - VkViewport viewport = {}; - viewport.x = viewport_x; - viewport.y = viewport_y; - viewport.width = viewport_w; - viewport.height = viewport_h; - viewport.minDepth = 0.f; - viewport.maxDepth = 1.f; - - vkCmdSetViewport(m_command_buffer, 0, 1, &viewport); - - VkRect2D scissor = {}; - scissor.extent.height = scissor_h; - scissor.extent.width = scissor_w; - scissor.offset.x = scissor_x; - scissor.offset.y = scissor_y; - - vkCmdSetScissor(m_command_buffer, 0, 1, &scissor); -} - -void VKGSRender::on_init_thread() -{ - GSRender::on_init_thread(); - m_attrib_ring_info.init(8 * RING_BUFFER_SIZE); - m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, 8 * RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0)); -} - -void VKGSRender::on_exit() -{ - m_texture_cache.destroy(); -} - -void VKGSRender::clear_surface(u32 mask) -{ - //TODO: Build clear commands into current renderpass descriptor set - if (!(mask & 0xF3)) return; - if (m_current_present_image== 0xFFFF) return; - if (!rsx::method_registers[NV4097_SET_SURFACE_FORMAT]) return; - - init_buffers(); - - float depth_clear = 1.f; - u32 stencil_clear = 0; - - VkClearValue depth_stencil_clear_values, color_clear_values; - VkImageSubresourceRange depth_range = vk::get_image_subresource_range(0, 0, 1, 1, 0); - - rsx::surface_depth_format surface_depth_format = rsx::to_surface_depth_format((rsx::method_registers[NV4097_SET_SURFACE_FORMAT] >> 5) & 0x7); - - if (mask & 0x1) - { - u32 max_depth_value = get_max_depth_value(surface_depth_format); - - u32 clear_depth = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] >> 8; - float depth_clear = (float)clear_depth / max_depth_value; - - depth_range.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT; - depth_stencil_clear_values.depthStencil.depth = depth_clear; - depth_stencil_clear_values.depthStencil.stencil = stencil_clear; - } - - if (mask & 0x2) - { - u8 clear_stencil = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] & 0xff; - u32 stencil_mask = rsx::method_registers[NV4097_SET_STENCIL_MASK]; - - //TODO set stencil mask - depth_range.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; - depth_stencil_clear_values.depthStencil.stencil = stencil_mask; - } - - if (mask & 0xF0) - { - u32 clear_color = rsx::method_registers[NV4097_SET_COLOR_CLEAR_VALUE]; - u8 clear_a = clear_color >> 24; - u8 clear_r = clear_color >> 16; - u8 clear_g = clear_color >> 8; - u8 clear_b = clear_color; - - //TODO set color mask - /*VkBool32 clear_red = (VkBool32)!!(mask & 0x20); - VkBool32 clear_green = (VkBool32)!!(mask & 0x40); - VkBool32 clear_blue = (VkBool32)!!(mask & 0x80); - VkBool32 clear_alpha = (VkBool32)!!(mask & 0x10);*/ - - color_clear_values.color.float32[0] = (float)clear_r / 255; - color_clear_values.color.float32[1] = (float)clear_g / 255; - color_clear_values.color.float32[2] = (float)clear_b / 255; - color_clear_values.color.float32[3] = (float)clear_a / 255; - - for (u32 i = 0; i < m_rtts.m_bound_render_targets.size(); ++i) - { - VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT); - if (std::get<1>(m_rtts.m_bound_render_targets[i]) == nullptr) continue; - - VkImage color_image = std::get<1>(m_rtts.m_bound_render_targets[i])->value; - change_image_layout(m_command_buffer, color_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, range); - vkCmdClearColorImage(m_command_buffer, color_image, VK_IMAGE_LAYOUT_GENERAL, &color_clear_values.color, 1, &range); - change_image_layout(m_command_buffer, color_image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, range); - } - } - - if (mask & 0x3) - { - VkImageAspectFlags depth_stencil_aspect = (surface_depth_format == rsx::surface_depth_format::z24s8) ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) : VK_IMAGE_ASPECT_DEPTH_BIT; - VkImage depth_stencil_image = std::get<1>(m_rtts.m_bound_depth_stencil)->value; - change_image_layout(m_command_buffer, depth_stencil_image, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, vk::get_image_subresource_range(0, 0, 1, 1, depth_stencil_aspect)); - vkCmdClearDepthStencilImage(m_command_buffer, std::get<1>(m_rtts.m_bound_depth_stencil)->value, VK_IMAGE_LAYOUT_GENERAL, &depth_stencil_clear_values.depthStencil, 1, &depth_range); - change_image_layout(m_command_buffer, depth_stencil_image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, depth_stencil_aspect)); - } - -} - -void VKGSRender::sync_at_semaphore_release() -{ - close_and_submit_command_buffer({}, m_submit_fence); - CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL)); - - CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence)); - CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0)); - open_command_buffer(); -} - -bool VKGSRender::do_method(u32 cmd, u32 arg) -{ - switch (cmd) - { - case NV4097_CLEAR_SURFACE: - clear_surface(arg); - return true; - case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE: - case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE: - sync_at_semaphore_release(); - return false; //call rsx::thread method implementation - default: - return false; - } -} - -bool VKGSRender::load_program() -{ - RSXVertexProgram vertex_program = get_current_vertex_program(); - RSXFragmentProgram fragment_program = get_current_fragment_program(); - - vk::pipeline_props properties = {}; - - - properties.ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - bool unused; - properties.ia.topology = vk::get_appropriate_topology(draw_mode, unused); - - if (rsx::method_registers[NV4097_SET_RESTART_INDEX_ENABLE]) - { - if (rsx::method_registers[NV4097_SET_RESTART_INDEX] != 0xFFFF && - rsx::method_registers[NV4097_SET_RESTART_INDEX] != 0xFFFFFFFF) - { - LOG_ERROR(RSX, "Custom primitive restart index 0x%X. Should rewrite index buffer with proper value!", rsx::method_registers[NV4097_SET_RESTART_INDEX]); - } - properties.ia.primitiveRestartEnable = VK_TRUE; - } - else - properties.ia.primitiveRestartEnable = VK_FALSE; - - - for (int i = 0; i < 4; ++i) - { - properties.att_state[i].colorWriteMask = 0xf; - properties.att_state[i].blendEnable = VK_FALSE; - } - - u32 color_mask = rsx::method_registers[NV4097_SET_COLOR_MASK]; - bool color_mask_b = !!(color_mask & 0xff); - bool color_mask_g = !!((color_mask >> 8) & 0xff); - bool color_mask_r = !!((color_mask >> 16) & 0xff); - bool color_mask_a = !!((color_mask >> 24) & 0xff); - - VkColorComponentFlags mask = 0; - if (color_mask_a) mask |= VK_COLOR_COMPONENT_A_BIT; - if (color_mask_b) mask |= VK_COLOR_COMPONENT_B_BIT; - if (color_mask_g) mask |= VK_COLOR_COMPONENT_G_BIT; - if (color_mask_r) mask |= VK_COLOR_COMPONENT_R_BIT; - - VkColorComponentFlags color_masks[4] = { mask }; - - u8 render_targets[] = { 0, 1, 2, 3 }; - - for (u8 idx = 0; idx < m_draw_buffers_count; ++idx) - { - properties.att_state[render_targets[idx]].colorWriteMask = mask; - } - - if (rsx::method_registers[NV4097_SET_BLEND_ENABLE]) - { - u32 sfactor = rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR]; - u32 dfactor = rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR]; - - VkBlendFactor sfactor_rgb = vk::get_blend_factor(sfactor); - VkBlendFactor sfactor_a = vk::get_blend_factor(sfactor >> 16); - VkBlendFactor dfactor_rgb = vk::get_blend_factor(dfactor); - VkBlendFactor dfactor_a = vk::get_blend_factor(dfactor >> 16); - - u32 equation = rsx::method_registers[NV4097_SET_BLEND_EQUATION]; - VkBlendOp equation_rgb = vk::get_blend_op(equation); - VkBlendOp equation_a = vk::get_blend_op(equation >> 16); - - //TODO: Separate target blending - for (u8 idx = 0; idx < m_draw_buffers_count; ++idx) - { - properties.att_state[render_targets[idx]].blendEnable = VK_TRUE; - properties.att_state[render_targets[idx]].srcColorBlendFactor = sfactor_rgb; - properties.att_state[render_targets[idx]].dstColorBlendFactor = dfactor_rgb; - properties.att_state[render_targets[idx]].srcAlphaBlendFactor = sfactor_a; - properties.att_state[render_targets[idx]].dstAlphaBlendFactor = dfactor_a; - properties.att_state[render_targets[idx]].colorBlendOp = equation_rgb; - properties.att_state[render_targets[idx]].alphaBlendOp = equation_a; - } - } - else - { - for (u8 idx = 0; idx < m_draw_buffers_count; ++idx) - { - properties.att_state[render_targets[idx]].blendEnable = VK_FALSE; - } - } - - if (rsx::method_registers[NV4097_SET_LOGIC_OP_ENABLE]) - { - properties.cs.logicOpEnable = true; - properties.cs.logicOp = vk::get_logic_op(rsx::method_registers[NV4097_SET_LOGIC_OP]); - } - - properties.ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; - properties.ds.depthWriteEnable = (!!rsx::method_registers[NV4097_SET_DEPTH_MASK]) ? VK_TRUE : VK_FALSE; - - if (rsx::method_registers[NV4097_SET_DEPTH_BOUNDS_TEST_ENABLE]) - { - properties.ds.depthBoundsTestEnable = VK_TRUE; - properties.ds.minDepthBounds = (f32&)rsx::method_registers[NV4097_SET_DEPTH_BOUNDS_MIN]; - properties.ds.maxDepthBounds = (f32&)rsx::method_registers[NV4097_SET_DEPTH_BOUNDS_MAX]; - } - else - properties.ds.depthBoundsTestEnable = VK_FALSE; - - if (rsx::method_registers[NV4097_SET_STENCIL_TEST_ENABLE]) - { - properties.ds.stencilTestEnable = VK_TRUE; - properties.ds.front.writeMask = rsx::method_registers[NV4097_SET_STENCIL_MASK]; - properties.ds.front.compareMask = rsx::method_registers[NV4097_SET_STENCIL_FUNC_MASK]; - properties.ds.front.reference = rsx::method_registers[NV4097_SET_STENCIL_FUNC_REF]; - properties.ds.front.failOp = vk::get_stencil_op(rsx::method_registers[NV4097_SET_STENCIL_OP_FAIL]); - properties.ds.front.passOp = vk::get_stencil_op(rsx::method_registers[NV4097_SET_STENCIL_OP_ZPASS]); - properties.ds.front.depthFailOp = vk::get_stencil_op(rsx::method_registers[NV4097_SET_STENCIL_OP_ZFAIL]); - properties.ds.front.compareOp = vk::compare_op(rsx::method_registers[NV4097_SET_STENCIL_FUNC]); - - if (rsx::method_registers[NV4097_SET_TWO_SIDED_STENCIL_TEST_ENABLE]) - { - properties.ds.back.failOp = vk::get_stencil_op(rsx::method_registers[NV4097_SET_BACK_STENCIL_OP_FAIL]); - properties.ds.back.passOp = vk::get_stencil_op(rsx::method_registers[NV4097_SET_BACK_STENCIL_OP_ZPASS]); - properties.ds.back.depthFailOp = vk::get_stencil_op(rsx::method_registers[NV4097_SET_BACK_STENCIL_OP_ZFAIL]); - properties.ds.back.compareOp = vk::compare_op(rsx::method_registers[NV4097_SET_BACK_STENCIL_FUNC]); - } - else - properties.ds.back = properties.ds.front; - } - else - properties.ds.stencilTestEnable = VK_FALSE; - - if (!!rsx::method_registers[NV4097_SET_DEPTH_TEST_ENABLE]) - { - properties.ds.depthTestEnable = VK_TRUE; - properties.ds.depthCompareOp = vk::compare_op(rsx::method_registers[NV4097_SET_DEPTH_FUNC]); - } - else - properties.ds.depthTestEnable = VK_FALSE; - + } +} + + +namespace +{ + VkRenderPass precompute_render_pass(VkDevice dev, VkFormat color_format, u8 number_of_color_surface, VkFormat depth_format) + { + // Some driver crashes when using empty render pass + if (number_of_color_surface == 0 && depth_format == VK_FORMAT_UNDEFINED) + return nullptr; + /* Describe a render pass and framebuffer attachments */ + std::vector attachments = {}; + std::vector attachment_references; + + VkAttachmentDescription color_attachement_description = {}; + color_attachement_description.format = color_format; + color_attachement_description.samples = VK_SAMPLE_COUNT_1_BIT; + color_attachement_description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + color_attachement_description.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + color_attachement_description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + color_attachement_description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + color_attachement_description.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + color_attachement_description.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + for (u32 i = 0; i < number_of_color_surface; ++i) + { + attachments.push_back(color_attachement_description); + attachment_references.push_back({ i, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL }); + } + + if (depth_format != VK_FORMAT_UNDEFINED) + { + VkAttachmentDescription depth_attachement_description = {}; + depth_attachement_description.format = depth_format; + depth_attachement_description.samples = VK_SAMPLE_COUNT_1_BIT; + depth_attachement_description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + depth_attachement_description.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + depth_attachement_description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + depth_attachement_description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + depth_attachement_description.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + depth_attachement_description.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + attachments.push_back(depth_attachement_description); + + attachment_references.push_back({ number_of_color_surface, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL }); + } + + VkSubpassDescription subpass = {}; + subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass.colorAttachmentCount = number_of_color_surface; + subpass.pColorAttachments = number_of_color_surface > 0 ? attachment_references.data() : nullptr; + subpass.pDepthStencilAttachment = depth_format != VK_FORMAT_UNDEFINED ? &attachment_references.back() : nullptr; + + VkRenderPassCreateInfo rp_info = {}; + rp_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + rp_info.attachmentCount = static_cast(attachments.size()); + rp_info.pAttachments = attachments.data(); + rp_info.subpassCount = 1; + rp_info.pSubpasses = &subpass; + + VkRenderPass result; + CHECK_RESULT(vkCreateRenderPass(dev, &rp_info, NULL, &result)); + return result; + } + + std::array get_precomputed_render_passes(VkDevice dev, const vk::gpu_formats_support &gpu_format_support) + { + std::array result = {}; + + const std::array depth_format_list = { VK_FORMAT_UNDEFINED, VK_FORMAT_D16_UNORM, gpu_format_support.d24_unorm_s8 ? VK_FORMAT_D24_UNORM_S8_UINT : VK_FORMAT_D32_SFLOAT_S8_UINT }; + const std::array color_format_list = { VK_FORMAT_R5G6B5_UNORM_PACK16, VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R8_UINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_A1R5G5B5_UNORM_PACK16, VK_FORMAT_R32_SFLOAT }; + + + for (const VkFormat &color_format : color_format_list) + { + for (const VkFormat &depth_stencil_format : depth_format_list) + { + for (u8 number_of_draw_buffer = 0; number_of_draw_buffer <= 4; number_of_draw_buffer++) + { + size_t idx = vk::get_render_pass_location(color_format, depth_stencil_format, number_of_draw_buffer); + result[idx] = precompute_render_pass(dev, color_format, number_of_draw_buffer, depth_stencil_format); + } + } + } + return result; + } + + std::tuple get_shared_pipeline_layout(VkDevice dev) + { + std::array bindings = {}; + + size_t idx = 0; + // Vertex buffer + for (int i = 0; i < 16; i++) + { + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[idx].binding = VERTEX_BUFFERS_FIRST_BIND_SLOT + i; + idx++; + } + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[idx].binding = VERTEX_CONSTANT_BUFFERS_BIND_SLOT; + + idx++; + + for (int i = 0; i < 16; i++) + { + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = TEXTURES_FIRST_BIND_SLOT + i; + idx++; + } + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS; + bindings[idx].binding = SCALE_OFFSET_BIND_SLOT; + + VkDescriptorSetLayoutCreateInfo infos = {}; + infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + infos.pBindings = bindings.data(); + infos.bindingCount = static_cast(bindings.size()); + + VkDescriptorSetLayout set_layout; + CHECK_RESULT(vkCreateDescriptorSetLayout(dev, &infos, nullptr, &set_layout)); + + VkPipelineLayoutCreateInfo layout_info = {}; + layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + layout_info.setLayoutCount = 1; + layout_info.pSetLayouts = &set_layout; + + VkPipelineLayout result; + CHECK_RESULT(vkCreatePipelineLayout(dev, &layout_info, nullptr, &result)); + return std::make_tuple(result, set_layout); + } +} + +VKGSRender::VKGSRender() : GSRender(frame_type::Vulkan) +{ + shaders_cache.load(rsx::old_shaders_cache::shader_language::glsl); + + m_thread_context.createInstance("RPCS3"); + m_thread_context.makeCurrentInstance(1); + m_thread_context.enable_debugging(); + +#ifdef _WIN32 + HINSTANCE hInstance = NULL; + HWND hWnd = (HWND)m_frame->handle(); + + std::vector& gpus = m_thread_context.enumerateDevices(); + m_swap_chain = m_thread_context.createSwapChain(hInstance, hWnd, gpus[0]); +#endif + + m_device = (vk::render_device *)(&m_swap_chain->get_device()); + + m_memory_type_mapping = get_memory_mapping(m_device->gpu()); + + m_optimal_tiling_supported_formats = vk::get_optimal_tiling_supported_formats(m_device->gpu()); + + vk::set_current_thread_ctx(m_thread_context); + vk::set_current_renderer(m_swap_chain->get_device()); + + m_client_width = m_frame->client_width(); + m_client_height = m_frame->client_height(); + m_swap_chain->init_swapchain(m_client_width, m_client_height); + + //create command buffer... + m_command_buffer_pool.create((*m_device)); + m_command_buffer.create(m_command_buffer_pool); + open_command_buffer(); + + for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i) + { + vk::change_image_layout(m_command_buffer, m_swap_chain->get_swap_chain_image(i), + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, + vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)); + + VkClearColorValue clear_color{}; + auto range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT); + vkCmdClearColorImage(m_command_buffer, m_swap_chain->get_swap_chain_image(i), VK_IMAGE_LAYOUT_GENERAL, &clear_color, 1, &range); + vk::change_image_layout(m_command_buffer, m_swap_chain->get_swap_chain_image(i), + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)); + + } + + +#define RING_BUFFER_SIZE 16 * 1024 * DESCRIPTOR_MAX_DRAW_CALLS + m_uniform_buffer_ring_info.init(RING_BUFFER_SIZE); + m_uniform_buffer_ring_info.heap.reset(new vk::buffer(*m_device, RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0)); + m_index_buffer_ring_info.init(RING_BUFFER_SIZE); + m_index_buffer_ring_info.heap.reset(new vk::buffer(*m_device, RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0)); + m_texture_upload_buffer_ring_info.init(8 * RING_BUFFER_SIZE); + m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, 8 * RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0)); + + m_render_passes = get_precomputed_render_passes(*m_device, m_optimal_tiling_supported_formats); + + std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device); + + VkDescriptorPoolSize uniform_buffer_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS }; + VkDescriptorPoolSize uniform_texel_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 16 * DESCRIPTOR_MAX_DRAW_CALLS }; + VkDescriptorPoolSize texture_pool = { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , 16 * DESCRIPTOR_MAX_DRAW_CALLS }; + + std::vector sizes{ uniform_buffer_pool, uniform_texel_pool, texture_pool }; + + descriptor_pool.create(*m_device, sizes.data(), static_cast(sizes.size())); + + + null_buffer = std::make_unique(*m_device, 32, m_memory_type_mapping.host_visible_coherent, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0); + null_buffer_view = std::make_unique(*m_device, null_buffer->value, VK_FORMAT_R32_SFLOAT, 0, 32); + + VkFenceCreateInfo fence_info = {}; + fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + + CHECK_RESULT(vkCreateFence(*m_device, &fence_info, nullptr, &m_submit_fence)); + + VkSemaphoreCreateInfo semaphore_info = {}; + semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + + vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &m_present_semaphore); +} + +VKGSRender::~VKGSRender() +{ + vkQueueWaitIdle(m_swap_chain->get_present_queue()); + + if (m_present_semaphore) + { + vkDestroySemaphore((*m_device), m_present_semaphore, nullptr); + m_present_semaphore = nullptr; + } + + vk::destroy_global_resources(); + + //TODO: Properly destroy shader modules instead of calling clear... + m_prog_buffer.clear(); + + m_index_buffer_ring_info.heap.release(); + m_uniform_buffer_ring_info.heap.release(); + m_attrib_ring_info.heap.release(); + m_texture_upload_buffer_ring_info.heap.release(); + null_buffer.release(); + null_buffer_view.release(); + m_buffer_view_to_clean.clear(); + m_sampler_to_clean.clear(); + m_framebuffer_to_clean.clear(); + + for (auto &render_pass : m_render_passes) + if (render_pass) + vkDestroyRenderPass(*m_device, render_pass, nullptr); + + m_rtts.destroy(); + + vkDestroyPipelineLayout(*m_device, pipeline_layout, nullptr); + vkDestroyDescriptorSetLayout(*m_device, descriptor_layouts, nullptr); + + descriptor_pool.destroy(); + + m_command_buffer.destroy(); + m_command_buffer_pool.destroy(); + + m_swap_chain->destroy(); + + m_thread_context.close(); + delete m_swap_chain; +} + +bool VKGSRender::on_access_violation(u32 address, bool is_writing) +{ + if (is_writing) + return m_texture_cache.invalidate_address(address); + + return false; +} + +void VKGSRender::begin() +{ + rsx::thread::begin(); + + //TODO: Fence sync, ring-buffers, etc + //CHECK_RESULT(vkDeviceWaitIdle((*m_device))); + + //Ease resource pressure if the number of draw calls becomes too high + if (m_used_descriptors >= DESCRIPTOR_MAX_DRAW_CALLS) + { + close_and_submit_command_buffer({}, m_submit_fence); + CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL)); + + vkResetDescriptorPool(*m_device, descriptor_pool, 0); + CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence)); + CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0)); + open_command_buffer(); + + m_used_descriptors = 0; + m_uniform_buffer_ring_info.m_get_pos = m_uniform_buffer_ring_info.get_current_put_pos_minus_one(); + m_index_buffer_ring_info.m_get_pos = m_index_buffer_ring_info.get_current_put_pos_minus_one(); + m_attrib_ring_info.m_get_pos = m_attrib_ring_info.get_current_put_pos_minus_one(); + m_texture_upload_buffer_ring_info.m_get_pos = m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one(); + } + + VkDescriptorSetAllocateInfo alloc_info = {}; + alloc_info.descriptorPool = descriptor_pool; + alloc_info.descriptorSetCount = 1; + alloc_info.pSetLayouts = &descriptor_layouts; + alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + + VkDescriptorSet new_descriptor_set; + CHECK_RESULT(vkAllocateDescriptorSets(*m_device, &alloc_info, &new_descriptor_set)); + + descriptor_sets = new_descriptor_set; + + init_buffers(); + + if (!load_program()) + return; + + u32 line_width = rsx::method_registers[NV4097_SET_LINE_WIDTH]; + float actual_line_width = (line_width >> 3) + (line_width & 7) / 8.f; + + vkCmdSetLineWidth(m_command_buffer, actual_line_width); + + //TODO: Set up other render-state parameters into the program pipeline + + m_draw_calls++; + m_used_descriptors++; +} + +namespace +{ + bool normalize(rsx::vertex_base_type type) + { + switch (type) + { + case rsx::vertex_base_type::s1: + case rsx::vertex_base_type::ub: + case rsx::vertex_base_type::cmp: + return true; + case rsx::vertex_base_type::f: + case rsx::vertex_base_type::sf: + case rsx::vertex_base_type::ub256: + case rsx::vertex_base_type::s32k: + return false; + } + throw EXCEPTION("unknown vertex type"); + } +} + + + +void VKGSRender::end() +{ + size_t idx = vk::get_render_pass_location( + vk::get_compatible_surface_format(m_surface.color_format).first, + vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, m_surface.depth_format), + (u8)vk::get_draw_buffers(rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])).size()); + VkRenderPass current_render_pass = m_render_passes[idx]; + + for (int i = 0; i < rsx::limits::textures_count; ++i) + { + if (m_program->has_uniform("tex" + std::to_string(i))) + { + if (!textures[i].enabled()) + { + m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), descriptor_sets); + continue; + } + vk::image_view *texture0 = m_texture_cache.upload_texture(m_command_buffer, textures[i], m_rtts, m_memory_type_mapping, m_texture_upload_buffer_ring_info, m_texture_upload_buffer_ring_info.heap.get()); + + VkFilter min_filter; + VkSamplerMipmapMode mip_mode; + std::tie(min_filter, mip_mode) = vk::get_min_filter_and_mip(textures[i].min_filter()); + m_sampler_to_clean.push_back(std::make_unique( + *m_device, + vk::vk_wrap_mode(textures[i].wrap_s()), vk::vk_wrap_mode(textures[i].wrap_t()), vk::vk_wrap_mode(textures[i].wrap_r()), + !!(textures[i].format() & CELL_GCM_TEXTURE_UN), + textures[i].bias(), vk::max_aniso(textures[i].max_aniso()), textures[i].min_lod(), textures[i].max_lod(), + min_filter, vk::get_mag_filter(textures[i].mag_filter()), mip_mode, vk::get_border_color(textures[i].border_color()) + )); + m_program->bind_uniform({ m_sampler_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), descriptor_sets); + } + } + + VkRenderPassBeginInfo rp_begin = {}; + rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + rp_begin.renderPass = current_render_pass; + rp_begin.framebuffer = m_framebuffer_to_clean.back()->value; + rp_begin.renderArea.offset.x = 0; + rp_begin.renderArea.offset.y = 0; + rp_begin.renderArea.extent.width = m_frame->client_width(); + rp_begin.renderArea.extent.height = m_frame->client_height(); + + vkCmdBeginRenderPass(m_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); + + auto upload_info = upload_vertex_data(); + + vkCmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); + vkCmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &descriptor_sets, 0, nullptr); + + if (!std::get<1>(upload_info)) + vkCmdDraw(m_command_buffer, vertex_draw_count, 1, 0, 0); + else + { + VkIndexType index_type; + u32 index_count; + VkDeviceSize offset; + std::tie(std::ignore, std::ignore, index_count, offset, index_type) = upload_info; + + vkCmdBindIndexBuffer(m_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type); + vkCmdDrawIndexed(m_command_buffer, index_count, 1, 0, 0, 0); + } + + vkCmdEndRenderPass(m_command_buffer); + + + rsx::thread::end(); +} + +void VKGSRender::set_viewport() +{ + u32 viewport_horizontal = rsx::method_registers[NV4097_SET_VIEWPORT_HORIZONTAL]; + u32 viewport_vertical = rsx::method_registers[NV4097_SET_VIEWPORT_VERTICAL]; + + u16 viewport_x = viewport_horizontal & 0xffff; + u16 viewport_y = viewport_vertical & 0xffff; + u16 viewport_w = viewport_horizontal >> 16; + u16 viewport_h = viewport_vertical >> 16; + + u32 scissor_horizontal = rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL]; + u32 scissor_vertical = rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL]; + u16 scissor_x = scissor_horizontal; + u16 scissor_w = scissor_horizontal >> 16; + u16 scissor_y = scissor_vertical; + u16 scissor_h = scissor_vertical >> 16; + +// u32 shader_window = rsx::method_registers[NV4097_SET_SHADER_WINDOW]; +// rsx::window_origin shader_window_origin = rsx::to_window_origin((shader_window >> 12) & 0xf); + + VkViewport viewport = {}; + viewport.x = viewport_x; + viewport.y = viewport_y; + viewport.width = viewport_w; + viewport.height = viewport_h; + viewport.minDepth = 0.f; + viewport.maxDepth = 1.f; + + vkCmdSetViewport(m_command_buffer, 0, 1, &viewport); + + VkRect2D scissor = {}; + scissor.extent.height = scissor_h; + scissor.extent.width = scissor_w; + scissor.offset.x = scissor_x; + scissor.offset.y = scissor_y; + + vkCmdSetScissor(m_command_buffer, 0, 1, &scissor); +} + +void VKGSRender::on_init_thread() +{ + GSRender::on_init_thread(); + m_attrib_ring_info.init(8 * RING_BUFFER_SIZE); + m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, 8 * RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0)); +} + +void VKGSRender::on_exit() +{ + m_texture_cache.destroy(); +} + +void VKGSRender::clear_surface(u32 mask) +{ + //TODO: Build clear commands into current renderpass descriptor set + if (!(mask & 0xF3)) return; + if (m_current_present_image== 0xFFFF) return; + if (!rsx::method_registers[NV4097_SET_SURFACE_FORMAT]) return; + + init_buffers(); + + float depth_clear = 1.f; + u32 stencil_clear = 0; + + VkClearValue depth_stencil_clear_values, color_clear_values; + VkImageSubresourceRange depth_range = vk::get_image_subresource_range(0, 0, 1, 1, 0); + + rsx::surface_depth_format surface_depth_format = rsx::to_surface_depth_format((rsx::method_registers[NV4097_SET_SURFACE_FORMAT] >> 5) & 0x7); + + if (mask & 0x1) + { + u32 max_depth_value = get_max_depth_value(surface_depth_format); + + u32 clear_depth = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] >> 8; + float depth_clear = (float)clear_depth / max_depth_value; + + depth_range.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT; + depth_stencil_clear_values.depthStencil.depth = depth_clear; + depth_stencil_clear_values.depthStencil.stencil = stencil_clear; + } + + if (mask & 0x2) + { + u8 clear_stencil = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] & 0xff; + u32 stencil_mask = rsx::method_registers[NV4097_SET_STENCIL_MASK]; + + //TODO set stencil mask + depth_range.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; + depth_stencil_clear_values.depthStencil.stencil = stencil_mask; + } + + if (mask & 0xF0) + { + u32 clear_color = rsx::method_registers[NV4097_SET_COLOR_CLEAR_VALUE]; + u8 clear_a = clear_color >> 24; + u8 clear_r = clear_color >> 16; + u8 clear_g = clear_color >> 8; + u8 clear_b = clear_color; + + //TODO set color mask + /*VkBool32 clear_red = (VkBool32)!!(mask & 0x20); + VkBool32 clear_green = (VkBool32)!!(mask & 0x40); + VkBool32 clear_blue = (VkBool32)!!(mask & 0x80); + VkBool32 clear_alpha = (VkBool32)!!(mask & 0x10);*/ + + color_clear_values.color.float32[0] = (float)clear_r / 255; + color_clear_values.color.float32[1] = (float)clear_g / 255; + color_clear_values.color.float32[2] = (float)clear_b / 255; + color_clear_values.color.float32[3] = (float)clear_a / 255; + + for (u32 i = 0; i < m_rtts.m_bound_render_targets.size(); ++i) + { + VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT); + if (std::get<1>(m_rtts.m_bound_render_targets[i]) == nullptr) continue; + + VkImage color_image = std::get<1>(m_rtts.m_bound_render_targets[i])->value; + change_image_layout(m_command_buffer, color_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, range); + vkCmdClearColorImage(m_command_buffer, color_image, VK_IMAGE_LAYOUT_GENERAL, &color_clear_values.color, 1, &range); + change_image_layout(m_command_buffer, color_image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, range); + } + } + + if (mask & 0x3) + { + VkImageAspectFlags depth_stencil_aspect = (surface_depth_format == rsx::surface_depth_format::z24s8) ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) : VK_IMAGE_ASPECT_DEPTH_BIT; + VkImage depth_stencil_image = std::get<1>(m_rtts.m_bound_depth_stencil)->value; + change_image_layout(m_command_buffer, depth_stencil_image, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, vk::get_image_subresource_range(0, 0, 1, 1, depth_stencil_aspect)); + vkCmdClearDepthStencilImage(m_command_buffer, std::get<1>(m_rtts.m_bound_depth_stencil)->value, VK_IMAGE_LAYOUT_GENERAL, &depth_stencil_clear_values.depthStencil, 1, &depth_range); + change_image_layout(m_command_buffer, depth_stencil_image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, depth_stencil_aspect)); + } + +} + +void VKGSRender::sync_at_semaphore_release() +{ + close_and_submit_command_buffer({}, m_submit_fence); + CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL)); + + CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence)); + CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0)); + open_command_buffer(); +} + +bool VKGSRender::do_method(u32 cmd, u32 arg) +{ + switch (cmd) + { + case NV4097_CLEAR_SURFACE: + clear_surface(arg); + return true; + case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE: + case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE: + sync_at_semaphore_release(); + return false; //call rsx::thread method implementation + default: + return false; + } +} + +bool VKGSRender::load_program() +{ + RSXVertexProgram vertex_program = get_current_vertex_program(); + RSXFragmentProgram fragment_program = get_current_fragment_program(); + + vk::pipeline_props properties = {}; + + + properties.ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + bool unused; + properties.ia.topology = vk::get_appropriate_topology(draw_mode, unused); + + if (rsx::method_registers[NV4097_SET_RESTART_INDEX_ENABLE]) + { + if (rsx::method_registers[NV4097_SET_RESTART_INDEX] != 0xFFFF && + rsx::method_registers[NV4097_SET_RESTART_INDEX] != 0xFFFFFFFF) + { + LOG_ERROR(RSX, "Custom primitive restart index 0x%X. Should rewrite index buffer with proper value!", rsx::method_registers[NV4097_SET_RESTART_INDEX]); + } + properties.ia.primitiveRestartEnable = VK_TRUE; + } + else + properties.ia.primitiveRestartEnable = VK_FALSE; + + + for (int i = 0; i < 4; ++i) + { + properties.att_state[i].colorWriteMask = 0xf; + properties.att_state[i].blendEnable = VK_FALSE; + } + + u32 color_mask = rsx::method_registers[NV4097_SET_COLOR_MASK]; + bool color_mask_b = !!(color_mask & 0xff); + bool color_mask_g = !!((color_mask >> 8) & 0xff); + bool color_mask_r = !!((color_mask >> 16) & 0xff); + bool color_mask_a = !!((color_mask >> 24) & 0xff); + + VkColorComponentFlags mask = 0; + if (color_mask_a) mask |= VK_COLOR_COMPONENT_A_BIT; + if (color_mask_b) mask |= VK_COLOR_COMPONENT_B_BIT; + if (color_mask_g) mask |= VK_COLOR_COMPONENT_G_BIT; + if (color_mask_r) mask |= VK_COLOR_COMPONENT_R_BIT; + + VkColorComponentFlags color_masks[4] = { mask }; + + u8 render_targets[] = { 0, 1, 2, 3 }; + + for (u8 idx = 0; idx < m_draw_buffers_count; ++idx) + { + properties.att_state[render_targets[idx]].colorWriteMask = mask; + } + + if (rsx::method_registers[NV4097_SET_BLEND_ENABLE]) + { + u32 sfactor = rsx::method_registers[NV4097_SET_BLEND_FUNC_SFACTOR]; + u32 dfactor = rsx::method_registers[NV4097_SET_BLEND_FUNC_DFACTOR]; + + VkBlendFactor sfactor_rgb = vk::get_blend_factor(sfactor); + VkBlendFactor sfactor_a = vk::get_blend_factor(sfactor >> 16); + VkBlendFactor dfactor_rgb = vk::get_blend_factor(dfactor); + VkBlendFactor dfactor_a = vk::get_blend_factor(dfactor >> 16); + + u32 equation = rsx::method_registers[NV4097_SET_BLEND_EQUATION]; + VkBlendOp equation_rgb = vk::get_blend_op(equation); + VkBlendOp equation_a = vk::get_blend_op(equation >> 16); + + //TODO: Separate target blending + for (u8 idx = 0; idx < m_draw_buffers_count; ++idx) + { + properties.att_state[render_targets[idx]].blendEnable = VK_TRUE; + properties.att_state[render_targets[idx]].srcColorBlendFactor = sfactor_rgb; + properties.att_state[render_targets[idx]].dstColorBlendFactor = dfactor_rgb; + properties.att_state[render_targets[idx]].srcAlphaBlendFactor = sfactor_a; + properties.att_state[render_targets[idx]].dstAlphaBlendFactor = dfactor_a; + properties.att_state[render_targets[idx]].colorBlendOp = equation_rgb; + properties.att_state[render_targets[idx]].alphaBlendOp = equation_a; + } + } + else + { + for (u8 idx = 0; idx < m_draw_buffers_count; ++idx) + { + properties.att_state[render_targets[idx]].blendEnable = VK_FALSE; + } + } + + if (rsx::method_registers[NV4097_SET_LOGIC_OP_ENABLE]) + { + properties.cs.logicOpEnable = true; + properties.cs.logicOp = vk::get_logic_op(rsx::method_registers[NV4097_SET_LOGIC_OP]); + } + + properties.ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + properties.ds.depthWriteEnable = (!!rsx::method_registers[NV4097_SET_DEPTH_MASK]) ? VK_TRUE : VK_FALSE; + + if (rsx::method_registers[NV4097_SET_DEPTH_BOUNDS_TEST_ENABLE]) + { + properties.ds.depthBoundsTestEnable = VK_TRUE; + properties.ds.minDepthBounds = (f32&)rsx::method_registers[NV4097_SET_DEPTH_BOUNDS_MIN]; + properties.ds.maxDepthBounds = (f32&)rsx::method_registers[NV4097_SET_DEPTH_BOUNDS_MAX]; + } + else + properties.ds.depthBoundsTestEnable = VK_FALSE; + + if (rsx::method_registers[NV4097_SET_STENCIL_TEST_ENABLE]) + { + properties.ds.stencilTestEnable = VK_TRUE; + properties.ds.front.writeMask = rsx::method_registers[NV4097_SET_STENCIL_MASK]; + properties.ds.front.compareMask = rsx::method_registers[NV4097_SET_STENCIL_FUNC_MASK]; + properties.ds.front.reference = rsx::method_registers[NV4097_SET_STENCIL_FUNC_REF]; + properties.ds.front.failOp = vk::get_stencil_op(rsx::method_registers[NV4097_SET_STENCIL_OP_FAIL]); + properties.ds.front.passOp = vk::get_stencil_op(rsx::method_registers[NV4097_SET_STENCIL_OP_ZPASS]); + properties.ds.front.depthFailOp = vk::get_stencil_op(rsx::method_registers[NV4097_SET_STENCIL_OP_ZFAIL]); + properties.ds.front.compareOp = vk::compare_op(rsx::method_registers[NV4097_SET_STENCIL_FUNC]); + + if (rsx::method_registers[NV4097_SET_TWO_SIDED_STENCIL_TEST_ENABLE]) + { + properties.ds.back.failOp = vk::get_stencil_op(rsx::method_registers[NV4097_SET_BACK_STENCIL_OP_FAIL]); + properties.ds.back.passOp = vk::get_stencil_op(rsx::method_registers[NV4097_SET_BACK_STENCIL_OP_ZPASS]); + properties.ds.back.depthFailOp = vk::get_stencil_op(rsx::method_registers[NV4097_SET_BACK_STENCIL_OP_ZFAIL]); + properties.ds.back.compareOp = vk::compare_op(rsx::method_registers[NV4097_SET_BACK_STENCIL_FUNC]); + } + else + properties.ds.back = properties.ds.front; + } + else + properties.ds.stencilTestEnable = VK_FALSE; + + if (!!rsx::method_registers[NV4097_SET_DEPTH_TEST_ENABLE]) + { + properties.ds.depthTestEnable = VK_TRUE; + properties.ds.depthCompareOp = vk::compare_op(rsx::method_registers[NV4097_SET_DEPTH_FUNC]); + } + else + properties.ds.depthTestEnable = VK_FALSE; + if (!!rsx::method_registers[NV4097_SET_CULL_FACE_ENABLE]) { properties.rs.cullMode = vk::get_cull_face(rsx::method_registers[NV4097_SET_CULL_FACE]); - } - - properties.rs.frontFace = vk::get_front_face_ccw(rsx::method_registers[NV4097_SET_FRONT_FACE]); - - size_t idx = vk::get_render_pass_location( - vk::get_compatible_surface_format(m_surface.color_format).first, - vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, m_surface.depth_format), - (u8)vk::get_draw_buffers(rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])).size()); - properties.render_pass = m_render_passes[idx]; - - properties.num_targets = m_draw_buffers_count; - - //Load current program from buffer - m_program = m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, properties, *m_device, pipeline_layout).get(); - - //TODO: Update constant buffers.. - //1. Update scale-offset matrix - //2. Update vertex constants - //3. Update fragment constants - const size_t scale_offset_offset = m_uniform_buffer_ring_info.alloc<256>(256); - - u8 *buf = (u8*)m_uniform_buffer_ring_info.map(scale_offset_offset, 256); - - //TODO: Add case for this in RSXThread - /** - * NOTE: While VK's coord system resembles GLs, the clip volume is no longer symetrical in z - * Its like D3D without the flip in y (depending on how you build the spir-v) - */ - { - int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; - int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; - - float scale_x = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE] / (clip_w / 2.f); - float offset_x = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET] - (clip_w / 2.f); - offset_x /= clip_w / 2.f; - - float scale_y = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 1] / (clip_h / 2.f); - float offset_y = ((float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 1] - (clip_h / 2.f)); - offset_y /= clip_h / 2.f; - - float scale_z = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 2]; - float offset_z = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 2]; - - float one = 1.f; - - stream_vector(buf, (u32&)scale_x, 0, 0, (u32&)offset_x); - stream_vector((char*)buf + 16, 0, (u32&)scale_y, 0, (u32&)offset_y); - stream_vector((char*)buf + 32, 0, 0, (u32&)scale_z, (u32&)offset_z); - stream_vector((char*)buf + 48, 0, 0, 0, (u32&)one); - } - - u32 is_alpha_tested = !!(rsx::method_registers[NV4097_SET_ALPHA_TEST_ENABLE]); - u8 alpha_ref_raw = (u8)(rsx::method_registers[NV4097_SET_ALPHA_REF] & 0xFF); - float alpha_ref = alpha_ref_raw / 255.f; - - memcpy((char*)buf + 64, &rsx::method_registers[NV4097_SET_FOG_PARAMS], sizeof(float)); - memcpy((char*)buf + 68, &rsx::method_registers[NV4097_SET_FOG_PARAMS + 1], sizeof(float)); - memcpy((char*)buf + 72, &is_alpha_tested, sizeof(u32)); - memcpy((char*)buf + 76, &alpha_ref, sizeof(float)); - m_uniform_buffer_ring_info.unmap(); - - const size_t vertex_constants_offset = m_uniform_buffer_ring_info.alloc<256>(512 * 4 * sizeof(float)); - buf = (u8*)m_uniform_buffer_ring_info.map(vertex_constants_offset, 512 * 4 * sizeof(float)); - fill_vertex_program_constants_data(buf); - m_uniform_buffer_ring_info.unmap(); - - const size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); - const size_t fragment_constants_offset = m_uniform_buffer_ring_info.alloc<256>(fragment_constants_sz); - buf = (u8*)m_uniform_buffer_ring_info.map(fragment_constants_offset, fragment_constants_sz); - m_prog_buffer.fill_fragment_constans_buffer({ reinterpret_cast(buf), gsl::narrow(fragment_constants_sz) }, fragment_program); - m_uniform_buffer_ring_info.unmap(); - - m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, scale_offset_offset, 256 }, SCALE_OFFSET_BIND_SLOT, descriptor_sets); - m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, vertex_constants_offset, 512 * 4 * sizeof(float) }, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets); - m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_constants_sz }, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets); - - return true; -} - -static const u32 mr_color_offset[rsx::limits::color_buffers_count] = -{ - NV4097_SET_SURFACE_COLOR_AOFFSET, - NV4097_SET_SURFACE_COLOR_BOFFSET, - NV4097_SET_SURFACE_COLOR_COFFSET, - NV4097_SET_SURFACE_COLOR_DOFFSET -}; - -static const u32 mr_color_dma[rsx::limits::color_buffers_count] = -{ - NV4097_SET_CONTEXT_DMA_COLOR_A, - NV4097_SET_CONTEXT_DMA_COLOR_B, - NV4097_SET_CONTEXT_DMA_COLOR_C, - NV4097_SET_CONTEXT_DMA_COLOR_D -}; - -static const u32 mr_color_pitch[rsx::limits::color_buffers_count] = -{ - NV4097_SET_SURFACE_PITCH_A, - NV4097_SET_SURFACE_PITCH_B, - NV4097_SET_SURFACE_PITCH_C, - NV4097_SET_SURFACE_PITCH_D -}; - -void VKGSRender::init_buffers(bool skip_reading) -{ - prepare_rtts(); - - if (!skip_reading) - { - read_buffers(); - } - - set_viewport(); -} - -void VKGSRender::read_buffers() -{ -} - -void VKGSRender::write_buffers() -{ -} - -void VKGSRender::close_and_submit_command_buffer(const std::vector &semaphores, VkFence fence) -{ - CHECK_RESULT(vkEndCommandBuffer(m_command_buffer)); - - VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - VkCommandBuffer cmd = m_command_buffer; - - VkSubmitInfo infos = {}; - infos.commandBufferCount = 1; - infos.pCommandBuffers = &cmd; - infos.pWaitDstStageMask = &pipe_stage_flags; - infos.pWaitSemaphores = semaphores.data(); - infos.waitSemaphoreCount = static_cast(semaphores.size()); - infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - - CHECK_RESULT(vkQueueSubmit(m_swap_chain->get_present_queue(), 1, &infos, fence)); -} - -void VKGSRender::open_command_buffer() -{ - VkCommandBufferInheritanceInfo inheritance_info = {}; - inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; - - VkCommandBufferBeginInfo begin_infos = {}; - begin_infos.pInheritanceInfo = &inheritance_info; - begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_infos.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - CHECK_RESULT(vkBeginCommandBuffer(m_command_buffer, &begin_infos)); -} - - -void VKGSRender::prepare_rtts() -{ - u32 surface_format = rsx::method_registers[NV4097_SET_SURFACE_FORMAT]; - - if (!m_rtts_dirty) - return; - - m_rtts_dirty = false; - - if (m_surface.format != surface_format) - m_surface.unpack(surface_format); - - u32 clip_horizontal = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL]; - u32 clip_vertical = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL]; - - u32 clip_width = clip_horizontal >> 16; - u32 clip_height = clip_vertical >> 16; - u32 clip_x = clip_horizontal; - u32 clip_y = clip_vertical; - - m_rtts.prepare_render_target(&m_command_buffer, - surface_format, - clip_horizontal, clip_vertical, - rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]), - get_color_surface_addresses(), get_zeta_surface_address(), - (*m_device), &m_command_buffer, m_optimal_tiling_supported_formats, m_memory_type_mapping); - - //Bind created rtts as current fbo... - std::vector draw_buffers = vk::get_draw_buffers(rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])); - - std::vector> fbo_images; - - for (u8 index: draw_buffers) - { - vk::image *raw = std::get<1>(m_rtts.m_bound_render_targets[index]); - - VkImageSubresourceRange subres = {}; - subres.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - subres.baseArrayLayer = 0; - subres.baseMipLevel = 0; - subres.layerCount = 1; - subres.levelCount = 1; - - fbo_images.push_back(std::make_unique(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres)); - } - - m_draw_buffers_count = static_cast(fbo_images.size()); - - if (std::get<1>(m_rtts.m_bound_depth_stencil) != nullptr) - { - vk::image *raw = (std::get<1>(m_rtts.m_bound_depth_stencil)); - - VkImageSubresourceRange subres = {}; - subres.aspectMask = (m_surface.depth_format == rsx::surface_depth_format::z24s8) ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) : VK_IMAGE_ASPECT_DEPTH_BIT; - subres.baseArrayLayer = 0; - subres.baseMipLevel = 0; - subres.layerCount = 1; - subres.levelCount = 1; - - fbo_images.push_back(std::make_unique(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres)); - } - - size_t idx = vk::get_render_pass_location(vk::get_compatible_surface_format(m_surface.color_format).first, vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, m_surface.depth_format), (u8)draw_buffers.size()); - VkRenderPass current_render_pass = m_render_passes[idx]; - - m_framebuffer_to_clean.push_back(std::make_unique(*m_device, current_render_pass, clip_width, clip_height, std::move(fbo_images))); -} - - -void VKGSRender::flip(int buffer) -{ - bool resize_screen = false; - - if (m_client_height != m_frame->client_height() || - m_client_width != m_frame->client_width()) - { - if (!!m_frame->client_height() && !!m_frame->client_width()) - resize_screen = true; - } - - if (!resize_screen) - { - u32 buffer_width = gcm_buffers[buffer].width; - u32 buffer_height = gcm_buffers[buffer].height; - u32 buffer_pitch = gcm_buffers[buffer].pitch; - - rsx::tiled_region buffer_region = get_tiled_address(gcm_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL); - - areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height }); - - coordi aspect_ratio; - - sizei csize = { m_frame->client_width(), m_frame->client_height() }; - sizei new_size = csize; - - const double aq = (double)buffer_width / buffer_height; - const double rq = (double)new_size.width / new_size.height; - const double q = aq / rq; - - if (q > 1.0) - { - new_size.height = int(new_size.height / q); - aspect_ratio.y = (csize.height - new_size.height) / 2; - } - else if (q < 1.0) - { - new_size.width = int(new_size.width * q); - aspect_ratio.x = (csize.width - new_size.width) / 2; - } - - aspect_ratio.size = new_size; - - VkSwapchainKHR swap_chain = (VkSwapchainKHR)(*m_swap_chain); - - //Prepare surface for new frame - CHECK_RESULT(vkAcquireNextImageKHR((*m_device), (*m_swap_chain), 0, m_present_semaphore, VK_NULL_HANDLE, &m_current_present_image)); - - //Blit contents to screen.. - VkImage image_to_flip = nullptr; - - if (std::get<1>(m_rtts.m_bound_render_targets[0]) != nullptr) - image_to_flip = std::get<1>(m_rtts.m_bound_render_targets[0])->value; - else if (std::get<1>(m_rtts.m_bound_render_targets[1]) != nullptr) - image_to_flip = std::get<1>(m_rtts.m_bound_render_targets[1])->value; - - VkImage target_image = m_swap_chain->get_swap_chain_image(m_current_present_image); - if (image_to_flip) - { - vk::copy_scaled_image(m_command_buffer, image_to_flip, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, - buffer_width, buffer_height, aspect_ratio.width, aspect_ratio.height, 1, VK_IMAGE_ASPECT_COLOR_BIT); - } - else - { - //No draw call was issued! - VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT); - VkClearColorValue clear_black = { 0 }; - vk::change_image_layout(m_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_GENERAL, range); - vkCmdClearColorImage(m_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_GENERAL, &clear_black, 1, &range); - vk::change_image_layout(m_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, range); - } - - close_and_submit_command_buffer({ m_present_semaphore }, m_submit_fence); - CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL)); - - VkPresentInfoKHR present = {}; - present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; - present.pNext = nullptr; - present.swapchainCount = 1; - present.pSwapchains = &swap_chain; - present.pImageIndices = &m_current_present_image; - CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present)); - } - else - { - /** - * Since we are about to destroy the old swapchain and its images, we just discard the commandbuffer. - * Waiting for the commands to process does not work reliably as the fence can be signaled before swap images are released - * and there are no explicit methods to ensure that the presentation engine is not using the images at all. - */ - - CHECK_RESULT(vkEndCommandBuffer(m_command_buffer)); - - //Will have to block until rendering is completed - VkFence resize_fence = VK_NULL_HANDLE; - VkFenceCreateInfo infos = {}; - infos.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - - vkQueueWaitIdle(m_swap_chain->get_present_queue()); - vkDeviceWaitIdle(*m_device); - - vkCreateFence((*m_device), &infos, nullptr, &resize_fence); - - //Wait for all grpahics tasks to complete - VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT; - VkSubmitInfo submit_infos = {}; - submit_infos.commandBufferCount = 0; - submit_infos.pCommandBuffers = nullptr; - submit_infos.pWaitDstStageMask = &pipe_stage_flags; - submit_infos.pWaitSemaphores = nullptr; - submit_infos.waitSemaphoreCount = 0; - submit_infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - CHECK_RESULT(vkQueueSubmit(m_swap_chain->get_present_queue(), 1, &submit_infos, resize_fence)); - - vkWaitForFences((*m_device), 1, &resize_fence, VK_TRUE, UINT64_MAX); - vkResetFences((*m_device), 1, &resize_fence); - - vkDeviceWaitIdle(*m_device); - - //Rebuild swapchain. Old swapchain destruction is handled by the init_swapchain call - m_client_width = m_frame->client_width(); - m_client_height = m_frame->client_height(); - m_swap_chain->init_swapchain(m_client_width, m_client_height); - - //Prepare new swapchain images for use - CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0)); - open_command_buffer(); - - for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i) - { - vk::change_image_layout(m_command_buffer, m_swap_chain->get_swap_chain_image(i), - VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, - vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)); - - VkClearColorValue clear_color{}; - auto range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT); - vkCmdClearColorImage(m_command_buffer, m_swap_chain->get_swap_chain_image(i), VK_IMAGE_LAYOUT_GENERAL, &clear_color, 1, &range); - vk::change_image_layout(m_command_buffer, m_swap_chain->get_swap_chain_image(i), - VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, - vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)); - } - - //Flush the command buffer - close_and_submit_command_buffer({}, resize_fence); - CHECK_RESULT(vkWaitForFences((*m_device), 1, &resize_fence, VK_TRUE, UINT64_MAX)); - vkDestroyFence((*m_device), resize_fence, nullptr); - } - - m_uniform_buffer_ring_info.m_get_pos = m_uniform_buffer_ring_info.get_current_put_pos_minus_one(); - m_index_buffer_ring_info.m_get_pos = m_index_buffer_ring_info.get_current_put_pos_minus_one(); - m_attrib_ring_info.m_get_pos = m_attrib_ring_info.get_current_put_pos_minus_one(); - m_texture_upload_buffer_ring_info.m_get_pos = m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one(); - - //Feed back damaged resources to the main texture cache for management... -// m_texture_cache.merge_dirty_textures(m_rtts.invalidated_resources); - m_rtts.invalidated_resources.clear(); - m_texture_cache.flush(); - - m_buffer_view_to_clean.clear(); - m_sampler_to_clean.clear(); - m_framebuffer_to_clean.clear(); - - vkResetDescriptorPool(*m_device, descriptor_pool, 0); - CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence)); - CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0)); - open_command_buffer(); - - m_draw_calls = 0; - m_used_descriptors = 0; - m_frame->flip(m_context); -} + } + + properties.rs.frontFace = vk::get_front_face_ccw(rsx::method_registers[NV4097_SET_FRONT_FACE]); + + size_t idx = vk::get_render_pass_location( + vk::get_compatible_surface_format(m_surface.color_format).first, + vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, m_surface.depth_format), + (u8)vk::get_draw_buffers(rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])).size()); + properties.render_pass = m_render_passes[idx]; + + properties.num_targets = m_draw_buffers_count; + + //Load current program from buffer + m_program = m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, properties, *m_device, pipeline_layout).get(); + + //TODO: Update constant buffers.. + //1. Update scale-offset matrix + //2. Update vertex constants + //3. Update fragment constants + const size_t scale_offset_offset = m_uniform_buffer_ring_info.alloc<256>(256); + + u8 *buf = (u8*)m_uniform_buffer_ring_info.map(scale_offset_offset, 256); + + //TODO: Add case for this in RSXThread + /** + * NOTE: While VK's coord system resembles GLs, the clip volume is no longer symetrical in z + * Its like D3D without the flip in y (depending on how you build the spir-v) + */ + { + int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; + int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; + + float scale_x = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE] / (clip_w / 2.f); + float offset_x = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET] - (clip_w / 2.f); + offset_x /= clip_w / 2.f; + + float scale_y = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 1] / (clip_h / 2.f); + float offset_y = ((float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 1] - (clip_h / 2.f)); + offset_y /= clip_h / 2.f; + + float scale_z = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_SCALE + 2]; + float offset_z = (float&)rsx::method_registers[NV4097_SET_VIEWPORT_OFFSET + 2]; + + float one = 1.f; + + stream_vector(buf, (u32&)scale_x, 0, 0, (u32&)offset_x); + stream_vector((char*)buf + 16, 0, (u32&)scale_y, 0, (u32&)offset_y); + stream_vector((char*)buf + 32, 0, 0, (u32&)scale_z, (u32&)offset_z); + stream_vector((char*)buf + 48, 0, 0, 0, (u32&)one); + } + + u32 is_alpha_tested = !!(rsx::method_registers[NV4097_SET_ALPHA_TEST_ENABLE]); + u8 alpha_ref_raw = (u8)(rsx::method_registers[NV4097_SET_ALPHA_REF] & 0xFF); + float alpha_ref = alpha_ref_raw / 255.f; + + memcpy((char*)buf + 64, &rsx::method_registers[NV4097_SET_FOG_PARAMS], sizeof(float)); + memcpy((char*)buf + 68, &rsx::method_registers[NV4097_SET_FOG_PARAMS + 1], sizeof(float)); + memcpy((char*)buf + 72, &is_alpha_tested, sizeof(u32)); + memcpy((char*)buf + 76, &alpha_ref, sizeof(float)); + m_uniform_buffer_ring_info.unmap(); + + const size_t vertex_constants_offset = m_uniform_buffer_ring_info.alloc<256>(512 * 4 * sizeof(float)); + buf = (u8*)m_uniform_buffer_ring_info.map(vertex_constants_offset, 512 * 4 * sizeof(float)); + fill_vertex_program_constants_data(buf); + m_uniform_buffer_ring_info.unmap(); + + const size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); + const size_t fragment_constants_offset = m_uniform_buffer_ring_info.alloc<256>(fragment_constants_sz); + buf = (u8*)m_uniform_buffer_ring_info.map(fragment_constants_offset, fragment_constants_sz); + m_prog_buffer.fill_fragment_constans_buffer({ reinterpret_cast(buf), gsl::narrow(fragment_constants_sz) }, fragment_program); + m_uniform_buffer_ring_info.unmap(); + + m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, scale_offset_offset, 256 }, SCALE_OFFSET_BIND_SLOT, descriptor_sets); + m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, vertex_constants_offset, 512 * 4 * sizeof(float) }, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets); + m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_constants_sz }, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets); + + return true; +} + +static const u32 mr_color_offset[rsx::limits::color_buffers_count] = +{ + NV4097_SET_SURFACE_COLOR_AOFFSET, + NV4097_SET_SURFACE_COLOR_BOFFSET, + NV4097_SET_SURFACE_COLOR_COFFSET, + NV4097_SET_SURFACE_COLOR_DOFFSET +}; + +static const u32 mr_color_dma[rsx::limits::color_buffers_count] = +{ + NV4097_SET_CONTEXT_DMA_COLOR_A, + NV4097_SET_CONTEXT_DMA_COLOR_B, + NV4097_SET_CONTEXT_DMA_COLOR_C, + NV4097_SET_CONTEXT_DMA_COLOR_D +}; + +static const u32 mr_color_pitch[rsx::limits::color_buffers_count] = +{ + NV4097_SET_SURFACE_PITCH_A, + NV4097_SET_SURFACE_PITCH_B, + NV4097_SET_SURFACE_PITCH_C, + NV4097_SET_SURFACE_PITCH_D +}; + +void VKGSRender::init_buffers(bool skip_reading) +{ + prepare_rtts(); + + if (!skip_reading) + { + read_buffers(); + } + + set_viewport(); +} + +void VKGSRender::read_buffers() +{ +} + +void VKGSRender::write_buffers() +{ +} + +void VKGSRender::close_and_submit_command_buffer(const std::vector &semaphores, VkFence fence) +{ + CHECK_RESULT(vkEndCommandBuffer(m_command_buffer)); + + VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + VkCommandBuffer cmd = m_command_buffer; + + VkSubmitInfo infos = {}; + infos.commandBufferCount = 1; + infos.pCommandBuffers = &cmd; + infos.pWaitDstStageMask = &pipe_stage_flags; + infos.pWaitSemaphores = semaphores.data(); + infos.waitSemaphoreCount = static_cast(semaphores.size()); + infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + + CHECK_RESULT(vkQueueSubmit(m_swap_chain->get_present_queue(), 1, &infos, fence)); +} + +void VKGSRender::open_command_buffer() +{ + VkCommandBufferInheritanceInfo inheritance_info = {}; + inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; + + VkCommandBufferBeginInfo begin_infos = {}; + begin_infos.pInheritanceInfo = &inheritance_info; + begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_infos.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + CHECK_RESULT(vkBeginCommandBuffer(m_command_buffer, &begin_infos)); +} + + +void VKGSRender::prepare_rtts() +{ + u32 surface_format = rsx::method_registers[NV4097_SET_SURFACE_FORMAT]; + + if (!m_rtts_dirty) + return; + + m_rtts_dirty = false; + + if (m_surface.format != surface_format) + m_surface.unpack(surface_format); + + u32 clip_horizontal = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL]; + u32 clip_vertical = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL]; + + u32 clip_width = clip_horizontal >> 16; + u32 clip_height = clip_vertical >> 16; + u32 clip_x = clip_horizontal; + u32 clip_y = clip_vertical; + + m_rtts.prepare_render_target(&m_command_buffer, + surface_format, + clip_horizontal, clip_vertical, + rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]), + get_color_surface_addresses(), get_zeta_surface_address(), + (*m_device), &m_command_buffer, m_optimal_tiling_supported_formats, m_memory_type_mapping); + + //Bind created rtts as current fbo... + std::vector draw_buffers = vk::get_draw_buffers(rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])); + + std::vector> fbo_images; + + for (u8 index: draw_buffers) + { + vk::image *raw = std::get<1>(m_rtts.m_bound_render_targets[index]); + + VkImageSubresourceRange subres = {}; + subres.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + subres.baseArrayLayer = 0; + subres.baseMipLevel = 0; + subres.layerCount = 1; + subres.levelCount = 1; + + fbo_images.push_back(std::make_unique(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres)); + } + + m_draw_buffers_count = static_cast(fbo_images.size()); + + if (std::get<1>(m_rtts.m_bound_depth_stencil) != nullptr) + { + vk::image *raw = (std::get<1>(m_rtts.m_bound_depth_stencil)); + + VkImageSubresourceRange subres = {}; + subres.aspectMask = (m_surface.depth_format == rsx::surface_depth_format::z24s8) ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) : VK_IMAGE_ASPECT_DEPTH_BIT; + subres.baseArrayLayer = 0; + subres.baseMipLevel = 0; + subres.layerCount = 1; + subres.levelCount = 1; + + fbo_images.push_back(std::make_unique(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres)); + } + + size_t idx = vk::get_render_pass_location(vk::get_compatible_surface_format(m_surface.color_format).first, vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, m_surface.depth_format), (u8)draw_buffers.size()); + VkRenderPass current_render_pass = m_render_passes[idx]; + + m_framebuffer_to_clean.push_back(std::make_unique(*m_device, current_render_pass, clip_width, clip_height, std::move(fbo_images))); +} + + +void VKGSRender::flip(int buffer) +{ + bool resize_screen = false; + + if (m_client_height != m_frame->client_height() || + m_client_width != m_frame->client_width()) + { + if (!!m_frame->client_height() && !!m_frame->client_width()) + resize_screen = true; + } + + if (!resize_screen) + { + u32 buffer_width = rsx::state.display_buffers[buffer].width; + u32 buffer_height = rsx::state.display_buffers[buffer].height; + u32 buffer_pitch = rsx::state.display_buffers[buffer].pitch; + + rsx::tiled_region buffer_region = get_tiled_address(rsx::state.display_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL); + + areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height }); + + coordi aspect_ratio; + + sizei csize = { m_frame->client_width(), m_frame->client_height() }; + sizei new_size = csize; + + const double aq = (double)buffer_width / buffer_height; + const double rq = (double)new_size.width / new_size.height; + const double q = aq / rq; + + if (q > 1.0) + { + new_size.height = int(new_size.height / q); + aspect_ratio.y = (csize.height - new_size.height) / 2; + } + else if (q < 1.0) + { + new_size.width = int(new_size.width * q); + aspect_ratio.x = (csize.width - new_size.width) / 2; + } + + aspect_ratio.size = new_size; + + VkSwapchainKHR swap_chain = (VkSwapchainKHR)(*m_swap_chain); + + //Prepare surface for new frame + CHECK_RESULT(vkAcquireNextImageKHR((*m_device), (*m_swap_chain), 0, m_present_semaphore, VK_NULL_HANDLE, &m_current_present_image)); + + //Blit contents to screen.. + VkImage image_to_flip = nullptr; + + if (std::get<1>(m_rtts.m_bound_render_targets[0]) != nullptr) + image_to_flip = std::get<1>(m_rtts.m_bound_render_targets[0])->value; + else if (std::get<1>(m_rtts.m_bound_render_targets[1]) != nullptr) + image_to_flip = std::get<1>(m_rtts.m_bound_render_targets[1])->value; + + VkImage target_image = m_swap_chain->get_swap_chain_image(m_current_present_image); + if (image_to_flip) + { + vk::copy_scaled_image(m_command_buffer, image_to_flip, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + buffer_width, buffer_height, aspect_ratio.width, aspect_ratio.height, 1, VK_IMAGE_ASPECT_COLOR_BIT); + } + else + { + //No draw call was issued! + VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT); + VkClearColorValue clear_black = { 0 }; + vk::change_image_layout(m_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_GENERAL, range); + vkCmdClearColorImage(m_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_GENERAL, &clear_black, 1, &range); + vk::change_image_layout(m_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, range); + } + + close_and_submit_command_buffer({ m_present_semaphore }, m_submit_fence); + CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL)); + + VkPresentInfoKHR present = {}; + present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; + present.pNext = nullptr; + present.swapchainCount = 1; + present.pSwapchains = &swap_chain; + present.pImageIndices = &m_current_present_image; + CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present)); + } + else + { + /** + * Since we are about to destroy the old swapchain and its images, we just discard the commandbuffer. + * Waiting for the commands to process does not work reliably as the fence can be signaled before swap images are released + * and there are no explicit methods to ensure that the presentation engine is not using the images at all. + */ + + CHECK_RESULT(vkEndCommandBuffer(m_command_buffer)); + + //Will have to block until rendering is completed + VkFence resize_fence = VK_NULL_HANDLE; + VkFenceCreateInfo infos = {}; + infos.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + + vkQueueWaitIdle(m_swap_chain->get_present_queue()); + vkDeviceWaitIdle(*m_device); + + vkCreateFence((*m_device), &infos, nullptr, &resize_fence); + + //Wait for all grpahics tasks to complete + VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT; + VkSubmitInfo submit_infos = {}; + submit_infos.commandBufferCount = 0; + submit_infos.pCommandBuffers = nullptr; + submit_infos.pWaitDstStageMask = &pipe_stage_flags; + submit_infos.pWaitSemaphores = nullptr; + submit_infos.waitSemaphoreCount = 0; + submit_infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + CHECK_RESULT(vkQueueSubmit(m_swap_chain->get_present_queue(), 1, &submit_infos, resize_fence)); + + vkWaitForFences((*m_device), 1, &resize_fence, VK_TRUE, UINT64_MAX); + vkResetFences((*m_device), 1, &resize_fence); + + vkDeviceWaitIdle(*m_device); + + //Rebuild swapchain. Old swapchain destruction is handled by the init_swapchain call + m_client_width = m_frame->client_width(); + m_client_height = m_frame->client_height(); + m_swap_chain->init_swapchain(m_client_width, m_client_height); + + //Prepare new swapchain images for use + CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0)); + open_command_buffer(); + + for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i) + { + vk::change_image_layout(m_command_buffer, m_swap_chain->get_swap_chain_image(i), + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, + vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)); + + VkClearColorValue clear_color{}; + auto range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT); + vkCmdClearColorImage(m_command_buffer, m_swap_chain->get_swap_chain_image(i), VK_IMAGE_LAYOUT_GENERAL, &clear_color, 1, &range); + vk::change_image_layout(m_command_buffer, m_swap_chain->get_swap_chain_image(i), + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)); + } + + //Flush the command buffer + close_and_submit_command_buffer({}, resize_fence); + CHECK_RESULT(vkWaitForFences((*m_device), 1, &resize_fence, VK_TRUE, UINT64_MAX)); + vkDestroyFence((*m_device), resize_fence, nullptr); + } + + m_uniform_buffer_ring_info.m_get_pos = m_uniform_buffer_ring_info.get_current_put_pos_minus_one(); + m_index_buffer_ring_info.m_get_pos = m_index_buffer_ring_info.get_current_put_pos_minus_one(); + m_attrib_ring_info.m_get_pos = m_attrib_ring_info.get_current_put_pos_minus_one(); + m_texture_upload_buffer_ring_info.m_get_pos = m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one(); + + //Feed back damaged resources to the main texture cache for management... +// m_texture_cache.merge_dirty_textures(m_rtts.invalidated_resources); + m_rtts.invalidated_resources.clear(); + m_texture_cache.flush(); + + m_buffer_view_to_clean.clear(); + m_sampler_to_clean.clear(); + m_framebuffer_to_clean.clear(); + + vkResetDescriptorPool(*m_device, descriptor_pool, 0); + CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence)); + CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0)); + open_command_buffer(); + + m_draw_calls = 0; + m_used_descriptors = 0; + m_frame->flip(m_context); +} diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 03cea5e20e..31fbba648b 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -32,27 +32,31 @@ namespace rsx namespace nv406e { - force_inline void set_reference(thread* rsx, u32 arg) + force_inline void set_reference(u32 arg) { - rsx->ctrl->ref.exchange(arg); + rsx::state.context->control.ref.exchange(arg); } force_inline void semaphore_acquire(thread* rsx, u32 arg) { - //TODO: dma - while (vm::ps3::read32(rsx->label_addr + method_registers[NV406E_SEMAPHORE_OFFSET]) != arg) + vm::ps3::ptr semaphore = vm::cast(get_address_dma(method_registers[NV406E_SEMAPHORE_OFFSET], method_registers[NV406E_SET_CONTEXT_DMA_SEMAPHORE])); + + while (semaphore->value != arg) { if (Emu.IsStopped()) + { break; + } - std::this_thread::sleep_for(1ms); + std::this_thread::yield(); } } force_inline void semaphore_release(thread* rsx, u32 arg) { - //TODO: dma - vm::ps3::write32(rsx->label_addr + method_registers[NV406E_SEMAPHORE_OFFSET], arg); + vm::ps3::ptr semaphore = vm::cast(get_address_dma(method_registers[NV406E_SEMAPHORE_OFFSET], method_registers[NV406E_SET_CONTEXT_DMA_SEMAPHORE])); + semaphore->timestamp = rsx->timestamp(); + semaphore->value = arg; } } @@ -60,15 +64,16 @@ namespace rsx { force_inline void texture_read_semaphore_release(thread* rsx, u32 arg) { - //TODO: dma - vm::ps3::write32(rsx->label_addr + method_registers[NV4097_SET_SEMAPHORE_OFFSET], arg); + rsx::semaphore_t& result = rsx::state.context->semaphores[method_registers[NV4097_SET_SEMAPHORE_OFFSET] / sizeof(CellGcmReportData)]; + result.timestamp = rsx->timestamp(); + result.value = arg; } force_inline void back_end_write_semaphore_release(thread* rsx, u32 arg) { - //TODO: dma - vm::ps3::write32(rsx->label_addr + method_registers[NV4097_SET_SEMAPHORE_OFFSET], - (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff)); + rsx::semaphore_t& result = rsx::state.context->semaphores[method_registers[NV4097_SET_SEMAPHORE_OFFSET] / sizeof(CellGcmReportData)]; + result.timestamp = rsx->timestamp(); + result.value = (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff); } //fire only when all data passed to rsx cmd buffer @@ -274,18 +279,8 @@ namespace rsx u8 type = arg >> 24; u32 offset = arg & 0xffffff; u32 report_dma = method_registers[NV4097_SET_CONTEXT_DMA_REPORT]; - u32 location; - switch (report_dma) - { - case CELL_GCM_CONTEXT_DMA_TO_MEMORY_GET_REPORT: location = CELL_GCM_LOCATION_LOCAL; break; - case CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_MAIN: location = CELL_GCM_LOCATION_MAIN; break; - default: - LOG_WARNING(RSX, "nv4097::get_report: bad report dma: 0x%x", report_dma); - return; - } - - vm::ps3::ptr result = vm::cast(get_address(offset, location)); + vm::ps3::ptr result = vm::cast(get_address_dma(offset, report_dma)); result->timer = rsx->timestamp(); @@ -356,7 +351,7 @@ namespace rsx LOG_ERROR(RSX, "%s: y is not null (0x%x)", __FUNCTION__, y); } - u32 address = get_address(method_registers[NV3062_SET_OFFSET_DESTIN] + (x << 2) + index * 4, method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN]); + u32 address = get_address_dma(method_registers[NV3062_SET_OFFSET_DESTIN] + (x << 2) + index * 4, method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN]); vm::ps3::write32(address, arg); } }; @@ -438,14 +433,14 @@ namespace rsx //HACK: it's extension of the flip-hack. remove this when textures cache would be properly implemented for (int i = 0; i < rsx::limits::color_buffers_count; ++i) { - u32 begin = rsx->gcm_buffers[i].offset; + u32 begin = state.display_buffers[i].offset; if (dst_offset < begin || !begin) { continue; } - if (rsx->gcm_buffers[i].width < 720 || rsx->gcm_buffers[i].height < 480) + if (state.display_buffers[i].width < 720 || state.display_buffers[i].height < 480) { continue; } @@ -455,7 +450,7 @@ namespace rsx return; } - u32 end = begin + rsx->gcm_buffers[i].height * rsx->gcm_buffers[i].pitch; + u32 end = begin + state.display_buffers[i].height * state.display_buffers[i].pitch; if (dst_offset < end) { @@ -470,8 +465,8 @@ namespace rsx u32 in_offset = u32(in_x * in_bpp + in_pitch * in_y); u32 out_offset = out_x * out_bpp + out_pitch * out_y; - tiled_region src_region = rsx->get_tiled_address(src_offset + in_offset, src_dma & 0xf);//get_address(src_offset, src_dma); - u32 dst_address = get_address(dst_offset + out_offset, dst_dma); + tiled_region src_region = rsx->get_tiled_address_dma(src_offset + in_offset, src_dma); + u32 dst_address = get_address_dma(dst_offset + out_offset, dst_dma); if (out_pitch == 0) { @@ -706,8 +701,8 @@ namespace rsx u32 dst_offset = method_registers[NV0039_OFFSET_OUT]; u32 dst_dma = method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_OUT]; - u8 *dst = (u8*)vm::base(get_address(dst_offset, dst_dma)); - const u8 *src = (u8*)vm::base(get_address(src_offset, src_dma)); + u8 *dst = (u8*)vm::base(get_address_dma(dst_offset, dst_dma)); + const u8 *src = (u8*)vm::base(get_address_dma(src_offset, src_dma)); if (in_pitch == out_pitch && out_pitch == line_length) { @@ -739,19 +734,18 @@ namespace rsx Emu.Pause(); } - rsx->gcm_current_buffer = arg; rsx->flip(arg); // After each flip PS3 system is executing a routine that changes registers value to some default. // Some game use this default state (SH3). rsx->reset(); - rsx->last_flip_time = get_system_time() - 1000000; - rsx->gcm_current_buffer = arg; - rsx->flip_status = 0; + state.last_flip_time = get_system_time() - 1000000; + state.current_display_buffer = arg; + state.flip_status = 0; - if (rsx->flip_handler) + if (state.flip_handler) { - Emu.GetCallbackManager().Async([func = rsx->flip_handler](PPUThread& ppu) + Emu.GetCallbackManager().Async([func = state.flip_handler](PPUThread& ppu) { func(ppu, 1); }); @@ -769,9 +763,9 @@ namespace rsx void user_command(thread* rsx, u32 arg) { - if (rsx->user_handler) + if (state.user_handler) { - Emu.GetCallbackManager().Async([func = rsx->user_handler, arg](PPUThread& ppu) + Emu.GetCallbackManager().Async([func = state.user_handler, arg](PPUThread& ppu) { func(ppu, arg); }); diff --git a/rpcs3/Gui/RSXDebugger.cpp b/rpcs3/Gui/RSXDebugger.cpp index a615edd205..26da60b24c 100644 --- a/rpcs3/Gui/RSXDebugger.cpp +++ b/rpcs3/Gui/RSXDebugger.cpp @@ -344,7 +344,7 @@ void RSXDebugger::OnClickBuffer(wxMouseEvent& event) return; } - const auto buffers = render->gcm_buffers; + const auto buffers = rsx::state.display_buffers; if(!buffers) return; @@ -587,7 +587,7 @@ void RSXDebugger::GoToGet(wxCommandEvent& event) if (const auto render = fxm::get()) { u32 realAddr; - if (RSXIOMem.getRealAddr(render->ctrl->get.load(), realAddr)) + if (RSXIOMem.getRealAddr(rsx::state.context->control.get.load(), realAddr)) { m_addr = realAddr; t_addr->SetValue(wxString::Format("%08x", m_addr)); @@ -602,7 +602,7 @@ void RSXDebugger::GoToPut(wxCommandEvent& event) if (const auto render = fxm::get()) { u32 realAddr; - if (RSXIOMem.getRealAddr(render->ctrl->put.load(), realAddr)) + if (RSXIOMem.getRealAddr(rsx::state.context->control.put.load(), realAddr)) { m_addr = realAddr; t_addr->SetValue(wxString::Format("%08x", m_addr)); @@ -678,15 +678,15 @@ void RSXDebugger::GetBuffers() return; } + if (!vm::check_addr(rsx::state.display_buffers.addr())) + return; + // Draw Buffers // TODO: Currently it only supports color buffers - for (u32 bufferId=0; bufferId < render->gcm_buffers_count; bufferId++) + for (u32 bufferId=0; bufferId < rsx::state.display_buffers_count; bufferId++) { - if(!vm::check_addr(render->gcm_buffers.addr())) - continue; - - auto buffers = render->gcm_buffers; - u32 RSXbuffer_addr = render->local_mem_addr + buffers[bufferId].offset; + auto &buffers = rsx::state.display_buffers; + u32 RSXbuffer_addr = rsx::state.frame_buffer.addr() + buffers[bufferId].offset; if(!vm::check_addr(RSXbuffer_addr)) continue;