PPU LLVM: Fix HLE function injection

This commit is contained in:
Eladash 2022-08-17 16:53:05 +03:00 committed by Ivan
parent c0e3b86064
commit 1dd1062be1
6 changed files with 110 additions and 51 deletions

View file

@ -996,6 +996,11 @@ bool jit_compiler::check(const std::string& path)
return false; return false;
} }
void jit_compiler::update_global_mapping(const std::string& name, u64 addr)
{
m_engine->updateGlobalMapping(name, addr);
}
void jit_compiler::fin() void jit_compiler::fin()
{ {
m_engine->finalizeObject(); m_engine->finalizeObject();

View file

@ -267,14 +267,14 @@ namespace asmjit
// Build runtime function with asmjit::X86Assembler // Build runtime function with asmjit::X86Assembler
template <typename FT, typename Asm = native_asm, typename F> template <typename FT, typename Asm = native_asm, typename F>
inline FT build_function_asm(std::string_view name, F&& builder) inline FT build_function_asm(std::string_view name, F&& builder, ::jit_runtime* custom_runtime = nullptr)
{ {
#ifdef __APPLE__ #ifdef __APPLE__
pthread_jit_write_protect_np(false); pthread_jit_write_protect_np(false);
#endif #endif
using namespace asmjit; using namespace asmjit;
auto& rt = get_global_runtime(); auto& rt = custom_runtime ? *custom_runtime : get_global_runtime();
CodeHolder code; CodeHolder code;
code.init(rt.environment()); code.init(rt.environment());
@ -362,6 +362,9 @@ public:
// Add object (path to obj file) // Add object (path to obj file)
void add(const std::string& path); void add(const std::string& path);
// Update global mapping for a single value
void update_global_mapping(const std::string& name, u64 addr);
// Check object file // Check object file
static bool check(const std::string& path); static bool check(const std::string& path);

View file

@ -519,12 +519,14 @@ s32 _spurs::create_lv2_eq(ppu_thread& ppu, vm::ptr<CellSpurs> spurs, vm::ptr<u32
{ {
if (s32 rc = sys_event_queue_create(ppu, queueId, vm::make_var(attr), SYS_EVENT_QUEUE_LOCAL, size)) if (s32 rc = sys_event_queue_create(ppu, queueId, vm::make_var(attr), SYS_EVENT_QUEUE_LOCAL, size))
{ {
static_cast<void>(ppu.test_stopped());
return rc; return rc;
} }
if (_spurs::attach_lv2_eq(ppu, spurs, *queueId, port, 1, true)) if (_spurs::attach_lv2_eq(ppu, spurs, *queueId, port, 1, true))
{ {
sys_event_queue_destroy(ppu, *queueId, SYS_EVENT_QUEUE_DESTROY_FORCE); sys_event_queue_destroy(ppu, *queueId, SYS_EVENT_QUEUE_DESTROY_FORCE);
static_cast<void>(ppu.test_stopped());
} }
return CELL_OK; return CELL_OK;
@ -623,6 +625,7 @@ s32 _spurs::detach_lv2_eq(vm::ptr<CellSpurs> spurs, u8 spuPort, bool spursCreate
void _spurs::handler_wait_ready(ppu_thread& ppu, vm::ptr<CellSpurs> spurs) void _spurs::handler_wait_ready(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
{ {
ensure(ppu_execute<&sys_lwmutex_lock>(ppu, spurs.ptr(&CellSpurs::mutex), 0) == 0); ensure(ppu_execute<&sys_lwmutex_lock>(ppu, spurs.ptr(&CellSpurs::mutex), 0) == 0);
static_cast<void>(ppu.test_stopped());
while (true) while (true)
{ {
@ -686,6 +689,7 @@ void _spurs::handler_wait_ready(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
if (spurs->handlerDirty == 0) if (spurs->handlerDirty == 0)
{ {
ensure(ppu_execute<&sys_lwcond_wait>(ppu, spurs.ptr(&CellSpurs::cond), 0) == 0); ensure(ppu_execute<&sys_lwcond_wait>(ppu, spurs.ptr(&CellSpurs::cond), 0) == 0);
static_cast<void>(ppu.test_stopped());
} }
spurs->handlerWaiting = 0; spurs->handlerWaiting = 0;
@ -693,6 +697,7 @@ void _spurs::handler_wait_ready(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
// If we reach here then a runnable workload was found // If we reach here then a runnable workload was found
ensure(ppu_execute<&sys_lwmutex_unlock>(ppu, spurs.ptr(&CellSpurs::mutex)) == 0); ensure(ppu_execute<&sys_lwmutex_unlock>(ppu, spurs.ptr(&CellSpurs::mutex)) == 0);
static_cast<void>(ppu.test_stopped());
} }
void _spurs::handler_entry(ppu_thread& ppu, vm::ptr<CellSpurs> spurs) void _spurs::handler_entry(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
@ -711,7 +716,10 @@ void _spurs::handler_entry(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
ensure(sys_spu_thread_group_start(ppu, spurs->spuTG) == 0); ensure(sys_spu_thread_group_start(ppu, spurs->spuTG) == 0);
if (s32 rc = sys_spu_thread_group_join(ppu, spurs->spuTG, vm::null, vm::null); rc + 0u != CELL_EFAULT) const s32 rc = sys_spu_thread_group_join(ppu, spurs->spuTG, vm::null, vm::null);
static_cast<void>(ppu.test_stopped());
if (rc + 0u != CELL_EFAULT)
{ {
if (rc + 0u == CELL_ESTAT) if (rc + 0u == CELL_ESTAT)
{ {
@ -810,6 +818,7 @@ s32 _spurs::wakeup_shutdown_completion_waiter(ppu_thread& ppu, vm::ptr<CellSpurs
{ {
ensure((wklF->x28 == 2u)); ensure((wklF->x28 == 2u));
rc = sys_semaphore_post(ppu, static_cast<u32>(wklF->sem), 1); rc = sys_semaphore_post(ppu, static_cast<u32>(wklF->sem), 1);
static_cast<void>(ppu.test_stopped());
} }
return rc; return rc;
@ -823,6 +832,7 @@ void _spurs::event_helper_entry(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
while (true) while (true)
{ {
ensure(sys_event_queue_receive(ppu, spurs->eventQueue, vm::null, 0) == 0); ensure(sys_event_queue_receive(ppu, spurs->eventQueue, vm::null, 0) == 0);
static_cast<void>(ppu.test_stopped());
const u64 event_src = ppu.gpr[4]; const u64 event_src = ppu.gpr[4];
const u64 event_data1 = ppu.gpr[5]; const u64 event_data1 = ppu.gpr[5];
@ -854,6 +864,8 @@ void _spurs::event_helper_entry(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
sys_semaphore_post(ppu, static_cast<u32>(spurs->wklF2[i].sem), 1); sys_semaphore_post(ppu, static_cast<u32>(spurs->wklF2[i].sem), 1);
} }
} }
static_cast<void>(ppu.test_stopped());
} }
else else
{ {
@ -883,6 +895,7 @@ void _spurs::event_helper_entry(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
else if (data0 == 2) else if (data0 == 2)
{ {
ensure(sys_semaphore_post(ppu, static_cast<u32>(spurs->semPrv), 1) == 0); ensure(sys_semaphore_post(ppu, static_cast<u32>(spurs->semPrv), 1) == 0);
static_cast<void>(ppu.test_stopped());
} }
else if (data0 == 3) else if (data0 == 3)
{ {
@ -2015,7 +2028,7 @@ void _spurs::trace_status_update(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
{ {
u8 init; u8 init;
spurs->sysSrvTrace.atomic_op([spurs, &init](CellSpurs::SrvTraceSyncVar& data) vm::atomic_op(spurs->sysSrvTrace, [spurs, &init](CellSpurs::SrvTraceSyncVar& data)
{ {
if ((init = data.sysSrvTraceInitialised)) if ((init = data.sysSrvTraceInitialised))
{ {
@ -2026,8 +2039,9 @@ void _spurs::trace_status_update(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
if (init) if (init)
{ {
spurs->sysSrvMessage = 0xff; vm::light_op<true>(spurs->sysSrvMessage, [&](atomic_t<u8>& v){ v.release(0xff); });
ensure(sys_semaphore_wait(ppu, static_cast<u32>(spurs->semPrv), 0) == 0); ensure(sys_semaphore_wait(ppu, static_cast<u32>(spurs->semPrv), 0) == 0);
static_cast<void>(ppu.test_stopped());
} }
} }
@ -2457,7 +2471,7 @@ s32 _spurs::add_workload(ppu_thread& ppu, vm::ptr<CellSpurs> spurs, vm::ptr<u32>
ensure((res_wkl <= 31)); ensure((res_wkl <= 31));
vm::light_op(spurs->sysSrvMsgUpdateWorkload, [](atomic_t<u8>& v){ v.release(0xff); }); vm::light_op(spurs->sysSrvMsgUpdateWorkload, [](atomic_t<u8>& v){ v.release(0xff); });
vm::light_op(spurs->sysSrvMessage, [](atomic_t<u8>& v){ v.release(0xff); }); vm::light_op<true>(spurs->sysSrvMessage, [](atomic_t<u8>& v){ v.release(0xff); });
return CELL_OK; return CELL_OK;
} }
@ -2551,7 +2565,7 @@ s32 cellSpursShutdownWorkload(ppu_thread& ppu, vm::ptr<CellSpurs> spurs, u32 wid
if (old_state == SPURS_WKL_STATE_SHUTTING_DOWN) if (old_state == SPURS_WKL_STATE_SHUTTING_DOWN)
{ {
vm::light_op(spurs->sysSrvMessage, [&](atomic_t<u8>& v){ v.release(0xff); }); vm::light_op<true>(spurs->sysSrvMessage, [&](atomic_t<u8>& v){ v.release(0xff); });
return CELL_OK; return CELL_OK;
} }
@ -2807,7 +2821,7 @@ s32 cellSpursReadyCountStore(ppu_thread& ppu, vm::ptr<CellSpurs> spurs, u32 wid,
return CELL_SPURS_POLICY_MODULE_ERROR_STAT; return CELL_SPURS_POLICY_MODULE_ERROR_STAT;
} }
vm::light_op(spurs->readyCount(wid), [&](atomic_t<u8>& v) vm::light_op<true>(spurs->readyCount(wid), [&](atomic_t<u8>& v)
{ {
v.release(static_cast<u8>(value)); v.release(static_cast<u8>(value));
}); });
@ -3256,6 +3270,7 @@ s32 cellSpursEventFlagSet(ppu_thread& ppu, vm::ptr<CellSpursEventFlag> eventFlag
eventFlag->pendingRecvTaskEvents[ppuWaitSlot] = ppuEvents; eventFlag->pendingRecvTaskEvents[ppuWaitSlot] = ppuEvents;
ensure(sys_event_port_send(eventFlag->eventPortId, 0, 0, 0) == 0); ensure(sys_event_port_send(eventFlag->eventPortId, 0, 0, 0) == 0);
static_cast<void>(ppu.test_stopped());
} }
if (pendingRecv) if (pendingRecv)
@ -3325,7 +3340,7 @@ s32 _spurs::event_flag_wait(ppu_thread& ppu, vm::ptr<CellSpursEventFlag> eventFl
bool recv; bool recv;
s32 rc; s32 rc;
u16 receivedEvents; u16 receivedEvents;
eventFlag->ctrl.atomic_op([eventFlag, mask, mode, block, &recv, &rc, &receivedEvents](CellSpursEventFlag::ControlSyncVar& ctrl) vm::atomic_op(eventFlag->ctrl, [eventFlag, mask, mode, block, &recv, &rc, &receivedEvents](CellSpursEventFlag::ControlSyncVar& ctrl)
{ {
u16 relevantEvents = ctrl.events & *mask; u16 relevantEvents = ctrl.events & *mask;
if (eventFlag->direction == CELL_SPURS_EVENT_FLAG_ANY2ANY) if (eventFlag->direction == CELL_SPURS_EVENT_FLAG_ANY2ANY)
@ -3429,6 +3444,7 @@ s32 _spurs::event_flag_wait(ppu_thread& ppu, vm::ptr<CellSpursEventFlag> eventFl
{ {
// Block till something happens // Block till something happens
ensure(sys_event_queue_receive(ppu, eventFlag->eventQueueId, vm::null, 0) == 0); ensure(sys_event_queue_receive(ppu, eventFlag->eventQueueId, vm::null, 0) == 0);
static_cast<void>(ppu.test_stopped());
s32 i = 0; s32 i = 0;
if (eventFlag->direction == CELL_SPURS_EVENT_FLAG_ANY2ANY) if (eventFlag->direction == CELL_SPURS_EVENT_FLAG_ANY2ANY)
@ -3437,7 +3453,7 @@ s32 _spurs::event_flag_wait(ppu_thread& ppu, vm::ptr<CellSpursEventFlag> eventFl
} }
*mask = eventFlag->pendingRecvTaskEvents[i]; *mask = eventFlag->pendingRecvTaskEvents[i];
eventFlag->ctrl.atomic_op([](auto& ctrl) { ctrl.ppuPendingRecv = 0; }); vm::atomic_op(eventFlag->ctrl, [](CellSpursEventFlag::ControlSyncVar& ctrl) { ctrl.ppuPendingRecv = 0; });
} }
*mask = receivedEvents; *mask = receivedEvents;

View file

@ -38,8 +38,6 @@ void fmt_class_string<bs_t<ppu_attr>>::format(std::string& out, u64 arg)
format_bitset(out, arg, "[", ",", "]", &fmt_class_string<ppu_attr>::format); format_bitset(out, arg, "[", ",", "]", &fmt_class_string<ppu_attr>::format);
} }
u32 ppu_get_far_jump(u32 pc);
void ppu_module::validate(u32 reloc) void ppu_module::validate(u32 reloc)
{ {
// Load custom PRX configuration if available // Load custom PRX configuration if available
@ -1202,12 +1200,6 @@ void ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
const ppu_opcode_t op{*_ptr++}; const ppu_opcode_t op{*_ptr++};
const ppu_itype::type type = s_ppu_itype.decode(op.opcode); const ppu_itype::type type = s_ppu_itype.decode(op.opcode);
if (ppu_get_far_jump(iaddr))
{
block.second = _ptr.addr() - block.first - 4;
break;
}
if (type == ppu_itype::UNK) if (type == ppu_itype::UNK)
{ {
// Invalid blocks will remain empty // Invalid blocks will remain empty
@ -1397,11 +1389,6 @@ void ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
const ppu_opcode_t op{*_ptr++}; const ppu_opcode_t op{*_ptr++};
const ppu_itype::type type = s_ppu_itype.decode(op.opcode); const ppu_itype::type type = s_ppu_itype.decode(op.opcode);
if (ppu_get_far_jump(iaddr))
{
break;
}
if (type == ppu_itype::B || type == ppu_itype::BC) if (type == ppu_itype::B || type == ppu_itype::BC)
{ {
const u32 target = (op.aa ? 0 : iaddr) + (type == ppu_itype::B ? +op.bt24 : +op.bt14); const u32 target = (op.aa ? 0 : iaddr) + (type == ppu_itype::B ? +op.bt24 : +op.bt14);
@ -1476,11 +1463,7 @@ void ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
const ppu_opcode_t op{*_ptr++}; const ppu_opcode_t op{*_ptr++};
const ppu_itype::type type = s_ppu_itype.decode(op.opcode); const ppu_itype::type type = s_ppu_itype.decode(op.opcode);
if (ppu_get_far_jump(addr)) if (type == ppu_itype::UNK)
{
_ptr.set(next);
}
else if (type == ppu_itype::UNK)
{ {
break; break;
} }
@ -1692,11 +1675,6 @@ void ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
for (; i_pos < lim; i_pos += 4) for (; i_pos < lim; i_pos += 4)
{ {
if (ppu_get_far_jump(i_pos))
{
continue;
}
const u32 opc = vm::_ref<u32>(i_pos); const u32 opc = vm::_ref<u32>(i_pos);
switch (auto type = s_ppu_itype.decode(opc)) switch (auto type = s_ppu_itype.decode(opc))

View file

@ -616,9 +616,13 @@ struct ppu_far_jumps_t
bool link; bool link;
bool with_toc; bool with_toc;
std::string module_name; std::string module_name;
ppu_intrp_func_t func;
}; };
ppu_far_jumps_t(int) noexcept {}
std::unordered_map<u32, all_info_t> vals; std::unordered_map<u32, all_info_t> vals;
::jit_runtime rt;
mutable shared_mutex mutex; mutable shared_mutex mutex;
@ -679,17 +683,64 @@ struct ppu_far_jumps_t
return {}; return {};
} }
template <bool Locked = true>
ppu_intrp_func_t gen_jump(u32 pc)
{
[[maybe_unused]] std::conditional_t<Locked, std::lock_guard<shared_mutex>, const shared_mutex&> lock(mutex);
auto it = vals.find(pc);
if (it == vals.end())
{
return nullptr;
}
if (!it->second.func)
{
it->second.func = build_function_asm<ppu_intrp_func_t>("", [&](native_asm& c, auto& args)
{
using namespace asmjit;
#ifdef ARCH_X64
c.mov(args[0], x86::rbp);
c.mov(x86::dword_ptr(args[0], ::offset32(&ppu_thread::cia)), pc);
c.jmp(ppu_far_jump);
#else
Label jmp_address = c.newLabel();
Label imm_address = c.newLabel();
c.ldr(args[1].r32(), arm::ptr(imm_address));
c.str(args[1].r32(), arm::Mem(args[0], ::offset32(&ppu_thread::cia)));
c.ldr(args[1], arm::ptr(jmp_address));
c.br(args[1]);
c.align(AlignMode::kCode, 16);
c.bind(jmp_address);
c.embedUInt64(reinterpret_cast<u64>(ppu_far_jump));
c.bind(imm_address);
c.embedUInt32(pc);
#endif
}, &rt);
}
return it->second.func;
}
}; };
u32 ppu_get_far_jump(u32 pc) u32 ppu_get_far_jump(u32 pc)
{ {
g_fxo->init<ppu_far_jumps_t>(); if (!g_fxo->is_init<ppu_far_jumps_t>())
{
return 0;
}
return g_fxo->get<ppu_far_jumps_t>().get_target(pc); return g_fxo->get<ppu_far_jumps_t>().get_target(pc);
} }
static void ppu_far_jump(ppu_thread& ppu, ppu_opcode_t, be_t<u32>* this_op, ppu_intrp_func*) static void ppu_far_jump(ppu_thread& ppu, ppu_opcode_t, be_t<u32>*, ppu_intrp_func*)
{ {
const u32 cia = g_fxo->get<ppu_far_jumps_t>().get_target(vm::get_addr(this_op), &ppu); const u32 cia = g_fxo->get<ppu_far_jumps_t>().get_target(ppu.cia, &ppu);
if (!vm::check_addr(cia, vm::page_executable)) if (!vm::check_addr(cia, vm::page_executable))
{ {
@ -740,7 +791,7 @@ bool ppu_form_branch_to_code(u32 entry, u32 target, bool link, bool with_toc, st
return false; return false;
} }
g_fxo->init<ppu_far_jumps_t>(); g_fxo->init<ppu_far_jumps_t>(0);
if (!module_name.empty()) if (!module_name.empty())
{ {
@ -759,7 +810,7 @@ bool ppu_form_branch_to_code(u32 entry, u32 target, bool link, bool with_toc, st
std::lock_guard lock(jumps.mutex); std::lock_guard lock(jumps.mutex);
jumps.vals.insert_or_assign(entry, ppu_far_jumps_t::all_info_t{target, link, with_toc, std::move(module_name)}); jumps.vals.insert_or_assign(entry, ppu_far_jumps_t::all_info_t{target, link, with_toc, std::move(module_name)});
ppu_register_function_at(entry, 4, &ppu_far_jump); ppu_register_function_at(entry, 4, g_cfg.core.ppu_decoder == ppu_decoder_type::_static ? &ppu_far_jump : ensure(g_fxo->get<ppu_far_jumps_t>().gen_jump<false>(entry)));
return true; return true;
} }
@ -781,7 +832,10 @@ bool ppu_form_branch_to_code(u32 entry, u32 target)
void ppu_remove_hle_instructions(u32 addr, u32 size) void ppu_remove_hle_instructions(u32 addr, u32 size)
{ {
g_fxo->init<ppu_far_jumps_t>(); if (Emu.IsStopped() || !g_fxo->is_init<ppu_far_jumps_t>())
{
return;
}
auto& jumps = g_fxo->get<ppu_far_jumps_t>(); auto& jumps = g_fxo->get<ppu_far_jumps_t>();
@ -3392,6 +3446,19 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
} }
} }
if (jit)
{
const auto far_jump = ppu_get_far_jump(func.addr) ? g_fxo->get<ppu_far_jumps_t>().gen_jump(func.addr) : nullptr;
if (far_jump)
{
// Replace the function with ppu_far_jump
jit->update_global_mapping(fmt::format("__0x%x", func.addr - reloc), reinterpret_cast<u64>(far_jump));
fpos++;
continue;
}
}
// Copy block or function entry // Copy block or function entry
ppu_function& entry = part.funcs.emplace_back(func); ppu_function& entry = part.funcs.emplace_back(func);
@ -3713,8 +3780,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
const auto addr = ensure(reinterpret_cast<ppu_intrp_func_t>(jit->get(name))); const auto addr = ensure(reinterpret_cast<ppu_intrp_func_t>(jit->get(name)));
jit_mod.funcs.emplace_back(addr); jit_mod.funcs.emplace_back(addr);
if (ppu_ref(func.addr) != ppu_far_jump) ppu_register_function_at(func.addr, 4, addr);
ppu_register_function_at(func.addr, 4, addr);
if (g_cfg.core.ppu_debug) if (g_cfg.core.ppu_debug)
ppu_log.notice("Installing function %s at 0x%x: %p (reloc = 0x%x)", name, func.addr, ppu_ref(func.addr), reloc); ppu_log.notice("Installing function %s at 0x%x: %p (reloc = 0x%x)", name, func.addr, ppu_ref(func.addr), reloc);
@ -3733,8 +3799,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
const u64 addr = reinterpret_cast<uptr>(ensure(jit_mod.funcs[index++])); const u64 addr = reinterpret_cast<uptr>(ensure(jit_mod.funcs[index++]));
if (ppu_ref(func.addr) != ppu_far_jump) ppu_register_function_at(func.addr, 4, addr);
ppu_register_function_at(func.addr, 4, addr);
if (g_cfg.core.ppu_debug) if (g_cfg.core.ppu_debug)
ppu_log.notice("Reinstalling function at 0x%x: %p (reloc=0x%x)", func.addr, ppu_ref(func.addr), reloc); ppu_log.notice("Reinstalling function at 0x%x: %p (reloc=0x%x)", func.addr, ppu_ref(func.addr), reloc);

View file

@ -236,14 +236,6 @@ Function* PPUTranslator::Translate(const ppu_function& info)
m_rel = nullptr; m_rel = nullptr;
} }
if (ppu_get_far_jump(m_addr + base))
{
// Branch into an HLEd instruction using the jump table
FlushRegisters();
CallFunction(0, m_reloc ? m_ir->CreateAdd(m_ir->getInt64(m_addr), m_seg0) : m_ir->getInt64(m_addr));
continue;
}
const u32 op = vm::read32(vm::cast(m_addr + base)); const u32 op = vm::read32(vm::cast(m_addr + base));
(this->*(s_ppu_decoder.decode(op)))({op}); (this->*(s_ppu_decoder.decode(op)))({op});