Linux: use futex_waitv syscall for atomic waiting

In order to make this possible, some unnecessary features were removed.
This commit is contained in:
Ivan Chikish 2023-07-31 23:57:26 +03:00 committed by Ivan
parent 831a9fe012
commit d34287b2cc
51 changed files with 441 additions and 574 deletions

View file

@ -2172,14 +2172,14 @@ u64 thread_base::finalize(thread_state result_state) noexcept
const u64 _self = m_thread; const u64 _self = m_thread;
// Set result state (errored or finalized) // Set result state (errored or finalized)
m_sync.fetch_op([&](u64& v) m_sync.fetch_op([&](u32& v)
{ {
v &= -4; v &= -4;
v |= static_cast<u32>(result_state); v |= static_cast<u32>(result_state);
}); });
// Signal waiting threads // Signal waiting threads
m_sync.notify_all(2); m_sync.notify_all();
return _self; return _self;
} }
@ -2266,8 +2266,18 @@ thread_state thread_ctrl::state()
void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */) void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
{ {
if (!usec)
{
return;
}
auto _this = g_tls_this_thread; auto _this = g_tls_this_thread;
if (!alert && usec > 50000)
{
usec = 50000;
}
#ifdef __linux__ #ifdef __linux__
static thread_local struct linux_timer_handle_t static thread_local struct linux_timer_handle_t
{ {
@ -2296,13 +2306,13 @@ void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
} }
} fd_timer; } fd_timer;
if (!alert && usec > 0 && usec <= 1000 && fd_timer != -1) if (!alert && fd_timer != -1)
{ {
struct itimerspec timeout; struct itimerspec timeout;
u64 missed; u64 missed;
timeout.it_value.tv_nsec = usec * 1'000ull; timeout.it_value.tv_nsec = usec % 1'000'000 * 1'000ull;
timeout.it_value.tv_sec = 0; timeout.it_value.tv_sec = usec / 1'000'000;
timeout.it_interval.tv_sec = 0; timeout.it_interval.tv_sec = 0;
timeout.it_interval.tv_nsec = 0; timeout.it_interval.tv_nsec = 0;
timerfd_settime(fd_timer, 0, &timeout, NULL); timerfd_settime(fd_timer, 0, &timeout, NULL);
@ -2312,15 +2322,27 @@ void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
} }
#endif #endif
if (_this->m_sync.bit_test_reset(2) || _this->m_taskq) if (alert)
{ {
return; if (_this->m_sync.bit_test_reset(2) || _this->m_taskq)
{
return;
}
} }
// Wait for signal and thread state abort // Wait for signal and thread state abort
atomic_wait::list<2> list{}; atomic_wait::list<2> list{};
list.set<0>(_this->m_sync, 0, 4 + 1);
list.set<1>(_this->m_taskq, nullptr); if (alert)
{
list.set<0>(_this->m_sync, 0);
list.set<1>(utils::bless<atomic_t<u32>>(&_this->m_taskq)[1], 0);
}
else
{
list.set<0>(_this->m_dummy, 0);
}
list.wait(atomic_wait_timeout{usec <= 0xffff'ffff'ffff'ffff / 1000 ? usec * 1000 : 0xffff'ffff'ffff'ffff}); list.wait(atomic_wait_timeout{usec <= 0xffff'ffff'ffff'ffff / 1000 ? usec * 1000 : 0xffff'ffff'ffff'ffff});
} }
@ -2331,29 +2353,27 @@ void thread_ctrl::wait_for_accurate(u64 usec)
return; return;
} }
if (usec > 50000)
{
fmt::throw_exception("thread_ctrl::wait_for_accurate: unsupported amount");
}
#ifdef __linux__
return wait_for(usec, false);
#else
using namespace std::chrono_literals; using namespace std::chrono_literals;
const auto until = std::chrono::steady_clock::now() + 1us * usec; const auto until = std::chrono::steady_clock::now() + 1us * usec;
while (true) while (true)
{ {
#ifdef __linux__
// NOTE: Assumption that timer initialization has succeeded
u64 host_min_quantum = usec <= 1000 ? 10 : 50;
#else
// Host scheduler quantum for windows (worst case) // Host scheduler quantum for windows (worst case)
// NOTE: On ps3 this function has very high accuracy
constexpr u64 host_min_quantum = 500; constexpr u64 host_min_quantum = 500;
#endif
if (usec >= host_min_quantum) if (usec >= host_min_quantum)
{ {
#ifdef __linux__
// Do not wait for the last quantum to avoid loss of accuracy
wait_for(usec - ((usec % host_min_quantum) + host_min_quantum), false);
#else
// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus // Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
wait_for(usec - (usec % host_min_quantum), false); wait_for(usec - (usec % host_min_quantum), false);
#endif
} }
// TODO: Determine best value for yield delay // TODO: Determine best value for yield delay
else if (usec >= host_min_quantum / 2) else if (usec >= host_min_quantum / 2)
@ -2374,6 +2394,7 @@ void thread_ctrl::wait_for_accurate(u64 usec)
usec = (until - current).count(); usec = (until - current).count();
} }
#endif
} }
std::string thread_ctrl::get_name_cached() std::string thread_ctrl::get_name_cached()
@ -2440,7 +2461,7 @@ bool thread_base::join(bool dtor) const
for (u64 i = 0; (m_sync & 3) <= 1; i++) for (u64 i = 0; (m_sync & 3) <= 1; i++)
{ {
m_sync.wait(0, 2, timeout); m_sync.wait(m_sync & ~2, timeout);
if (m_sync & 2) if (m_sync & 2)
{ {
@ -2460,7 +2481,7 @@ void thread_base::notify()
{ {
// Set notification // Set notification
m_sync |= 4; m_sync |= 4;
m_sync.notify_one(4); m_sync.notify_all();
} }
u64 thread_base::get_native_id() const u64 thread_base::get_native_id() const
@ -2497,7 +2518,7 @@ u64 thread_base::get_cycles()
{ {
cycles = static_cast<u64>(thread_time.tv_sec) * 1'000'000'000 + thread_time.tv_nsec; cycles = static_cast<u64>(thread_time.tv_sec) * 1'000'000'000 + thread_time.tv_nsec;
#endif #endif
if (const u64 old_cycles = m_sync.fetch_op([&](u64& v){ v &= 7; v |= (cycles << 3); }) >> 3) if (const u64 old_cycles = m_cycles.exchange(cycles))
{ {
return cycles - old_cycles; return cycles - old_cycles;
} }
@ -2507,7 +2528,7 @@ u64 thread_base::get_cycles()
} }
else else
{ {
return m_sync >> 3; return m_cycles;
} }
} }
@ -2560,8 +2581,8 @@ void thread_base::exec()
} }
// Notify waiters // Notify waiters
ptr->exec.release(nullptr); ptr->done.release(1);
ptr->exec.notify_all(); ptr->done.notify_all();
} }
if (ptr->next) if (ptr->next)

View file

@ -100,17 +100,19 @@ class thread_future
protected: protected:
atomic_t<void(*)(thread_base*, thread_future*)> exec{}; atomic_t<void(*)(thread_base*, thread_future*)> exec{};
atomic_t<u32> done{0};
public: public:
// Get reference to the atomic variable for inspection and waiting for // Get reference to the atomic variable for inspection and waiting for
const auto& get_wait() const const auto& get_wait() const
{ {
return exec; return done;
} }
// Wait (preset) // Wait (preset)
void wait() const void wait() const
{ {
exec.wait<atomic_wait::op_ne>(nullptr); done.wait(0);
} }
}; };
@ -131,8 +133,13 @@ private:
// Thread handle (platform-specific) // Thread handle (platform-specific)
atomic_t<u64> m_thread{0}; atomic_t<u64> m_thread{0};
// Thread state and cycles // Thread cycles
atomic_t<u64> m_sync{0}; atomic_t<u64> m_cycles{0};
atomic_t<u32> m_dummy{0};
// Thread state
atomic_t<u32> m_sync{0};
// Thread name // Thread name
atomic_ptr<std::string> m_tname; atomic_ptr<std::string> m_tname;
@ -284,16 +291,22 @@ public:
} }
atomic_wait::list<Max + 2> list{}; atomic_wait::list<Max + 2> list{};
list.template set<Max>(_this->m_sync, 0, 4 + 1); list.template set<Max>(_this->m_sync, 0);
list.template set<Max + 1>(_this->m_taskq, nullptr); list.template set<Max + 1>(_this->m_taskq);
setter(list); setter(list);
list.wait(atomic_wait_timeout{usec <= 0xffff'ffff'ffff'ffff / 1000 ? usec * 1000 : 0xffff'ffff'ffff'ffff}); list.wait(atomic_wait_timeout{usec <= 0xffff'ffff'ffff'ffff / 1000 ? usec * 1000 : 0xffff'ffff'ffff'ffff});
} }
template <atomic_wait::op Op = atomic_wait::op::eq, typename T, typename U> template <typename T, typename U>
static inline void wait_on(T& wait, U old, u64 usec = -1) static inline void wait_on(T& wait, U old, u64 usec = -1)
{ {
wait_on_custom<1>([&](atomic_wait::list<3>& list){ list.set<0, Op>(wait, old); }, usec); wait_on_custom<1>([&](atomic_wait::list<3>& list) { list.template set<0>(wait, old); }, usec);
}
template <typename T>
static inline void wait_on(T& wait)
{
wait_on_custom<1>([&](atomic_wait::list<3>& list) { list.template set<0>(wait); });
} }
// Exit. // Exit.
@ -637,7 +650,7 @@ public:
{ {
bool notify_sync = false; bool notify_sync = false;
if (s >= thread_state::aborting && thread::m_sync.fetch_op([](u64& v){ return !(v & 3) && (v |= 1); }).second) if (s >= thread_state::aborting && thread::m_sync.fetch_op([](u32& v) { return !(v & 3) && (v |= 1); }).second)
{ {
notify_sync = true; notify_sync = true;
} }
@ -650,7 +663,7 @@ public:
if (notify_sync) if (notify_sync)
{ {
// Notify after context abortion has been made so all conditions for wake-up be satisfied by the time of notification // Notify after context abortion has been made so all conditions for wake-up be satisfied by the time of notification
thread::m_sync.notify_one(1); thread::m_sync.notify_all();
} }
if (s == thread_state::finished) if (s == thread_state::finished)

View file

@ -9,7 +9,7 @@ void cond_variable::imp_wait(u32 _old, u64 _timeout) noexcept
ensure(_old); ensure(_old);
// Wait with timeout // Wait with timeout
m_value.wait(_old, c_signal_mask, atomic_wait_timeout{_timeout > max_timeout ? umax : _timeout * 1000}); m_value.wait(_old, atomic_wait_timeout{_timeout > max_timeout ? umax : _timeout * 1000});
// Cleanup // Cleanup
m_value.atomic_op([](u32& value) m_value.atomic_op([](u32& value)
@ -47,10 +47,10 @@ void cond_variable::imp_wake(u32 _count) noexcept
if (_count > 1 || ((_old + (c_signal_mask & (0 - c_signal_mask))) & c_signal_mask) == c_signal_mask) if (_count > 1 || ((_old + (c_signal_mask & (0 - c_signal_mask))) & c_signal_mask) == c_signal_mask)
{ {
// Resort to notify_all if signal count reached max // Resort to notify_all if signal count reached max
m_value.notify_all(c_signal_mask); m_value.notify_all();
} }
else else
{ {
m_value.notify_one(c_signal_mask); m_value.notify_one();
} }
} }

View file

@ -2,6 +2,7 @@
#include "util/types.hpp" #include "util/types.hpp"
#include "util/atomic.hpp" #include "util/atomic.hpp"
#include "util/asm.hpp"
//! Simple unshrinkable array base for concurrent access. Only growths automatically. //! Simple unshrinkable array base for concurrent access. Only growths automatically.
//! There is no way to know the current size. The smaller index is, the faster it's accessed. //! There is no way to know the current size. The smaller index is, the faster it's accessed.
@ -280,12 +281,17 @@ public:
template <typename T> template <typename T>
class lf_queue final class lf_queue final
{ {
atomic_t<lf_queue_item<T>*> m_head{nullptr}; atomic_t<u64> m_head{0};
lf_queue_item<T>* load(u64 value) const noexcept
{
return reinterpret_cast<lf_queue_item<T>*>(value >> 16);
}
// Extract all elements and reverse element order (FILO to FIFO) // Extract all elements and reverse element order (FILO to FIFO)
lf_queue_item<T>* reverse() noexcept lf_queue_item<T>* reverse() noexcept
{ {
if (auto* head = m_head.load() ? m_head.exchange(nullptr) : nullptr) if (auto* head = load(m_head) ? load(m_head.exchange(0)) : nullptr)
{ {
if (auto* prev = head->m_link) if (auto* prev = head->m_link)
{ {
@ -311,35 +317,35 @@ public:
~lf_queue() ~lf_queue()
{ {
delete m_head.load(); delete load(m_head);
} }
template <atomic_wait::op Flags = atomic_wait::op::eq>
void wait(std::nullptr_t /*null*/ = nullptr) noexcept void wait(std::nullptr_t /*null*/ = nullptr) noexcept
{ {
if (m_head == nullptr) if (m_head == 0)
{ {
m_head.template wait<Flags>(nullptr); utils::bless<atomic_t<u32>>(&m_head)[1].wait(0);
} }
} }
const volatile void* observe() const noexcept const volatile void* observe() const noexcept
{ {
return m_head.load(); return load(m_head);
} }
explicit operator bool() const noexcept explicit operator bool() const noexcept
{ {
return m_head != nullptr; return m_head != 0;
} }
template <typename... Args> template <typename... Args>
void push(Args&&... args) void push(Args&&... args)
{ {
auto _old = m_head.load(); auto oldv = m_head.load();
auto _old = load(oldv);
auto item = new lf_queue_item<T>(_old, std::forward<Args>(args)...); auto item = new lf_queue_item<T>(_old, std::forward<Args>(args)...);
while (!m_head.compare_exchange(_old, item)) while (!m_head.compare_exchange(oldv, reinterpret_cast<u64>(item) << 16))
{ {
item->m_link = _old; item->m_link = _old;
} }
@ -347,7 +353,7 @@ public:
if (!_old) if (!_old)
{ {
// Notify only if queue was empty // Notify only if queue was empty
m_head.notify_one(); utils::bless<atomic_t<u32>>(&m_head)[1].notify_one();
} }
} }
@ -363,7 +369,7 @@ public:
lf_queue_slice<T> pop_all_reversed() lf_queue_slice<T> pop_all_reversed()
{ {
lf_queue_slice<T> result; lf_queue_slice<T> result;
result.m_head = m_head.exchange(nullptr); result.m_head = load(m_head.exchange(0));
return result; return result;
} }

View file

@ -74,14 +74,14 @@ void shared_mutex::imp_wait()
break; break;
} }
m_value.wait(old, c_sig); m_value.wait(old);
} }
} }
void shared_mutex::imp_signal() void shared_mutex::imp_signal()
{ {
m_value += c_sig; m_value += c_sig;
m_value.notify_one(c_sig); m_value.notify_one();
} }
void shared_mutex::imp_lock(u32 val) void shared_mutex::imp_lock(u32 val)

View file

@ -38,7 +38,29 @@ constexpr NTSTATUS NTSTATUS_ALERTED = 0x101;
constexpr NTSTATUS NTSTATUS_TIMEOUT = 0x102; constexpr NTSTATUS NTSTATUS_TIMEOUT = 0x102;
#endif #endif
#ifndef __linux__ #ifdef __linux__
#ifndef SYS_futex_waitv
#if defined(ARCH_X64) || defined(ARCH_ARM64)
#define SYS_futex_waitv 449
#endif
#endif
#ifndef FUTEX_32
#define FUTEX_32 2
#endif
#ifndef FUTEX_WAITV_MAX
#define FUTEX_WAITV_MAX 128
#endif
struct futex_waitv
{
__u64 val;
__u64 uaddr;
__u32 flags;
__u32 __reserved;
};
#else
enum enum
{ {
FUTEX_PRIVATE_FLAG = 0, FUTEX_PRIVATE_FLAG = 0,
@ -113,7 +135,7 @@ inline int futex(volatile void* uaddr, int futex_op, uint val, const timespec* t
} }
else else
{ {
// TODO // TODO: absolute timeout
} }
map.erase(std::find(map.find(uaddr), map.end(), ref)); map.erase(std::find(map.find(uaddr), map.end(), ref));

View file

@ -261,7 +261,7 @@ struct cpu_prof
if (threads.empty()) if (threads.empty())
{ {
// Wait for messages if no work (don't waste CPU) // Wait for messages if no work (don't waste CPU)
thread_ctrl::wait_on(registered, nullptr); thread_ctrl::wait_on(registered);
continue; continue;
} }
@ -939,7 +939,7 @@ bool cpu_thread::check_state() noexcept
else else
{ {
// TODO: fix the workaround // TODO: fix the workaround
g_suspend_counter.wait(ctr, -4, atomic_wait_timeout{100}); g_suspend_counter.wait(ctr, atomic_wait_timeout{10'000});
} }
} }
else else
@ -972,8 +972,7 @@ bool cpu_thread::check_state() noexcept
} }
// Short sleep when yield flag is present alone (makes no sense when other methods which can stop thread execution have been done) // Short sleep when yield flag is present alone (makes no sense when other methods which can stop thread execution have been done)
// Pass a mask of a single bit which is often unused to avoid notifications s_dummy_atomic.wait(0, atomic_wait_timeout{80'000});
s_dummy_atomic.wait(0, 1u << 30, atomic_wait_timeout{80'000});
} }
} }
} }
@ -1010,13 +1009,13 @@ cpu_thread& cpu_thread::operator=(thread_state)
if (old & cpu_flag::wait && old.none_of(cpu_flag::again + cpu_flag::exit)) if (old & cpu_flag::wait && old.none_of(cpu_flag::again + cpu_flag::exit))
{ {
state.notify_one(cpu_flag::exit); state.notify_one();
if (auto thread = try_get<spu_thread>()) if (auto thread = try_get<spu_thread>())
{ {
if (u32 resv = atomic_storage<u32>::load(thread->raddr)) if (u32 resv = atomic_storage<u32>::load(thread->raddr))
{ {
vm::reservation_notifier(resv).notify_all(-128); vm::reservation_notifier(resv).notify_all();
} }
} }
} }

View file

@ -73,7 +73,7 @@ void mic_context::operator()()
// Timestep in microseconds // Timestep in microseconds
constexpr u64 TIMESTEP = 256ull * 1'000'000ull / 48000ull; constexpr u64 TIMESTEP = 256ull * 1'000'000ull / 48000ull;
u64 timeout = 0; u64 timeout = 0;
u64 oldvalue = 0; u32 oldvalue = 0;
while (thread_ctrl::state() != thread_state::aborting) while (thread_ctrl::state() != thread_state::aborting)
{ {

View file

@ -374,7 +374,7 @@ public:
static constexpr auto thread_name = "Microphone Thread"sv; static constexpr auto thread_name = "Microphone Thread"sv;
protected: protected:
atomic_t<u64> wakey = 0; atomic_t<u32> wakey = 0;
// u32 signalStateLocalTalk = 9; // value is in range 0-10. 10 indicates talking, 0 indicating none. // u32 signalStateLocalTalk = 9; // value is in range 0-10. 10 indicates talking, 0 indicating none.
// u32 signalStateFarTalk = 0; // value is in range 0-10. 10 indicates talking from far away, 0 indicating none. // u32 signalStateFarTalk = 0; // value is in range 0-10. 10 indicates talking from far away, 0 indicating none.

View file

@ -164,7 +164,7 @@ error_code open_msg_dialog(bool is_blocking, u32 type, vm::cptr<char> msgString,
return CellSysutilError{ret + 0u}; return CellSysutilError{ret + 0u};
} }
const auto notify = std::make_shared<atomic_t<bool>>(false); const auto notify = std::make_shared<atomic_t<u32>>(0);
const auto res = manager->create<rsx::overlays::message_dialog>()->show(is_blocking, msgString.get_ptr(), _type, [callback, userData, &return_code, is_blocking, notify](s32 status) const auto res = manager->create<rsx::overlays::message_dialog>()->show(is_blocking, msgString.get_ptr(), _type, [callback, userData, &return_code, is_blocking, notify](s32 status)
{ {
@ -186,7 +186,7 @@ error_code open_msg_dialog(bool is_blocking, u32 type, vm::cptr<char> msgString,
if (is_blocking && notify) if (is_blocking && notify)
{ {
*notify = true; *notify = 1;
notify->notify_one(); notify->notify_one();
} }
}); });

View file

@ -256,7 +256,7 @@ error_code cell_music_decode_read(vm::ptr<void> buf, vm::ptr<u32> startTime, u64
{ {
dec.read_pos = 0; dec.read_pos = 0;
dec.decoder.clear(); dec.decoder.clear();
dec.decoder.track_fully_consumed = true; dec.decoder.track_fully_consumed = 1;
dec.decoder.track_fully_consumed.notify_one(); dec.decoder.track_fully_consumed.notify_one();
break; break;
} }

View file

@ -2477,8 +2477,8 @@ s32 _spurs::add_workload(ppu_thread& ppu, vm::ptr<CellSpurs> spurs, vm::ptr<u32>
spurs_res += 127; spurs_res += 127;
spurs_res2 += 127; spurs_res2 += 127;
spurs_res.notify_all(-128); spurs_res.notify_all();
spurs_res2.notify_all(-128); spurs_res2.notify_all();
u32 res_wkl; u32 res_wkl;
const auto wkl = &spurs->wklInfo(wnum); const auto wkl = &spurs->wklInfo(wnum);

View file

@ -303,7 +303,7 @@ struct vdec_context final
return; return;
} }
thread_ctrl::wait_on(in_cmd, nullptr); thread_ctrl::wait_on(in_cmd);
slice = in_cmd.pop_all(); // Pop new command list slice = in_cmd.pop_all(); // Pop new command list
}()) }())
{ {
@ -921,7 +921,7 @@ static error_code vdecOpen(ppu_thread& ppu, T type, U res, vm::cptr<CellVdecCb>
}); });
thrd->state -= cpu_flag::stop; thrd->state -= cpu_flag::stop;
thrd->state.notify_one(cpu_flag::stop); thrd->state.notify_one();
return CELL_OK; return CELL_OK;
} }

View file

@ -1821,7 +1821,11 @@ void ppu_thread::cpu_task()
// Wait until the progress dialog is closed. // Wait until the progress dialog is closed.
// We don't want to open a cell dialog while a native progress dialog is still open. // We don't want to open a cell dialog while a native progress dialog is still open.
thread_ctrl::wait_on<atomic_wait::op_ne>(g_progr_ptotal, 0); while (u32 v = g_progr_ptotal)
{
g_progr_ptotal.wait(v);
}
g_fxo->get<progress_dialog_workaround>().show_overlay_message_only = true; g_fxo->get<progress_dialog_workaround>().show_overlay_message_only = true;
// Sadly we can't postpone initializing guest time because we need to run PPU threads // Sadly we can't postpone initializing guest time because we need to run PPU threads
@ -1839,7 +1843,7 @@ void ppu_thread::cpu_task()
} }
ensure(spu.state.test_and_reset(cpu_flag::stop)); ensure(spu.state.test_and_reset(cpu_flag::stop));
spu.state.notify_one(cpu_flag::stop); spu.state.notify_one();
} }
}); });
@ -2051,7 +2055,7 @@ ppu_thread::ppu_thread(utils::serial& ar)
struct init_pushed struct init_pushed
{ {
bool pushed = false; bool pushed = false;
atomic_t<bool> inited = false; atomic_t<u32> inited = false;
}; };
call_history.data.resize(g_cfg.core.ppu_call_history ? call_history_max_size : 1); call_history.data.resize(g_cfg.core.ppu_call_history ? call_history_max_size : 1);
@ -2100,7 +2104,7 @@ ppu_thread::ppu_thread(utils::serial& ar)
{ {
while (!Emu.IsStopped() && !g_fxo->get<init_pushed>().inited) while (!Emu.IsStopped() && !g_fxo->get<init_pushed>().inited)
{ {
thread_ctrl::wait_on(g_fxo->get<init_pushed>().inited, false); thread_ctrl::wait_on(g_fxo->get<init_pushed>().inited, 0);
} }
return false; return false;
} }
@ -2117,7 +2121,7 @@ ppu_thread::ppu_thread(utils::serial& ar)
{ppu_cmd::ptr_call, 0}, +[](ppu_thread&) -> bool {ppu_cmd::ptr_call, 0}, +[](ppu_thread&) -> bool
{ {
auto& inited = g_fxo->get<init_pushed>().inited; auto& inited = g_fxo->get<init_pushed>().inited;
inited = true; inited = 1;
inited.notify_all(); inited.notify_all();
return true; return true;
} }
@ -3046,7 +3050,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
if (ppu.cia < liblv2_begin || ppu.cia >= liblv2_end) if (ppu.cia < liblv2_begin || ppu.cia >= liblv2_end)
{ {
res.notify_all(-128); res.notify_all();
} }
if (addr == ppu.last_faddr) if (addr == ppu.last_faddr)

View file

@ -21,7 +21,7 @@ inline void try_start(spu_thread& spu)
}).second) }).second)
{ {
spu.state -= cpu_flag::stop; spu.state -= cpu_flag::stop;
spu.state.notify_one(cpu_flag::stop); spu.state.notify_one();
} }
}; };
@ -273,7 +273,7 @@ bool spu_thread::write_reg(const u32 addr, const u32 value)
for (status_npc_sync_var old; (old = status_npc).status & SPU_STATUS_RUNNING;) for (status_npc_sync_var old; (old = status_npc).status & SPU_STATUS_RUNNING;)
{ {
status_npc.wait(old); utils::bless<atomic_t<u32>>(&status_npc)[0].wait(old.status);
} }
} }
} }

View file

@ -647,7 +647,10 @@ void spu_cache::initialize()
if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit || g_cfg.core.spu_decoder == spu_decoder_type::llvm) if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit || g_cfg.core.spu_decoder == spu_decoder_type::llvm)
{ {
// Initialize progress dialog (wait for previous progress done) // Initialize progress dialog (wait for previous progress done)
thread_ctrl::wait_on<atomic_wait::op_ne>(g_progr_ptotal, 0); while (u32 v = g_progr_ptotal)
{
g_progr_ptotal.wait(v);
}
g_progr_ptotal += ::size32(func_list); g_progr_ptotal += ::size32(func_list);
progr.emplace("Building SPU cache..."); progr.emplace("Building SPU cache...");
@ -7795,7 +7798,7 @@ public:
{ {
minusb = eval(x); minusb = eval(x);
} }
const auto minusbx = bitcast<u8[16]>(minusb); const auto minusbx = bitcast<u8[16]>(minusb);
// Data with swapped endian from a load instruction // Data with swapped endian from a load instruction
@ -11011,7 +11014,7 @@ struct spu_llvm_worker
return; return;
} }
thread_ctrl::wait_on(registered, nullptr); thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&registered)[1], 0);
slice = registered.pop_all(); slice = registered.pop_all();
}()) }())
{ {
@ -11178,7 +11181,7 @@ struct spu_llvm
{ {
// Interrupt profiler thread and put it to sleep // Interrupt profiler thread and put it to sleep
static_cast<void>(prof_mutex.reset()); static_cast<void>(prof_mutex.reset());
thread_ctrl::wait_on(registered, nullptr); thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&registered)[1], 0);
continue; continue;
} }

View file

@ -2418,7 +2418,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
} }
} }
if (++i < 10) if (true || ++i < 10)
{ {
busy_wait(500); busy_wait(500);
} }
@ -2426,7 +2426,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
{ {
// Wait // Wait
_cpu->state += cpu_flag::wait + cpu_flag::temp; _cpu->state += cpu_flag::wait + cpu_flag::temp;
bits->wait(old, wmask); // bits->wait(old, wmask);
_cpu->check_state(); _cpu->check_state();
} }
}()) }())
@ -2542,7 +2542,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
v &= ~wmask; v &= ~wmask;
}); });
bits->notify_all(wmask); // bits->notify_all(wmask);
if (size == size0) if (size == size0)
{ {
@ -3588,7 +3588,7 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
{ {
if (raddr) if (raddr)
{ {
vm::reservation_notifier(addr).notify_all(-128); vm::reservation_notifier(addr).notify_all();
raddr = 0; raddr = 0;
} }
@ -3775,7 +3775,7 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args)
} }
do_cell_atomic_128_store(addr, _ptr<spu_rdata_t>(args.lsa & 0x3ff80)); do_cell_atomic_128_store(addr, _ptr<spu_rdata_t>(args.lsa & 0x3ff80));
vm::reservation_notifier(addr).notify_all(-128); vm::reservation_notifier(addr).notify_all();
} }
bool spu_thread::do_mfc(bool can_escape, bool must_finish) bool spu_thread::do_mfc(bool can_escape, bool must_finish)
@ -4908,7 +4908,11 @@ s64 spu_thread::get_ch_value(u32 ch)
} }
} }
#ifdef __linux__
const bool reservation_busy_waiting = false;
#else
const bool reservation_busy_waiting = ((utils::get_tsc() >> 8) % 100 + ((raddr == spurs_addr) ? 50 : 0)) < g_cfg.core.spu_reservation_busy_waiting_percentage; const bool reservation_busy_waiting = ((utils::get_tsc() >> 8) % 100 + ((raddr == spurs_addr) ? 50 : 0)) < g_cfg.core.spu_reservation_busy_waiting_percentage;
#endif
for (; !events.count; events = get_events(mask1 & ~SPU_EVENT_LR, true, true)) for (; !events.count; events = get_events(mask1 & ~SPU_EVENT_LR, true, true))
{ {
@ -4930,8 +4934,11 @@ s64 spu_thread::get_ch_value(u32 ch)
if (raddr && (mask1 & ~SPU_EVENT_TM) == SPU_EVENT_LR) if (raddr && (mask1 & ~SPU_EVENT_TM) == SPU_EVENT_LR)
{ {
// Don't busy-wait with TSX - memory is sensitive // Don't busy-wait with TSX - memory is sensitive
if (!reservation_busy_waiting) if (g_use_rtm || !reservation_busy_waiting)
{ {
#ifdef __linux__
vm::reservation_notifier(raddr).wait(rtime, atomic_wait_timeout{50'000});
#else
if (raddr - spurs_addr <= 0x80 && !g_cfg.core.spu_accurate_reservations && mask1 == SPU_EVENT_LR) if (raddr - spurs_addr <= 0x80 && !g_cfg.core.spu_accurate_reservations && mask1 == SPU_EVENT_LR)
{ {
atomic_wait_engine::set_one_time_use_wait_callback(+[](u64) -> bool atomic_wait_engine::set_one_time_use_wait_callback(+[](u64) -> bool
@ -4944,7 +4951,7 @@ s64 spu_thread::get_ch_value(u32 ch)
// Wait without timeout, in this situation we have notifications for all writes making it possible // Wait without timeout, in this situation we have notifications for all writes making it possible
// Abort notifications are handled specially for performance reasons // Abort notifications are handled specially for performance reasons
vm::reservation_notifier(raddr).wait(rtime, -128); vm::reservation_notifier(raddr).wait(rtime);
continue; continue;
} }
@ -4976,7 +4983,8 @@ s64 spu_thread::get_ch_value(u32 ch)
return true; return true;
}); });
vm::reservation_notifier(raddr).wait(rtime, -128, atomic_wait_timeout{80'000}); vm::reservation_notifier(raddr).wait(rtime, atomic_wait_timeout{80'000});
#endif
} }
else else
{ {
@ -5464,7 +5472,7 @@ extern void resume_spu_thread_group_from_waiting(spu_thread& spu)
{ {
group->run_state = SPU_THREAD_GROUP_STATUS_SUSPENDED; group->run_state = SPU_THREAD_GROUP_STATUS_SUSPENDED;
spu.state += cpu_flag::signal; spu.state += cpu_flag::signal;
spu.state.notify_one(cpu_flag::signal); spu.state.notify_one();
return; return;
} }
@ -5482,7 +5490,7 @@ extern void resume_spu_thread_group_from_waiting(spu_thread& spu)
thread->state -= cpu_flag::suspend; thread->state -= cpu_flag::suspend;
} }
thread->state.notify_one(cpu_flag::suspend + cpu_flag::signal); thread->state.notify_one();
} }
} }
} }
@ -6244,7 +6252,7 @@ s64 spu_channel::pop_wait(cpu_thread& spu, bool pop)
while (true) while (true)
{ {
thread_ctrl::wait_on(data, bit_wait); thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&data)[1], u32{bit_wait >> 32});
old = data; old = data;
if (!(old & bit_wait)) if (!(old & bit_wait))
@ -6325,7 +6333,7 @@ bool spu_channel::push_wait(cpu_thread& spu, u32 value, bool push)
return false; return false;
} }
thread_ctrl::wait_on(data, state); thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&data)[1], u32(state >> 32));
state = data; state = data;
} }
} }
@ -6369,7 +6377,7 @@ std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu)
while (true) while (true)
{ {
thread_ctrl::wait_on(values, old); thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&values)[0], u32(u64(std::bit_cast<u128>(old))));
old = values; old = values;
if (!old.waiting) if (!old.waiting)

View file

@ -235,7 +235,7 @@ public:
// Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushed to jostling_value) // Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushed to jostling_value)
ensure(this->data.bit_test_reset(off_wait)); ensure(this->data.bit_test_reset(off_wait));
data.notify_one(); utils::bless<atomic_t<u32>>(&data)[1].notify_one();
} }
// Return true if count has changed from 0 to 1, this condition is considered satisfied even if we pushed a value directly to the special storage for waiting SPUs // Return true if count has changed from 0 to 1, this condition is considered satisfied even if we pushed a value directly to the special storage for waiting SPUs
@ -294,7 +294,7 @@ public:
if ((old & mask) == mask) if ((old & mask) == mask)
{ {
data.notify_one(); utils::bless<atomic_t<u32>>(&data)[1].notify_one();
} }
return static_cast<u32>(old); return static_cast<u32>(old);
@ -386,7 +386,7 @@ struct spu_channel_4_t
// Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushing to jostling_value) // Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushing to jostling_value)
ensure(atomic_storage<u8>::exchange(values.raw().waiting, 0)); ensure(atomic_storage<u8>::exchange(values.raw().waiting, 0));
values.notify_one(); utils::bless<atomic_t<u32>>(&values)[0].notify_one();
} }
return; return;

View file

@ -1631,7 +1631,7 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)
if (is_paused(target->state - cpu_flag::suspend)) if (is_paused(target->state - cpu_flag::suspend))
{ {
target->state.notify_one(cpu_flag::suspend); target->state.notify_one();
} }
} }
} }
@ -1684,7 +1684,7 @@ void lv2_obj::schedule_all(u64 current_time)
if (notify_later_idx == std::size(g_to_notify)) if (notify_later_idx == std::size(g_to_notify))
{ {
// Out of notification slots, notify locally (resizable container is not worth it) // Out of notification slots, notify locally (resizable container is not worth it)
target->state.notify_one(cpu_flag::signal + cpu_flag::suspend); target->state.notify_one();
} }
else else
{ {
@ -1718,7 +1718,7 @@ void lv2_obj::schedule_all(u64 current_time)
if (notify_later_idx == std::size(g_to_notify)) if (notify_later_idx == std::size(g_to_notify))
{ {
// Out of notification slots, notify locally (resizable container is not worth it) // Out of notification slots, notify locally (resizable container is not worth it)
target->state.notify_one(cpu_flag::notify); target->state.notify_one();
} }
else else
{ {
@ -1948,7 +1948,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
u64 remaining = usec - passed; u64 remaining = usec - passed;
#ifdef __linux__ #ifdef __linux__
// NOTE: Assumption that timer initialization has succeeded // NOTE: Assumption that timer initialization has succeeded
u64 host_min_quantum = is_usleep && remaining <= 1000 ? 10 : 50; constexpr u64 host_min_quantum = 10;
#else #else
// Host scheduler quantum for windows (worst case) // Host scheduler quantum for windows (worst case)
// NOTE: On ps3 this function has very high accuracy // NOTE: On ps3 this function has very high accuracy
@ -1965,8 +1965,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
if (remaining > host_min_quantum) if (remaining > host_min_quantum)
{ {
#ifdef __linux__ #ifdef __linux__
// Do not wait for the last quantum to avoid loss of accuracy wait_for(remaining);
wait_for(remaining - ((remaining % host_min_quantum) + host_min_quantum));
#else #else
// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus // Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
wait_for(remaining - (remaining % host_min_quantum)); wait_for(remaining - (remaining % host_min_quantum));

View file

@ -183,7 +183,7 @@ error_code _sys_interrupt_thread_establish(ppu_thread& ppu, vm::ptr<u32> ih, u32
}); });
it->state -= cpu_flag::stop; it->state -= cpu_flag::stop;
it->state.notify_one(cpu_flag::stop); it->state.notify_one();
return result; return result;
}); });

View file

@ -862,7 +862,7 @@ error_code mmapper_thread_recover_page_fault(cpu_thread* cpu)
if (cpu->state & cpu_flag::signal) if (cpu->state & cpu_flag::signal)
{ {
cpu->state.notify_one(cpu_flag::signal); cpu->state.notify_one();
} }
return CELL_OK; return CELL_OK;

View file

@ -1042,7 +1042,7 @@ error_code sys_spu_thread_group_start(ppu_thread& ppu, u32 id)
{ {
for (; index != umax; index--) for (; index != umax; index--)
{ {
threads[index]->state.notify_one(cpu_flag::stop); threads[index]->state.notify_one();
} }
} }
} notify_threads; } notify_threads;
@ -1216,7 +1216,7 @@ error_code sys_spu_thread_group_resume(ppu_thread& ppu, u32 id)
{ {
for (; index != umax; index--) for (; index != umax; index--)
{ {
threads[index]->state.notify_one(cpu_flag::suspend); threads[index]->state.notify_one();
} }
} }
} notify_threads; } notify_threads;
@ -1397,7 +1397,7 @@ error_code sys_spu_thread_group_terminate(ppu_thread& ppu, u32 id, s32 value)
if (prev_resv && prev_resv != resv) if (prev_resv && prev_resv != resv)
{ {
// Batch reservation notifications if possible // Batch reservation notifications if possible
vm::reservation_notifier(prev_resv).notify_all(-128); vm::reservation_notifier(prev_resv).notify_all();
} }
prev_resv = resv; prev_resv = resv;
@ -1407,7 +1407,7 @@ error_code sys_spu_thread_group_terminate(ppu_thread& ppu, u32 id, s32 value)
if (prev_resv) if (prev_resv)
{ {
vm::reservation_notifier(prev_resv).notify_all(-128); vm::reservation_notifier(prev_resv).notify_all();
} }
group->exit_status = value; group->exit_status = value;

View file

@ -434,7 +434,7 @@ public:
{ {
// Note: by the time of notification the thread could have been deallocated which is why the direct function is used // Note: by the time of notification the thread could have been deallocated which is why the direct function is used
// TODO: Pass a narrower mask // TODO: Pass a narrower mask
atomic_wait_engine::notify_one(cpu, 4, atomic_wait::default_mask<atomic_bs_t<cpu_flag>>); atomic_wait_engine::notify_one(cpu);
} }
} }

View file

@ -135,7 +135,7 @@ namespace vm
_xend(); _xend();
#endif #endif
if constexpr (Ack) if constexpr (Ack)
res.notify_all(-128); res.notify_all();
return; return;
} }
else else
@ -149,7 +149,7 @@ namespace vm
_xend(); _xend();
#endif #endif
if constexpr (Ack) if constexpr (Ack)
res.notify_all(-128); res.notify_all();
return result; return result;
} }
else else
@ -204,7 +204,7 @@ namespace vm
#endif #endif
res += 127; res += 127;
if (Ack) if (Ack)
res.notify_all(-128); res.notify_all();
return; return;
} }
else else
@ -218,7 +218,7 @@ namespace vm
#endif #endif
res += 127; res += 127;
if (Ack) if (Ack)
res.notify_all(-128); res.notify_all();
return result; return result;
} }
else else
@ -253,7 +253,7 @@ namespace vm
}); });
if constexpr (Ack) if constexpr (Ack)
res.notify_all(-128); res.notify_all();
return; return;
} }
else else
@ -273,7 +273,7 @@ namespace vm
}); });
if (Ack && result) if (Ack && result)
res.notify_all(-128); res.notify_all();
return result; return result;
} }
} }
@ -293,7 +293,7 @@ namespace vm
} }
if constexpr (Ack) if constexpr (Ack)
res.notify_all(-128); res.notify_all();
return; return;
} }
else else
@ -313,7 +313,7 @@ namespace vm
} }
if (Ack && result) if (Ack && result)
res.notify_all(-128); res.notify_all();
return result; return result;
} }
} }
@ -405,7 +405,7 @@ namespace vm
if constexpr (Ack) if constexpr (Ack)
{ {
res.notify_all(-128); res.notify_all();
} }
} }
else else
@ -415,7 +415,7 @@ namespace vm
if constexpr (Ack) if constexpr (Ack)
{ {
res.notify_all(-128); res.notify_all();
} }
return result; return result;

View file

@ -55,7 +55,7 @@ namespace gl
job.completion_callback(result); job.completion_callback(result);
} }
thread_ctrl::wait_on(m_work_queue, nullptr); thread_ctrl::wait_on(m_work_queue);
} }
} }

View file

@ -161,7 +161,7 @@ namespace rsx
this->on_close = std::move(on_close); this->on_close = std::move(on_close);
visible = true; visible = true;
const auto notify = std::make_shared<atomic_t<bool>>(false); const auto notify = std::make_shared<atomic_t<u32>>(0);
auto& overlayman = g_fxo->get<display_manager>(); auto& overlayman = g_fxo->get<display_manager>();
overlayman.attach_thread_input( overlayman.attach_thread_input(

View file

@ -295,7 +295,7 @@ namespace rsx
} }
else if (!m_input_thread_abort) else if (!m_input_thread_abort)
{ {
thread_ctrl::wait_on(m_input_token_stack, nullptr); thread_ctrl::wait_on(m_input_token_stack);
} }
} }
} }

View file

@ -295,7 +295,7 @@ namespace rsx
{ {
if (!m_stop_input_loop) if (!m_stop_input_loop)
{ {
const auto notify = std::make_shared<atomic_t<bool>>(false); const auto notify = std::make_shared<atomic_t<u32>>(0);
auto& overlayman = g_fxo->get<display_manager>(); auto& overlayman = g_fxo->get<display_manager>();
if (interactive) if (interactive)

View file

@ -1621,7 +1621,7 @@ namespace rsx
update_panel(); update_panel();
const auto notify = std::make_shared<atomic_t<bool>>(false); const auto notify = std::make_shared<atomic_t<u32>>(0);
auto& overlayman = g_fxo->get<display_manager>(); auto& overlayman = g_fxo->get<display_manager>();
overlayman.attach_thread_input( overlayman.attach_thread_input(
@ -1631,7 +1631,7 @@ namespace rsx
while (!Emu.IsStopped() && !*notify) while (!Emu.IsStopped() && !*notify)
{ {
notify->wait(false, atomic_wait_timeout{1'000'000}); notify->wait(0, atomic_wait_timeout{1'000'000});
} }
} }
} }

View file

@ -240,7 +240,7 @@ namespace rsx
this->on_close = std::move(on_close); this->on_close = std::move(on_close);
visible = true; visible = true;
const auto notify = std::make_shared<atomic_t<bool>>(false); const auto notify = std::make_shared<atomic_t<u32>>(0);
auto& overlayman = g_fxo->get<display_manager>(); auto& overlayman = g_fxo->get<display_manager>();
overlayman.attach_thread_input( overlayman.attach_thread_input(
@ -250,7 +250,7 @@ namespace rsx
while (!Emu.IsStopped() && !*notify) while (!Emu.IsStopped() && !*notify)
{ {
notify->wait(false, atomic_wait_timeout{1'000'000}); notify->wait(0, atomic_wait_timeout{1'000'000});
} }
return CELL_OK; return CELL_OK;

View file

@ -20,7 +20,7 @@ namespace rsx
u64 user_interface::alloc_thread_bit() u64 user_interface::alloc_thread_bit()
{ {
auto [_old, ok] = this->thread_bits.fetch_op([](u64& bits) auto [_old, ok] = this->thread_bits.fetch_op([](u32& bits)
{ {
if (~bits) if (~bits)
{ {
@ -385,7 +385,7 @@ namespace rsx
m_stop_pad_interception.release(stop_pad_interception); m_stop_pad_interception.release(stop_pad_interception);
m_stop_input_loop.release(true); m_stop_input_loop.release(true);
while (u64 b = thread_bits) while (u32 b = thread_bits)
{ {
if (b == g_thread_bit) if (b == g_thread_bit)
{ {

View file

@ -85,7 +85,7 @@ namespace rsx
bool m_start_pad_interception = true; bool m_start_pad_interception = true;
atomic_t<bool> m_stop_pad_interception = false; atomic_t<bool> m_stop_pad_interception = false;
atomic_t<bool> m_input_thread_detached = false; atomic_t<bool> m_input_thread_detached = false;
atomic_t<u64> thread_bits = 0; atomic_t<u32> thread_bits = 0;
bool m_keyboard_input_enabled = false; // Allow keyboard input bool m_keyboard_input_enabled = false; // Allow keyboard input
bool m_keyboard_pad_handler_active = true; // Initialized as true to prevent keyboard input until proven otherwise. bool m_keyboard_pad_handler_active = true; // Initialized as true to prevent keyboard input until proven otherwise.
bool m_allow_input_on_pause = false; bool m_allow_input_on_pause = false;

View file

@ -853,9 +853,8 @@ namespace rsx
g_fxo->get<vblank_thread>().set_thread(std::shared_ptr<named_thread<std::function<void()>>>(new named_thread<std::function<void()>>("VBlank Thread"sv, [this]() -> void g_fxo->get<vblank_thread>().set_thread(std::shared_ptr<named_thread<std::function<void()>>>(new named_thread<std::function<void()>>("VBlank Thread"sv, [this]() -> void
{ {
// See sys_timer_usleep for details
#ifdef __linux__ #ifdef __linux__
constexpr u32 host_min_quantum = 50; constexpr u32 host_min_quantum = 10;
#else #else
constexpr u32 host_min_quantum = 500; constexpr u32 host_min_quantum = 500;
#endif #endif
@ -878,8 +877,12 @@ namespace rsx
// Calculate time remaining to that time (0 if we passed it) // Calculate time remaining to that time (0 if we passed it)
const u64 wait_for = current >= post_event_time ? 0 : post_event_time - current; const u64 wait_for = current >= post_event_time ? 0 : post_event_time - current;
#ifdef __linux__
const u64 wait_sleep = wait_for;
#else
// Substract host operating system min sleep quantom to get sleep time // Substract host operating system min sleep quantom to get sleep time
const u64 wait_sleep = wait_for - u64{wait_for >= host_min_quantum} * host_min_quantum; const u64 wait_sleep = wait_for - u64{wait_for >= host_min_quantum} * host_min_quantum;
#endif
if (!wait_for) if (!wait_for)
{ {
@ -3116,7 +3119,7 @@ namespace rsx
{ {
#ifdef __linux__ #ifdef __linux__
// NOTE: Assumption that timer initialization has succeeded // NOTE: Assumption that timer initialization has succeeded
u64 host_min_quantum = remaining <= 1000 ? 10 : 50; constexpr u64 host_min_quantum = 10;
#else #else
// Host scheduler quantum for windows (worst case) // Host scheduler quantum for windows (worst case)
// NOTE: On ps3 this function has very high accuracy // NOTE: On ps3 this function has very high accuracy
@ -3125,8 +3128,7 @@ namespace rsx
if (remaining >= host_min_quantum) if (remaining >= host_min_quantum)
{ {
#ifdef __linux__ #ifdef __linux__
// Do not wait for the last quantum to avoid loss of accuracy thread_ctrl::wait_for(remaining, false);
thread_ctrl::wait_for(remaining - ((remaining % host_min_quantum) + host_min_quantum), false);
#else #else
// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus // Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
thread_ctrl::wait_for(remaining - (remaining % host_min_quantum), false); thread_ctrl::wait_for(remaining - (remaining % host_min_quantum), false);

View file

@ -213,7 +213,7 @@ namespace rsx
u32 last_known_code_start = 0; u32 last_known_code_start = 0;
atomic_t<u32> external_interrupt_lock{ 0 }; atomic_t<u32> external_interrupt_lock{ 0 };
atomic_t<bool> external_interrupt_ack{ false }; atomic_t<bool> external_interrupt_ack{ false };
atomic_t<bool> is_initialized{ false }; atomic_t<u32> is_initialized{0};
bool is_fifo_idle() const; bool is_fifo_idle() const;
void flush_fifo(); void flush_fifo();

View file

@ -48,7 +48,7 @@ namespace vk
} }
} }
thread_ctrl::wait_on(m_work_queue, nullptr); thread_ctrl::wait_on(m_work_queue);
} }
} }

View file

@ -1699,7 +1699,7 @@ struct registers_decoder<NV4097_SET_SHADER_CONTROL>
static void dump(std::string& out, const decoded_type& decoded) static void dump(std::string& out, const decoded_type& decoded)
{ {
fmt::append(out, "Shader control: raw_value: 0x%x reg_count: %u%s%s", fmt::append(out, "Shader control: raw_value: 0x%x reg_count: %u%s%s",
decoded.shader_ctrl(), ((decoded.shader_ctrl() >> 24) & 0xFF), ((decoded.shader_ctrl() & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) ? " depth_replace" : ""), decoded.shader_ctrl(), ((decoded.shader_ctrl() >> 24) & 0xFF), ((decoded.shader_ctrl() & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) ? " depth_replace" : ""),
((decoded.shader_ctrl() & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) ? " 32b_exports" : "")); ((decoded.shader_ctrl() & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) ? " 32b_exports" : ""));
} }
}; };

View file

@ -153,7 +153,7 @@ void fmt_class_string<cfg_mode>::format(std::string& out, u64 arg)
}); });
} }
void Emulator::CallFromMainThread(std::function<void()>&& func, atomic_t<bool>* wake_up, bool track_emu_state, u64 stop_ctr) const void Emulator::CallFromMainThread(std::function<void()>&& func, atomic_t<u32>* wake_up, bool track_emu_state, u64 stop_ctr) const
{ {
if (!track_emu_state) if (!track_emu_state)
{ {
@ -174,14 +174,14 @@ void Emulator::CallFromMainThread(std::function<void()>&& func, atomic_t<bool>*
void Emulator::BlockingCallFromMainThread(std::function<void()>&& func) const void Emulator::BlockingCallFromMainThread(std::function<void()>&& func) const
{ {
atomic_t<bool> wake_up = false; atomic_t<u32> wake_up = 0;
CallFromMainThread(std::move(func), &wake_up); CallFromMainThread(std::move(func), &wake_up);
while (!wake_up) while (!wake_up)
{ {
ensure(thread_ctrl::get_current()); ensure(thread_ctrl::get_current());
wake_up.wait(false); wake_up.wait(0);
} }
} }
@ -424,7 +424,7 @@ void Emulator::Init()
make_path_verbose(dev_flash, true); make_path_verbose(dev_flash, true);
make_path_verbose(dev_flash2, true); make_path_verbose(dev_flash2, true);
make_path_verbose(dev_flash3, true); make_path_verbose(dev_flash3, true);
if (make_path_verbose(dev_usb, true)) if (make_path_verbose(dev_usb, true))
{ {
make_path_verbose(dev_usb + "MUSIC/", false); make_path_verbose(dev_usb + "MUSIC/", false);
@ -2152,7 +2152,7 @@ void Emulator::RunPPU()
} }
ensure(cpu.state.test_and_reset(cpu_flag::stop)); ensure(cpu.state.test_and_reset(cpu_flag::stop));
cpu.state.notify_one(cpu_flag::stop); cpu.state.notify_one();
signalled_thread = true; signalled_thread = true;
}); });
@ -2165,7 +2165,7 @@ void Emulator::RunPPU()
if (auto thr = g_fxo->try_get<named_thread<rsx::rsx_replay_thread>>()) if (auto thr = g_fxo->try_get<named_thread<rsx::rsx_replay_thread>>())
{ {
thr->state -= cpu_flag::stop; thr->state -= cpu_flag::stop;
thr->state.notify_one(cpu_flag::stop); thr->state.notify_one();
} }
} }
@ -2234,7 +2234,7 @@ void Emulator::FinalizeRunRequest()
} }
ensure(spu.state.test_and_reset(cpu_flag::stop)); ensure(spu.state.test_and_reset(cpu_flag::stop));
spu.state.notify_one(cpu_flag::stop); spu.state.notify_one();
}; };
if (m_savestate_extension_flags1 & SaveStateExtentionFlags1::ShouldCloseMenu) if (m_savestate_extension_flags1 & SaveStateExtentionFlags1::ShouldCloseMenu)
@ -2437,7 +2437,7 @@ void Emulator::Resume()
auto on_select = [](u32, cpu_thread& cpu) auto on_select = [](u32, cpu_thread& cpu)
{ {
cpu.state -= cpu_flag::dbg_global_pause; cpu.state -= cpu_flag::dbg_global_pause;
cpu.state.notify_one(cpu_flag::dbg_global_pause); cpu.state.notify_one();
}; };
idm::select<named_thread<ppu_thread>>(on_select); idm::select<named_thread<ppu_thread>>(on_select);

View file

@ -54,7 +54,7 @@ constexpr bool is_error(game_boot_result res)
struct EmuCallbacks struct EmuCallbacks
{ {
std::function<void(std::function<void()>, atomic_t<bool>*)> call_from_main_thread; std::function<void(std::function<void()>, atomic_t<u32>*)> call_from_main_thread;
std::function<void(bool)> on_run; // (start_playtime) continuing or going ingame, so start the clock std::function<void(bool)> on_run; // (start_playtime) continuing or going ingame, so start the clock
std::function<void()> on_pause; std::function<void()> on_pause;
std::function<void()> on_resume; std::function<void()> on_resume;
@ -180,7 +180,7 @@ public:
} }
// Call from the GUI thread // Call from the GUI thread
void CallFromMainThread(std::function<void()>&& func, atomic_t<bool>* wake_up = nullptr, bool track_emu_state = true, u64 stop_ctr = umax) const; void CallFromMainThread(std::function<void()>&& func, atomic_t<u32>* wake_up = nullptr, bool track_emu_state = true, u64 stop_ctr = umax) const;
// Blocking call from the GUI thread // Blocking call from the GUI thread
void BlockingCallFromMainThread(std::function<void()>&& func) const; void BlockingCallFromMainThread(std::function<void()>&& func) const;

View file

@ -68,7 +68,7 @@ void progress_dialog_server::operator()()
{ {
// Some backends like OpenGL actually initialize a lot of driver objects in the "on_init" method. // Some backends like OpenGL actually initialize a lot of driver objects in the "on_init" method.
// Wait for init to complete within reasonable time. Abort just in case we have hardware/driver issues. // Wait for init to complete within reasonable time. Abort just in case we have hardware/driver issues.
renderer->is_initialized.wait(false, atomic_wait_timeout(5 * 1000000000ull)); renderer->is_initialized.wait(0, atomic_wait_timeout(5 * 1000000000ull));
auto manager = g_fxo->try_get<rsx::overlays::display_manager>(); auto manager = g_fxo->try_get<rsx::overlays::display_manager>();
show_overlay_message = g_fxo->get<progress_dialog_workaround>().show_overlay_message_only; show_overlay_message = g_fxo->get<progress_dialog_workaround>().show_overlay_message_only;

View file

@ -60,7 +60,7 @@ void headless_application::InitializeCallbacks()
return false; return false;
}; };
callbacks.call_from_main_thread = [this](std::function<void()> func, atomic_t<bool>* wake_up) callbacks.call_from_main_thread = [this](std::function<void()> func, atomic_t<u32>* wake_up)
{ {
RequestCallFromMainThread(std::move(func), wake_up); RequestCallFromMainThread(std::move(func), wake_up);
}; };
@ -166,7 +166,7 @@ void headless_application::InitializeCallbacks()
/** /**
* Using connects avoids timers being unable to be used in a non-qt thread. So, even if this looks stupid to just call func, it's succinct. * Using connects avoids timers being unable to be used in a non-qt thread. So, even if this looks stupid to just call func, it's succinct.
*/ */
void headless_application::CallFromMainThread(const std::function<void()>& func, atomic_t<bool>* wake_up) void headless_application::CallFromMainThread(const std::function<void()>& func, atomic_t<u32>* wake_up)
{ {
func(); func();

View file

@ -30,8 +30,8 @@ private:
} }
Q_SIGNALS: Q_SIGNALS:
void RequestCallFromMainThread(std::function<void()> func, atomic_t<bool>* wake_up); void RequestCallFromMainThread(std::function<void()> func, atomic_t<u32>* wake_up);
private Q_SLOTS: private Q_SLOTS:
static void CallFromMainThread(const std::function<void()>& func, atomic_t<bool>* wake_up); static void CallFromMainThread(const std::function<void()>& func, atomic_t<u32>* wake_up);
}; };

View file

@ -55,6 +55,7 @@ DYNAMIC_IMPORT("ntdll.dll", NtSetTimerResolution, NTSTATUS(ULONG DesiredResoluti
#ifdef __linux__ #ifdef __linux__
#include <sys/time.h> #include <sys/time.h>
#include <sys/resource.h> #include <sys/resource.h>
#include <sys/prctl.h>
#endif #endif
#if defined(__APPLE__) #if defined(__APPLE__)
@ -443,6 +444,11 @@ int main(int argc, char** argv)
const u64 intro_time = (intro_stats.ru_utime.tv_sec + intro_stats.ru_stime.tv_sec) * 1000000000ull + (intro_stats.ru_utime.tv_usec + intro_stats.ru_stime.tv_usec) * 1000ull; const u64 intro_time = (intro_stats.ru_utime.tv_sec + intro_stats.ru_stime.tv_sec) * 1000000000ull + (intro_stats.ru_utime.tv_usec + intro_stats.ru_stime.tv_usec) * 1000ull;
#endif #endif
#ifdef __linux__
// Set timerslack value for Linux. The default value is 50,000ns. Change this to just 1 since we value precise timers.
prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0);
#endif
s_argv0 = argv[0]; // Save for report_fatal_error s_argv0 = argv[0]; // Save for report_fatal_error
// Only run RPCS3 to display an error // Only run RPCS3 to display an error

View file

@ -1366,7 +1366,7 @@ void debugger_frame::DoStep(bool step_over)
} }
}); });
cpu->state.notify_one(s_pause_flags); cpu->state.notify_one();
} }
} }
@ -1412,7 +1412,7 @@ void debugger_frame::RunBtnPress()
Emu.Resume(); Emu.Resume();
} }
cpu->state.notify_one(s_pause_flags); cpu->state.notify_one();
m_debugger_list->EnableThreadFollowing(); m_debugger_list->EnableThreadFollowing();
} }
} }

View file

@ -421,7 +421,7 @@ void gui_application::InitializeCallbacks()
return false; return false;
}; };
callbacks.call_from_main_thread = [this](std::function<void()> func, atomic_t<bool>* wake_up) callbacks.call_from_main_thread = [this](std::function<void()> func, atomic_t<u32>* wake_up)
{ {
RequestCallFromMainThread(std::move(func), wake_up); RequestCallFromMainThread(std::move(func), wake_up);
}; };
@ -792,7 +792,7 @@ void gui_application::OnChangeStyleSheetRequest()
/** /**
* Using connects avoids timers being unable to be used in a non-qt thread. So, even if this looks stupid to just call func, it's succinct. * Using connects avoids timers being unable to be used in a non-qt thread. So, even if this looks stupid to just call func, it's succinct.
*/ */
void gui_application::CallFromMainThread(const std::function<void()>& func, atomic_t<bool>* wake_up) void gui_application::CallFromMainThread(const std::function<void()>& func, atomic_t<u32>* wake_up)
{ {
func(); func();

View file

@ -118,8 +118,8 @@ Q_SIGNALS:
void OnEnableDiscEject(bool enabled); void OnEnableDiscEject(bool enabled);
void OnEnableDiscInsert(bool enabled); void OnEnableDiscInsert(bool enabled);
void RequestCallFromMainThread(std::function<void()> func, atomic_t<bool>* wake_up); void RequestCallFromMainThread(std::function<void()> func, atomic_t<u32>* wake_up);
private Q_SLOTS: private Q_SLOTS:
static void CallFromMainThread(const std::function<void()>& func, atomic_t<bool>* wake_up); static void CallFromMainThread(const std::function<void()>& func, atomic_t<u32>* wake_up);
}; };

View file

@ -1,6 +1,7 @@
#include "atomic.hpp" #include "atomic.hpp"
#if defined(__linux__) #if defined(__linux__)
// This definition is unused on Linux
#define USE_FUTEX #define USE_FUTEX
#elif !defined(_WIN32) #elif !defined(_WIN32)
#define USE_STD #define USE_STD
@ -40,8 +41,8 @@ namespace utils
// Total number of entries. // Total number of entries.
static constexpr usz s_hashtable_size = 1u << 17; static constexpr usz s_hashtable_size = 1u << 17;
// Reference counter combined with shifted pointer (which is assumed to be 47 bit) // Reference counter combined with shifted pointer (which is assumed to be 48 bit)
static constexpr uptr s_ref_mask = (1u << 17) - 1; static constexpr uptr s_ref_mask = 0xffff;
// Fix for silly on-first-use initializer // Fix for silly on-first-use initializer
static bool s_null_wait_cb(const void*, u64, u64){ return true; }; static bool s_null_wait_cb(const void*, u64, u64){ return true; };
@ -55,163 +56,17 @@ static thread_local bool(*s_tls_one_time_wait_cb)(u64 attempts) = nullptr;
// Callback for notification functions for optimizations // Callback for notification functions for optimizations
static thread_local void(*s_tls_notify_cb)(const void* data, u64 progress) = nullptr; static thread_local void(*s_tls_notify_cb)(const void* data, u64 progress) = nullptr;
static inline bool operator &(atomic_wait::op lhs, atomic_wait::op_flag rhs)
{
return !!(static_cast<u8>(lhs) & static_cast<u8>(rhs));
}
// Compare data in memory with old value, and return true if they are equal // Compare data in memory with old value, and return true if they are equal
static NEVER_INLINE bool ptr_cmp(const void* data, u32 _size, u128 old128, u128 mask128, atomic_wait::info* ext = nullptr) static NEVER_INLINE bool ptr_cmp(const void* data, u32 old, atomic_wait::info* ext = nullptr)
{ {
using atomic_wait::op;
using atomic_wait::op_flag;
const u8 size = static_cast<u8>(_size);
const op flag{static_cast<u8>(_size >> 8)};
bool result = false;
if (size <= 8)
{
u64 new_value = 0;
u64 old_value = static_cast<u64>(old128);
u64 mask = static_cast<u64>(mask128) & (u64{umax} >> ((64 - size * 8) & 63));
// Don't load memory on empty mask
switch (mask ? size : 0)
{
case 0: break;
case 1: new_value = reinterpret_cast<const atomic_t<u8>*>(data)->load(); break;
case 2: new_value = reinterpret_cast<const atomic_t<u16>*>(data)->load(); break;
case 4: new_value = reinterpret_cast<const atomic_t<u32>*>(data)->load(); break;
case 8: new_value = reinterpret_cast<const atomic_t<u64>*>(data)->load(); break;
default:
{
fmt::throw_exception("Bad size (arg=0x%x)", _size);
}
}
if (flag & op_flag::bit_not)
{
new_value = ~new_value;
}
if (!mask) [[unlikely]]
{
new_value = 0;
old_value = 0;
}
else
{
if (flag & op_flag::byteswap)
{
switch (size)
{
case 2:
{
new_value = stx::se_storage<u16>::swap(static_cast<u16>(new_value));
old_value = stx::se_storage<u16>::swap(static_cast<u16>(old_value));
mask = stx::se_storage<u16>::swap(static_cast<u16>(mask));
break;
}
case 4:
{
new_value = stx::se_storage<u32>::swap(static_cast<u32>(new_value));
old_value = stx::se_storage<u32>::swap(static_cast<u32>(old_value));
mask = stx::se_storage<u32>::swap(static_cast<u32>(mask));
break;
}
case 8:
{
new_value = stx::se_storage<u64>::swap(new_value);
old_value = stx::se_storage<u64>::swap(old_value);
mask = stx::se_storage<u64>::swap(mask);
break;
}
default:
{
break;
}
}
}
// Make most significant bit sign bit
const auto shv = std::countl_zero(mask);
new_value &= mask;
old_value &= mask;
new_value <<= shv;
old_value <<= shv;
}
s64 news = new_value;
s64 olds = old_value;
u64 newa = news < 0 ? (0ull - new_value) : new_value;
u64 olda = olds < 0 ? (0ull - old_value) : old_value;
switch (op{static_cast<u8>(static_cast<u8>(flag) & 0xf)})
{
case op::eq: result = old_value == new_value; break;
case op::slt: result = olds < news; break;
case op::sgt: result = olds > news; break;
case op::ult: result = old_value < new_value; break;
case op::ugt: result = old_value > new_value; break;
case op::alt: result = olda < newa; break;
case op::agt: result = olda > newa; break;
case op::pop:
{
// Count is taken from least significant byte and ignores some flags
const u64 count = static_cast<u64>(old128) & 0xff;
result = count < utils::popcnt64(new_value);
break;
}
default:
{
fmt::throw_exception("ptr_cmp(): unrecognized atomic wait operation.");
}
}
}
else if (size == 16 && (flag == op::eq || flag == (op::eq | op_flag::inverse)))
{
u128 new_value = 0;
u128 old_value = old128;
u128 mask = mask128;
// Don't load memory on empty mask
if (mask) [[likely]]
{
new_value = atomic_storage<u128>::load(*reinterpret_cast<const u128*>(data));
}
// TODO
result = !((old_value ^ new_value) & mask);
}
else if (size > 16 && !~mask128 && (flag == op::eq || flag == (op::eq | op_flag::inverse)))
{
// Interpret old128 as a pointer to the old value
ensure(!(old128 >> (64 + 17)));
result = std::memcmp(data, reinterpret_cast<const void*>(static_cast<uptr>(old128)), size) == 0;
}
else
{
fmt::throw_exception("ptr_cmp(): no alternative operations are supported for non-standard atomic wait yet.");
}
if (flag & op_flag::inverse)
{
result = !result;
}
// Check other wait variables if provided // Check other wait variables if provided
if (result) if (reinterpret_cast<const atomic_t<u32>*>(data)->load() == old)
{ {
if (ext) [[unlikely]] if (ext) [[unlikely]]
{ {
for (auto e = ext; e->data; e++) for (auto e = ext; e->data; e++)
{ {
if (!ptr_cmp(e->data, e->size, e->old, e->mask)) if (!ptr_cmp(e->data, e->old))
{ {
return false; return false;
} }
@ -283,18 +138,15 @@ namespace
#endif #endif
// Essentially a fat semaphore // Essentially a fat semaphore
struct cond_handle struct alignas(64) cond_handle
{ {
// Combined pointer (most significant 47 bits) and ref counter (17 least significant bits) // Combined pointer (most significant 48 bits) and ref counter (16 least significant bits)
atomic_t<u64> ptr_ref; atomic_t<u64> ptr_ref;
u64 tid; u64 tid;
u128 mask; u32 oldv;
u128 oldv;
u64 tsc0; u64 tsc0;
u16 link; u16 link;
u8 size;
u8 flag;
atomic_t<u32> sync; atomic_t<u32> sync;
#ifdef USE_STD #ifdef USE_STD
@ -316,7 +168,7 @@ namespace
mtx.init(mtx); mtx.init(mtx);
#endif #endif
ensure(!ptr_ref.exchange((iptr << 17) | 1)); ensure(!ptr_ref.exchange((iptr << 16) | 1));
} }
void destroy() void destroy()
@ -324,10 +176,7 @@ namespace
tid = 0; tid = 0;
tsc0 = 0; tsc0 = 0;
link = 0; link = 0;
size = 0;
flag = 0;
sync.release(0); sync.release(0);
mask = 0;
oldv = 0; oldv = 0;
#ifdef USE_STD #ifdef USE_STD
@ -517,7 +366,7 @@ namespace
// TLS storage for few allocaded "semaphores" to allow skipping initialization // TLS storage for few allocaded "semaphores" to allow skipping initialization
static thread_local tls_cond_handler s_tls_conds{}; static thread_local tls_cond_handler s_tls_conds{};
static u32 cond_alloc(uptr iptr, u128 mask, u32 tls_slot = -1) static u32 cond_alloc(uptr iptr, u32 tls_slot = -1)
{ {
// Try to get cond from tls slot instead // Try to get cond from tls slot instead
u16* ptls = tls_slot >= std::size(s_tls_conds.cond) ? nullptr : s_tls_conds.cond + tls_slot; u16* ptls = tls_slot >= std::size(s_tls_conds.cond) ? nullptr : s_tls_conds.cond + tls_slot;
@ -526,8 +375,7 @@ static u32 cond_alloc(uptr iptr, u128 mask, u32 tls_slot = -1)
{ {
// Fast reinitialize // Fast reinitialize
const u32 id = std::exchange(*ptls, 0); const u32 id = std::exchange(*ptls, 0);
s_cond_list[id].mask = mask; s_cond_list[id].ptr_ref.release((iptr << 16) | 1);
s_cond_list[id].ptr_ref.release((iptr << 17) | 1);
return id; return id;
} }
@ -581,7 +429,6 @@ static u32 cond_alloc(uptr iptr, u128 mask, u32 tls_slot = -1)
const u32 id = level3 * 64 + std::countr_one(bits); const u32 id = level3 * 64 + std::countr_one(bits);
// Initialize new "semaphore" // Initialize new "semaphore"
s_cond_list[id].mask = mask;
s_cond_list[id].init(iptr); s_cond_list[id].init(iptr);
return id; return id;
} }
@ -625,8 +472,6 @@ static void cond_free(u32 cond_id, u32 tls_slot = -1)
{ {
// Fast finalization // Fast finalization
cond->sync.release(0); cond->sync.release(0);
cond->size = 0;
cond->mask = 0;
*ptls = static_cast<u16>(cond_id); *ptls = static_cast<u16>(cond_id);
return; return;
} }
@ -652,7 +497,7 @@ static void cond_free(u32 cond_id, u32 tls_slot = -1)
s_cond_sem1.atomic_op(FN(x -= u128{1} << (level1 * 14))); s_cond_sem1.atomic_op(FN(x -= u128{1} << (level1 * 14)));
} }
static cond_handle* cond_id_lock(u32 cond_id, u128 mask, uptr iptr = 0) static cond_handle* cond_id_lock(u32 cond_id, uptr iptr = 0)
{ {
bool did_ref = false; bool did_ref = false;
@ -673,7 +518,7 @@ static cond_handle* cond_id_lock(u32 cond_id, u128 mask, uptr iptr = 0)
return false; return false;
} }
if (iptr && (val >> 17) != iptr) if (iptr && (val >> 16) != iptr)
{ {
// Pointer mismatch // Pointer mismatch
return false; return false;
@ -686,11 +531,6 @@ static cond_handle* cond_id_lock(u32 cond_id, u128 mask, uptr iptr = 0)
return false; return false;
} }
if (!(mask & cond->mask) && cond->size)
{
return false;
}
if (!did_ref) if (!did_ref)
{ {
val++; val++;
@ -702,7 +542,7 @@ static cond_handle* cond_id_lock(u32 cond_id, u128 mask, uptr iptr = 0)
if (ok) if (ok)
{ {
// Check other fields again // Check other fields again
if (const u32 sync_val = cond->sync; sync_val == 0 || sync_val == 3 || (cond->size && !(mask & cond->mask))) if (const u32 sync_val = cond->sync; sync_val == 0 || sync_val == 3)
{ {
did_ref = true; did_ref = true;
continue; continue;
@ -713,7 +553,7 @@ static cond_handle* cond_id_lock(u32 cond_id, u128 mask, uptr iptr = 0)
if ((old & s_ref_mask) == s_ref_mask) if ((old & s_ref_mask) == s_ref_mask)
{ {
fmt::throw_exception("Reference count limit (131071) reached in an atomic notifier."); fmt::throw_exception("Reference count limit (%u) reached in an atomic notifier.", s_ref_mask);
} }
break; break;
@ -736,8 +576,8 @@ namespace
u64 bits: 24; // Allocated bits u64 bits: 24; // Allocated bits
u64 prio: 24; // Reserved u64 prio: 24; // Reserved
u64 ref : 17; // Ref counter u64 ref : 16; // Ref counter
u64 iptr: 47; // First pointer to use slot (to count used slots) u64 iptr: 48; // First pointer to use slot (to count used slots)
}; };
// Need to spare 16 bits for ref counter // Need to spare 16 bits for ref counter
@ -760,7 +600,7 @@ namespace
static void slot_free(uptr ptr, atomic_t<u16>* slot, u32 tls_slot) noexcept; static void slot_free(uptr ptr, atomic_t<u16>* slot, u32 tls_slot) noexcept;
template <typename F> template <typename F>
static auto slot_search(uptr iptr, u128 mask, F func) noexcept; static auto slot_search(uptr iptr, F func) noexcept;
}; };
static_assert(sizeof(root_info) == 64); static_assert(sizeof(root_info) == 64);
@ -944,7 +784,7 @@ void root_info::slot_free(uptr iptr, atomic_t<u16>* slot, u32 tls_slot) noexcept
} }
template <typename F> template <typename F>
FORCE_INLINE auto root_info::slot_search(uptr iptr, u128 mask, F func) noexcept FORCE_INLINE auto root_info::slot_search(uptr iptr, F func) noexcept
{ {
u32 index = 0; u32 index = 0;
[[maybe_unused]] u32 total = 0; [[maybe_unused]] u32 total = 0;
@ -974,7 +814,7 @@ FORCE_INLINE auto root_info::slot_search(uptr iptr, u128 mask, F func) noexcept
for (u32 i = 0; i < cond_count; i++) for (u32 i = 0; i < cond_count; i++)
{ {
if (cond_id_lock(cond_ids[i], mask, iptr)) if (cond_id_lock(cond_ids[i], iptr))
{ {
if (func(cond_ids[i])) if (func(cond_ids[i]))
{ {
@ -994,18 +834,82 @@ FORCE_INLINE auto root_info::slot_search(uptr iptr, u128 mask, F func) noexcept
} }
} }
SAFE_BUFFERS(void) atomic_wait_engine::wait(const void* data, u32 size, u128 old_value, u64 timeout, u128 mask, atomic_wait::info* ext) SAFE_BUFFERS(void)
atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wait::info* ext)
{ {
const auto stamp0 = utils::get_unique_tsc(); uint ext_size = 0;
if (!s_tls_wait_cb(data, 0, stamp0)) #ifdef __linux__
::timespec ts{};
if (timeout + 1)
{
if (ext) [[unlikely]]
{
// futex_waitv uses absolute timeout
::clock_gettime(CLOCK_MONOTONIC, &ts);
}
ts.tv_sec += timeout / 1'000'000'000;
ts.tv_nsec += timeout % 1'000'000'000;
if (ts.tv_nsec > 1'000'000'000)
{
ts.tv_sec++;
ts.tv_nsec -= 1'000'000'000;
}
}
futex_waitv vec[atomic_wait::max_list]{};
vec[0].flags = FUTEX_32 | FUTEX_PRIVATE_FLAG;
vec[0].uaddr = reinterpret_cast<__u64>(data);
vec[0].val = old_value;
if (ext) [[unlikely]]
{
for (auto e = ext; e->data; e++)
{
ext_size++;
vec[ext_size].flags = FUTEX_32 | FUTEX_PRIVATE_FLAG;
vec[ext_size].uaddr = reinterpret_cast<__u64>(e->data);
vec[ext_size].val = e->old;
}
}
if (ext_size) [[unlikely]]
{
if (syscall(SYS_futex_waitv, +vec, ext_size + 1, 0, timeout + 1 ? &ts : nullptr, CLOCK_MONOTONIC) == -1)
{
if (errno == ENOSYS)
{
fmt::throw_exception("futex_waitv is not supported (Linux kernel is too old)");
}
if (errno == EINVAL)
{
fmt::throw_exception("futex_waitv: bad param");
}
}
}
else
{
if (futex(const_cast<void*>(data), FUTEX_WAIT_PRIVATE, old_value, timeout + 1 ? &ts : nullptr) == -1)
{
if (errno == EINVAL)
{
fmt::throw_exception("futex: bad param");
}
}
}
return;
#endif
if (!s_tls_wait_cb(data, 0, 0))
{ {
return; return;
} }
const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 17); const auto stamp0 = utils::get_unique_tsc();
uint ext_size = 0; const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 16);
uptr iptr_ext[atomic_wait::max_list - 1]{}; uptr iptr_ext[atomic_wait::max_list - 1]{};
@ -1026,18 +930,18 @@ SAFE_BUFFERS(void) atomic_wait_engine::wait(const void* data, u32 size, u128 old
} }
} }
iptr_ext[ext_size] = reinterpret_cast<uptr>(e->data) & (~s_ref_mask >> 17); iptr_ext[ext_size] = reinterpret_cast<uptr>(e->data) & (~s_ref_mask >> 16);
ext_size++; ext_size++;
} }
} }
const u32 cond_id = cond_alloc(iptr, mask, 0); const u32 cond_id = cond_alloc(iptr, 0);
u32 cond_id_ext[atomic_wait::max_list - 1]{}; u32 cond_id_ext[atomic_wait::max_list - 1]{};
for (u32 i = 0; i < ext_size; i++) for (u32 i = 0; i < ext_size; i++)
{ {
cond_id_ext[i] = cond_alloc(iptr_ext[i], ext[i].mask, i + 1); cond_id_ext[i] = cond_alloc(iptr_ext[i], i + 1);
} }
const auto slot = root_info::slot_alloc(iptr); const auto slot = root_info::slot_alloc(iptr);
@ -1060,8 +964,6 @@ SAFE_BUFFERS(void) atomic_wait_engine::wait(const void* data, u32 size, u128 old
// Store some info for notifiers (some may be unused) // Store some info for notifiers (some may be unused)
cond->link = 0; cond->link = 0;
cond->size = static_cast<u8>(size);
cond->flag = static_cast<u8>(size >> 8);
cond->oldv = old_value; cond->oldv = old_value;
cond->tsc0 = stamp0; cond->tsc0 = stamp0;
@ -1071,8 +973,6 @@ SAFE_BUFFERS(void) atomic_wait_engine::wait(const void* data, u32 size, u128 old
{ {
// Extensions point to original cond_id, copy remaining info // Extensions point to original cond_id, copy remaining info
cond_ext[i]->link = cond_id; cond_ext[i]->link = cond_id;
cond_ext[i]->size = static_cast<u8>(ext[i].size);
cond_ext[i]->flag = static_cast<u8>(ext[i].size >> 8);
cond_ext[i]->oldv = ext[i].old; cond_ext[i]->oldv = ext[i].old;
cond_ext[i]->tsc0 = stamp0; cond_ext[i]->tsc0 = stamp0;
@ -1105,7 +1005,7 @@ SAFE_BUFFERS(void) atomic_wait_engine::wait(const void* data, u32 size, u128 old
u64 attempts = 0; u64 attempts = 0;
while (ptr_cmp(data, size, old_value, mask, ext)) while (ptr_cmp(data, old_value, ext))
{ {
if (s_tls_one_time_wait_cb) if (s_tls_one_time_wait_cb)
{ {
@ -1263,7 +1163,7 @@ SAFE_BUFFERS(void) atomic_wait_engine::wait(const void* data, u32 size, u128 old
} }
template <bool NoAlert = false> template <bool NoAlert = false>
static u32 alert_sema(u32 cond_id, u32 size, u128 mask) static u32 alert_sema(u32 cond_id, u32 size)
{ {
ensure(cond_id); ensure(cond_id);
@ -1271,11 +1171,11 @@ static u32 alert_sema(u32 cond_id, u32 size, u128 mask)
u32 ok = 0; u32 ok = 0;
if (!cond->size || mask & cond->mask) if (true)
{ {
// Redirect if necessary // Redirect if necessary
const auto _old = cond; const auto _old = cond;
const auto _new = _old->link ? cond_id_lock(_old->link, u128(-1)) : _old; const auto _new = _old->link ? cond_id_lock(_old->link) : _old;
if (_new && _new->tsc0 == _old->tsc0) if (_new && _new->tsc0 == _old->tsc0)
{ {
@ -1336,50 +1236,58 @@ void atomic_wait_engine::set_notify_callback(void(*cb)(const void*, u64))
s_tls_notify_cb = cb; s_tls_notify_cb = cb;
} }
void atomic_wait_engine::notify_one(const void* data, u32 size, u128 mask) void atomic_wait_engine::notify_one(const void* data)
{ {
const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 17);
if (s_tls_notify_cb) if (s_tls_notify_cb)
s_tls_notify_cb(data, 0); s_tls_notify_cb(data, 0);
root_info::slot_search(iptr, mask, [&](u32 cond_id) #ifdef __linux__
futex(const_cast<void*>(data), FUTEX_WAKE_PRIVATE, 1);
#else
const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 16);
root_info::slot_search(iptr, [&](u32 cond_id)
{ {
if (alert_sema(cond_id, size, mask)) if (alert_sema(cond_id, 4))
{ {
return true; return true;
} }
return false; return false;
}); });
#endif
if (s_tls_notify_cb) if (s_tls_notify_cb)
s_tls_notify_cb(data, -1); s_tls_notify_cb(data, -1);
} }
SAFE_BUFFERS(void) atomic_wait_engine::notify_all(const void* data, u32 size, u128 mask) SAFE_BUFFERS(void)
atomic_wait_engine::notify_all(const void* data)
{ {
const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 17);
if (s_tls_notify_cb) if (s_tls_notify_cb)
s_tls_notify_cb(data, 0); s_tls_notify_cb(data, 0);
#ifdef __linux__
futex(const_cast<void*>(data), FUTEX_WAKE_PRIVATE, 1);
#else
const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 16);
// Array count for batch notification // Array count for batch notification
u32 count = 0; u32 count = 0;
// Array itself. // Array itself.
u32 cond_ids[128]; u32 cond_ids[128];
root_info::slot_search(iptr, mask, [&](u32 cond_id) root_info::slot_search(iptr, [&](u32 cond_id)
{ {
if (count >= 128) if (count >= 128)
{ {
// Unusual big amount of sema: fallback to notify_one alg // Unusual big amount of sema: fallback to notify_one alg
alert_sema(cond_id, size, mask); alert_sema(cond_id, 4);
return false; return false;
} }
u32 res = alert_sema<true>(cond_id, size, mask); u32 res = alert_sema<true>(cond_id, 4);
if (~res <= u16{umax}) if (~res <= u16{umax})
{ {
@ -1395,7 +1303,7 @@ SAFE_BUFFERS(void) atomic_wait_engine::notify_all(const void* data, u32 size, u1
{ {
const u32 cond_id = *(std::end(cond_ids) - i - 1); const u32 cond_id = *(std::end(cond_ids) - i - 1);
if (!s_cond_list[cond_id].wakeup(size ? 1 : 2)) if (!s_cond_list[cond_id].wakeup(1))
{ {
*(std::end(cond_ids) - i - 1) = ~cond_id; *(std::end(cond_ids) - i - 1) = ~cond_id;
} }
@ -1434,6 +1342,7 @@ SAFE_BUFFERS(void) atomic_wait_engine::notify_all(const void* data, u32 size, u1
{ {
cond_free(~*(std::end(cond_ids) - i - 1)); cond_free(~*(std::end(cond_ids) - i - 1));
} }
#endif
if (s_tls_notify_cb) if (s_tls_notify_cb)
s_tls_notify_cb(data, -1); s_tls_notify_cb(data, -1);

View file

@ -129,54 +129,21 @@ enum class atomic_wait_timeout : u64
inf = 0xffffffffffffffff, inf = 0xffffffffffffffff,
}; };
template <typename T>
class lf_queue;
namespace stx
{
template <typename T>
class atomic_ptr;
}
// Various extensions for atomic_t::wait // Various extensions for atomic_t::wait
namespace atomic_wait namespace atomic_wait
{ {
// Max number of simultaneous atomic variables to wait on (can be extended if really necessary) // Max number of simultaneous atomic variables to wait on (can be extended if really necessary)
constexpr uint max_list = 8; constexpr uint max_list = 8;
enum class op : u8
{
eq, // Wait while value is bitwise equal to
slt, // Wait while signed value is less than
sgt, // Wait while signed value is greater than
ult, // Wait while unsigned value is less than
ugt, // Wait while unsigned value is greater than
alt, // Wait while absolute value is less than
agt, // Wait while absolute value is greater than
pop, // Wait while set bit count of the value is less than
__max
};
static_assert(static_cast<u8>(op::__max) == 8);
enum class op_flag : u8
{
inverse = 1 << 4, // Perform inverse operation (negate the result)
bit_not = 1 << 5, // Perform bitwise NOT on loaded value before operation
byteswap = 1 << 6, // Perform byteswap on both arguments and masks when applicable
};
constexpr op_flag op_be = std::endian::native == std::endian::little ? op_flag::byteswap : op_flag{0};
constexpr op_flag op_le = std::endian::native == std::endian::little ? op_flag{0} : op_flag::byteswap;
constexpr op operator |(op_flag lhs, op_flag rhs)
{
return op{static_cast<u8>(static_cast<u8>(lhs) | static_cast<u8>(rhs))};
}
constexpr op operator |(op_flag lhs, op rhs)
{
return op{static_cast<u8>(static_cast<u8>(lhs) | static_cast<u8>(rhs))};
}
constexpr op operator |(op lhs, op_flag rhs)
{
return op{static_cast<u8>(static_cast<u8>(lhs) | static_cast<u8>(rhs))};
}
constexpr op op_ne = op::eq | op_flag::inverse;
constexpr struct any_value_t constexpr struct any_value_t
{ {
template <typename T> template <typename T>
@ -186,46 +153,10 @@ namespace atomic_wait
} }
} any_value; } any_value;
template <typename X>
using payload_type = decltype(std::declval<X>().observe());
template <typename X, typename T = payload_type<X>>
constexpr u128 default_mask = sizeof(T) <= 8 ? u128{u64{umax} >> ((64 - sizeof(T) * 8) & 63)} : u128(-1);
template <typename X, typename T = payload_type<X>>
constexpr u128 get_value(X&, T value = T{}, ...)
{
static_assert((sizeof(T) & (sizeof(T) - 1)) == 0);
static_assert(sizeof(T) <= 16);
return std::bit_cast<get_uint_t<sizeof(T)>, T>(value);
}
struct info struct info
{ {
const void* data; const void* data;
u32 size; u32 old;
u128 old;
u128 mask;
template <typename X, typename T = payload_type<X>>
constexpr void set_value(X& a, T value = T{})
{
old = get_value(a, value);
}
template <typename X, typename T = payload_type<X>>
constexpr void set_mask(T value)
{
static_assert((sizeof(T) & (sizeof(T) - 1)) == 0);
static_assert(sizeof(T) <= 16);
mask = std::bit_cast<get_uint_t<sizeof(T)>, T>(value);
}
template <typename X, typename T = payload_type<X>>
constexpr void set_mask()
{
mask = default_mask<X>;
}
}; };
template <uint Max, typename... T> template <uint Max, typename... T>
@ -243,9 +174,9 @@ namespace atomic_wait
constexpr list& operator=(const list&) noexcept = default; constexpr list& operator=(const list&) noexcept = default;
template <typename... U, typename = std::void_t<decltype(std::declval<U>().template wait<op::eq>(any_value))...>> template <typename... U, typename = std::void_t<decltype(std::declval<U>().wait(any_value))...>>
constexpr list(U&... vars) constexpr list(U&... vars)
: m_info{{&vars, sizeof(vars.observe()), get_value(vars), default_mask<U>}...} : m_info{{&vars, 0}...}
{ {
static_assert(sizeof...(U) == Max, "Inconsistent amount of atomics."); static_assert(sizeof...(U) == Max, "Inconsistent amount of atomics.");
} }
@ -256,40 +187,37 @@ namespace atomic_wait
static_assert(sizeof...(U) == Max, "Inconsistent amount of values."); static_assert(sizeof...(U) == Max, "Inconsistent amount of values.");
auto* ptr = m_info; auto* ptr = m_info;
((ptr->template set_value<T>(*static_cast<T*>(ptr->data), values), ptr++), ...); (((ptr->old = std::bit_cast<u32>(values)), ptr++), ...);
return *this; return *this;
} }
template <typename... U> template <uint Index, typename T2, typename U, typename = std::void_t<decltype(std::declval<T2>().wait(any_value))>>
constexpr list& masks(U... masks)
{
static_assert(sizeof...(U) <= Max, "Too many masks.");
auto* ptr = m_info;
((ptr++)->template set_mask<T>(masks), ...);
return *this;
}
template <uint Index, op Flags = op::eq, typename T2, typename U, typename = std::void_t<decltype(std::declval<T2>().template wait<op::eq>(any_value))>>
constexpr void set(T2& var, U value) constexpr void set(T2& var, U value)
{ {
static_assert(Index < Max); static_assert(Index < Max);
m_info[Index].data = &var; m_info[Index].data = &var;
m_info[Index].size = sizeof(var.observe()) | (static_cast<u8>(Flags) << 8); m_info[Index].old = std::bit_cast<u32>(value);
m_info[Index].template set_value<T2>(var, value);
m_info[Index].template set_mask<T2>();
} }
template <uint Index, op Flags = op::eq, typename T2, typename U, typename V, typename = std::void_t<decltype(std::declval<T2>().template wait<op::eq>(any_value))>> template <uint Index, typename T2>
constexpr void set(T2& var, U value, V mask) constexpr void set(lf_queue<T2>& var, std::nullptr_t = nullptr)
{ {
static_assert(Index < Max); static_assert(Index < Max);
static_assert(sizeof(var) == sizeof(uptr));
m_info[Index].data = &var; m_info[Index].data = reinterpret_cast<char*>(&var) + sizeof(u32);
m_info[Index].size = sizeof(var.observe()) | (static_cast<u8>(Flags) << 8); m_info[Index].old = 0;
m_info[Index].template set_value<T2>(var, value); }
m_info[Index].template set_mask<T2>(mask);
template <uint Index, typename T2>
constexpr void set(stx::atomic_ptr<T2>& var, std::nullptr_t = nullptr)
{
static_assert(Index < Max);
static_assert(sizeof(var) == sizeof(uptr));
m_info[Index].data = reinterpret_cast<char*>(&var) + sizeof(u32);
m_info[Index].old = 0;
} }
// Timeout is discouraged // Timeout is discouraged
@ -302,7 +230,7 @@ namespace atomic_wait
} }
}; };
template <typename... T, typename = std::void_t<decltype(std::declval<T>().template wait<op::eq>(any_value))...>> template <typename... T, typename = std::void_t<decltype(std::declval<T>().wait(any_value))...>>
list(T&... vars) -> list<sizeof...(T), T...>; list(T&... vars) -> list<sizeof...(T), T...>;
} }
@ -322,20 +250,15 @@ private:
template <uint Max, typename... T> template <uint Max, typename... T>
friend class atomic_wait::list; friend class atomic_wait::list;
static void wait(const void* data, u32 size, u128 old_value, u64 timeout, u128 mask, atomic_wait::info* ext = nullptr); static void wait(const void* data, u32 old_value, u64 timeout, atomic_wait::info* ext = nullptr);
public: public:
static void notify_one(const void* data, u32 size, u128 mask128); static void notify_one(const void* data);
static void notify_all(const void* data, u32 size, u128 mask128); static void notify_all(const void* data);
static void set_wait_callback(bool(*cb)(const void* data, u64 attempts, u64 stamp0)); static void set_wait_callback(bool(*cb)(const void* data, u64 attempts, u64 stamp0));
static void set_notify_callback(void(*cb)(const void* data, u64 progress)); static void set_notify_callback(void(*cb)(const void* data, u64 progress));
static void set_one_time_use_wait_callback(bool(*cb)(u64 progress)); static void set_one_time_use_wait_callback(bool (*cb)(u64 progress));
static void notify_all(const void* data)
{
notify_all(data, 0, u128(-1));
}
}; };
template <uint Max, typename... T> template <uint Max, typename... T>
@ -343,7 +266,7 @@ void atomic_wait::list<Max, T...>::wait(atomic_wait_timeout timeout)
{ {
static_assert(!!Max, "Cannot initiate atomic wait with empty list."); static_assert(!!Max, "Cannot initiate atomic wait with empty list.");
atomic_wait_engine::wait(m_info[0].data, m_info[0].size, m_info[0].old, static_cast<u64>(timeout), m_info[0].mask, m_info + 1); atomic_wait_engine::wait(m_info[0].data, m_info[0].old, static_cast<u64>(timeout), m_info + 1);
} }
// Helper class, provides access to compiler-specific atomic intrinsics // Helper class, provides access to compiler-specific atomic intrinsics
@ -1759,46 +1682,31 @@ public:
}); });
} }
// Timeout is discouraged void wait(type old_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const
template <atomic_wait::op Flags = atomic_wait::op::eq> requires(sizeof(type) == 4)
void wait(type old_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const noexcept
{ {
const u128 old = std::bit_cast<get_uint_t<sizeof(T)>>(old_value); atomic_wait_engine::wait(&m_data, std::bit_cast<u32>(old_value), static_cast<u64>(timeout));
const u128 mask = atomic_wait::default_mask<atomic_t>;
atomic_wait_engine::wait(&m_data, sizeof(T) | (static_cast<u8>(Flags) << 8), old, static_cast<u64>(timeout), mask);
} }
// Overload with mask (only selected bits are checked), timeout is discouraged [[deprecated]] void wait(type old_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const
template <atomic_wait::op Flags = atomic_wait::op::eq> requires(sizeof(type) == 8)
void wait(type old_value, type mask_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const noexcept
{ {
const u128 old = std::bit_cast<get_uint_t<sizeof(T)>>(old_value); atomic_wait::info ext[2]{};
const u128 mask = std::bit_cast<get_uint_t<sizeof(T)>>(mask_value); ext[0].data = reinterpret_cast<const char*>(&m_data) + 4;
atomic_wait_engine::wait(&m_data, sizeof(T) | (static_cast<u8>(Flags) << 8), old, static_cast<u64>(timeout), mask); ext[0].old = std::bit_cast<u64>(old_value) >> 32;
atomic_wait_engine::wait(&m_data, std::bit_cast<u64>(old_value), static_cast<u64>(timeout), ext);
} }
void notify_one() noexcept void notify_one()
requires(sizeof(type) == 4 || sizeof(type) == 8)
{ {
atomic_wait_engine::notify_one(&m_data, sizeof(T), atomic_wait::default_mask<atomic_t>); atomic_wait_engine::notify_one(&m_data);
} }
// Notify with mask, allowing to not wake up thread which doesn't wait on this mask void notify_all()
void notify_one(type mask_value) noexcept requires(sizeof(type) == 4 || sizeof(type) == 8)
{ {
const u128 mask = std::bit_cast<get_uint_t<sizeof(T)>>(mask_value); atomic_wait_engine::notify_all(&m_data);
atomic_wait_engine::notify_one(&m_data, sizeof(T), mask);
}
void notify_all() noexcept
{
atomic_wait_engine::notify_all(&m_data, sizeof(T), atomic_wait::default_mask<atomic_t>);
}
// Notify all threads with mask, allowing to not wake up threads which don't wait on them
void notify_all(type mask_value) noexcept
{
const u128 mask = std::bit_cast<get_uint_t<sizeof(T)>>(mask_value);
atomic_wait_engine::notify_all(&m_data, sizeof(T), mask);
} }
}; };
@ -1874,23 +1782,6 @@ public:
{ {
return base::fetch_xor(1) != 0; return base::fetch_xor(1) != 0;
} }
// Timeout is discouraged
template <atomic_wait::op Flags = atomic_wait::op::eq>
void wait(bool old_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const noexcept
{
base::template wait<Flags>(old_value, 1, timeout);
}
void notify_one() noexcept
{
base::notify_one(1);
}
void notify_all() noexcept
{
base::notify_all(1);
}
}; };
// Specializations // Specializations
@ -1904,12 +1795,6 @@ struct std::common_type<atomic_t<T, Align>, T2> : std::common_type<T, std::commo
template <typename T, typename T2, usz Align2> template <typename T, typename T2, usz Align2>
struct std::common_type<T, atomic_t<T2, Align2>> : std::common_type<std::common_type_t<T>, T2> {}; struct std::common_type<T, atomic_t<T2, Align2>> : std::common_type<std::common_type_t<T>, T2> {};
namespace atomic_wait
{
template <usz Align>
constexpr u128 default_mask<atomic_t<bool, Align>> = 1;
}
#ifndef _MSC_VER #ifndef _MSC_VER
#pragma GCC diagnostic pop #pragma GCC diagnostic pop
#pragma GCC diagnostic pop #pragma GCC diagnostic pop

View file

@ -6,33 +6,35 @@
// Mutex that tries to maintain the order of acquisition // Mutex that tries to maintain the order of acquisition
class fifo_mutex class fifo_mutex
{ {
// Low 8 bits are incremented on acquisition, high 8 bits are incremented on release // Low 16 bits are incremented on acquisition, high 16 bits are incremented on release
atomic_t<u16> m_value{0}; atomic_t<u32> m_value{0};
public: public:
constexpr fifo_mutex() noexcept = default; constexpr fifo_mutex() noexcept = default;
void lock() noexcept void lock() noexcept
{ {
const u16 val = m_value.fetch_op([](u16& val) // clang-format off
const u32 val = m_value.fetch_op([](u32& val)
{ {
val = (val & 0xff00) | ((val + 1) & 0xff); val = (val & 0xffff0000) | ((val + 1) & 0xffff);
}); });
// clang-format on
if (val >> 8 != (val & 0xff)) [[unlikely]] if (val >> 16 != (val & 0xffff)) [[unlikely]]
{ {
// TODO: implement busy waiting along with moving to cpp file // TODO: implement busy waiting along with moving to cpp file
m_value.wait<atomic_wait::op_ne>(((val + 1) & 0xff) << 8, 0xff00); m_value.wait((val & 0xffff0000) | ((val + 1) & 0xffff));
} }
} }
bool try_lock() noexcept bool try_lock() noexcept
{ {
const u16 val = m_value.load(); const u32 val = m_value.load();
if (val >> 8 == (val & 0xff)) if (val >> 16 == (val & 0xffff))
{ {
if (m_value.compare_and_swap(val, ((val + 1) & 0xff) | (val & 0xff00))) if (m_value.compare_and_swap(val, ((val + 1) & 0xffff) | (val & 0xffff0000)))
{ {
return true; return true;
} }
@ -43,9 +45,9 @@ public:
void unlock() noexcept void unlock() noexcept
{ {
const u16 val = m_value.add_fetch(0x100); const u32 val = m_value.add_fetch(0x10000);
if (val >> 8 != (val & 0xff)) if (val >> 16 != (val & 0xffff))
{ {
m_value.notify_one(); m_value.notify_one();
} }
@ -53,9 +55,9 @@ public:
bool is_free() const noexcept bool is_free() const noexcept
{ {
const u16 val = m_value.load(); const u32 val = m_value.load();
return (val >> 8) == (val & 0xff); return (val >> 16) == (val & 0xffff);
} }
void lock_unlock() noexcept void lock_unlock() noexcept

View file

@ -259,8 +259,8 @@ namespace utils
void audio_decoder::clear() void audio_decoder::clear()
{ {
track_fully_decoded = false; track_fully_decoded = 0;
track_fully_consumed = false; track_fully_consumed = 0;
has_error = false; has_error = false;
m_size = 0; m_size = 0;
duration_ms = 0; duration_ms = 0;
@ -274,7 +274,7 @@ namespace utils
{ {
auto& thread = *m_thread; auto& thread = *m_thread;
thread = thread_state::aborting; thread = thread_state::aborting;
track_fully_consumed = true; track_fully_consumed = 1;
track_fully_consumed.notify_one(); track_fully_consumed.notify_one();
thread(); thread();
m_thread.reset(); m_thread.reset();
@ -511,7 +511,7 @@ namespace utils
media_log.notice("audio_decoder: about to decode: %s (index=%d)", ::at32(m_context.playlist, m_context.current_track), m_context.current_track); media_log.notice("audio_decoder: about to decode: %s (index=%d)", ::at32(m_context.playlist, m_context.current_track), m_context.current_track);
decode_track(::at32(m_context.playlist, m_context.current_track)); decode_track(::at32(m_context.playlist, m_context.current_track));
track_fully_decoded = true; track_fully_decoded = 1;
if (has_error) if (has_error)
{ {
@ -521,7 +521,7 @@ namespace utils
// Let's only decode one track at a time. Wait for the consumer to finish reading the track. // Let's only decode one track at a time. Wait for the consumer to finish reading the track.
media_log.notice("audio_decoder: waiting until track is consumed..."); media_log.notice("audio_decoder: waiting until track is consumed...");
thread_ctrl::wait_on(track_fully_consumed, false); thread_ctrl::wait_on(track_fully_consumed, 0);
track_fully_consumed = false; track_fully_consumed = false;
} }

View file

@ -77,8 +77,8 @@ namespace utils
std::vector<u8> data; std::vector<u8> data;
atomic_t<u64> m_size = 0; atomic_t<u64> m_size = 0;
atomic_t<u64> duration_ms = 0; atomic_t<u64> duration_ms = 0;
atomic_t<bool> track_fully_decoded{false}; atomic_t<u32> track_fully_decoded{0};
atomic_t<bool> track_fully_consumed{false}; atomic_t<u32> track_fully_consumed{0};
atomic_t<bool> has_error{false}; atomic_t<bool> has_error{false};
std::deque<std::pair<u64, u64>> timestamps_ms; std::deque<std::pair<u64, u64>> timestamps_ms;

View file

@ -3,6 +3,7 @@
#include <cstdint> #include <cstdint>
#include <memory> #include <memory>
#include "atomic.hpp" #include "atomic.hpp"
#include "asm.hpp"
namespace stx namespace stx
{ {
@ -21,7 +22,7 @@ namespace stx
// Basic assumption of userspace pointer size // Basic assumption of userspace pointer size
constexpr uint c_ptr_size = 48; constexpr uint c_ptr_size = 48;
// Use lower 17 bits as atomic_ptr internal counter of borrowed refs (pointer itself is shifted) // Use lower 16 bits as atomic_ptr internal counter of borrowed refs (pointer itself is shifted)
constexpr uint c_ref_mask = 0xffff, c_ref_size = 16; constexpr uint c_ref_mask = 0xffff, c_ref_size = 16;
// Remaining pointer bits // Remaining pointer bits
@ -1054,20 +1055,19 @@ namespace stx
return observe() == r.get(); return observe() == r.get();
} }
template <atomic_wait::op Flags = atomic_wait::op::eq> void wait(std::nullptr_t, atomic_wait_timeout timeout = atomic_wait_timeout::inf)
void wait(const volatile void* value, atomic_wait_timeout timeout = atomic_wait_timeout::inf)
{ {
m_val.wait<Flags>(reinterpret_cast<uptr>(value) << c_ref_size, c_ptr_mask, timeout); utils::bless<atomic_t<u32>>(&m_val)[1].wait(0, timeout);
} }
void notify_one() void notify_one()
{ {
m_val.notify_one(c_ptr_mask); utils::bless<atomic_t<u32>>(&m_val)[1].notify_one();
} }
void notify_all() void notify_all()
{ {
m_val.notify_all(c_ptr_mask); utils::bless<atomic_t<u32>>(&m_val)[1].notify_all();
} }
}; };
@ -1110,18 +1110,6 @@ namespace stx
} null_ptr; } null_ptr;
} }
namespace atomic_wait
{
template <typename T>
constexpr u128 default_mask<stx::atomic_ptr<T>> = stx::c_ptr_mask;
template <typename T>
constexpr u128 get_value(stx::atomic_ptr<T>&, const volatile void* value = nullptr)
{
return reinterpret_cast<uptr>(value) << stx::c_ref_size;
}
}
using stx::null_ptr; using stx::null_ptr;
using stx::single_ptr; using stx::single_ptr;
using stx::shared_ptr; using stx::shared_ptr;