rpcs3/rpcs3/Emu/perf_meter.hpp
Nekotekina 580bd2b25e Initial Linux Aarch64 support
* Update asmjit dependency (aarch64 branch)
* Disable USE_DISCORD_RPC by default
* Dump some JIT objects in rpcs3 cache dir
* Add SIGILL handler for all platforms
* Fix resetting zeroing denormals in thread pool
* Refactor most v128:: utils into global gv_** functions
* Refactor PPU interpreter (incomplete), remove "precise"
* - Instruction specializations with multiple accuracy flags
* - Adjust calling convention for speed
* - Removed precise/fast setting, replaced with static
* - Started refactoring interpreters for building at runtime JIT
*   (I got tired of poor compiler optimizations)
* - Expose some accuracy settings (SAT, NJ, VNAN, FPCC)
* - Add exec_bytes PPU thread variable (akin to cycle count)
* PPU LLVM: fix VCTUXS+VCTSXS instruction NaN results
* SPU interpreter: remove "precise" for now (extremely non-portable)
* - As with PPU, settings changed to static/dynamic for interpreters.
* - Precise options will be implemented later
* Fix termination after fatal error dialog
2022-01-15 06:48:04 +03:00

181 lines
4 KiB
C++

#pragma once
#include "util/types.hpp"
#include "util/logs.hpp"
#include "util/tsc.hpp"
#include "system_config.h"
#include <array>
#include <cmath>
LOG_CHANNEL(perf_log, "PERF");
// TODO: constexpr with the help of bitcast
template <auto Name>
inline const auto perf_name = []
{
constexpr auto short_name = Name;
std::array<char, sizeof(Name) + 1> result{};
std::memcpy(result.data(), &short_name, sizeof(Name));
return result;
}();
class perf_stat_base
{
atomic_t<u64> m_log[66]{};
protected:
// Print accumulated values
void print(const char* name) const noexcept;
// Accumulate values from a thread
void push(u64 ns[66]) noexcept;
// Get end time; accumulate value to the TLS
static void push(u64 data[66], u64 start_time, const char* name) noexcept;
// Register TLS storage for stats
static void add(u64 ns[66], const char* name) noexcept;
// Unregister TLS storage and drain its data
static void remove(u64 ns[66], const char* name) noexcept;
public:
perf_stat_base() noexcept = default;
perf_stat_base(const perf_stat_base&) = delete;
perf_stat_base& operator =(const perf_stat_base&) = delete;
~perf_stat_base() {}
// Collect all data, report it, and clean
static void report() noexcept;
};
// Object that prints event length stats at the end
template <auto ShortName>
class perf_stat final : public perf_stat_base
{
static inline thread_local struct perf_stat_local
{
// Local non-atomic values for increments
u64 m_log[66]{};
perf_stat_local() noexcept
{
perf_stat_base::add(m_log, perf_name<ShortName>.data());
}
~perf_stat_local()
{
perf_stat_base::remove(m_log, perf_name<ShortName>.data());
}
} g_tls_perf_stat;
public:
static FORCE_INLINE SAFE_BUFFERS(void) push(u64 start_time) noexcept
{
perf_stat_base::push(g_tls_perf_stat.m_log, start_time, perf_name<ShortName>.data());
}
};
// Object that prints event length at the end
template <auto ShortName, auto... SubEvents>
class perf_meter
{
// Initialize array (possibly only 1 element) with timestamp
u64 m_timestamps[1 + sizeof...(SubEvents)];
public:
FORCE_INLINE SAFE_BUFFERS() perf_meter() noexcept
{
restart();
}
// Copy first timestamp
template <auto SN, auto... S>
FORCE_INLINE SAFE_BUFFERS() perf_meter(const perf_meter<SN, S...>& r) noexcept
{
m_timestamps[0] = r.get();
std::memset(m_timestamps + 1, 0, sizeof(m_timestamps) - sizeof(u64));
}
template <auto SN, auto... S>
SAFE_BUFFERS() perf_meter(perf_meter<SN, S...>&& r) noexcept
{
m_timestamps[0] = r.get();
r.reset();
}
// Copy first timestamp
template <auto SN, auto... S>
SAFE_BUFFERS(perf_meter&) operator =(const perf_meter<SN, S...>& r) noexcept
{
m_timestamps[0] = r.get();
return *this;
}
template <auto SN, auto... S>
SAFE_BUFFERS(perf_meter&) operator =(perf_meter<SN, S...>& r) noexcept
{
m_timestamps[0] = r.get();
r.reset();
return *this;
}
// Push subevent data in array
template <auto Event, usz Index = 0>
SAFE_BUFFERS(void) push() noexcept
{
// TODO: should use more efficient search with type comparison, then value comparison, or pattern matching
if constexpr (std::array<bool, sizeof...(SubEvents)>{(SubEvents == Event)...}[Index])
{
// Push actual timestamp into an array
m_timestamps[Index + 1] = utils::get_tsc();
}
else if constexpr (Index < sizeof...(SubEvents))
{
// Proceed search recursively
push<Event, Index + 1>();
}
}
// Obtain initial timestamp
u64 get() const noexcept
{
return m_timestamps[0];
}
// Disable this counter
FORCE_INLINE SAFE_BUFFERS(void) reset() noexcept
{
m_timestamps[0] = 0;
}
// Re-initialize first timestamp
FORCE_INLINE SAFE_BUFFERS(void) restart() noexcept
{
m_timestamps[0] = utils::get_tsc();
std::memset(m_timestamps + 1, 0, sizeof(m_timestamps) - sizeof(u64));
}
SAFE_BUFFERS() ~perf_meter()
{
// Disabled counter
if (!m_timestamps[0]) [[unlikely]]
{
return;
}
if (!g_cfg.core.perf_report) [[likely]]
{
return;
}
// Register perf stat in nanoseconds
perf_stat<ShortName>::push(m_timestamps[0]);
// TODO: handle push(), currently ignored
}
};