mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-28 13:28:01 +03:00
rsx: Optimize static heap allocations
This commit is contained in:
parent
76948b6364
commit
26a7e9653f
4 changed files with 123 additions and 91 deletions
|
@ -404,57 +404,76 @@ namespace
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
NEVER_INLINE std::tuple<T, T, u32> upload_untouched_skip_restart(std::span<to_be_t<const T>> src, std::span<T> dst, T restart_index)
|
NEVER_INLINE std::tuple<T, T, u32> upload_untouched_skip_restart(std::span<to_be_t<const T>> src, std::span<T> dst, T restart_index)
|
||||||
{
|
|
||||||
T min_index = index_limit<T>();
|
|
||||||
T max_index = 0;
|
|
||||||
u32 written = 0;
|
|
||||||
u32 length = ::size32(src);
|
|
||||||
|
|
||||||
for (u32 i = written; i < length; ++i)
|
|
||||||
{
|
{
|
||||||
T index = src[i];
|
T min_index = index_limit<T>();
|
||||||
if (index != restart_index)
|
T max_index = 0;
|
||||||
|
u32 written = 0;
|
||||||
|
u32 length = ::size32(src);
|
||||||
|
|
||||||
|
for (u32 i = written; i < length; ++i)
|
||||||
{
|
{
|
||||||
dst[written++] = min_max(min_index, max_index, index);
|
T index = src[i];
|
||||||
|
if (index != restart_index)
|
||||||
|
{
|
||||||
|
dst[written++] = min_max(min_index, max_index, index);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return std::make_tuple(min_index, max_index, written);
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::make_tuple(min_index, max_index, written);
|
template<typename T>
|
||||||
}
|
std::tuple<T, T, u32> upload_untouched(std::span<to_be_t<const T>> src, std::span<T> dst, rsx::primitive_type draw_mode, bool is_primitive_restart_enabled, u32 primitive_restart_index)
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
std::tuple<T, T, u32> upload_untouched(std::span<to_be_t<const T>> src, std::span<T> dst, rsx::primitive_type draw_mode, bool is_primitive_restart_enabled, u32 primitive_restart_index)
|
|
||||||
{
|
|
||||||
if (!is_primitive_restart_enabled)
|
|
||||||
{
|
{
|
||||||
return untouched_impl::upload_untouched(src, dst);
|
if (!is_primitive_restart_enabled)
|
||||||
}
|
|
||||||
else if constexpr (std::is_same_v<T, u16>)
|
|
||||||
{
|
|
||||||
if (primitive_restart_index > 0xffff)
|
|
||||||
{
|
{
|
||||||
return untouched_impl::upload_untouched(src, dst);
|
return untouched_impl::upload_untouched(src, dst);
|
||||||
}
|
}
|
||||||
|
else if constexpr (std::is_same_v<T, u16>)
|
||||||
|
{
|
||||||
|
if (primitive_restart_index > 0xffff)
|
||||||
|
{
|
||||||
|
return untouched_impl::upload_untouched(src, dst);
|
||||||
|
}
|
||||||
|
else if (is_primitive_disjointed(draw_mode))
|
||||||
|
{
|
||||||
|
return upload_untouched_skip_restart(src, dst, static_cast<u16>(primitive_restart_index));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return primitive_restart_impl::upload_untouched(src, dst, static_cast<u16>(primitive_restart_index));
|
||||||
|
}
|
||||||
|
}
|
||||||
else if (is_primitive_disjointed(draw_mode))
|
else if (is_primitive_disjointed(draw_mode))
|
||||||
{
|
{
|
||||||
return upload_untouched_skip_restart(src, dst, static_cast<u16>(primitive_restart_index));
|
return upload_untouched_skip_restart(src, dst, primitive_restart_index);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
return primitive_restart_impl::upload_untouched(src, dst, static_cast<u16>(primitive_restart_index));
|
return primitive_restart_impl::upload_untouched(src, dst, primitive_restart_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (is_primitive_disjointed(draw_mode))
|
|
||||||
|
void iota16(u16* dst, u32 count)
|
||||||
{
|
{
|
||||||
return upload_untouched_skip_restart(src, dst, primitive_restart_index);
|
unsigned i = 0;
|
||||||
|
#if defined(ARCH_X64) || defined(ARCH_ARM64)
|
||||||
|
const unsigned step = 8; // We do 8 entries per step
|
||||||
|
const __m128i vec_step = _mm_set1_epi16(8); // Constant to increment the raw values
|
||||||
|
__m128i values = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
|
||||||
|
__m128i* vec_ptr = reinterpret_cast<__m128i*>(dst);
|
||||||
|
|
||||||
|
for (; (i + step) <= count; i += step, vec_ptr++)
|
||||||
|
{
|
||||||
|
_mm_stream_si128(vec_ptr, values);
|
||||||
|
_mm_add_epi16(values, vec_step);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for (; i < count; ++i)
|
||||||
|
dst[i] = i;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
return primitive_restart_impl::upload_untouched(src, dst, primitive_restart_index);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
std::tuple<T, T, u32> expand_indexed_triangle_fan(std::span<to_be_t<const T>> src, std::span<T> dst, bool is_primitive_restart_enabled, u32 primitive_restart_index)
|
std::tuple<T, T, u32> expand_indexed_triangle_fan(std::span<to_be_t<const T>> src, std::span<T> dst, bool is_primitive_restart_enabled, u32 primitive_restart_index)
|
||||||
|
@ -624,8 +643,7 @@ void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst,
|
||||||
switch (draw_mode)
|
switch (draw_mode)
|
||||||
{
|
{
|
||||||
case rsx::primitive_type::line_loop:
|
case rsx::primitive_type::line_loop:
|
||||||
for (unsigned i = 0; i < count; ++i)
|
iota16(typedDst, count);
|
||||||
typedDst[i] = i;
|
|
||||||
typedDst[count] = 0;
|
typedDst[count] = 0;
|
||||||
return;
|
return;
|
||||||
case rsx::primitive_type::triangle_fan:
|
case rsx::primitive_type::triangle_fan:
|
||||||
|
|
|
@ -20,29 +20,33 @@ protected:
|
||||||
template<int Alignment>
|
template<int Alignment>
|
||||||
bool can_alloc(usz size) const
|
bool can_alloc(usz size) const
|
||||||
{
|
{
|
||||||
usz alloc_size = utils::align(size, Alignment);
|
const usz alloc_size = utils::align(size, Alignment);
|
||||||
usz aligned_put_pos = utils::align(m_put_pos, Alignment);
|
const usz aligned_put_pos = utils::align(m_put_pos, Alignment);
|
||||||
if (aligned_put_pos + alloc_size < m_size)
|
const usz alloc_end = aligned_put_pos + alloc_size;
|
||||||
|
|
||||||
|
if (alloc_end < m_size) [[ likely ]]
|
||||||
{
|
{
|
||||||
// range before get
|
// Range before get
|
||||||
if (aligned_put_pos + alloc_size < m_get_pos)
|
if (alloc_end < m_get_pos)
|
||||||
return true;
|
return true;
|
||||||
// range after get
|
|
||||||
|
// Range after get
|
||||||
if (aligned_put_pos > m_get_pos)
|
if (aligned_put_pos > m_get_pos)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
// ..]....[..get..
|
||||||
// ..]....[..get..
|
if (aligned_put_pos < m_get_pos)
|
||||||
if (aligned_put_pos < m_get_pos)
|
return false;
|
||||||
return false;
|
|
||||||
// ..get..]...[...
|
// ..get..]...[...
|
||||||
// Actually all resources extending beyond heap space starts at 0
|
// Actually all resources extending beyond heap space starts at 0
|
||||||
if (alloc_size > m_get_pos)
|
if (alloc_size > m_get_pos)
|
||||||
return false;
|
return false;
|
||||||
return true;
|
|
||||||
}
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Grow the buffer to hold at least size bytes
|
// Grow the buffer to hold at least size bytes
|
||||||
|
@ -53,10 +57,9 @@ protected:
|
||||||
}
|
}
|
||||||
|
|
||||||
usz m_size;
|
usz m_size;
|
||||||
usz m_put_pos; // Start of free space
|
usz m_put_pos; // Start of free space
|
||||||
usz m_min_guard_size; //If an allocation touches the guard region, reset the heap to avoid going over budget
|
usz m_get_pos; // End of free space
|
||||||
usz m_current_allocated_size;
|
usz m_min_guard_size; // If an allocation touches the guard region, reset the heap to avoid going over budget
|
||||||
usz m_largest_allocated_pool;
|
|
||||||
|
|
||||||
char* m_name;
|
char* m_name;
|
||||||
public:
|
public:
|
||||||
|
@ -65,8 +68,6 @@ public:
|
||||||
data_heap(const data_heap&) = delete;
|
data_heap(const data_heap&) = delete;
|
||||||
data_heap(data_heap&&) = delete;
|
data_heap(data_heap&&) = delete;
|
||||||
|
|
||||||
usz m_get_pos; // End of free space
|
|
||||||
|
|
||||||
void init(usz heap_size, const char* buffer_name = "unnamed", usz min_guard_size=0x10000)
|
void init(usz heap_size, const char* buffer_name = "unnamed", usz min_guard_size=0x10000)
|
||||||
{
|
{
|
||||||
m_name = const_cast<char*>(buffer_name);
|
m_name = const_cast<char*>(buffer_name);
|
||||||
|
@ -75,10 +76,8 @@ public:
|
||||||
m_put_pos = 0;
|
m_put_pos = 0;
|
||||||
m_get_pos = heap_size - 1;
|
m_get_pos = heap_size - 1;
|
||||||
|
|
||||||
//allocation stats
|
// Allocation stats
|
||||||
m_min_guard_size = min_guard_size;
|
m_min_guard_size = min_guard_size;
|
||||||
m_current_allocated_size = 0;
|
|
||||||
m_largest_allocated_pool = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int Alignment>
|
template<int Alignment>
|
||||||
|
@ -89,24 +88,45 @@ public:
|
||||||
|
|
||||||
if (!can_alloc<Alignment>(size) && !grow(alloc_size))
|
if (!can_alloc<Alignment>(size) && !grow(alloc_size))
|
||||||
{
|
{
|
||||||
fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d allocated=%d requested=%d guard=%d largest_pool=%d",
|
fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d",
|
||||||
m_name, m_size, m_current_allocated_size, size, m_min_guard_size, m_largest_allocated_pool);
|
m_name, m_size, size, m_min_guard_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
const usz block_length = (aligned_put_pos - m_put_pos) + alloc_size;
|
const usz alloc_end = aligned_put_pos + alloc_size;
|
||||||
m_current_allocated_size += block_length;
|
if (alloc_end < m_size)
|
||||||
m_largest_allocated_pool = std::max(m_largest_allocated_pool, block_length);
|
|
||||||
|
|
||||||
if (aligned_put_pos + alloc_size < m_size)
|
|
||||||
{
|
{
|
||||||
m_put_pos = aligned_put_pos + alloc_size;
|
m_put_pos = alloc_end;
|
||||||
return aligned_put_pos;
|
return aligned_put_pos;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
m_put_pos = alloc_size;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For use in cases where we take a fixed amount each time
|
||||||
|
*/
|
||||||
|
template<int Alignment, usz Size = Alignment>
|
||||||
|
usz static_alloc()
|
||||||
|
{
|
||||||
|
static_assert((Size & (Alignment - 1)) == 0);
|
||||||
|
ensure((m_put_pos & (Alignment - 1)) == 0);
|
||||||
|
|
||||||
|
if (!can_alloc<Alignment>(Size) && !grow(Size))
|
||||||
{
|
{
|
||||||
m_put_pos = alloc_size;
|
fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d requested=%d guard=%d",
|
||||||
return 0;
|
m_name, m_size, Size, m_min_guard_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const usz alloc_end = m_put_pos + Size;
|
||||||
|
if (m_put_pos + Size < m_size)
|
||||||
|
{
|
||||||
|
m_put_pos = alloc_end;
|
||||||
|
return m_put_pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_put_pos = Size;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -117,30 +137,25 @@ public:
|
||||||
return (m_put_pos > 0) ? m_put_pos - 1 : m_size - 1;
|
return (m_put_pos > 0) ? m_put_pos - 1 : m_size - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void set_get_pos(usz value)
|
||||||
|
{
|
||||||
|
m_get_pos = value;
|
||||||
|
}
|
||||||
|
|
||||||
virtual bool is_critical() const
|
virtual bool is_critical() const
|
||||||
{
|
{
|
||||||
const usz guard_length = std::max(m_min_guard_size, m_largest_allocated_pool);
|
return m_min_guard_size >= m_size;
|
||||||
return (m_current_allocated_size + guard_length) >= m_size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void reset_allocation_stats()
|
void reset_allocation_stats()
|
||||||
{
|
{
|
||||||
m_current_allocated_size = 0;
|
|
||||||
m_largest_allocated_pool = 0;
|
|
||||||
m_get_pos = get_current_put_pos_minus_one();
|
m_get_pos = get_current_put_pos_minus_one();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Updates the current_allocated_size metrics
|
// Updates the current_allocated_size metrics
|
||||||
void notify()
|
inline void notify()
|
||||||
{
|
{
|
||||||
if (m_get_pos == umax)
|
// @unused
|
||||||
m_current_allocated_size = 0;
|
|
||||||
else if (m_get_pos < m_put_pos)
|
|
||||||
m_current_allocated_size = (m_put_pos - m_get_pos - 1);
|
|
||||||
else if (m_get_pos > m_put_pos)
|
|
||||||
m_current_allocated_size = (m_put_pos + (m_size - m_get_pos - 1));
|
|
||||||
else
|
|
||||||
fmt::throw_exception("m_put_pos == m_get_pos!");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
usz size() const
|
usz size() const
|
||||||
|
|
|
@ -41,7 +41,7 @@ namespace vk::data_heap_manager
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
heap->m_get_pos = found->second;
|
heap->set_get_pos(found->second);
|
||||||
heap->notify();
|
heap->notify();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1162,7 +1162,6 @@ void VKGSRender::check_heap_status(u32 flags)
|
||||||
{
|
{
|
||||||
heap_critical = false;
|
heap_critical = false;
|
||||||
u32 test = 1u << std::countr_zero(flags);
|
u32 test = 1u << std::countr_zero(flags);
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
switch (flags & test)
|
switch (flags & test)
|
||||||
|
@ -2046,7 +2045,7 @@ void VKGSRender::load_program_env()
|
||||||
check_heap_status(VK_HEAP_CHECK_VERTEX_ENV_STORAGE);
|
check_heap_status(VK_HEAP_CHECK_VERTEX_ENV_STORAGE);
|
||||||
|
|
||||||
// Vertex state
|
// Vertex state
|
||||||
const auto mem = m_vertex_env_ring_info.alloc<256>(256);
|
const auto mem = m_vertex_env_ring_info.static_alloc<256>();
|
||||||
auto buf = static_cast<u8*>(m_vertex_env_ring_info.map(mem, 148));
|
auto buf = static_cast<u8*>(m_vertex_env_ring_info.map(mem, 148));
|
||||||
|
|
||||||
m_draw_processor.fill_scale_offset_data(buf, false);
|
m_draw_processor.fill_scale_offset_data(buf, false);
|
||||||
|
@ -2134,7 +2133,7 @@ void VKGSRender::load_program_env()
|
||||||
{
|
{
|
||||||
check_heap_status(VK_HEAP_CHECK_FRAGMENT_ENV_STORAGE);
|
check_heap_status(VK_HEAP_CHECK_FRAGMENT_ENV_STORAGE);
|
||||||
|
|
||||||
auto mem = m_fragment_env_ring_info.alloc<256>(256);
|
auto mem = m_fragment_env_ring_info.static_alloc<256>();
|
||||||
auto buf = m_fragment_env_ring_info.map(mem, 32);
|
auto buf = m_fragment_env_ring_info.map(mem, 32);
|
||||||
|
|
||||||
m_draw_processor.fill_fragment_state_buffer(buf, current_fragment_program);
|
m_draw_processor.fill_fragment_state_buffer(buf, current_fragment_program);
|
||||||
|
@ -2146,7 +2145,7 @@ void VKGSRender::load_program_env()
|
||||||
{
|
{
|
||||||
check_heap_status(VK_HEAP_CHECK_TEXTURE_ENV_STORAGE);
|
check_heap_status(VK_HEAP_CHECK_TEXTURE_ENV_STORAGE);
|
||||||
|
|
||||||
auto mem = m_fragment_texture_params_ring_info.alloc<256>(768);
|
auto mem = m_fragment_texture_params_ring_info.static_alloc<256, 768>();
|
||||||
auto buf = m_fragment_texture_params_ring_info.map(mem, 768);
|
auto buf = m_fragment_texture_params_ring_info.map(mem, 768);
|
||||||
|
|
||||||
current_fragment_program.texture_params.write_to(buf, current_fp_metadata.referenced_textures_mask);
|
current_fragment_program.texture_params.write_to(buf, current_fp_metadata.referenced_textures_mask);
|
||||||
|
@ -2158,7 +2157,7 @@ void VKGSRender::load_program_env()
|
||||||
{
|
{
|
||||||
check_heap_status(VK_HEAP_CHECK_FRAGMENT_ENV_STORAGE);
|
check_heap_status(VK_HEAP_CHECK_FRAGMENT_ENV_STORAGE);
|
||||||
|
|
||||||
auto mem = m_raster_env_ring_info.alloc<256>(256);
|
auto mem = m_raster_env_ring_info.static_alloc<256>();
|
||||||
auto buf = m_raster_env_ring_info.map(mem, 128);
|
auto buf = m_raster_env_ring_info.map(mem, 128);
|
||||||
|
|
||||||
std::memcpy(buf, rsx::method_registers.polygon_stipple_pattern(), 128);
|
std::memcpy(buf, rsx::method_registers.polygon_stipple_pattern(), 128);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue