Implemented writing swizzled textures

Optimized protected_region::combine
Implemented conditional async mode for nv3089::image_in & nv0039::buffer_notify
This commit is contained in:
DHrpcs3 2016-03-15 23:00:05 +03:00
parent 53e3833aa1
commit 82a32fcb5a
6 changed files with 173 additions and 32 deletions

View file

@ -57,10 +57,11 @@ public:
m_end = m_begin + value; m_end = m_begin + value;
return *this; return *this;
} }
void extend(const range& other) void extend(const range& other)
{ {
m_begin = std::min(m_begin, other.m_begin); m_begin = std::min(m_begin, other.m_begin);
m_end = std::min(m_end, other.m_end); m_end = std::max(m_end, other.m_end);
} }
constexpr bool valid() const constexpr bool valid() const

View file

@ -1059,7 +1059,6 @@ bool nv3089_image_in(u32 arg, GLGSRender* renderer)
return false; return false;
} }
u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel
u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4; u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4;
@ -1204,6 +1203,15 @@ bool nv3089_image_in(u32 arg, GLGSRender* renderer)
dst_info.swizzled = context_surface == CELL_GCM_CONTEXT_SWIZZLE2D; dst_info.swizzled = context_surface == CELL_GCM_CONTEXT_SWIZZLE2D;
if (dst_info.swizzled)
{
u8 sw_width_log2 = rsx::method_registers[NV309E_SET_FORMAT] >> 16;
u8 sw_height_log2 = rsx::method_registers[NV309E_SET_FORMAT] >> 24;
dst_info.log2_width = sw_width_log2 ? sw_width_log2 : 1;
dst_info.log2_height = sw_height_log2 ? sw_height_log2 : 1;
}
switch (dst_color_format) switch (dst_color_format)
{ {
case CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5: case CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5:
@ -1443,6 +1451,7 @@ gl::texture_info surface_info(rsx::thread &rsx, rsx::surface_color_format format
{ {
gl::texture_info info{}; gl::texture_info info{};
info.format = gl::get_texture_format(surface_format_to_texture_format(format)); info.format = gl::get_texture_format(surface_format_to_texture_format(format));
info.format.flags &= gl::texture_flags::allow_swizzle;
rsx::tiled_region region = rsx.get_tiled_address(offset, location); rsx::tiled_region region = rsx.get_tiled_address(offset, location);
@ -1510,24 +1519,29 @@ void GLGSRender::init_buffers(bool skip_reading)
m_surface.width = clip_width * m_surface.width_mult + clip_x; m_surface.width = clip_width * m_surface.width_mult + clip_x;
m_surface.height = clip_height * m_surface.height_mult + clip_y; m_surface.height = clip_height * m_surface.height_mult + clip_y;
bool swizzled_surface = m_surface.type == CELL_GCM_SURFACE_SWIZZLE;
rsx::for_each_active_color_surface([&](int index) rsx::for_each_active_color_surface([&](int index)
{ {
u32 offset = rsx::method_registers[mr_color_offset[index]]; u32 offset = rsx::method_registers[mr_color_offset[index]];
u32 location = rsx::method_registers[mr_color_dma[index]]; u32 location = rsx::method_registers[mr_color_dma[index]];
u32 pitch = rsx::method_registers[mr_color_pitch[index]]; u32 pitch = rsx::method_registers[mr_color_pitch[index]];
bool swizzled = m_surface.type == CELL_GCM_SURFACE_SWIZZLE;
gl::texture_info info = surface_info(*this, m_surface.color_format, offset, location, m_surface.width, m_surface.height, pitch); gl::texture_info info = surface_info(*this, m_surface.color_format, offset, location, m_surface.width, m_surface.height, pitch);
info.swizzled = swizzled; info.swizzled = swizzled_surface;
if (swizzled_surface)
{
info.log2_width = m_surface.log2width;
info.log2_height = m_surface.log2height;
}
cached_color_buffers[index] = &texture_cache.entry(info, skip_reading ? gl::cache_buffers::none : gl::cache_buffers::local); cached_color_buffers[index] = &texture_cache.entry(info, skip_reading ? gl::cache_buffers::none : gl::cache_buffers::local);
draw_fbo.color[index] = cached_color_buffers[index]->view(); draw_fbo.color[index] = cached_color_buffers[index]->view();
}); });
{ {
u32 offset = rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET];
u32 location = rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA];
u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z] & ~63; u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z] & ~63;
int bpp; int bpp;
@ -1542,13 +1556,16 @@ void GLGSRender::init_buffers(bool skip_reading)
break; break;
} }
if (pitch && pitch < bpp * m_surface.width) if (swizzled_surface || (pitch && pitch < bpp * m_surface.width))
{ {
__glcheck draw_fbo.depth_stencil = null_texture; __glcheck draw_fbo.depth_stencil = null_texture;
cached_depth_buffer = nullptr; cached_depth_buffer = nullptr;
} }
else else
{ {
u32 offset = rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET];
u32 location = rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA];
if (!pitch) if (!pitch)
{ {
pitch = m_surface.width * bpp; pitch = m_surface.width * bpp;

View file

@ -333,19 +333,59 @@ namespace gl
} }
else else
{ {
if (info.swizzled && (info.format.flags & texture_flags::allow_swizzle) != texture_flags::none)
{
//TODO
LOG_ERROR(RSX, "writing swizzled texture[0x%x] to host buffer", info.start_address);
}
gl::pixel_pack_settings{} gl::pixel_pack_settings{}
.row_length(info.pitch / info.format.bpp) .row_length(info.pitch / info.format.bpp)
.aligment(1) .aligment(1)
.swap_bytes((info.format.flags & gl::texture_flags::swap_bytes) != gl::texture_flags::none) .swap_bytes((info.format.flags & gl::texture_flags::swap_bytes) != gl::texture_flags::none)
.apply(); .apply();
__glcheck glGetTexImage((GLenum)info.target, 0, (GLenum)info.format.format, (GLenum)info.format.type, vm::base_priv(info.start_address)); if (info.swizzled && (info.format.flags & texture_flags::allow_swizzle) != texture_flags::none)
{
std::unique_ptr<u8[]> linear_pixels(new u8[info.size()]);
__glcheck glGetTexImage((GLenum)info.target, 0, (GLenum)info.format.format, (GLenum)info.format.type, linear_pixels.get());
u16 sw_width = 1 << info.log2_width;
u16 sw_height = 1 << info.log2_height;
// Check and pad texture out if we are given non square texture for swizzle to be correct
if (sw_width != info.width || sw_height != info.height)
{
std::unique_ptr<u8[]> sw_temp(new u8[info.format.bpp * sw_width * sw_height]);
switch (info.format.bpp)
{
case 1:
rsx::pad_texture<u8>(linear_pixels.get(), sw_temp.get(), info.width, info.height, sw_width, sw_height);
break;
case 2:
rsx::pad_texture<u16>(linear_pixels.get(), sw_temp.get(), info.width, info.height, sw_width, sw_height);
break;
case 4:
rsx::pad_texture<u32>(linear_pixels.get(), sw_temp.get(), info.width, info.height, sw_width, sw_height);
break;
}
linear_pixels = std::move(sw_temp);
}
switch (info.format.bpp)
{
case 1:
rsx::convert_linear_swizzle<u8>(linear_pixels.get(), vm::base_priv(info.start_address), sw_width, sw_height, false);
break;
case 2:
rsx::convert_linear_swizzle<u16>(linear_pixels.get(), vm::base_priv(info.start_address), sw_width, sw_height, false);
break;
case 4:
rsx::convert_linear_swizzle<u32>(linear_pixels.get(), vm::base_priv(info.start_address), sw_width, sw_height, false);
break;
}
}
else
{
__glcheck glGetTexImage((GLenum)info.target, 0, (GLenum)info.format.format, (GLenum)info.format.type, vm::base_priv(info.start_address));
}
} }
ignore(gl::cache_buffers::all); ignore(gl::cache_buffers::all);
@ -590,20 +630,93 @@ namespace gl
//TODO //TODO
} }
void set_page_protection(range<u32> range, u8 protect)
{
vm::page_protect(range.begin(), range.size(), 0, ~protect & (vm::page_readable | vm::page_writable), protect);
}
void protected_region::combine(protected_region& region) void protected_region::combine(protected_region& region)
{ {
region.unprotect(); cache_access new_protection = region.requires_protection();
unprotect();
for (auto &texture : region.m_textures) for (auto &texture : region.m_textures)
{ {
texture.second.parent(this); texture.second.parent(this);
if (!m_textures.emplace(texture).second) if (!m_textures.emplace(texture).second)
{ {
throw EXCEPTION(""); throw EXCEPTION("");
} }
new_protection |= texture.second.requires_protection();
} }
u8 new_protection_flags = 0;
if ((new_protection & cache_access::read) != cache_access::none)
{
new_protection_flags |= vm::page_readable;
}
if ((new_protection & cache_access::write) != cache_access::none)
{
new_protection_flags |= vm::page_writable;
}
if (m_current_protection != new_protection_flags && region.m_current_protection != new_protection_flags)
{
if (begin() < region.begin())
{
set_page_protection({ begin(), region.end() }, new_protection_flags);
}
else
{
set_page_protection({ region.begin(), end() }, new_protection_flags);
}
}
else if (m_current_protection != new_protection_flags)
{
if (begin() < region.begin())
{
set_page_protection({ begin(), region.begin() }, new_protection_flags);
}
else
{
set_page_protection({ region.end(), end() }, new_protection_flags);
}
}
else if (region.m_current_protection != new_protection_flags)
{
if (begin() < region.begin())
{
set_page_protection({ end(), region.end() }, new_protection_flags);
}
else
{
set_page_protection({ region.begin(), begin() }, new_protection_flags);
}
}
else
{
if (begin() < region.begin())
{
if (u32 diff = region.begin() - end())
{
set_page_protection({ end(), end() + diff }, new_protection_flags);
}
}
else
{
if (u32 diff = begin() - region.end())
{
set_page_protection({ region.end(), region.end() + diff }, new_protection_flags);
}
set_page_protection({ region.begin(), begin() }, new_protection_flags);
}
}
m_current_protection = new_protection_flags;
extend(region); extend(region);
} }
@ -667,7 +780,7 @@ namespace gl
if (!aligned_size) if (!aligned_size)
{ {
aligned_range.begin(info.start_address & ~(vm::page_size - 1)); aligned_range.begin(info.start_address & ~(vm::page_size - 1));
aligned_range.size(align(info.size() + info.start_address - aligned_range.begin(), vm::page_size)); aligned_range.size(align(info.start_address - aligned_range.begin() + info.size(), vm::page_size));
} }
else else
{ {

View file

@ -70,6 +70,8 @@ namespace gl
bool swizzled; bool swizzled;
float lod_bias; float lod_bias;
u32 start_address; u32 start_address;
u32 log2_width;
u32 log2_height;
u32 size() const u32 size() const
{ {
@ -126,7 +128,7 @@ namespace gl
{ {
private: private:
std::unordered_map<texture_info, cached_texture, fnv_1a_hasher, bitwise_equals> m_textures; std::unordered_map<texture_info, cached_texture, fnv_1a_hasher, bitwise_equals> m_textures;
u32 m_current_protection = 0; u8 m_current_protection = 0;
public: public:
cache_access requires_protection() const; cache_access requires_protection() const;

View file

@ -186,6 +186,8 @@ void rsx::gl_texture::bind(gl::texture_cache& cache, rsx::texture& tex)
info.lod_bias = tex.bias(); info.lod_bias = tex.bias();
} }
gl::texture_flags flags = gl::texture_flags::none;
if (is_compressed) if (is_compressed)
{ {
info.format.type = gl::texture::type::ubyte; info.format.type = gl::texture::type::ubyte;
@ -226,15 +228,18 @@ void rsx::gl_texture::bind(gl::texture_cache& cache, rsx::texture& tex)
throw EXCEPTION("unimplemented texture format 0x%x", format); throw EXCEPTION("unimplemented texture format 0x%x", format);
} }
info.swizzled = info.swizzled && (found->second.flags & gl::texture_flags::allow_swizzle) != gl::texture_flags::none;
info.format = found->second; info.format = found->second;
info.pitch = std::max(info.width * info.format.bpp, tex.pitch()); info.pitch = std::max(info.width * info.format.bpp, tex.pitch());
flags = found->second.flags;
info.format.flags &= gl::texture_flags::allow_swizzle;
remap = info.format.remap.data(); remap = info.format.remap.data();
} }
__glcheck cache.entry(info, gl::cache_buffers::local).bind(tex.index()); __glcheck cache.entry(info, gl::cache_buffers::local).bind(tex.index());
if ((info.format.flags & gl::texture_flags::allow_remap) != gl::texture_flags::none) if ((flags & gl::texture_flags::allow_remap) != gl::texture_flags::none)
{ {
u8 remap_a = tex.remap() & 0x3; u8 remap_a = tex.remap() & 0x3;
u8 remap_r = (tex.remap() >> 2) & 0x3; u8 remap_r = (tex.remap() >> 2) & 0x3;

View file

@ -29,15 +29,22 @@ namespace rsx
} }
}; };
force_inline void async_operation(std::function<void()> function) force_inline void async_operation(bool call_async, std::function<void()> function)
{ {
++operations_in_progress; if (call_async)
{
std::thread([function = std::move(function)]() ++operations_in_progress;
std::thread([function = std::move(function)]()
{
scoped_operation operation;
function();
}).detach();
}
else
{ {
scoped_operation operation;
function(); function();
}).detach(); }
} }
std::vector<std::shared_ptr<thread_ctrl>> threads_storage; std::vector<std::shared_ptr<thread_ctrl>> threads_storage;
@ -493,7 +500,7 @@ namespace rsx
sw_height_log2 = 1; sw_height_log2 = 1;
} }
async_operation([=] async_operation(need_clip || need_convert || (in_w + in_h) > 128, [=]
{ {
u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr; u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr;
u8* pixels_dst = vm::ps3::_ptr<u8>(dst_address + out_offset); u8* pixels_dst = vm::ps3::_ptr<u8>(dst_address + out_offset);
@ -592,10 +599,8 @@ namespace rsx
u16 sw_width = 1 << sw_width_log2; u16 sw_width = 1 << sw_width_log2;
u16 sw_height = 1 << sw_height_log2; u16 sw_height = 1 << sw_height_log2;
temp2.reset(new u8[out_bpp * sw_width * sw_height]);
u8* linear_pixels = pixels_src; u8* linear_pixels = pixels_src;
u8* swizzled_pixels = temp2.get(); u8* swizzled_pixels = pixels_dst;
std::unique_ptr<u8[]> sw_temp; std::unique_ptr<u8[]> sw_temp;
@ -632,8 +637,6 @@ namespace rsx
convert_linear_swizzle<u32>(linear_pixels, swizzled_pixels, sw_width, sw_height, false); convert_linear_swizzle<u32>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
break; break;
} }
std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height);
} }
}); });
} }
@ -674,7 +677,7 @@ namespace rsx
u32 dst_offset = method_registers[NV0039_OFFSET_OUT]; u32 dst_offset = method_registers[NV0039_OFFSET_OUT];
u32 dst_dma = method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_OUT]; u32 dst_dma = method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_OUT];
async_operation([=] async_operation(line_count * line_length > 64, [=]
{ {
u8 *dst = (u8*)vm::base(get_address(dst_offset, dst_dma)); u8 *dst = (u8*)vm::base(get_address(dst_offset, dst_dma));
const u8 *src = (u8*)vm::base(get_address(src_offset, src_dma)); const u8 *src = (u8*)vm::base(get_address(src_offset, src_dma));