mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-28 13:28:01 +03:00
SPU LLVM: New verification method
This commit is contained in:
parent
394fc8eb79
commit
62e72d0d32
1 changed files with 254 additions and 66 deletions
|
@ -20,6 +20,8 @@
|
||||||
#include "util/simd.hpp"
|
#include "util/simd.hpp"
|
||||||
#include "util/sysinfo.hpp"
|
#include "util/sysinfo.hpp"
|
||||||
|
|
||||||
|
#pragma optimize("", off)
|
||||||
|
|
||||||
const extern spu_decoder<spu_itype> g_spu_itype;
|
const extern spu_decoder<spu_itype> g_spu_itype;
|
||||||
const extern spu_decoder<spu_iname> g_spu_iname;
|
const extern spu_decoder<spu_iname> g_spu_iname;
|
||||||
const extern spu_decoder<spu_iflag> g_spu_iflag;
|
const extern spu_decoder<spu_iflag> g_spu_iflag;
|
||||||
|
@ -1594,10 +1596,11 @@ public:
|
||||||
init_luts();
|
init_luts();
|
||||||
|
|
||||||
// Start compilation
|
// Start compilation
|
||||||
const auto label_test = BasicBlock::Create(m_context, "", m_function);
|
usz label_test_count = 0;
|
||||||
const auto label_diff = BasicBlock::Create(m_context, "", m_function);
|
auto label_test = BasicBlock::Create(m_context, "label_test_0", m_function);
|
||||||
const auto label_body = BasicBlock::Create(m_context, "", m_function);
|
const auto label_diff = BasicBlock::Create(m_context, "label_diff", m_function);
|
||||||
const auto label_stop = BasicBlock::Create(m_context, "", m_function);
|
const auto label_body = BasicBlock::Create(m_context, "label_body", m_function);
|
||||||
|
const auto label_stop = BasicBlock::Create(m_context, "label_stop", m_function);
|
||||||
|
|
||||||
// Load PC, which will be the actual value of 'm_base'
|
// Load PC, which will be the actual value of 'm_base'
|
||||||
m_base_pc = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::pc));
|
m_base_pc = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::pc));
|
||||||
|
@ -1618,18 +1621,21 @@ public:
|
||||||
{
|
{
|
||||||
// Disable check (unsafe)
|
// Disable check (unsafe)
|
||||||
m_ir->CreateBr(label_body);
|
m_ir->CreateBr(label_body);
|
||||||
|
m_ir->SetInsertPoint(label_body);
|
||||||
}
|
}
|
||||||
else if (func.data.size() == 1)
|
else if (func.data.size() == 1)
|
||||||
{
|
{
|
||||||
const auto pu32 = m_ir->CreateGEP(get_type<u8>(), m_lsptr, m_base_pc);
|
const auto pu32 = m_ir->CreateGEP(get_type<u8>(), m_lsptr, m_base_pc);
|
||||||
const auto cond = m_ir->CreateICmpNE(m_ir->CreateLoad(get_type<u32>(), pu32), m_ir->getInt32(func.data[0]));
|
const auto cond = m_ir->CreateICmpNE(m_ir->CreateLoad(get_type<u32>(), pu32), m_ir->getInt32(func.data[0]));
|
||||||
m_ir->CreateCondBr(cond, label_diff, label_body, m_md_unlikely);
|
m_ir->CreateCondBr(cond, label_diff, label_body, m_md_unlikely);
|
||||||
|
m_ir->SetInsertPoint(label_body);
|
||||||
}
|
}
|
||||||
else if (func.data.size() == 2)
|
else if (func.data.size() == 2)
|
||||||
{
|
{
|
||||||
const auto pu64 = m_ir->CreateGEP(get_type<u8>(), m_lsptr, m_base_pc);
|
const auto pu64 = m_ir->CreateGEP(get_type<u8>(), m_lsptr, m_base_pc);
|
||||||
const auto cond = m_ir->CreateICmpNE(m_ir->CreateLoad(get_type<u64>(), pu64), m_ir->getInt64(static_cast<u64>(func.data[1]) << 32 | func.data[0]));
|
const auto cond = m_ir->CreateICmpNE(m_ir->CreateLoad(get_type<u64>(), pu64), m_ir->getInt64(static_cast<u64>(func.data[1]) << 32 | func.data[0]));
|
||||||
m_ir->CreateCondBr(cond, label_diff, label_body, m_md_unlikely);
|
m_ir->CreateCondBr(cond, label_diff, label_body, m_md_unlikely);
|
||||||
|
m_ir->SetInsertPoint(label_body);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1648,9 +1654,9 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 stride;
|
u32 stride{};
|
||||||
u32 elements;
|
u32 elements{};
|
||||||
u32 dwords;
|
u32 dwords{};
|
||||||
|
|
||||||
if (m_use_avx512)
|
if (m_use_avx512)
|
||||||
{
|
{
|
||||||
|
@ -1672,29 +1678,157 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get actual pc corresponding to the found beginning of the data
|
// Get actual pc corresponding to the found beginning of the data
|
||||||
llvm::Value* starta_pc = m_ir->CreateAnd(get_pc(starta), 0x3fffc);
|
llvm::Value* starta_pc = m_ir->CreateAnd(get_pc(starta), SPU_LS_SIZE - 4);
|
||||||
llvm::Value* data_addr = m_ir->CreateGEP(get_type<u8>(), m_lsptr, starta_pc);
|
llvm::Value* data_addr = m_ir->CreateGEP(get_type<u8>(), m_lsptr, starta_pc);
|
||||||
|
|
||||||
llvm::Value* acc = nullptr;
|
llvm::Value* acc = nullptr;
|
||||||
|
|
||||||
// Use 512bit xorsum to verify integrity if size is atleast 512b * 3
|
// Exempt interrupt handler
|
||||||
// This code uses a 512bit vector for all hardware to ensure behavior matches.
|
bool has_sync_or_channel_sync = func.entry_point == 0;
|
||||||
|
|
||||||
|
for (u32 i = 0; !has_sync_or_channel_sync && i < func.data.size(); i++)
|
||||||
|
{
|
||||||
|
const spu_opcode_t op{std::bit_cast<be_t<u32>>(func.data[i])};
|
||||||
|
|
||||||
|
switch (g_spu_itype.decode(op.opcode))
|
||||||
|
{
|
||||||
|
case spu_itype::RDCH:
|
||||||
|
{
|
||||||
|
if (op.ra == MFC_RdTagStat)
|
||||||
|
{
|
||||||
|
//has_sync_or_channel_sync = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case spu_itype::RCHCNT:
|
||||||
|
{
|
||||||
|
if (op.ra == MFC_RdTagStat || op.ra == SPU_WrOutIntrMbox || op.ra == SPU_WrOutMbox)
|
||||||
|
{
|
||||||
|
// Actually this is fine and is quite common
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
has_sync_or_channel_sync = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case spu_itype::SYNC:
|
||||||
|
case spu_itype::IRET:
|
||||||
|
case spu_itype::BISLED:
|
||||||
|
{
|
||||||
|
// Disable optimization if there is a SYNC or interrupt instruction
|
||||||
|
has_sync_or_channel_sync = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case spu_itype::BI:
|
||||||
|
{
|
||||||
|
// Interrupt enabling instruction
|
||||||
|
has_sync_or_channel_sync = !!op.e;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case spu_itype::STQR:
|
||||||
|
{
|
||||||
|
// Self-modifying code is likely
|
||||||
|
has_sync_or_channel_sync = (op.si16 >= 0 && op.i16 < func.data.size() - i) || (op.si16 < 0 && (0 - op.si16) + 0u < i);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
//case spu_itype::STQA:
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use 512bit xorsum to verify integrity if size is atleast (stride * 2)
|
||||||
// The xorsum path is still faster even on narrow hardware.
|
// The xorsum path is still faster even on narrow hardware.
|
||||||
if ((end - starta) >= 192 && !g_cfg.core.precise_spu_verification)
|
if (!has_sync_or_channel_sync && (end - starta) >= std::max<u32>(stride, 32) * 2 && !g_cfg.core.precise_spu_verification)
|
||||||
{
|
{
|
||||||
for (u32 j = starta; j < end; j += 64)
|
if (stride < 32)
|
||||||
{
|
{
|
||||||
int indices[16];
|
stride = 32;
|
||||||
|
elements = 8;
|
||||||
|
dwords = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum accum_type : usz
|
||||||
|
{
|
||||||
|
//_sub,
|
||||||
|
//_xor,
|
||||||
|
_add,
|
||||||
|
//_sub_reversed,
|
||||||
|
_end,
|
||||||
|
};
|
||||||
|
|
||||||
|
llvm::Value* acc_local = nullptr;
|
||||||
|
|
||||||
|
for (u32 j = starta, iter = 0, max_itrer = m_size / stride, reset_loop_at = 0; j < end; j += stride, iter++)
|
||||||
|
{
|
||||||
|
int indices[16]{};
|
||||||
bool holes = false;
|
bool holes = false;
|
||||||
bool data = false;
|
bool data = false;
|
||||||
|
|
||||||
for (u32 i = 0; i < 16; i++)
|
const bool is_last_iteration = j + stride >= end;
|
||||||
|
|
||||||
|
auto shuffle_access = [](usz index, usz size, usz element_size) -> u32
|
||||||
|
{
|
||||||
|
ensure(index < size);
|
||||||
|
|
||||||
|
// TODO
|
||||||
|
return index;
|
||||||
|
|
||||||
|
if (size == element_size)
|
||||||
|
{
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
const usz index_rem = index % element_size;
|
||||||
|
|
||||||
|
const usz size_fixed = (size + element_size - 1) / element_size;
|
||||||
|
const usz index_fixed = (index) / element_size;
|
||||||
|
|
||||||
|
// Square root estimation via bitwise ops
|
||||||
|
const usz bit_mask_count = (std::bit_width(size_fixed) - 1);
|
||||||
|
const usz sqrt_est_bit = (bit_mask_count / 2);
|
||||||
|
const usz sqrt_est = 1ull << sqrt_est_bit;
|
||||||
|
const usz sqrt_est_moduleo_mask = sqrt_est - 1;
|
||||||
|
const usz partitioner = size_fixed >> sqrt_est_bit;
|
||||||
|
const usz max_regular = (sqrt_est * partitioner);
|
||||||
|
const usz error_partition = size_fixed - max_regular;
|
||||||
|
|
||||||
|
// Fallback for a special case (unaligned access at the end)
|
||||||
|
if (size % element_size && index_fixed == size_fixed - 1)
|
||||||
|
{
|
||||||
|
return ensure(index_fixed * element_size + index_rem, FN(x < size));
|
||||||
|
}
|
||||||
|
|
||||||
|
const usz tmp_partition_index1 = std::min(index_fixed & sqrt_est_moduleo_mask, error_partition);
|
||||||
|
const usz partitioned_index_first = tmp_partition_index1 * (partitioner + 1);
|
||||||
|
const usz tmp_partition_index2 = (index_fixed & sqrt_est_moduleo_mask) - tmp_partition_index1;
|
||||||
|
const usz partitioned_index_second = tmp_partition_index2 * (partitioner + 0);
|
||||||
|
const usz partitioned_index_third = index_fixed / partitioner;
|
||||||
|
|
||||||
|
const usz result_tmp = partitioned_index_first + partitioned_index_second + partitioned_index_third;
|
||||||
|
|
||||||
|
// Fallback for a special case (unaligned access at the end)
|
||||||
|
if (size % element_size && result_tmp == size_fixed - 1)
|
||||||
|
{
|
||||||
|
return ensure(index_fixed * element_size + index_rem, FN(x < size));
|
||||||
|
}
|
||||||
|
|
||||||
|
const usz result = result_tmp * element_size + index_rem;
|
||||||
|
|
||||||
|
ensure(result < size);
|
||||||
|
return result;
|
||||||
|
};
|
||||||
|
|
||||||
|
for (u32 i = 0; i < elements; i++)
|
||||||
{
|
{
|
||||||
const u32 k = j + i * 4;
|
const u32 k = j + i * 4;
|
||||||
|
|
||||||
if (k < start || k >= end || !func.data[(k - start) / 4])
|
if (k >= end || !func.data[shuffle_access((k - start) / 4, func.data.size(), elements)])
|
||||||
{
|
{
|
||||||
indices[i] = 16;
|
indices[i] = elements;
|
||||||
holes = true;
|
holes = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -1704,65 +1838,119 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!data)
|
|
||||||
{
|
|
||||||
// Skip full-sized holes
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
llvm::Value* vls = nullptr;
|
llvm::Value* vls = nullptr;
|
||||||
|
|
||||||
// Load unaligned code block from LS
|
// Load unaligned code block from LS
|
||||||
vls = m_ir->CreateAlignedLoad(get_type<u32[16]>(), _ptr<u32[16]>(data_addr, j - starta), llvm::MaybeAlign{4});
|
vls = !data ? nullptr : stride == 64 ? m_ir->CreateAlignedLoad(get_type<u32[16]>(), _ptr<u32[16]>(data_addr, shuffle_access(j - starta, m_size, stride)), llvm::MaybeAlign{4}) :
|
||||||
|
stride == 32 ? m_ir->CreateAlignedLoad(get_type<u32[8]>(), _ptr<u32[8]>(data_addr, shuffle_access(j - starta, m_size, stride)), llvm::MaybeAlign{4})
|
||||||
|
: ensure(vls, FN(false), "Unreachable");
|
||||||
|
|
||||||
// Mask if necessary
|
// Mask if necessary
|
||||||
if (holes)
|
if (holes && data)
|
||||||
{
|
{
|
||||||
vls = m_ir->CreateShuffleVector(vls, ConstantAggregateZero::get(vls->getType()), llvm::ArrayRef(indices, 16));
|
vls = m_ir->CreateShuffleVector(vls, ConstantAggregateZero::get(vls->getType()), llvm::ArrayRef(indices, elements));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!data)
|
||||||
|
{
|
||||||
|
//
|
||||||
|
}
|
||||||
|
else if (reset_loop_at == iter)
|
||||||
|
{
|
||||||
|
acc_local = vls;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
switch (accum_type{(iter - 1) % accum_type::_end})
|
||||||
|
{
|
||||||
|
//case accum_type::_xor: acc_local = m_ir->CreateXor(acc_local, vls); break;
|
||||||
|
case accum_type::_add: acc_local = m_ir->CreateAdd(acc_local, vls); break;
|
||||||
|
//case accum_type::_sub: acc_local = m_ir->CreateSub(acc_local, vls); break;
|
||||||
|
//case accum_type::_sub_reversed: acc_local = m_ir->CreateSub(vls, acc_local); break;
|
||||||
|
default: ensure(false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
acc = acc ? m_ir->CreateXor(acc, vls) : vls;
|
|
||||||
check_iterations++;
|
check_iterations++;
|
||||||
}
|
|
||||||
|
|
||||||
|
constexpr usz num_of_full_repetitions_per_check = 2;
|
||||||
|
constexpr usz num_of_full_repetitions_per_jump = 8;
|
||||||
|
|
||||||
|
const usz iter_since_reset = iter - reset_loop_at;
|
||||||
|
|
||||||
|
if ((iter_since_reset && (iter_since_reset + 1) % (accum_type::_end * num_of_full_repetitions_per_check) == 0) || is_last_iteration)
|
||||||
|
{
|
||||||
// Create the Xorsum
|
// Create the Xorsum
|
||||||
u32 xorsum[16] = {0};
|
u32 checksum[512 / 32]{};
|
||||||
|
|
||||||
for (u32 j = 0; j < func.data.size(); j += 16) // Process 16 elements per iteration
|
for (usz i = reset_loop_at * elements, end_it = std::min<usz>(func.data.size(), (iter + 1) * elements); i < end_it; i++) // Process 16 elements per iteration
|
||||||
{
|
{
|
||||||
for (u32 i = 0; i < 16; i++)
|
const usz sub_index = i % elements;
|
||||||
|
const usz group_index = (i - reset_loop_at) / elements;
|
||||||
|
const u32 fdata = func.data[shuffle_access(i, func.data.size(), elements)];
|
||||||
|
|
||||||
|
if (!group_index)
|
||||||
{
|
{
|
||||||
if (j + i < func.data.size())
|
checksum[sub_index] = fdata;
|
||||||
{
|
continue;
|
||||||
xorsum[i] ^= func.data[j + i];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch (accum_type{(group_index - 1) % accum_type::_end})
|
||||||
|
{
|
||||||
|
// case accum_type::_xor: checksum[sub_index] ^= fdata; break;
|
||||||
|
case accum_type::_add: checksum[sub_index] += fdata; break;
|
||||||
|
//case accum_type::_sub: checksum[sub_index] -= fdata; break;
|
||||||
|
//case accum_type::_sub_reversed: checksum[sub_index] = fdata - checksum[sub_index]; break;
|
||||||
|
default: ensure(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto* const_vector = ConstantDataVector::get(m_context, llvm::ArrayRef(xorsum, 16));
|
auto* const_vector = ConstantDataVector::get(m_context, llvm::ArrayRef(checksum, elements));
|
||||||
acc = m_ir->CreateXor(acc, const_vector);
|
|
||||||
|
acc_local = data ? acc_local : const_vector;
|
||||||
|
|
||||||
|
const auto xor_res = m_ir->CreateXor(acc_local, const_vector);
|
||||||
|
|
||||||
|
if ((iter_since_reset + 1) % (accum_type::_end * num_of_full_repetitions_per_jump) != 0 && !is_last_iteration)
|
||||||
|
{
|
||||||
|
reset_loop_at = iter + 1;
|
||||||
|
acc = acc ? m_ir->CreateOr(xor_res, acc) : xor_res;
|
||||||
|
acc_local = nullptr;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
acc = acc ? m_ir->CreateOr(xor_res, acc) : xor_res;
|
||||||
|
|
||||||
// Pattern for PTEST
|
// Pattern for PTEST
|
||||||
acc = m_ir->CreateBitCast(acc, get_type<u64[8]>());
|
acc = stride == 64 ? m_ir->CreateBitCast(acc, get_type<u64[8]>()) :
|
||||||
|
stride == 32 ? m_ir->CreateBitCast(acc, get_type<u64[4]>())
|
||||||
|
: ensure(acc, FN(false), "Unreachable");
|
||||||
|
|
||||||
llvm::Value* elem = m_ir->CreateExtractElement(acc, u64{0});
|
llvm::Value* elem = m_ir->CreateExtractElement(acc, u64{0});
|
||||||
|
|
||||||
for (u32 i = 1; i < 8; i++)
|
for (u64 i = 1; i < (stride / sizeof(u64)); i++)
|
||||||
{
|
{
|
||||||
elem = m_ir->CreateOr(elem, m_ir->CreateExtractElement(acc, i));
|
elem = m_ir->CreateOr(elem, m_ir->CreateExtractElement(acc, i));
|
||||||
}
|
}
|
||||||
|
|
||||||
spu_log.error("end");
|
spu_log.warning("Using checksum verification!");
|
||||||
|
|
||||||
// Compare result with zero
|
// Compare result with zero
|
||||||
const auto cond = m_ir->CreateICmpNE(elem, m_ir->getInt64(0));
|
const auto cond = m_ir->CreateICmpNE(elem, m_ir->getInt64(0));
|
||||||
m_ir->CreateCondBr(cond, label_diff, label_body, m_md_unlikely);
|
const auto label_check_next_or_body = is_last_iteration ? label_body : BasicBlock::Create(m_context, fmt::format("label_test_%d", ++label_test_count), m_function);
|
||||||
|
m_ir->CreateCondBr(cond, label_diff, label_check_next_or_body, m_md_unlikely);
|
||||||
|
m_ir->SetInsertPoint(label_check_next_or_body);
|
||||||
|
acc_local = nullptr;
|
||||||
|
acc = nullptr;
|
||||||
|
reset_loop_at = iter + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (u32 j = starta; j < end; j += stride)
|
for (u32 j = starta; j < end; j += stride)
|
||||||
{
|
{
|
||||||
int indices[16];
|
int indices[16]{};
|
||||||
bool holes = false;
|
bool holes = false;
|
||||||
bool data = false;
|
bool data = false;
|
||||||
|
|
||||||
|
@ -1770,7 +1958,7 @@ public:
|
||||||
{
|
{
|
||||||
const u32 k = j + i * 4;
|
const u32 k = j + i * 4;
|
||||||
|
|
||||||
if (k < start || k >= end || !func.data[(k - start) / 4])
|
if (k >= end || !func.data[(k - start) / 4])
|
||||||
{
|
{
|
||||||
indices[i] = elements;
|
indices[i] = elements;
|
||||||
holes = true;
|
holes = true;
|
||||||
|
@ -1811,7 +1999,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Perform bitwise comparison and accumulate
|
// Perform bitwise comparison and accumulate
|
||||||
u32 words[16];
|
u32 words[16]{};
|
||||||
|
|
||||||
for (u32 i = 0; i < elements; i++)
|
for (u32 i = 0; i < elements; i++)
|
||||||
{
|
{
|
||||||
|
@ -1839,7 +2027,7 @@ public:
|
||||||
|
|
||||||
llvm::Value* elem = m_ir->CreateExtractElement(acc, u64{0});
|
llvm::Value* elem = m_ir->CreateExtractElement(acc, u64{0});
|
||||||
|
|
||||||
for (u32 i = 1; i < dwords; i++)
|
for (u64 i = 1; i < dwords; i++)
|
||||||
{
|
{
|
||||||
elem = m_ir->CreateOr(elem, m_ir->CreateExtractElement(acc, i));
|
elem = m_ir->CreateOr(elem, m_ir->CreateExtractElement(acc, i));
|
||||||
}
|
}
|
||||||
|
@ -1847,11 +2035,11 @@ public:
|
||||||
// Compare result with zero
|
// Compare result with zero
|
||||||
const auto cond = m_ir->CreateICmpNE(elem, m_ir->getInt64(0));
|
const auto cond = m_ir->CreateICmpNE(elem, m_ir->getInt64(0));
|
||||||
m_ir->CreateCondBr(cond, label_diff, label_body, m_md_unlikely);
|
m_ir->CreateCondBr(cond, label_diff, label_body, m_md_unlikely);
|
||||||
|
m_ir->SetInsertPoint(label_body);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Increase block counter with statistics
|
// Increase block counter with statistics
|
||||||
m_ir->SetInsertPoint(label_body);
|
|
||||||
const auto pbcount = spu_ptr<u64>(&spu_thread::block_counter);
|
const auto pbcount = spu_ptr<u64>(&spu_thread::block_counter);
|
||||||
m_ir->CreateStore(m_ir->CreateAdd(m_ir->CreateLoad(get_type<u64>(), pbcount), m_ir->getInt64(check_iterations)), pbcount);
|
m_ir->CreateStore(m_ir->CreateAdd(m_ir->CreateLoad(get_type<u64>(), pbcount), m_ir->getInt64(check_iterations)), pbcount);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue