mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-28 21:38:01 +03:00
SPU LLVM: AVX-512 optimization for CFLTU (#14384)
- Takes advantage of vrangeps and the new float to uint instructions from AVX-512 - Down from 6 to 3 instructions TODO: Somehow ensure that this is what llvm outputs using CreateFPToUI?
This commit is contained in:
parent
dabb2cc9a0
commit
fd6829f757
1 changed files with 9 additions and 0 deletions
|
@ -9871,6 +9871,15 @@ public:
|
||||||
a = eval(a * s);
|
a = eval(a * s);
|
||||||
|
|
||||||
value_t<s32[4]> r;
|
value_t<s32[4]> r;
|
||||||
|
|
||||||
|
if (m_use_avx512)
|
||||||
|
{
|
||||||
|
const auto sc = clamp_smax(a);
|
||||||
|
r.value = m_ir->CreateFPToUI(sc.value, get_type<s32[4]>());
|
||||||
|
set_vr(op.rt, r);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
r.value = m_ir->CreateFPToUI(a.value, get_type<s32[4]>());
|
r.value = m_ir->CreateFPToUI(a.value, get_type<s32[4]>());
|
||||||
set_vr(op.rt, select(bitcast<s32[4]>(a) > splat<s32[4]>(((32 + 127) << 23) - 1), splat<s32[4]>(-1), r & ~(bitcast<s32[4]>(a) >> 31)));
|
set_vr(op.rt, select(bitcast<s32[4]>(a) > splat<s32[4]>(((32 + 127) << 23) - 1), splat<s32[4]>(-1), r & ~(bitcast<s32[4]>(a) >> 31)));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue