2020-12-05 13:08:24 +01:00
# include "stdafx.h"
2018-05-14 23:07:36 +03:00
# include "Utilities/JIT.h"
2020-10-08 09:14:35 +03:00
# include "Utilities/date_time.h"
2020-07-15 21:57:39 +03:00
# include "Emu/Memory/vm.h"
2018-09-25 23:34:45 +03:00
# include "Emu/Memory/vm_ptr.h"
# include "Emu/Memory/vm_reservation.h"
2014-06-02 19:27:24 +02:00
2020-10-08 09:14:35 +03:00
# include "Loader/ELF.h"
# include "Emu/VFS.h"
2014-08-26 03:55:37 +04:00
# include "Emu/IdManager.h"
2020-10-18 15:00:10 +03:00
# include "Emu/perf_meter.hpp"
2020-02-11 23:36:46 +02:00
# include "Emu/RSX/RSXThread.h"
2014-08-26 03:55:37 +04:00
# include "Emu/Cell/PPUThread.h"
2016-04-14 02:09:41 +03:00
# include "Emu/Cell/ErrorCodes.h"
# include "Emu/Cell/lv2/sys_spu.h"
# include "Emu/Cell/lv2/sys_event_flag.h"
# include "Emu/Cell/lv2/sys_event.h"
# include "Emu/Cell/lv2/sys_interrupt.h"
2014-08-23 01:15:02 +04:00
2014-08-26 03:55:37 +04:00
# include "Emu/Cell/SPUDisAsm.h"
2020-10-25 09:08:50 +02:00
# include "Emu/Cell/SPUAnalyser.h"
2012-11-15 01:39:56 +02:00
# include "Emu/Cell/SPUThread.h"
2014-04-06 23:23:32 +04:00
# include "Emu/Cell/SPURecompiler.h"
2021-03-23 22:32:50 +03:00
# include "Emu/Cell/timers.hpp"
2016-04-25 13:49:12 +03:00
2016-05-13 16:55:34 +03:00
# include <cmath>
2014-07-16 16:09:20 +04:00
# include <cfenv>
2017-04-23 23:09:27 +03:00
# include <thread>
2020-11-05 18:59:01 +03:00
# include <shared_mutex>
2020-11-08 01:56:35 +03:00
# include "util/vm.hpp"
# include "util/asm.hpp"
2020-12-13 16:34:45 +03:00
# include "util/v128.hpp"
2020-12-21 17:12:05 +03:00
# include "util/v128sse.hpp"
2020-12-18 12:55:54 +03:00
# include "util/sysinfo.hpp"
2017-07-18 20:03:47 +03:00
2020-04-07 20:29:11 +03:00
using spu_rdata_t = decltype ( spu_thread : : rdata ) ;
2020-06-26 19:25:36 +03:00
template < >
void fmt_class_string < mfc_atomic_status > : : format ( std : : string & out , u64 arg )
{
format_enum ( out , arg , [ ] ( mfc_atomic_status arg )
{
switch ( arg )
{
case MFC_PUTLLC_SUCCESS : return " PUTLLC " ;
case MFC_PUTLLC_FAILURE : return " PUTLLC-FAIL " ;
case MFC_PUTLLUC_SUCCESS : return " PUTLLUC " ;
case MFC_GETLLAR_SUCCESS : return " GETLLAR " ;
}
return unknown ;
} ) ;
}
template < >
void fmt_class_string < mfc_tag_update > : : format ( std : : string & out , u64 arg )
{
format_enum ( out , arg , [ ] ( mfc_tag_update arg )
{
switch ( arg )
{
case MFC_TAG_UPDATE_IMMEDIATE : return " empty " ;
case MFC_TAG_UPDATE_ANY : return " ANY " ;
case MFC_TAG_UPDATE_ALL : return " ALL " ;
}
return unknown ;
} ) ;
}
2020-07-17 11:18:04 +03:00
template < >
void fmt_class_string < spu_type > : : format ( std : : string & out , u64 arg )
{
format_enum ( out , arg , [ ] ( spu_type arg )
{
switch ( arg )
{
case spu_type : : threaded : return " Threaded " ;
case spu_type : : raw : return " Raw " ;
case spu_type : : isolated : return " Isolated " ;
}
return unknown ;
} ) ;
}
2019-04-15 15:52:43 +03:00
// Verify AVX availability for TSX transactions
static const bool s_tsx_avx = utils : : has_avx ( ) ;
2019-05-18 23:47:35 +03:00
// For special case
static const bool s_tsx_haswell = utils : : has_rtm ( ) & & ! utils : : has_mpx ( ) ;
2020-04-28 17:23:43 +03:00
static FORCE_INLINE bool cmp_rdata_avx ( const __m256i * lhs , const __m256i * rhs )
{
# if defined(_MSC_VER) || defined(__AVX__)
2020-09-06 06:15:49 +03:00
const __m256 x0 = _mm256_xor_ps ( _mm256_castsi256_ps ( _mm256_loadu_si256 ( lhs + 0 ) ) , _mm256_castsi256_ps ( _mm256_loadu_si256 ( rhs + 0 ) ) ) ;
const __m256 x1 = _mm256_xor_ps ( _mm256_castsi256_ps ( _mm256_loadu_si256 ( lhs + 1 ) ) , _mm256_castsi256_ps ( _mm256_loadu_si256 ( rhs + 1 ) ) ) ;
const __m256 x2 = _mm256_xor_ps ( _mm256_castsi256_ps ( _mm256_loadu_si256 ( lhs + 2 ) ) , _mm256_castsi256_ps ( _mm256_loadu_si256 ( rhs + 2 ) ) ) ;
const __m256 x3 = _mm256_xor_ps ( _mm256_castsi256_ps ( _mm256_loadu_si256 ( lhs + 3 ) ) , _mm256_castsi256_ps ( _mm256_loadu_si256 ( rhs + 3 ) ) ) ;
2020-04-28 17:23:43 +03:00
const __m256 c0 = _mm256_or_ps ( x0 , x1 ) ;
const __m256 c1 = _mm256_or_ps ( x2 , x3 ) ;
const __m256 c2 = _mm256_or_ps ( c0 , c1 ) ;
return _mm256_testz_si256 ( _mm256_castps_si256 ( c2 ) , _mm256_castps_si256 ( c2 ) ) ! = 0 ;
# else
bool result = 0 ;
__asm__ (
2020-09-06 06:15:49 +03:00
" vmovups 0*32(%[lhs]), %%ymm0; " // load
" vmovups 1*32(%[lhs]), %%ymm1; "
" vmovups 2*32(%[lhs]), %%ymm2; "
" vmovups 3*32(%[lhs]), %%ymm3; "
2020-04-28 17:23:43 +03:00
" vxorps 0*32(%[rhs]), %%ymm0, %%ymm0; " // compare
" vxorps 1*32(%[rhs]), %%ymm1, %%ymm1; "
" vxorps 2*32(%[rhs]), %%ymm2, %%ymm2; "
" vxorps 3*32(%[rhs]), %%ymm3, %%ymm3; "
" vorps %%ymm0, %%ymm1, %%ymm0; " // merge
" vorps %%ymm2, %%ymm3, %%ymm2; "
" vorps %%ymm0, %%ymm2, %%ymm0; "
" vptest %%ymm0, %%ymm0; " // test
" vzeroupper "
: " =@ccz " ( result )
: [ lhs ] " r " ( lhs )
, [ rhs ] " r " ( rhs )
: " cc " // Clobber flags
, " xmm0 " // Clobber registers ymm0-ymm3 (see mov_rdata_avx)
, " xmm1 "
, " xmm2 "
, " xmm3 "
) ;
return result ;
# endif
}
2020-04-07 20:29:11 +03:00
# ifdef _MSC_VER
__forceinline
# endif
extern bool cmp_rdata ( const spu_rdata_t & _lhs , const spu_rdata_t & _rhs )
2017-02-17 22:35:57 +03:00
{
2020-04-28 17:23:43 +03:00
# ifndef __AVX__
if ( s_tsx_avx ) [[likely]]
# endif
{
2020-04-07 20:29:11 +03:00
return cmp_rdata_avx ( reinterpret_cast < const __m256i * > ( _lhs ) , reinterpret_cast < const __m256i * > ( _rhs ) ) ;
2020-04-28 17:23:43 +03:00
}
2020-04-07 20:29:11 +03:00
const auto lhs = reinterpret_cast < const v128 * > ( _lhs ) ;
const auto rhs = reinterpret_cast < const v128 * > ( _rhs ) ;
2019-06-06 21:32:35 +03:00
const v128 a = ( lhs [ 0 ] ^ rhs [ 0 ] ) | ( lhs [ 1 ] ^ rhs [ 1 ] ) ;
const v128 b = ( lhs [ 2 ] ^ rhs [ 2 ] ) | ( lhs [ 3 ] ^ rhs [ 3 ] ) ;
const v128 c = ( lhs [ 4 ] ^ rhs [ 4 ] ) | ( lhs [ 5 ] ^ rhs [ 5 ] ) ;
const v128 d = ( lhs [ 6 ] ^ rhs [ 6 ] ) | ( lhs [ 7 ] ^ rhs [ 7 ] ) ;
const v128 r = ( a | b ) | ( c | d ) ;
2020-06-15 17:24:04 +03:00
return r = = v128 { } ;
2017-02-17 22:35:57 +03:00
}
2014-07-16 16:09:20 +04:00
2020-04-26 19:34:55 +03:00
static FORCE_INLINE void mov_rdata_avx ( __m256i * dst , const __m256i * src )
{
2020-09-09 18:10:40 +03:00
# ifdef _MSC_VER
2020-09-07 23:32:44 +03:00
_mm256_storeu_si256 ( dst + 0 , _mm256_loadu_si256 ( src + 0 ) ) ;
_mm256_storeu_si256 ( dst + 1 , _mm256_loadu_si256 ( src + 1 ) ) ;
_mm256_storeu_si256 ( dst + 2 , _mm256_loadu_si256 ( src + 2 ) ) ;
_mm256_storeu_si256 ( dst + 3 , _mm256_loadu_si256 ( src + 3 ) ) ;
2020-04-26 19:34:55 +03:00
# else
__asm__ (
" vmovdqu 0*32(%[src]), %%ymm0; " // load
" vmovdqu %%ymm0, 0*32(%[dst]); " // store
" vmovdqu 1*32(%[src]), %%ymm0; "
" vmovdqu %%ymm0, 1*32(%[dst]); "
" vmovdqu 2*32(%[src]), %%ymm0; "
" vmovdqu %%ymm0, 2*32(%[dst]); "
" vmovdqu 3*32(%[src]), %%ymm0; "
" vmovdqu %%ymm0, 3*32(%[dst]); "
2020-04-27 17:46:15 +03:00
# ifndef __AVX__
" vzeroupper " // Don't need in AVX mode (should be emitted automatically)
# endif
2020-04-26 19:34:55 +03:00
:
: [ src ] " r " ( src )
, [ dst ] " r " ( dst )
2020-04-27 17:46:15 +03:00
# ifdef __AVX__
: " ymm0 " // Clobber ymm0 register (acknowledge its modification)
# else
: " xmm0 " // ymm0 is "unknown" if not compiled in AVX mode, so clobber xmm0 only
# endif
2020-04-26 19:34:55 +03:00
) ;
# endif
}
2020-04-07 20:29:11 +03:00
# ifdef _MSC_VER
__forceinline
# endif
extern void mov_rdata ( spu_rdata_t & _dst , const spu_rdata_t & _src )
2019-01-15 17:31:21 +02:00
{
2020-04-26 19:34:55 +03:00
# ifndef __AVX__
if ( s_tsx_avx ) [[likely]]
# endif
{
2020-04-07 20:29:11 +03:00
mov_rdata_avx ( reinterpret_cast < __m256i * > ( _dst ) , reinterpret_cast < const __m256i * > ( _src ) ) ;
2020-04-26 19:34:55 +03:00
return ;
}
2020-10-11 17:32:00 +03:00
{
const __m128i v0 = _mm_loadu_si128 ( reinterpret_cast < const __m128i * > ( _src + 0 ) ) ;
const __m128i v1 = _mm_loadu_si128 ( reinterpret_cast < const __m128i * > ( _src + 16 ) ) ;
const __m128i v2 = _mm_loadu_si128 ( reinterpret_cast < const __m128i * > ( _src + 32 ) ) ;
const __m128i v3 = _mm_loadu_si128 ( reinterpret_cast < const __m128i * > ( _src + 48 ) ) ;
_mm_storeu_si128 ( reinterpret_cast < __m128i * > ( _dst + 0 ) , v0 ) ;
_mm_storeu_si128 ( reinterpret_cast < __m128i * > ( _dst + 16 ) , v1 ) ;
_mm_storeu_si128 ( reinterpret_cast < __m128i * > ( _dst + 32 ) , v2 ) ;
_mm_storeu_si128 ( reinterpret_cast < __m128i * > ( _dst + 48 ) , v3 ) ;
}
const __m128i v0 = _mm_loadu_si128 ( reinterpret_cast < const __m128i * > ( _src + 64 ) ) ;
const __m128i v1 = _mm_loadu_si128 ( reinterpret_cast < const __m128i * > ( _src + 80 ) ) ;
const __m128i v2 = _mm_loadu_si128 ( reinterpret_cast < const __m128i * > ( _src + 96 ) ) ;
const __m128i v3 = _mm_loadu_si128 ( reinterpret_cast < const __m128i * > ( _src + 112 ) ) ;
_mm_storeu_si128 ( reinterpret_cast < __m128i * > ( _dst + 64 ) , v0 ) ;
_mm_storeu_si128 ( reinterpret_cast < __m128i * > ( _dst + 80 ) , v1 ) ;
_mm_storeu_si128 ( reinterpret_cast < __m128i * > ( _dst + 96 ) , v2 ) ;
_mm_storeu_si128 ( reinterpret_cast < __m128i * > ( _dst + 112 ) , v3 ) ;
2020-02-11 23:36:46 +02:00
}
2020-10-30 05:17:00 +03:00
static FORCE_INLINE void mov_rdata_nt_avx ( __m256i * dst , const __m256i * src )
{
# ifdef _MSC_VER
_mm256_stream_si256 ( dst + 0 , _mm256_load_si256 ( src + 0 ) ) ;
_mm256_stream_si256 ( dst + 1 , _mm256_load_si256 ( src + 1 ) ) ;
_mm256_stream_si256 ( dst + 2 , _mm256_load_si256 ( src + 2 ) ) ;
_mm256_stream_si256 ( dst + 3 , _mm256_load_si256 ( src + 3 ) ) ;
# else
__asm__ (
" vmovdqa 0*32(%[src]), %%ymm0; " // load
" vmovntdq %%ymm0, 0*32(%[dst]); " // store
" vmovdqa 1*32(%[src]), %%ymm0; "
" vmovntdq %%ymm0, 1*32(%[dst]); "
" vmovdqa 2*32(%[src]), %%ymm0; "
" vmovntdq %%ymm0, 2*32(%[dst]); "
" vmovdqa 3*32(%[src]), %%ymm0; "
" vmovntdq %%ymm0, 3*32(%[dst]); "
# ifndef __AVX__
" vzeroupper " // Don't need in AVX mode (should be emitted automatically)
# endif
:
: [ src ] " r " ( src )
, [ dst ] " r " ( dst )
# ifdef __AVX__
: " ymm0 " // Clobber ymm0 register (acknowledge its modification)
# else
: " xmm0 " // ymm0 is "unknown" if not compiled in AVX mode, so clobber xmm0 only
# endif
) ;
# endif
}
extern void mov_rdata_nt ( spu_rdata_t & _dst , const spu_rdata_t & _src )
{
# ifndef __AVX__
if ( s_tsx_avx ) [[likely]]
# endif
{
mov_rdata_nt_avx ( reinterpret_cast < __m256i * > ( _dst ) , reinterpret_cast < const __m256i * > ( _src ) ) ;
return ;
}
{
const __m128i v0 = _mm_load_si128 ( reinterpret_cast < const __m128i * > ( _src + 0 ) ) ;
const __m128i v1 = _mm_load_si128 ( reinterpret_cast < const __m128i * > ( _src + 16 ) ) ;
const __m128i v2 = _mm_load_si128 ( reinterpret_cast < const __m128i * > ( _src + 32 ) ) ;
const __m128i v3 = _mm_load_si128 ( reinterpret_cast < const __m128i * > ( _src + 48 ) ) ;
_mm_stream_si128 ( reinterpret_cast < __m128i * > ( _dst + 0 ) , v0 ) ;
_mm_stream_si128 ( reinterpret_cast < __m128i * > ( _dst + 16 ) , v1 ) ;
_mm_stream_si128 ( reinterpret_cast < __m128i * > ( _dst + 32 ) , v2 ) ;
_mm_stream_si128 ( reinterpret_cast < __m128i * > ( _dst + 48 ) , v3 ) ;
}
const __m128i v0 = _mm_load_si128 ( reinterpret_cast < const __m128i * > ( _src + 64 ) ) ;
const __m128i v1 = _mm_load_si128 ( reinterpret_cast < const __m128i * > ( _src + 80 ) ) ;
const __m128i v2 = _mm_load_si128 ( reinterpret_cast < const __m128i * > ( _src + 96 ) ) ;
const __m128i v3 = _mm_load_si128 ( reinterpret_cast < const __m128i * > ( _src + 112 ) ) ;
_mm_stream_si128 ( reinterpret_cast < __m128i * > ( _dst + 64 ) , v0 ) ;
_mm_stream_si128 ( reinterpret_cast < __m128i * > ( _dst + 80 ) , v1 ) ;
_mm_stream_si128 ( reinterpret_cast < __m128i * > ( _dst + 96 ) , v2 ) ;
_mm_stream_si128 ( reinterpret_cast < __m128i * > ( _dst + 112 ) , v3 ) ;
}
2020-09-25 17:29:25 +03:00
void do_cell_atomic_128_store ( u32 addr , const void * to_write ) ;
2018-05-27 23:37:01 +03:00
extern thread_local u64 g_tls_fault_spu ;
2017-04-23 23:09:27 +03:00
2020-12-10 15:06:01 +03:00
const spu_decoder < spu_itype > s_spu_itype ;
2020-10-25 09:08:50 +02:00
2017-07-11 22:21:22 +03:00
namespace spu
{
namespace scheduler
{
2020-12-06 15:15:19 +03:00
std : : array < atomic_t < u8 > , 65536 > atomic_instruction_table = { } ;
2017-07-24 20:55:11 +03:00
constexpr u32 native_jiffy_duration_us = 1500 ; //About 1ms resolution with a half offset
2017-07-11 22:21:22 +03:00
2020-02-05 06:44:10 +02:00
void acquire_pc_address ( spu_thread & spu , u32 pc , u32 timeout_ms , u32 max_concurrent_instructions )
2017-07-11 22:21:22 +03:00
{
2017-07-18 01:12:13 +03:00
const u32 pc_offset = pc > > 2 ;
2017-07-11 22:21:22 +03:00
2020-12-06 15:15:19 +03:00
if ( atomic_instruction_table [ pc_offset ] . observe ( ) > = max_concurrent_instructions )
2017-07-14 17:00:49 +03:00
{
2020-10-25 20:06:34 +03:00
spu . state + = cpu_flag : : wait + cpu_flag : : temp ;
2019-06-29 23:41:52 +03:00
2017-07-24 20:55:11 +03:00
if ( timeout_ms > 0 )
2017-07-14 17:00:49 +03:00
{
2017-10-21 14:21:37 +03:00
const u64 timeout = timeout_ms * 1000u ; //convert to microseconds
const u64 start = get_system_time ( ) ;
2017-07-24 20:55:11 +03:00
auto remaining = timeout ;
2020-12-06 15:15:19 +03:00
while ( atomic_instruction_table [ pc_offset ] . observe ( ) > = max_concurrent_instructions )
2017-07-24 20:55:11 +03:00
{
if ( remaining > = native_jiffy_duration_us )
std : : this_thread : : sleep_for ( 1 ms ) ;
else
2018-05-28 16:52:21 +03:00
std : : this_thread : : yield ( ) ;
2017-07-24 20:55:11 +03:00
const auto now = get_system_time ( ) ;
const auto elapsed = now - start ;
if ( elapsed > timeout ) break ;
remaining = timeout - elapsed ;
}
}
else
{
//Slight pause if function is overburdened
2020-12-06 15:15:19 +03:00
const auto count = atomic_instruction_table [ pc_offset ] . observe ( ) * 100ull ;
2017-10-21 14:21:37 +03:00
busy_wait ( count ) ;
2017-07-14 17:00:49 +03:00
}
2019-06-29 23:41:52 +03:00
2020-12-09 10:47:45 +03:00
ensure ( ! spu . check_state ( ) ) ;
2017-07-11 22:21:22 +03:00
}
2017-07-18 01:12:13 +03:00
atomic_instruction_table [ pc_offset ] + + ;
2017-07-11 22:21:22 +03:00
}
void release_pc_address ( u32 pc )
{
2017-07-18 01:12:13 +03:00
const u32 pc_offset = pc > > 2 ;
2017-07-11 22:21:22 +03:00
2017-07-18 01:12:13 +03:00
atomic_instruction_table [ pc_offset ] - - ;
2017-07-11 22:21:22 +03:00
}
struct concurrent_execution_watchdog
{
u32 pc = 0 ;
2017-07-14 17:00:49 +03:00
bool active = false ;
2017-07-11 22:21:22 +03:00
2018-10-11 01:17:19 +03:00
concurrent_execution_watchdog ( spu_thread & spu )
2017-07-11 22:21:22 +03:00
: pc ( spu . pc )
{
2020-02-05 06:44:10 +02:00
if ( const u32 max_concurrent_instructions = g_cfg . core . preferred_spu_threads )
2017-07-14 17:00:49 +03:00
{
2020-02-05 06:44:10 +02:00
acquire_pc_address ( spu , pc , g_cfg . core . spu_delay_penalty , max_concurrent_instructions ) ;
2017-07-14 17:00:49 +03:00
active = true ;
}
2017-07-11 22:21:22 +03:00
}
~ concurrent_execution_watchdog ( )
{
2017-07-14 17:00:49 +03:00
if ( active )
release_pc_address ( pc ) ;
2017-07-11 22:21:22 +03:00
}
} ;
}
}
2020-11-23 19:57:34 +02:00
std : : array < u32 , 2 > op_branch_targets ( u32 pc , spu_opcode_t op )
{
std : : array < u32 , 2 > res { spu_branch_target ( pc + 4 ) , UINT32_MAX } ;
switch ( const auto type = s_spu_itype . decode ( op . opcode ) )
{
case spu_itype : : BR :
case spu_itype : : BRA :
case spu_itype : : BRNZ :
case spu_itype : : BRZ :
case spu_itype : : BRHNZ :
case spu_itype : : BRHZ :
case spu_itype : : BRSL :
case spu_itype : : BRASL :
{
const int index = ( type = = spu_itype : : BR | | type = = spu_itype : : BRA | | type = = spu_itype : : BRSL | | type = = spu_itype : : BRASL ? 0 : 1 ) ;
res [ index ] = ( spu_branch_target ( type = = spu_itype : : BRASL | | type = = spu_itype : : BRA ? 0 : pc , op . i16 ) ) ;
break ;
}
case spu_itype : : IRET :
case spu_itype : : BI :
case spu_itype : : BISLED :
case spu_itype : : BISL :
case spu_itype : : BIZ :
case spu_itype : : BINZ :
case spu_itype : : BIHZ :
case spu_itype : : BIHNZ : // TODO (detect constant address branches, such as for interrupts enable/disable pattern)
case spu_itype : : UNK :
{
res [ 0 ] = UINT32_MAX ;
break ;
}
default : break ;
}
return res ;
}
2020-10-31 01:52:24 +03:00
const auto spu_putllc_tx = build_function_asm < u64 ( * ) ( u32 raddr , u64 rtime , void * _old , const void * _new ) > ( [ ] ( asmjit : : X86Assembler & c , auto & args )
2018-05-14 23:07:36 +03:00
{
using namespace asmjit ;
Label fall = c . newLabel ( ) ;
Label fail = c . newLabel ( ) ;
2019-04-15 15:52:43 +03:00
Label _ret = c . newLabel ( ) ;
2019-06-06 21:32:35 +03:00
Label skip = c . newLabel ( ) ;
Label next = c . newLabel ( ) ;
2020-10-29 05:35:09 +03:00
Label load = c . newLabel ( ) ;
2019-04-15 15:52:43 +03:00
2020-06-04 09:19:56 +03:00
//if (utils::has_avx() && !s_tsx_avx)
//{
// c.vzeroupper();
//}
2019-04-15 15:52:43 +03:00
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
2019-05-18 23:47:35 +03:00
c . push ( x86 : : rbp ) ;
c . push ( x86 : : r13 ) ;
c . push ( x86 : : r12 ) ;
c . push ( x86 : : rbx ) ;
c . sub ( x86 : : rsp , 168 ) ;
2019-04-15 15:52:43 +03:00
# ifdef _WIN32
if ( s_tsx_avx )
{
c . vmovups ( x86 : : oword_ptr ( x86 : : rsp , 0 ) , x86 : : xmm6 ) ;
c . vmovups ( x86 : : oword_ptr ( x86 : : rsp , 16 ) , x86 : : xmm7 ) ;
}
else
{
c . movups ( x86 : : oword_ptr ( x86 : : rsp , 0 ) , x86 : : xmm6 ) ;
c . movups ( x86 : : oword_ptr ( x86 : : rsp , 16 ) , x86 : : xmm7 ) ;
c . movups ( x86 : : oword_ptr ( x86 : : rsp , 32 ) , x86 : : xmm8 ) ;
c . movups ( x86 : : oword_ptr ( x86 : : rsp , 48 ) , x86 : : xmm9 ) ;
c . movups ( x86 : : oword_ptr ( x86 : : rsp , 64 ) , x86 : : xmm10 ) ;
c . movups ( x86 : : oword_ptr ( x86 : : rsp , 80 ) , x86 : : xmm11 ) ;
c . movups ( x86 : : oword_ptr ( x86 : : rsp , 96 ) , x86 : : xmm12 ) ;
c . movups ( x86 : : oword_ptr ( x86 : : rsp , 112 ) , x86 : : xmm13 ) ;
c . movups ( x86 : : oword_ptr ( x86 : : rsp , 128 ) , x86 : : xmm14 ) ;
c . movups ( x86 : : oword_ptr ( x86 : : rsp , 144 ) , x86 : : xmm15 ) ;
}
# endif
2018-05-14 23:07:36 +03:00
// Prepare registers
2020-10-31 01:52:24 +03:00
build_swap_rdx_with ( c , args , x86 : : r12 ) ;
2020-11-01 14:28:56 +03:00
c . mov ( x86 : : rbp , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & vm : : g_sudo_addr ) ) ) ;
2019-05-18 23:47:35 +03:00
c . lea ( x86 : : rbp , x86 : : qword_ptr ( x86 : : rbp , args [ 0 ] ) ) ;
2020-10-19 15:20:53 +03:00
c . prefetchw ( x86 : : byte_ptr ( x86 : : rbp , 0 ) ) ;
c . prefetchw ( x86 : : byte_ptr ( x86 : : rbp , 64 ) ) ;
2020-04-11 11:16:28 +03:00
c . and_ ( args [ 0 ] . r32 ( ) , 0xff80 ) ;
c . shr ( args [ 0 ] . r32 ( ) , 1 ) ;
2020-11-01 14:28:56 +03:00
c . lea ( x86 : : rbx , x86 : : qword_ptr ( reinterpret_cast < u64 > ( + vm : : g_reservations ) , args [ 0 ] ) ) ;
2020-10-20 08:22:25 +03:00
c . prefetchw ( x86 : : byte_ptr ( x86 : : rbx ) ) ;
2019-05-18 23:47:35 +03:00
c . mov ( x86 : : r13 , args [ 1 ] ) ;
2018-05-14 23:07:36 +03:00
2019-04-15 15:52:43 +03:00
// Prepare data
if ( s_tsx_avx )
{
c . vmovups ( x86 : : ymm0 , x86 : : yword_ptr ( args [ 2 ] , 0 ) ) ;
c . vmovups ( x86 : : ymm1 , x86 : : yword_ptr ( args [ 2 ] , 32 ) ) ;
c . vmovups ( x86 : : ymm2 , x86 : : yword_ptr ( args [ 2 ] , 64 ) ) ;
c . vmovups ( x86 : : ymm3 , x86 : : yword_ptr ( args [ 2 ] , 96 ) ) ;
c . vmovups ( x86 : : ymm4 , x86 : : yword_ptr ( args [ 3 ] , 0 ) ) ;
c . vmovups ( x86 : : ymm5 , x86 : : yword_ptr ( args [ 3 ] , 32 ) ) ;
c . vmovups ( x86 : : ymm6 , x86 : : yword_ptr ( args [ 3 ] , 64 ) ) ;
c . vmovups ( x86 : : ymm7 , x86 : : yword_ptr ( args [ 3 ] , 96 ) ) ;
}
else
{
c . movaps ( x86 : : xmm0 , x86 : : oword_ptr ( args [ 2 ] , 0 ) ) ;
c . movaps ( x86 : : xmm1 , x86 : : oword_ptr ( args [ 2 ] , 16 ) ) ;
c . movaps ( x86 : : xmm2 , x86 : : oword_ptr ( args [ 2 ] , 32 ) ) ;
c . movaps ( x86 : : xmm3 , x86 : : oword_ptr ( args [ 2 ] , 48 ) ) ;
c . movaps ( x86 : : xmm4 , x86 : : oword_ptr ( args [ 2 ] , 64 ) ) ;
c . movaps ( x86 : : xmm5 , x86 : : oword_ptr ( args [ 2 ] , 80 ) ) ;
c . movaps ( x86 : : xmm6 , x86 : : oword_ptr ( args [ 2 ] , 96 ) ) ;
c . movaps ( x86 : : xmm7 , x86 : : oword_ptr ( args [ 2 ] , 112 ) ) ;
c . movaps ( x86 : : xmm8 , x86 : : oword_ptr ( args [ 3 ] , 0 ) ) ;
c . movaps ( x86 : : xmm9 , x86 : : oword_ptr ( args [ 3 ] , 16 ) ) ;
c . movaps ( x86 : : xmm10 , x86 : : oword_ptr ( args [ 3 ] , 32 ) ) ;
c . movaps ( x86 : : xmm11 , x86 : : oword_ptr ( args [ 3 ] , 48 ) ) ;
c . movaps ( x86 : : xmm12 , x86 : : oword_ptr ( args [ 3 ] , 64 ) ) ;
c . movaps ( x86 : : xmm13 , x86 : : oword_ptr ( args [ 3 ] , 80 ) ) ;
c . movaps ( x86 : : xmm14 , x86 : : oword_ptr ( args [ 3 ] , 96 ) ) ;
c . movaps ( x86 : : xmm15 , x86 : : oword_ptr ( args [ 3 ] , 112 ) ) ;
}
2018-05-14 23:07:36 +03:00
2020-10-31 01:52:24 +03:00
// Alloc args[0] to stamp0
const auto stamp0 = args [ 0 ] ;
const auto stamp1 = args [ 1 ] ;
build_get_tsc ( c , stamp0 ) ;
2018-05-14 23:07:36 +03:00
// Begin transaction
2020-10-31 01:52:24 +03:00
Label tx0 = build_transaction_enter ( c , fall , [ & ] ( )
2020-10-29 05:01:45 +03:00
{
c . add ( x86 : : qword_ptr ( args [ 2 ] , : : offset32 ( & spu_thread : : ftx ) - : : offset32 ( & spu_thread : : rdata ) ) , 1 ) ;
2020-10-31 01:52:24 +03:00
build_get_tsc ( c , stamp1 ) ;
c . sub ( stamp1 , stamp0 ) ;
c . xor_ ( x86 : : eax , x86 : : eax ) ;
2020-11-01 14:45:16 +03:00
c . cmp ( stamp1 , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & g_rtm_tx_limit1 ) ) ) ;
2020-10-31 01:52:24 +03:00
c . jae ( fall ) ;
2020-10-29 05:01:45 +03:00
} ) ;
2020-10-28 23:12:35 +03:00
c . bt ( x86 : : dword_ptr ( args [ 2 ] , : : offset32 ( & spu_thread : : state ) - : : offset32 ( & spu_thread : : rdata ) ) , static_cast < u32 > ( cpu_flag : : pause ) ) ;
c . jc ( fall ) ;
2020-10-09 20:33:12 +03:00
c . xbegin ( tx0 ) ;
2019-05-18 23:47:35 +03:00
c . mov ( x86 : : rax , x86 : : qword_ptr ( x86 : : rbx ) ) ;
2020-10-28 23:12:35 +03:00
c . test ( x86 : : eax , vm : : rsrv_unique_lock ) ;
2020-10-08 16:13:55 +03:00
c . jnz ( skip ) ;
2020-10-28 23:12:35 +03:00
c . and_ ( x86 : : rax , - 128 ) ;
2019-05-18 23:47:35 +03:00
c . cmp ( x86 : : rax , x86 : : r13 ) ;
2019-06-06 21:32:35 +03:00
c . jne ( fail ) ;
2019-04-15 15:52:43 +03:00
if ( s_tsx_avx )
{
2019-05-18 23:47:35 +03:00
c . vxorps ( x86 : : ymm0 , x86 : : ymm0 , x86 : : yword_ptr ( x86 : : rbp , 0 ) ) ;
c . vxorps ( x86 : : ymm1 , x86 : : ymm1 , x86 : : yword_ptr ( x86 : : rbp , 32 ) ) ;
c . vxorps ( x86 : : ymm2 , x86 : : ymm2 , x86 : : yword_ptr ( x86 : : rbp , 64 ) ) ;
c . vxorps ( x86 : : ymm3 , x86 : : ymm3 , x86 : : yword_ptr ( x86 : : rbp , 96 ) ) ;
2019-04-15 15:52:43 +03:00
c . vorps ( x86 : : ymm0 , x86 : : ymm0 , x86 : : ymm1 ) ;
c . vorps ( x86 : : ymm1 , x86 : : ymm2 , x86 : : ymm3 ) ;
c . vorps ( x86 : : ymm0 , x86 : : ymm1 , x86 : : ymm0 ) ;
c . vptest ( x86 : : ymm0 , x86 : : ymm0 ) ;
}
else
{
2019-05-18 23:47:35 +03:00
c . xorps ( x86 : : xmm0 , x86 : : oword_ptr ( x86 : : rbp , 0 ) ) ;
c . xorps ( x86 : : xmm1 , x86 : : oword_ptr ( x86 : : rbp , 16 ) ) ;
c . xorps ( x86 : : xmm2 , x86 : : oword_ptr ( x86 : : rbp , 32 ) ) ;
c . xorps ( x86 : : xmm3 , x86 : : oword_ptr ( x86 : : rbp , 48 ) ) ;
c . xorps ( x86 : : xmm4 , x86 : : oword_ptr ( x86 : : rbp , 64 ) ) ;
c . xorps ( x86 : : xmm5 , x86 : : oword_ptr ( x86 : : rbp , 80 ) ) ;
c . xorps ( x86 : : xmm6 , x86 : : oword_ptr ( x86 : : rbp , 96 ) ) ;
c . xorps ( x86 : : xmm7 , x86 : : oword_ptr ( x86 : : rbp , 112 ) ) ;
2019-04-15 15:52:43 +03:00
c . orps ( x86 : : xmm0 , x86 : : xmm1 ) ;
c . orps ( x86 : : xmm2 , x86 : : xmm3 ) ;
c . orps ( x86 : : xmm4 , x86 : : xmm5 ) ;
c . orps ( x86 : : xmm6 , x86 : : xmm7 ) ;
c . orps ( x86 : : xmm0 , x86 : : xmm2 ) ;
c . orps ( x86 : : xmm4 , x86 : : xmm6 ) ;
c . orps ( x86 : : xmm0 , x86 : : xmm4 ) ;
c . ptest ( x86 : : xmm0 , x86 : : xmm0 ) ;
}
2018-05-14 23:07:36 +03:00
c . jnz ( fail ) ;
2019-04-15 15:52:43 +03:00
if ( s_tsx_avx )
{
2019-05-18 23:47:35 +03:00
c . vmovaps ( x86 : : yword_ptr ( x86 : : rbp , 0 ) , x86 : : ymm4 ) ;
c . vmovaps ( x86 : : yword_ptr ( x86 : : rbp , 32 ) , x86 : : ymm5 ) ;
c . vmovaps ( x86 : : yword_ptr ( x86 : : rbp , 64 ) , x86 : : ymm6 ) ;
c . vmovaps ( x86 : : yword_ptr ( x86 : : rbp , 96 ) , x86 : : ymm7 ) ;
2019-04-15 15:52:43 +03:00
}
else
{
2019-05-18 23:47:35 +03:00
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 0 ) , x86 : : xmm8 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 16 ) , x86 : : xmm9 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 32 ) , x86 : : xmm10 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 48 ) , x86 : : xmm11 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 64 ) , x86 : : xmm12 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 80 ) , x86 : : xmm13 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 96 ) , x86 : : xmm14 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 112 ) , x86 : : xmm15 ) ;
2019-04-15 15:52:43 +03:00
}
2019-05-18 23:47:35 +03:00
c . sub ( x86 : : qword_ptr ( x86 : : rbx ) , - 128 ) ;
2018-05-14 23:07:36 +03:00
c . xend ( ) ;
2020-10-29 05:01:45 +03:00
c . add ( x86 : : qword_ptr ( args [ 2 ] , : : offset32 ( & spu_thread : : stx ) - : : offset32 ( & spu_thread : : rdata ) ) , 1 ) ;
2020-10-31 01:52:24 +03:00
build_get_tsc ( c ) ;
c . sub ( x86 : : rax , stamp0 ) ;
2019-04-15 15:52:43 +03:00
c . jmp ( _ret ) ;
2018-05-14 23:07:36 +03:00
2020-10-20 08:22:25 +03:00
// XABORT is expensive so finish with xend instead
c . bind ( fail ) ;
2020-10-29 05:35:09 +03:00
// Load old data to store back in rdata
2020-10-20 08:22:25 +03:00
if ( s_tsx_avx )
{
c . vmovaps ( x86 : : ymm0 , x86 : : yword_ptr ( x86 : : rbp , 0 ) ) ;
c . vmovaps ( x86 : : ymm1 , x86 : : yword_ptr ( x86 : : rbp , 32 ) ) ;
c . vmovaps ( x86 : : ymm2 , x86 : : yword_ptr ( x86 : : rbp , 64 ) ) ;
c . vmovaps ( x86 : : ymm3 , x86 : : yword_ptr ( x86 : : rbp , 96 ) ) ;
}
else
{
c . movaps ( x86 : : xmm0 , x86 : : oword_ptr ( x86 : : rbp , 0 ) ) ;
c . movaps ( x86 : : xmm1 , x86 : : oword_ptr ( x86 : : rbp , 16 ) ) ;
c . movaps ( x86 : : xmm2 , x86 : : oword_ptr ( x86 : : rbp , 32 ) ) ;
c . movaps ( x86 : : xmm3 , x86 : : oword_ptr ( x86 : : rbp , 48 ) ) ;
c . movaps ( x86 : : xmm4 , x86 : : oword_ptr ( x86 : : rbp , 64 ) ) ;
c . movaps ( x86 : : xmm5 , x86 : : oword_ptr ( x86 : : rbp , 80 ) ) ;
c . movaps ( x86 : : xmm6 , x86 : : oword_ptr ( x86 : : rbp , 96 ) ) ;
c . movaps ( x86 : : xmm7 , x86 : : oword_ptr ( x86 : : rbp , 112 ) ) ;
}
c . xend ( ) ;
2020-10-29 05:01:45 +03:00
c . add ( x86 : : qword_ptr ( args [ 2 ] , : : offset32 ( & spu_thread : : stx ) - : : offset32 ( & spu_thread : : rdata ) ) , 1 ) ;
2020-10-29 05:35:09 +03:00
c . jmp ( load ) ;
2020-10-20 08:22:25 +03:00
c . bind ( skip ) ;
c . xend ( ) ;
2020-10-29 05:01:45 +03:00
c . add ( x86 : : qword_ptr ( args [ 2 ] , : : offset32 ( & spu_thread : : stx ) - : : offset32 ( & spu_thread : : rdata ) ) , 1 ) ;
2020-10-31 01:52:24 +03:00
build_get_tsc ( c , stamp1 ) ;
2019-06-06 21:32:35 +03:00
//c.jmp(fall);
2018-05-14 23:07:36 +03:00
c . bind ( fall ) ;
2019-06-06 21:32:35 +03:00
2019-05-18 23:47:35 +03:00
Label fall2 = c . newLabel ( ) ;
2019-06-06 21:32:35 +03:00
Label fail2 = c . newLabel ( ) ;
2020-10-08 16:13:55 +03:00
Label fail3 = c . newLabel ( ) ;
2019-05-18 23:47:35 +03:00
// Lightened transaction: only compare and swap data
2019-06-06 21:32:35 +03:00
c . bind ( next ) ;
2019-10-11 16:57:20 +03:00
// Try to "lock" reservation
2020-10-28 21:58:52 +03:00
c . mov ( x86 : : eax , 1 ) ;
c . lock ( ) . xadd ( x86 : : qword_ptr ( x86 : : rbx ) , x86 : : rax ) ;
c . test ( x86 : : eax , vm : : rsrv_unique_lock ) ;
2020-10-28 23:12:35 +03:00
c . jnz ( fail2 ) ;
2020-10-28 21:58:52 +03:00
2020-10-31 01:52:24 +03:00
// Check if already updated
c . and_ ( x86 : : rax , - 128 ) ;
2020-10-08 16:13:55 +03:00
c . cmp ( x86 : : rax , x86 : : r13 ) ;
c . jne ( fail2 ) ;
2019-10-11 16:57:20 +03:00
2020-10-31 01:52:24 +03:00
// Exclude some time spent on touching memory: stamp1 contains last success or failure
c . mov ( x86 : : rax , stamp1 ) ;
c . sub ( x86 : : rax , stamp0 ) ;
build_get_tsc ( c , stamp1 ) ;
c . sub ( stamp1 , x86 : : rax ) ;
2020-11-01 14:45:16 +03:00
c . cmp ( x86 : : rax , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & g_rtm_tx_limit2 ) ) ) ;
2020-10-31 01:52:24 +03:00
c . jae ( fall2 ) ;
Label tx1 = build_transaction_enter ( c , fall2 , [ & ] ( )
2020-10-29 05:01:45 +03:00
{
c . add ( x86 : : qword_ptr ( args [ 2 ] , : : offset32 ( & spu_thread : : ftx ) - : : offset32 ( & spu_thread : : rdata ) ) , 1 ) ;
2020-10-31 01:52:24 +03:00
build_get_tsc ( c ) ;
c . sub ( x86 : : rax , stamp1 ) ;
2020-11-01 14:45:16 +03:00
c . cmp ( x86 : : rax , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & g_rtm_tx_limit2 ) ) ) ;
2020-10-31 01:52:24 +03:00
c . jae ( fall2 ) ;
c . test ( x86 : : qword_ptr ( x86 : : rbx ) , 127 - 1 ) ;
c . jnz ( fall2 ) ;
2020-10-29 05:01:45 +03:00
} ) ;
2020-10-19 15:20:53 +03:00
c . prefetchw ( x86 : : byte_ptr ( x86 : : rbp , 0 ) ) ;
c . prefetchw ( x86 : : byte_ptr ( x86 : : rbp , 64 ) ) ;
2020-10-13 17:46:03 +03:00
// Check pause flag
c . bt ( x86 : : dword_ptr ( args [ 2 ] , : : offset32 ( & spu_thread : : state ) - : : offset32 ( & spu_thread : : rdata ) ) , static_cast < u32 > ( cpu_flag : : pause ) ) ;
2020-10-20 08:22:25 +03:00
c . jc ( fall2 ) ;
2020-10-13 17:46:03 +03:00
c . mov ( x86 : : rax , x86 : : qword_ptr ( x86 : : rbx ) ) ;
2020-10-28 21:58:52 +03:00
c . and_ ( x86 : : rax , - 128 ) ;
2020-10-13 17:46:03 +03:00
c . cmp ( x86 : : rax , x86 : : r13 ) ;
2020-10-28 21:58:52 +03:00
c . jne ( fail2 ) ;
2020-10-09 20:33:12 +03:00
c . xbegin ( tx1 ) ;
2019-05-18 23:47:35 +03:00
if ( s_tsx_avx )
{
c . vxorps ( x86 : : ymm0 , x86 : : ymm0 , x86 : : yword_ptr ( x86 : : rbp , 0 ) ) ;
c . vxorps ( x86 : : ymm1 , x86 : : ymm1 , x86 : : yword_ptr ( x86 : : rbp , 32 ) ) ;
c . vxorps ( x86 : : ymm2 , x86 : : ymm2 , x86 : : yword_ptr ( x86 : : rbp , 64 ) ) ;
c . vxorps ( x86 : : ymm3 , x86 : : ymm3 , x86 : : yword_ptr ( x86 : : rbp , 96 ) ) ;
c . vorps ( x86 : : ymm0 , x86 : : ymm0 , x86 : : ymm1 ) ;
c . vorps ( x86 : : ymm1 , x86 : : ymm2 , x86 : : ymm3 ) ;
c . vorps ( x86 : : ymm0 , x86 : : ymm1 , x86 : : ymm0 ) ;
c . vptest ( x86 : : ymm0 , x86 : : ymm0 ) ;
}
else
{
c . xorps ( x86 : : xmm0 , x86 : : oword_ptr ( x86 : : rbp , 0 ) ) ;
c . xorps ( x86 : : xmm1 , x86 : : oword_ptr ( x86 : : rbp , 16 ) ) ;
c . xorps ( x86 : : xmm2 , x86 : : oword_ptr ( x86 : : rbp , 32 ) ) ;
c . xorps ( x86 : : xmm3 , x86 : : oword_ptr ( x86 : : rbp , 48 ) ) ;
c . xorps ( x86 : : xmm4 , x86 : : oword_ptr ( x86 : : rbp , 64 ) ) ;
c . xorps ( x86 : : xmm5 , x86 : : oword_ptr ( x86 : : rbp , 80 ) ) ;
c . xorps ( x86 : : xmm6 , x86 : : oword_ptr ( x86 : : rbp , 96 ) ) ;
c . xorps ( x86 : : xmm7 , x86 : : oword_ptr ( x86 : : rbp , 112 ) ) ;
c . orps ( x86 : : xmm0 , x86 : : xmm1 ) ;
c . orps ( x86 : : xmm2 , x86 : : xmm3 ) ;
c . orps ( x86 : : xmm4 , x86 : : xmm5 ) ;
c . orps ( x86 : : xmm6 , x86 : : xmm7 ) ;
c . orps ( x86 : : xmm0 , x86 : : xmm2 ) ;
c . orps ( x86 : : xmm4 , x86 : : xmm6 ) ;
c . orps ( x86 : : xmm0 , x86 : : xmm4 ) ;
c . ptest ( x86 : : xmm0 , x86 : : xmm0 ) ;
}
2020-10-20 08:22:25 +03:00
c . jnz ( fail3 ) ;
2019-05-18 23:47:35 +03:00
if ( s_tsx_avx )
{
c . vmovaps ( x86 : : yword_ptr ( x86 : : rbp , 0 ) , x86 : : ymm4 ) ;
c . vmovaps ( x86 : : yword_ptr ( x86 : : rbp , 32 ) , x86 : : ymm5 ) ;
c . vmovaps ( x86 : : yword_ptr ( x86 : : rbp , 64 ) , x86 : : ymm6 ) ;
c . vmovaps ( x86 : : yword_ptr ( x86 : : rbp , 96 ) , x86 : : ymm7 ) ;
}
else
{
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 0 ) , x86 : : xmm8 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 16 ) , x86 : : xmm9 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 32 ) , x86 : : xmm10 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 48 ) , x86 : : xmm11 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 64 ) , x86 : : xmm12 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 80 ) , x86 : : xmm13 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 96 ) , x86 : : xmm14 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 112 ) , x86 : : xmm15 ) ;
}
c . xend ( ) ;
2020-10-29 05:01:45 +03:00
c . add ( x86 : : qword_ptr ( args [ 2 ] , : : offset32 ( & spu_thread : : stx ) - : : offset32 ( & spu_thread : : rdata ) ) , 1 ) ;
2020-10-28 21:58:52 +03:00
c . lock ( ) . add ( x86 : : qword_ptr ( x86 : : rbx ) , 127 ) ;
2020-10-31 01:52:24 +03:00
build_get_tsc ( c ) ;
c . sub ( x86 : : rax , stamp0 ) ;
2019-05-18 23:47:35 +03:00
c . jmp ( _ret ) ;
2020-10-20 08:22:25 +03:00
// XABORT is expensive so try to finish with xend instead
2020-10-08 16:13:55 +03:00
c . bind ( fail3 ) ;
2018-05-14 23:07:36 +03:00
2020-10-29 05:35:09 +03:00
// Load previous data to store back to rdata
2020-10-20 08:22:25 +03:00
if ( s_tsx_avx )
{
c . vmovaps ( x86 : : ymm0 , x86 : : yword_ptr ( x86 : : rbp , 0 ) ) ;
c . vmovaps ( x86 : : ymm1 , x86 : : yword_ptr ( x86 : : rbp , 32 ) ) ;
c . vmovaps ( x86 : : ymm2 , x86 : : yword_ptr ( x86 : : rbp , 64 ) ) ;
c . vmovaps ( x86 : : ymm3 , x86 : : yword_ptr ( x86 : : rbp , 96 ) ) ;
}
else
{
c . movaps ( x86 : : xmm0 , x86 : : oword_ptr ( x86 : : rbp , 0 ) ) ;
c . movaps ( x86 : : xmm1 , x86 : : oword_ptr ( x86 : : rbp , 16 ) ) ;
c . movaps ( x86 : : xmm2 , x86 : : oword_ptr ( x86 : : rbp , 32 ) ) ;
c . movaps ( x86 : : xmm3 , x86 : : oword_ptr ( x86 : : rbp , 48 ) ) ;
c . movaps ( x86 : : xmm4 , x86 : : oword_ptr ( x86 : : rbp , 64 ) ) ;
c . movaps ( x86 : : xmm5 , x86 : : oword_ptr ( x86 : : rbp , 80 ) ) ;
c . movaps ( x86 : : xmm6 , x86 : : oword_ptr ( x86 : : rbp , 96 ) ) ;
c . movaps ( x86 : : xmm7 , x86 : : oword_ptr ( x86 : : rbp , 112 ) ) ;
}
c . xend ( ) ;
2020-10-29 05:01:45 +03:00
c . add ( x86 : : qword_ptr ( args [ 2 ] , : : offset32 ( & spu_thread : : stx ) - : : offset32 ( & spu_thread : : rdata ) ) , 1 ) ;
2020-10-20 08:22:25 +03:00
c . jmp ( fail2 ) ;
c . bind ( fall2 ) ;
2020-10-31 01:52:24 +03:00
c . mov ( x86 : : rax , - 1 ) ;
2019-06-06 21:32:35 +03:00
c . jmp ( _ret ) ;
c . bind ( fail2 ) ;
2020-10-28 21:58:52 +03:00
c . lock ( ) . sub ( x86 : : qword_ptr ( x86 : : rbx ) , 1 ) ;
2020-10-29 05:35:09 +03:00
c . bind ( load ) ;
// Store previous data back to rdata
if ( s_tsx_avx )
{
c . vmovaps ( x86 : : yword_ptr ( args [ 2 ] , 0 ) , x86 : : ymm0 ) ;
c . vmovaps ( x86 : : yword_ptr ( args [ 2 ] , 32 ) , x86 : : ymm1 ) ;
c . vmovaps ( x86 : : yword_ptr ( args [ 2 ] , 64 ) , x86 : : ymm2 ) ;
c . vmovaps ( x86 : : yword_ptr ( args [ 2 ] , 96 ) , x86 : : ymm3 ) ;
}
else
{
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 0 ) , x86 : : xmm0 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 16 ) , x86 : : xmm1 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 32 ) , x86 : : xmm2 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 48 ) , x86 : : xmm3 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 64 ) , x86 : : xmm4 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 80 ) , x86 : : xmm5 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 96 ) , x86 : : xmm6 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 112 ) , x86 : : xmm7 ) ;
}
2020-10-31 01:52:24 +03:00
c . mov ( x86 : : rax , - 1 ) ;
c . mov ( x86 : : qword_ptr ( args [ 2 ] , : : offset32 ( & spu_thread : : last_ftime ) - : : offset32 ( & spu_thread : : rdata ) ) , x86 : : rax ) ;
2019-06-06 21:32:35 +03:00
c . xor_ ( x86 : : eax , x86 : : eax ) ;
2019-05-18 23:47:35 +03:00
//c.jmp(_ret);
2019-04-15 15:52:43 +03:00
c . bind ( _ret ) ;
# ifdef _WIN32
if ( s_tsx_avx )
{
c . vmovups ( x86 : : xmm6 , x86 : : oword_ptr ( x86 : : rsp , 0 ) ) ;
c . vmovups ( x86 : : xmm7 , x86 : : oword_ptr ( x86 : : rsp , 16 ) ) ;
}
else
{
c . movups ( x86 : : xmm6 , x86 : : oword_ptr ( x86 : : rsp , 0 ) ) ;
c . movups ( x86 : : xmm7 , x86 : : oword_ptr ( x86 : : rsp , 16 ) ) ;
c . movups ( x86 : : xmm8 , x86 : : oword_ptr ( x86 : : rsp , 32 ) ) ;
c . movups ( x86 : : xmm9 , x86 : : oword_ptr ( x86 : : rsp , 48 ) ) ;
c . movups ( x86 : : xmm10 , x86 : : oword_ptr ( x86 : : rsp , 64 ) ) ;
c . movups ( x86 : : xmm11 , x86 : : oword_ptr ( x86 : : rsp , 80 ) ) ;
c . movups ( x86 : : xmm12 , x86 : : oword_ptr ( x86 : : rsp , 96 ) ) ;
c . movups ( x86 : : xmm13 , x86 : : oword_ptr ( x86 : : rsp , 112 ) ) ;
c . movups ( x86 : : xmm14 , x86 : : oword_ptr ( x86 : : rsp , 128 ) ) ;
c . movups ( x86 : : xmm15 , x86 : : oword_ptr ( x86 : : rsp , 144 ) ) ;
}
# endif
if ( s_tsx_avx )
{
c . vzeroupper ( ) ;
}
2019-05-18 23:47:35 +03:00
c . add ( x86 : : rsp , 168 ) ;
c . pop ( x86 : : rbx ) ;
c . pop ( x86 : : r12 ) ;
c . pop ( x86 : : r13 ) ;
c . pop ( x86 : : rbp ) ;
2019-01-15 17:31:21 +02:00
c . ret ( ) ;
2018-05-14 23:07:36 +03:00
} ) ;
2020-10-31 01:52:24 +03:00
const auto spu_putlluc_tx = build_function_asm < u64 ( * ) ( u32 raddr , const void * rdata , cpu_thread * _spu ) > ( [ ] ( asmjit : : X86Assembler & c , auto & args )
2019-05-18 23:47:35 +03:00
{
using namespace asmjit ;
Label fall = c . newLabel ( ) ;
Label _ret = c . newLabel ( ) ;
2019-06-06 21:32:35 +03:00
Label skip = c . newLabel ( ) ;
Label next = c . newLabel ( ) ;
2019-05-18 23:47:35 +03:00
2020-06-04 09:19:56 +03:00
//if (utils::has_avx() && !s_tsx_avx)
//{
// c.vzeroupper();
//}
2019-05-18 23:47:35 +03:00
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
c . push ( x86 : : rbp ) ;
c . push ( x86 : : r13 ) ;
c . push ( x86 : : r12 ) ;
c . push ( x86 : : rbx ) ;
c . sub ( x86 : : rsp , 40 ) ;
# ifdef _WIN32
if ( ! s_tsx_avx )
{
c . movups ( x86 : : oword_ptr ( x86 : : rsp , 0 ) , x86 : : xmm6 ) ;
c . movups ( x86 : : oword_ptr ( x86 : : rsp , 16 ) , x86 : : xmm7 ) ;
}
# endif
// Prepare registers
2020-10-31 01:52:24 +03:00
build_swap_rdx_with ( c , args , x86 : : r12 ) ;
2020-11-01 14:28:56 +03:00
c . mov ( x86 : : rbp , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & vm : : g_sudo_addr ) ) ) ;
2019-05-18 23:47:35 +03:00
c . lea ( x86 : : rbp , x86 : : qword_ptr ( x86 : : rbp , args [ 0 ] ) ) ;
2020-10-19 15:20:53 +03:00
c . prefetchw ( x86 : : byte_ptr ( x86 : : rbp , 0 ) ) ;
c . prefetchw ( x86 : : byte_ptr ( x86 : : rbp , 64 ) ) ;
2020-04-11 11:16:28 +03:00
c . and_ ( args [ 0 ] . r32 ( ) , 0xff80 ) ;
c . shr ( args [ 0 ] . r32 ( ) , 1 ) ;
2020-11-01 14:28:56 +03:00
c . lea ( x86 : : rbx , x86 : : qword_ptr ( reinterpret_cast < u64 > ( + vm : : g_reservations ) , args [ 0 ] ) ) ;
2020-10-20 08:22:25 +03:00
c . prefetchw ( x86 : : byte_ptr ( x86 : : rbx ) ) ;
2019-05-18 23:47:35 +03:00
c . mov ( x86 : : r13 , args [ 1 ] ) ;
2018-05-14 23:07:36 +03:00
// Prepare data
2019-04-15 15:52:43 +03:00
if ( s_tsx_avx )
{
c . vmovups ( x86 : : ymm0 , x86 : : yword_ptr ( args [ 1 ] , 0 ) ) ;
c . vmovups ( x86 : : ymm1 , x86 : : yword_ptr ( args [ 1 ] , 32 ) ) ;
c . vmovups ( x86 : : ymm2 , x86 : : yword_ptr ( args [ 1 ] , 64 ) ) ;
c . vmovups ( x86 : : ymm3 , x86 : : yword_ptr ( args [ 1 ] , 96 ) ) ;
}
else
{
c . movaps ( x86 : : xmm0 , x86 : : oword_ptr ( args [ 1 ] , 0 ) ) ;
c . movaps ( x86 : : xmm1 , x86 : : oword_ptr ( args [ 1 ] , 16 ) ) ;
c . movaps ( x86 : : xmm2 , x86 : : oword_ptr ( args [ 1 ] , 32 ) ) ;
c . movaps ( x86 : : xmm3 , x86 : : oword_ptr ( args [ 1 ] , 48 ) ) ;
c . movaps ( x86 : : xmm4 , x86 : : oword_ptr ( args [ 1 ] , 64 ) ) ;
c . movaps ( x86 : : xmm5 , x86 : : oword_ptr ( args [ 1 ] , 80 ) ) ;
c . movaps ( x86 : : xmm6 , x86 : : oword_ptr ( args [ 1 ] , 96 ) ) ;
c . movaps ( x86 : : xmm7 , x86 : : oword_ptr ( args [ 1 ] , 112 ) ) ;
}
2018-05-14 23:07:36 +03:00
2020-10-31 01:52:24 +03:00
// Alloc args[0] to stamp0
const auto stamp0 = args [ 0 ] ;
const auto stamp1 = args [ 1 ] ;
build_get_tsc ( c , stamp0 ) ;
2018-05-14 23:07:36 +03:00
// Begin transaction
2020-10-31 01:52:24 +03:00
Label tx0 = build_transaction_enter ( c , fall , [ & ] ( )
2020-10-29 05:01:45 +03:00
{
c . add ( x86 : : qword_ptr ( args [ 2 ] , : : offset32 ( & spu_thread : : ftx ) ) , 1 ) ;
2020-10-31 01:52:24 +03:00
build_get_tsc ( c , stamp1 ) ;
c . sub ( stamp1 , stamp0 ) ;
c . xor_ ( x86 : : eax , x86 : : eax ) ;
2020-11-01 14:45:16 +03:00
c . cmp ( stamp1 , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & g_rtm_tx_limit1 ) ) ) ;
2020-10-31 01:52:24 +03:00
c . jae ( fall ) ;
2020-10-29 05:01:45 +03:00
} ) ;
2020-10-09 20:33:12 +03:00
c . xbegin ( tx0 ) ;
2020-10-17 21:27:19 +03:00
c . test ( x86 : : qword_ptr ( x86 : : rbx ) , vm : : rsrv_unique_lock ) ;
2019-06-06 21:32:35 +03:00
c . jnz ( skip ) ;
2019-04-15 15:52:43 +03:00
if ( s_tsx_avx )
{
2019-05-18 23:47:35 +03:00
c . vmovaps ( x86 : : yword_ptr ( x86 : : rbp , 0 ) , x86 : : ymm0 ) ;
c . vmovaps ( x86 : : yword_ptr ( x86 : : rbp , 32 ) , x86 : : ymm1 ) ;
c . vmovaps ( x86 : : yword_ptr ( x86 : : rbp , 64 ) , x86 : : ymm2 ) ;
c . vmovaps ( x86 : : yword_ptr ( x86 : : rbp , 96 ) , x86 : : ymm3 ) ;
2019-04-15 15:52:43 +03:00
}
else
{
2019-05-18 23:47:35 +03:00
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 0 ) , x86 : : xmm0 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 16 ) , x86 : : xmm1 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 32 ) , x86 : : xmm2 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 48 ) , x86 : : xmm3 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 64 ) , x86 : : xmm4 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 80 ) , x86 : : xmm5 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 96 ) , x86 : : xmm6 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 112 ) , x86 : : xmm7 ) ;
2019-04-15 15:52:43 +03:00
}
2019-05-18 23:47:35 +03:00
c . sub ( x86 : : qword_ptr ( x86 : : rbx ) , - 128 ) ;
2018-05-14 23:07:36 +03:00
c . xend ( ) ;
2020-10-29 05:01:45 +03:00
c . add ( x86 : : qword_ptr ( args [ 2 ] , : : offset32 ( & spu_thread : : stx ) ) , 1 ) ;
2020-10-31 01:52:24 +03:00
build_get_tsc ( c ) ;
c . sub ( x86 : : rax , stamp0 ) ;
2019-04-15 15:52:43 +03:00
c . jmp ( _ret ) ;
2018-05-14 23:07:36 +03:00
2019-06-06 21:32:35 +03:00
c . bind ( skip ) ;
2020-10-20 08:22:25 +03:00
c . xend ( ) ;
2020-10-29 05:01:45 +03:00
c . add ( x86 : : qword_ptr ( args [ 2 ] , : : offset32 ( & spu_thread : : stx ) ) , 1 ) ;
2020-10-31 01:52:24 +03:00
build_get_tsc ( c , stamp1 ) ;
2019-06-06 21:32:35 +03:00
//c.jmp(fall);
2018-05-14 23:07:36 +03:00
c . bind ( fall ) ;
2019-06-06 21:32:35 +03:00
2019-05-18 23:47:35 +03:00
Label fall2 = c . newLabel ( ) ;
// Lightened transaction
2019-06-06 21:32:35 +03:00
c . bind ( next ) ;
2019-10-11 16:57:20 +03:00
2020-10-08 16:13:55 +03:00
// Lock reservation
2020-10-28 21:58:52 +03:00
c . mov ( x86 : : eax , 1 ) ;
c . lock ( ) . xadd ( x86 : : qword_ptr ( x86 : : rbx ) , x86 : : rax ) ;
2020-10-31 01:52:24 +03:00
c . test ( x86 : : eax , 127 - 1 ) ;
2020-10-28 21:58:52 +03:00
c . jnz ( fall2 ) ;
2019-10-11 16:57:20 +03:00
2020-10-31 01:52:24 +03:00
// Exclude some time spent on touching memory: stamp1 contains last success or failure
c . mov ( x86 : : rax , stamp1 ) ;
c . sub ( x86 : : rax , stamp0 ) ;
2020-11-01 14:45:16 +03:00
c . cmp ( x86 : : rax , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & g_rtm_tx_limit2 ) ) ) ;
2020-10-31 01:52:24 +03:00
c . jae ( fall2 ) ;
build_get_tsc ( c , stamp1 ) ;
c . sub ( stamp1 , x86 : : rax ) ;
Label tx1 = build_transaction_enter ( c , fall2 , [ & ] ( )
2020-10-29 05:01:45 +03:00
{
c . add ( x86 : : qword_ptr ( args [ 2 ] , : : offset32 ( & spu_thread : : ftx ) ) , 1 ) ;
2020-10-31 01:52:24 +03:00
build_get_tsc ( c ) ;
c . sub ( x86 : : rax , stamp1 ) ;
2020-11-01 14:45:16 +03:00
c . cmp ( x86 : : rax , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & g_rtm_tx_limit2 ) ) ) ;
2020-10-31 01:52:24 +03:00
c . jae ( fall2 ) ;
2020-10-29 05:01:45 +03:00
} ) ;
2020-10-19 15:20:53 +03:00
c . prefetchw ( x86 : : byte_ptr ( x86 : : rbp , 0 ) ) ;
c . prefetchw ( x86 : : byte_ptr ( x86 : : rbp , 64 ) ) ;
2020-10-09 20:33:12 +03:00
// Check pause flag
c . bt ( x86 : : dword_ptr ( args [ 2 ] , : : offset32 ( & cpu_thread : : state ) ) , static_cast < u32 > ( cpu_flag : : pause ) ) ;
c . jc ( fall2 ) ;
2020-10-28 23:12:35 +03:00
// Check contention
c . test ( x86 : : qword_ptr ( x86 : : rbx ) , 127 - 1 ) ;
c . jc ( fall2 ) ;
2020-10-09 20:33:12 +03:00
c . xbegin ( tx1 ) ;
2019-05-18 23:47:35 +03:00
if ( s_tsx_avx )
{
c . vmovaps ( x86 : : yword_ptr ( x86 : : rbp , 0 ) , x86 : : ymm0 ) ;
c . vmovaps ( x86 : : yword_ptr ( x86 : : rbp , 32 ) , x86 : : ymm1 ) ;
c . vmovaps ( x86 : : yword_ptr ( x86 : : rbp , 64 ) , x86 : : ymm2 ) ;
c . vmovaps ( x86 : : yword_ptr ( x86 : : rbp , 96 ) , x86 : : ymm3 ) ;
}
else
{
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 0 ) , x86 : : xmm0 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 16 ) , x86 : : xmm1 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 32 ) , x86 : : xmm2 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 48 ) , x86 : : xmm3 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 64 ) , x86 : : xmm4 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 80 ) , x86 : : xmm5 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 96 ) , x86 : : xmm6 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rbp , 112 ) , x86 : : xmm7 ) ;
}
c . xend ( ) ;
2020-10-29 05:01:45 +03:00
c . add ( x86 : : qword_ptr ( args [ 2 ] , : : offset32 ( & spu_thread : : stx ) ) , 1 ) ;
2020-10-08 16:13:55 +03:00
c . lock ( ) . add ( x86 : : qword_ptr ( x86 : : rbx ) , 127 ) ;
2020-10-31 01:52:24 +03:00
build_get_tsc ( c ) ;
c . sub ( x86 : : rax , stamp0 ) ;
2019-04-15 15:52:43 +03:00
c . jmp ( _ret ) ;
2019-05-18 23:47:35 +03:00
c . bind ( fall2 ) ;
2020-10-08 16:13:55 +03:00
c . xor_ ( x86 : : eax , x86 : : eax ) ;
2019-06-06 21:32:35 +03:00
//c.jmp(_ret);
2019-05-18 23:47:35 +03:00
2019-04-15 15:52:43 +03:00
c . bind ( _ret ) ;
# ifdef _WIN32
if ( ! s_tsx_avx )
{
c . movups ( x86 : : xmm6 , x86 : : oword_ptr ( x86 : : rsp , 0 ) ) ;
c . movups ( x86 : : xmm7 , x86 : : oword_ptr ( x86 : : rsp , 16 ) ) ;
}
# endif
if ( s_tsx_avx )
{
c . vzeroupper ( ) ;
}
2019-05-18 23:47:35 +03:00
c . add ( x86 : : rsp , 40 ) ;
c . pop ( x86 : : rbx ) ;
c . pop ( x86 : : r12 ) ;
c . pop ( x86 : : r13 ) ;
c . pop ( x86 : : rbp ) ;
2018-06-11 02:30:43 +03:00
c . ret ( ) ;
2018-05-14 23:07:36 +03:00
} ) ;
2020-10-31 01:52:24 +03:00
const extern auto spu_getllar_tx = build_function_asm < u64 ( * ) ( u32 raddr , void * rdata , cpu_thread * _cpu , u64 rtime ) > ( [ ] ( asmjit : : X86Assembler & c , auto & args )
2020-10-13 20:23:10 +03:00
{
using namespace asmjit ;
Label fall = c . newLabel ( ) ;
Label _ret = c . newLabel ( ) ;
//if (utils::has_avx() && !s_tsx_avx)
//{
// c.vzeroupper();
//}
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
c . push ( x86 : : rbp ) ;
c . push ( x86 : : r13 ) ;
c . push ( x86 : : r12 ) ;
c . push ( x86 : : rbx ) ;
c . sub ( x86 : : rsp , 40 ) ;
# ifdef _WIN32
if ( ! s_tsx_avx )
{
c . movups ( x86 : : oword_ptr ( x86 : : rsp , 0 ) , x86 : : xmm6 ) ;
c . movups ( x86 : : oword_ptr ( x86 : : rsp , 16 ) , x86 : : xmm7 ) ;
}
# endif
// Prepare registers
2020-10-31 01:52:24 +03:00
build_swap_rdx_with ( c , args , x86 : : r12 ) ;
2020-11-01 14:28:56 +03:00
c . mov ( x86 : : rbp , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & vm : : g_sudo_addr ) ) ) ;
2020-10-13 20:23:10 +03:00
c . lea ( x86 : : rbp , x86 : : qword_ptr ( x86 : : rbp , args [ 0 ] ) ) ;
c . and_ ( args [ 0 ] . r32 ( ) , 0xff80 ) ;
c . shr ( args [ 0 ] . r32 ( ) , 1 ) ;
2020-11-01 14:28:56 +03:00
c . lea ( x86 : : rbx , x86 : : qword_ptr ( reinterpret_cast < u64 > ( + vm : : g_reservations ) , args [ 0 ] ) ) ;
2020-10-13 20:23:10 +03:00
c . mov ( x86 : : r13 , args [ 1 ] ) ;
2020-10-31 01:52:24 +03:00
// Alloc args[0] to stamp0
const auto stamp0 = args [ 0 ] ;
build_get_tsc ( c , stamp0 ) ;
2020-10-13 20:23:10 +03:00
// Begin transaction
2020-10-31 01:52:24 +03:00
Label tx0 = build_transaction_enter ( c , fall , [ & ] ( )
2020-10-29 05:01:45 +03:00
{
c . add ( x86 : : qword_ptr ( args [ 2 ] , : : offset32 ( & spu_thread : : ftx ) ) , 1 ) ;
2020-10-31 01:52:24 +03:00
build_get_tsc ( c ) ;
c . sub ( x86 : : rax , stamp0 ) ;
2020-11-01 14:45:16 +03:00
c . cmp ( x86 : : rax , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & g_rtm_tx_limit1 ) ) ) ;
2020-10-31 01:52:24 +03:00
c . jae ( fall ) ;
2020-10-29 05:01:45 +03:00
} ) ;
2020-10-13 20:23:10 +03:00
// Check pause flag
c . bt ( x86 : : dword_ptr ( args [ 2 ] , : : offset32 ( & cpu_thread : : state ) ) , static_cast < u32 > ( cpu_flag : : pause ) ) ;
c . jc ( fall ) ;
c . mov ( x86 : : rax , x86 : : qword_ptr ( x86 : : rbx ) ) ;
2020-10-14 02:53:29 +03:00
c . and_ ( x86 : : rax , ~ vm : : rsrv_shared_mask ) ;
2020-10-13 20:23:10 +03:00
c . cmp ( x86 : : rax , args [ 3 ] ) ;
c . jne ( fall ) ;
c . xbegin ( tx0 ) ;
// Just read data to registers
if ( s_tsx_avx )
{
c . vmovups ( x86 : : ymm0 , x86 : : yword_ptr ( x86 : : rbp , 0 ) ) ;
c . vmovups ( x86 : : ymm1 , x86 : : yword_ptr ( x86 : : rbp , 32 ) ) ;
c . vmovups ( x86 : : ymm2 , x86 : : yword_ptr ( x86 : : rbp , 64 ) ) ;
c . vmovups ( x86 : : ymm3 , x86 : : yword_ptr ( x86 : : rbp , 96 ) ) ;
}
else
{
c . movaps ( x86 : : xmm0 , x86 : : oword_ptr ( x86 : : rbp , 0 ) ) ;
c . movaps ( x86 : : xmm1 , x86 : : oword_ptr ( x86 : : rbp , 16 ) ) ;
c . movaps ( x86 : : xmm2 , x86 : : oword_ptr ( x86 : : rbp , 32 ) ) ;
c . movaps ( x86 : : xmm3 , x86 : : oword_ptr ( x86 : : rbp , 48 ) ) ;
c . movaps ( x86 : : xmm4 , x86 : : oword_ptr ( x86 : : rbp , 64 ) ) ;
c . movaps ( x86 : : xmm5 , x86 : : oword_ptr ( x86 : : rbp , 80 ) ) ;
c . movaps ( x86 : : xmm6 , x86 : : oword_ptr ( x86 : : rbp , 96 ) ) ;
c . movaps ( x86 : : xmm7 , x86 : : oword_ptr ( x86 : : rbp , 112 ) ) ;
}
c . xend ( ) ;
2020-10-29 05:01:45 +03:00
c . add ( x86 : : qword_ptr ( args [ 2 ] , : : offset32 ( & spu_thread : : stx ) ) , 1 ) ;
2020-10-31 01:52:24 +03:00
build_get_tsc ( c ) ;
c . sub ( x86 : : rax , stamp0 ) ;
2020-10-13 20:23:10 +03:00
// Store data
if ( s_tsx_avx )
{
c . vmovaps ( x86 : : yword_ptr ( args [ 1 ] , 0 ) , x86 : : ymm0 ) ;
c . vmovaps ( x86 : : yword_ptr ( args [ 1 ] , 32 ) , x86 : : ymm1 ) ;
c . vmovaps ( x86 : : yword_ptr ( args [ 1 ] , 64 ) , x86 : : ymm2 ) ;
c . vmovaps ( x86 : : yword_ptr ( args [ 1 ] , 96 ) , x86 : : ymm3 ) ;
}
else
{
c . movaps ( x86 : : oword_ptr ( args [ 1 ] , 0 ) , x86 : : xmm0 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 1 ] , 16 ) , x86 : : xmm1 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 1 ] , 32 ) , x86 : : xmm2 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 1 ] , 48 ) , x86 : : xmm3 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 1 ] , 64 ) , x86 : : xmm4 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 1 ] , 80 ) , x86 : : xmm5 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 1 ] , 96 ) , x86 : : xmm6 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 1 ] , 112 ) , x86 : : xmm7 ) ;
}
c . jmp ( _ret ) ;
c . bind ( fall ) ;
c . xor_ ( x86 : : eax , x86 : : eax ) ;
//c.jmp(_ret);
c . bind ( _ret ) ;
# ifdef _WIN32
if ( ! s_tsx_avx )
{
c . movups ( x86 : : xmm6 , x86 : : oword_ptr ( x86 : : rsp , 0 ) ) ;
c . movups ( x86 : : xmm7 , x86 : : oword_ptr ( x86 : : rsp , 16 ) ) ;
}
# endif
if ( s_tsx_avx )
{
c . vzeroupper ( ) ;
}
c . add ( x86 : : rsp , 40 ) ;
c . pop ( x86 : : rbx ) ;
c . pop ( x86 : : r12 ) ;
c . pop ( x86 : : r13 ) ;
c . pop ( x86 : : rbp ) ;
c . ret ( ) ;
} ) ;
2015-07-13 00:02:02 +03:00
void spu_int_ctrl_t : : set ( u64 ints )
{
// leave only enabled interrupts
2015-09-18 01:41:14 +03:00
ints & = mask ;
2015-07-13 00:02:02 +03:00
// notify if at least 1 bit was set
2020-03-15 08:10:33 +02:00
if ( ints & & ~ stat . fetch_or ( ints ) & ints )
2015-07-13 00:02:02 +03:00
{
2020-03-15 08:10:33 +02:00
std : : shared_lock rlock ( id_manager : : g_mutex ) ;
2015-07-13 00:02:02 +03:00
2019-12-21 08:16:47 +02:00
if ( const auto tag_ptr = tag . lock ( ) )
2015-07-13 00:02:02 +03:00
{
2019-12-21 08:16:47 +02:00
if ( auto handler = tag_ptr - > handler . lock ( ) )
2017-02-04 19:30:21 +03:00
{
2020-03-15 08:10:33 +02:00
rlock . unlock ( ) ;
2017-02-04 19:30:21 +03:00
handler - > exec ( ) ;
}
2015-07-13 00:02:02 +03:00
}
}
}
2015-08-26 05:54:06 +03:00
const spu_imm_table_t g_spu_imm ;
2015-03-20 19:53:54 +03:00
2016-05-13 16:55:34 +03:00
spu_imm_table_t : : scale_table_t : : scale_table_t ( )
{
for ( s32 i = - 155 ; i < 174 ; i + + )
{
2018-05-28 13:40:31 +03:00
m_data [ i + 155 ] . vf = _mm_set1_ps ( static_cast < float > ( std : : exp2 ( i ) ) ) ;
2016-05-13 16:55:34 +03:00
}
}
spu_imm_table_t : : spu_imm_table_t ( )
{
2018-09-05 22:52:31 +02:00
for ( u32 i = 0 ; i < std : : size ( sldq_pshufb ) ; i + + )
2016-05-13 16:55:34 +03:00
{
for ( u32 j = 0 ; j < 16 ; j + + )
{
sldq_pshufb [ i ] . _u8 [ j ] = static_cast < u8 > ( j - i ) ;
}
}
2018-09-05 22:52:31 +02:00
for ( u32 i = 0 ; i < std : : size ( srdq_pshufb ) ; i + + )
2016-05-13 16:55:34 +03:00
{
2017-12-09 17:57:43 +03:00
const u32 im = ( 0u - i ) & 0x1f ;
2016-05-13 16:55:34 +03:00
for ( u32 j = 0 ; j < 16 ; j + + )
{
2017-12-09 17:57:43 +03:00
srdq_pshufb [ i ] . _u8 [ j ] = ( j + im > 15 ) ? 0xff : static_cast < u8 > ( j + im ) ;
2016-05-13 16:55:34 +03:00
}
}
2018-09-05 22:52:31 +02:00
for ( u32 i = 0 ; i < std : : size ( rldq_pshufb ) ; i + + )
2016-05-13 16:55:34 +03:00
{
for ( u32 j = 0 ; j < 16 ; j + + )
{
rldq_pshufb [ i ] . _u8 [ j ] = static_cast < u8 > ( ( j - i ) & 0xf ) ;
}
}
}
2020-03-31 03:11:37 +03:00
std : : string spu_thread : : dump_regs ( ) const
{
std : : string ret ;
2020-11-15 07:45:28 +02:00
const bool floats_only = debugger_float_mode . load ( ) ;
2020-11-10 16:57:06 +02:00
for ( u32 i = 0 ; i < 128 ; i + + , ret + = ' \n ' )
2020-03-31 03:11:37 +03:00
{
2020-11-15 07:45:28 +02:00
fmt : : append ( ret , " %s: " , spu_reg_name [ i ] ) ;
2020-11-10 16:57:06 +02:00
const auto r = gpr [ i ] ;
if ( auto [ size , dst , src ] = SPUDisAsm : : try_get_insert_mask_info ( r ) ; size )
{
2020-11-11 05:59:24 +02:00
// Special: insertion masks
2020-11-10 16:57:06 +02:00
const std : : string_view type =
size = = 1 ? " byte " :
size = = 2 ? " half " :
size = = 4 ? " word " :
size = = 8 ? " dword " : " error " ;
if ( ( size > = 4u & & ! src ) | | ( size = = 2u & & src = = 1u ) | | ( size = = 1u & & src = = 3u ) )
{
fmt : : append ( ret , " insert -> %s[%u] " , type , dst ) ;
continue ;
}
}
2020-11-15 07:45:28 +02:00
auto to_f64 = [ ] ( u32 bits )
{
const u32 abs = bits & 0x7fff'ffff ;
constexpr u32 scale = ( 1 < < 23 ) ;
return std : : copysign ( abs < scale ? 0 : std : : ldexp ( ( scale + ( abs % scale ) ) / f64 { scale } , static_cast < int > ( abs > > 23 ) - 127 ) , bits > > 31 ? - 1 : 1 ) ;
} ;
const double array [ ] { to_f64 ( r . u32r [ 0 ] ) , to_f64 ( r . u32r [ 1 ] ) , to_f64 ( r . u32r [ 2 ] ) , to_f64 ( r . u32r [ 3 ] ) } ;
2020-11-10 16:57:06 +02:00
const u32 i3 = r . _u32 [ 3 ] ;
2020-11-15 07:45:28 +02:00
const bool is_packed = v128 : : from32p ( i3 ) = = r ;
if ( floats_only )
{
fmt : : append ( ret , " %g, %g, %g, %g " , array [ 0 ] , array [ 1 ] , array [ 2 ] , array [ 3 ] ) ;
continue ;
}
2020-11-10 16:57:06 +02:00
2020-11-15 07:45:28 +02:00
if ( is_packed )
2020-11-10 16:57:06 +02:00
{
// Shortand formatting
2020-11-11 05:59:24 +02:00
fmt : : append ( ret , " %08x " , i3 ) ;
2020-11-10 16:57:06 +02:00
}
else
{
2020-11-11 05:59:24 +02:00
fmt : : append ( ret , " %08x %08x %08x %08x " , r . u32r [ 0 ] , r . u32r [ 1 ] , r . u32r [ 2 ] , r . u32r [ 3 ] ) ;
2020-11-10 16:57:06 +02:00
}
if ( i3 > = 0x80 & & is_exec_code ( i3 ) )
{
2021-01-22 10:11:54 +02:00
SPUDisAsm dis_asm ( cpu_disasm_mode : : normal , ls ) ;
2020-11-10 16:57:06 +02:00
dis_asm . disasm ( i3 ) ;
fmt : : append ( ret , " -> %s " , dis_asm . last_opcode ) ;
}
2020-11-15 07:45:28 +02:00
if ( std : : any_of ( std : : begin ( array ) , std : : end ( array ) , [ ] ( f64 v ) { return v ! = 0 ; } ) )
{
if ( is_packed )
{
fmt : : append ( ret , " (%g) " , array [ 0 ] ) ;
}
else
{
fmt : : append ( ret , " (%g, %g, %g, %g) " , array [ 0 ] , array [ 1 ] , array [ 2 ] , array [ 3 ] ) ;
}
}
2020-03-31 03:11:37 +03:00
}
2018-08-06 10:19:47 +03:00
const auto events = ch_events . load ( ) ;
fmt : : append ( ret , " \n Event Stat: 0x%x \n " , events . events ) ;
fmt : : append ( ret , " Event Mask: 0x%x \n " , events . mask ) ;
fmt : : append ( ret , " Event Count: %u \n " , events . count ) ;
2020-04-23 19:26:06 +03:00
fmt : : append ( ret , " SRR0: 0x%05x \n " , srr0 ) ;
fmt : : append ( ret , " Stall Stat: %s \n " , ch_stall_stat ) ;
2020-04-13 21:53:19 +03:00
fmt : : append ( ret , " Stall Mask: 0x%x \n " , ch_stall_mask ) ;
fmt : : append ( ret , " Tag Stat: %s \n " , ch_tag_stat ) ;
2020-06-26 19:25:36 +03:00
fmt : : append ( ret , " Tag Update: %s \n " , mfc_tag_update { ch_tag_upd } ) ;
fmt : : append ( ret , " Atomic Stat: %s \n " , ch_atomic_stat ) ; // TODO: use mfc_atomic_status formatting
2018-08-06 10:19:47 +03:00
fmt : : append ( ret , " Interrupts: %s \n " , interrupts_enabled ? " Enabled " : " Disabled " ) ;
2020-04-03 12:18:00 +03:00
fmt : : append ( ret , " Inbound Mailbox: %s \n " , ch_in_mbox ) ;
fmt : : append ( ret , " Out Mailbox: %s \n " , ch_out_mbox ) ;
fmt : : append ( ret , " Out Interrupts Mailbox: %s \n " , ch_out_intr_mbox ) ;
fmt : : append ( ret , " SNR config: 0x%llx \n " , snr_config ) ;
fmt : : append ( ret , " SNR1: %s \n " , ch_snr1 ) ;
2020-12-19 09:48:37 +02:00
fmt : : append ( ret , " SNR2: %s \n " , ch_snr2 ) ;
const u32 addr = raddr ;
fmt : : append ( ret , " Reservation Addr: %s \n " , addr ? fmt : : format ( " 0x%x " , addr ) : " N/A " ) ;
fmt : : append ( ret , " Reservation Data: \n " ) ;
be_t < u32 > data [ 32 ] { } ;
2021-01-25 17:45:47 +03:00
std : : memcpy ( data , rdata , sizeof ( rdata ) ) ; // Show the data even if the reservation was lost inside the atomic loop
2020-12-19 09:48:37 +02:00
for ( usz i = 0 ; i < std : : size ( data ) ; i + = 4 )
{
fmt : : append ( ret , " [0x%02x] %08x %08x %08x %08x \n " , i * sizeof ( data [ 0 ] )
, data [ i + 0 ] , data [ i + 1 ] , data [ i + 2 ] , data [ i + 3 ] ) ;
}
2020-04-03 12:18:00 +03:00
2020-03-31 03:11:37 +03:00
return ret ;
}
std : : string spu_thread : : dump_callstack ( ) const
{
2020-11-07 18:12:52 +02:00
std : : string ret ;
fmt : : append ( ret , " Call stack: \n ========= \n 0x%08x (0x0) called \n " , pc ) ;
for ( const auto & sp : dump_callstack_list ( ) )
{
// TODO: function addresses too
fmt : : append ( ret , " > from 0x%08x (sp=0x%08x) \n " , sp . first , sp . second ) ;
}
return ret ;
2020-03-31 03:11:37 +03:00
}
2020-07-03 07:56:55 +03:00
std : : vector < std : : pair < u32 , u32 > > spu_thread : : dump_callstack_list ( ) const
2020-03-31 03:11:37 +03:00
{
2020-11-07 18:12:52 +02:00
std : : vector < std : : pair < u32 , u32 > > call_stack_list ;
bool first = true ;
// Declare first 128-bytes as invalid for stack (common values such as 0 do not make sense here)
2020-11-08 15:42:20 +02:00
for ( u32 sp = gpr [ 1 ] . _u32 [ 3 ] ; ( sp & 0xF ) = = 0u & & sp > = 0x80u & & sp < = 0x3FFE0u ; sp = _ref < u32 > ( sp ) , first = false )
2020-11-07 18:12:52 +02:00
{
v128 lr = _ref < v128 > ( sp + 16 ) ;
2020-11-07 21:05:44 +02:00
auto is_invalid = [ this ] ( v128 v )
2020-11-07 18:12:52 +02:00
{
2020-11-07 21:05:44 +02:00
const u32 addr = v . _u32 [ 3 ] & 0x3FFFC ;
2020-11-07 18:12:52 +02:00
if ( v ! = v128 : : from32r ( addr ) )
{
2020-11-07 21:05:44 +02:00
// 1) Non-zero lower words are invalid (because BRSL-like instructions generate only zeroes)
// 2) Bits normally masked out by indirect braches are considered invalid
2020-11-07 18:12:52 +02:00
return true ;
}
2020-11-10 16:57:06 +02:00
return ! addr | | ! is_exec_code ( addr ) ;
2020-11-07 18:12:52 +02:00
} ;
if ( is_invalid ( lr ) )
{
2020-11-08 15:42:20 +02:00
if ( first )
2020-11-07 18:12:52 +02:00
{
// Function hasn't saved LR, could be because it's a leaf function
// Use LR directly instead
lr = gpr [ 0 ] ;
if ( is_invalid ( lr ) )
{
// Skip it, workaround
continue ;
}
}
else
{
break ;
}
}
// TODO: function addresses too
call_stack_list . emplace_back ( lr . _u32 [ 3 ] , sp ) ;
}
return call_stack_list ;
2020-03-31 03:11:37 +03:00
}
std : : string spu_thread : : dump_misc ( ) const
{
std : : string ret ;
2015-07-01 01:25:52 +03:00
2020-04-03 11:21:18 +03:00
fmt : : append ( ret , " Block Weight: %u (Retreats: %u) " , block_counter , block_failure ) ;
2020-03-17 20:10:49 +03:00
if ( g_cfg . core . spu_prof )
{
// Get short function hash
const u64 name = atomic_storage < u64 > : : load ( block_hash ) ;
fmt : : append ( ret , " \n Current block: %s " , fmt : : base57 ( be_t < u64 > { name } ) ) ;
// Print only 7 hash characters out of 11 (which covers roughly 48 bits)
ret . resize ( ret . size ( ) - 4 ) ;
// Print chunk address from lowest 16 bits
fmt : : append ( ret , " ...chunk-0x%05x " , ( name & 0xffff ) * 4 ) ;
}
2020-04-13 21:53:19 +03:00
2020-11-15 07:37:56 +03:00
const u32 offset = group ? SPU_FAKE_BASE_ADDR + ( id & 0xffffff ) * SPU_LS_SIZE : RAW_SPU_BASE_ADDR + index * RAW_SPU_OFFSET ;
2018-05-18 18:53:01 +03:00
fmt : : append ( ret , " \n [%s] " , ch_mfc_cmd ) ;
2020-01-21 15:08:45 +02:00
fmt : : append ( ret , " \n Local Storage: 0x%08x..0x%08x " , offset , offset + 0x3ffff ) ;
2020-04-13 21:53:19 +03:00
if ( const u64 _time = start_time )
{
if ( const auto func = current_func )
{
2020-04-16 21:16:40 +03:00
ret + = " \n In function: " ;
2020-04-13 21:53:19 +03:00
ret + = func ;
}
else
{
ret + = ' \n ' ;
}
2020-04-14 19:41:31 +03:00
2020-04-13 21:53:19 +03:00
fmt : : append ( ret , " \n Waiting: %fs " , ( get_system_time ( ) - _time ) / 1000000. ) ;
}
else
{
ret + = " \n \n " ;
}
2018-03-25 00:03:32 +03:00
fmt : : append ( ret , " \n Tag Mask: 0x%08x " , ch_tag_mask ) ;
fmt : : append ( ret , " \n MFC Queue Size: %u " , mfc_size ) ;
for ( u32 i = 0 ; i < 16 ; i + + )
{
if ( i < mfc_size )
{
2018-05-18 18:53:01 +03:00
fmt : : append ( ret , " \n %s " , mfc_queue [ i ] ) ;
2018-03-25 00:03:32 +03:00
}
else
{
2020-04-03 11:21:18 +03:00
break ;
2018-03-25 00:03:32 +03:00
}
}
2016-04-14 02:09:41 +03:00
return ret ;
2012-11-15 01:39:56 +02:00
}
2018-10-11 01:17:19 +03:00
void spu_thread : : cpu_init ( )
2012-11-15 01:39:56 +02:00
{
2020-01-11 21:36:03 +02:00
std : : memset ( gpr . data ( ) , 0 , gpr . size ( ) * sizeof ( gpr [ 0 ] ) ) ;
2015-08-26 05:54:06 +03:00
fpscr . Reset ( ) ;
2015-03-02 05:10:41 +03:00
2017-02-17 22:35:57 +03:00
ch_mfc_cmd = { } ;
2013-11-03 21:23:16 +02:00
2017-02-13 16:12:24 +03:00
srr0 = 0 ;
2018-03-25 00:03:32 +03:00
mfc_size = 0 ;
2018-04-03 22:42:47 +03:00
mfc_barrier = 0 ;
mfc_fence = 0 ;
2017-02-17 22:35:57 +03:00
ch_tag_upd = 0 ;
2015-03-13 04:59:25 +03:00
ch_tag_mask = 0 ;
2019-07-09 07:30:06 +03:00
ch_tag_stat . data . raw ( ) = { } ;
2017-02-17 22:35:57 +03:00
ch_stall_mask = 0 ;
2019-07-09 07:30:06 +03:00
ch_stall_stat . data . raw ( ) = { } ;
ch_atomic_stat . data . raw ( ) = { } ;
2013-12-22 21:40:50 +04:00
2019-07-09 07:30:06 +03:00
ch_out_mbox . data . raw ( ) = { } ;
ch_out_intr_mbox . data . raw ( ) = { } ;
2014-07-16 20:10:18 +04:00
2018-08-06 10:19:47 +03:00
ch_events . raw ( ) = { } ;
interrupts_enabled = false ;
2017-02-17 22:35:57 +03:00
raddr = 0 ;
2015-03-02 05:10:41 +03:00
2020-07-30 16:01:25 +03:00
ch_dec_start_timestamp = get_timebased_time ( ) ;
ch_dec_value = option & SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE ? ~ static_cast < u32 > ( ch_dec_start_timestamp ) : 0 ;
2015-03-02 05:10:41 +03:00
2020-07-17 11:18:04 +03:00
if ( get_type ( ) > = spu_type : : raw )
2018-11-19 08:07:44 +02:00
{
2019-10-24 21:03:17 +03:00
ch_in_mbox . clear ( ) ;
ch_snr1 . data . raw ( ) = { } ;
ch_snr2 . data . raw ( ) = { } ;
2018-11-19 08:07:44 +02:00
snr_config = 0 ;
2020-01-07 23:00:06 +02:00
mfc_prxy_mask . raw ( ) = 0 ;
mfc_prxy_write_state = { } ;
2018-11-19 08:07:44 +02:00
}
2020-07-17 11:18:04 +03:00
status_npc . raw ( ) = { get_type ( ) = = spu_type : : isolated ? SPU_STATUS_IS_ISOLATED : 0 , 0 } ;
2019-07-09 07:30:06 +03:00
run_ctrl . raw ( ) = 0 ;
2015-03-02 05:10:41 +03:00
2015-09-18 01:41:14 +03:00
int_ctrl [ 0 ] . clear ( ) ;
int_ctrl [ 1 ] . clear ( ) ;
int_ctrl [ 2 ] . clear ( ) ;
2015-03-02 05:10:41 +03:00
2015-08-26 05:54:06 +03:00
gpr [ 1 ] . _u32 [ 3 ] = 0x3FFF0 ; // initial stack frame pointer
2012-11-15 01:39:56 +02:00
}
2020-07-30 12:07:18 +03:00
void spu_thread : : cpu_return ( )
2015-02-01 16:52:34 +03:00
{
2020-07-17 11:18:04 +03:00
if ( get_type ( ) > = spu_type : : raw )
2018-11-05 12:38:35 +01:00
{
2020-02-19 22:25:53 +02:00
if ( status_npc . fetch_op ( [ this ] ( status_npc_sync_var & state )
2020-01-21 15:51:55 +03:00
{
2020-01-21 20:05:45 +02:00
if ( state . status & SPU_STATUS_RUNNING )
{
// Save next PC and current SPU Interrupt Status
// Used only by RunCtrl stop requests
state . status & = ~ SPU_STATUS_RUNNING ;
2020-02-19 22:25:53 +02:00
state . npc = pc | + interrupts_enabled ;
2020-01-21 20:05:45 +02:00
return true ;
}
return false ;
2020-02-19 22:25:53 +02:00
} ) . second )
{
status_npc . notify_one ( ) ;
}
2018-11-13 18:40:41 +03:00
}
2020-07-17 11:18:04 +03:00
else if ( is_stopped ( ) )
2018-11-13 18:40:41 +03:00
{
2019-10-29 21:32:34 +02:00
ch_in_mbox . clear ( ) ;
2020-12-09 10:47:45 +03:00
if ( ensure ( group - > running ) - - = = 1 )
2018-11-05 12:38:35 +01:00
{
2018-11-15 05:56:28 +02:00
{
std : : lock_guard lock ( group - > mutex ) ;
group - > run_state = SPU_THREAD_GROUP_STATUS_INITIALIZED ;
2018-12-25 20:23:03 +03:00
2019-02-08 19:23:49 +02:00
if ( ! group - > join_state )
{
group - > join_state = SYS_SPU_THREAD_GROUP_JOIN_ALL_THREADS_EXIT ;
}
2020-05-11 21:24:04 +03:00
for ( const auto & thread : group - > threads )
{
if ( thread & & thread . get ( ) ! = this & & thread - > status_npc . load ( ) . status > > 16 = = SYS_SPU_THREAD_STOP_THREAD_EXIT )
{
// Wait for all threads to have error codes if exited by sys_spu_thread_exit
2020-05-12 19:17:50 +03:00
for ( u32 status ; ! thread - > exit_status . try_read ( status )
| | status ! = thread - > last_exit_status ; )
2020-05-11 21:24:04 +03:00
{
2020-12-21 17:12:05 +03:00
utils : : pause ( ) ;
2020-05-27 18:41:17 +03:00
}
2020-05-11 21:24:04 +03:00
}
}
2020-05-09 19:49:12 +03:00
if ( status_npc . load ( ) . status > > 16 = = SYS_SPU_THREAD_STOP_THREAD_EXIT )
{
// Set exit status now, in conjunction with group state changes
exit_status . set_value ( last_exit_status ) ;
}
2020-05-12 04:46:26 +03:00
group - > stop_count + + ;
2018-12-25 20:23:03 +03:00
if ( const auto ppu = std : : exchange ( group - > waiter , nullptr ) )
{
// Send exit status directly to the joining thread
ppu - > gpr [ 4 ] = group - > join_state ;
ppu - > gpr [ 5 ] = group - > exit_status ;
2019-02-10 18:22:48 +03:00
group - > join_state . release ( 0 ) ;
2020-05-12 04:46:26 +03:00
lv2_obj : : awake ( ppu ) ;
2018-12-25 20:23:03 +03:00
}
2018-11-15 05:56:28 +02:00
}
2018-11-13 18:40:41 +03:00
// Notify on last thread stopped
2020-05-12 04:46:26 +03:00
group - > stop_count . notify_all ( ) ;
2018-11-05 12:38:35 +01:00
}
2020-05-09 19:49:12 +03:00
else if ( status_npc . load ( ) . status > > 16 = = SYS_SPU_THREAD_STOP_THREAD_EXIT )
{
exit_status . set_value ( last_exit_status ) ;
}
2018-11-13 18:40:41 +03:00
}
}
2018-11-05 12:38:35 +01:00
2018-11-13 18:40:41 +03:00
extern thread_local std : : string ( * g_tls_log_prefix ) ( ) ;
2018-11-05 12:38:35 +01:00
2018-11-13 18:40:41 +03:00
void spu_thread : : cpu_task ( )
{
2018-10-11 01:17:19 +03:00
// Get next PC and SPU Interrupt status
2020-01-21 20:05:45 +02:00
pc = status_npc . load ( ) . npc ;
2020-01-21 15:51:55 +03:00
2020-01-21 18:40:12 +02:00
// Note: works both on RawSPU and threaded SPU!
2018-10-11 01:17:19 +03:00
set_interrupt_status ( ( pc & 1 ) ! = 0 ) ;
pc & = 0x3fffc ;
2016-04-14 02:09:41 +03:00
std : : fesetround ( FE_TOWARDZERO ) ;
2017-12-09 17:57:43 +03:00
2016-04-27 01:27:24 +03:00
g_tls_log_prefix = [ ]
2015-03-20 19:53:54 +03:00
{
2018-10-11 01:17:19 +03:00
const auto cpu = static_cast < spu_thread * > ( get_current_cpu_thread ( ) ) ;
2020-02-28 10:43:37 +03:00
2020-11-26 12:30:51 +03:00
static thread_local shared_ptr < std : : string > name_cache ;
2020-02-28 10:43:37 +03:00
if ( ! cpu - > spu_tname . is_equal ( name_cache ) ) [[unlikely]]
{
2020-12-06 11:13:34 +03:00
cpu - > spu_tname . peek_op ( [ & ] ( const shared_ptr < std : : string > & ptr )
{
if ( ptr ! = name_cache )
{
name_cache = ptr ;
}
} ) ;
2020-02-28 10:43:37 +03:00
}
2020-07-17 11:18:04 +03:00
const auto type = cpu - > get_type ( ) ;
return fmt : : format ( " %sSPU[0x%07x] Thread (%s) [0x%05x] " , type > = spu_type : : raw ? type = = spu_type : : isolated ? " Iso " : " Raw " : " " , cpu - > lv2_id , * name_cache . get ( ) , cpu - > pc ) ;
2016-04-14 02:09:41 +03:00
} ;
2018-04-09 17:45:37 +03:00
if ( jit )
{
2019-03-18 23:01:16 +03:00
while ( true )
2018-04-09 17:45:37 +03:00
{
2020-02-05 10:00:08 +03:00
if ( state ) [[unlikely]]
2019-03-18 23:01:16 +03:00
{
if ( check_state ( ) )
break ;
}
2020-02-19 18:26:41 +03:00
if ( _ref < u32 > ( pc ) = = 0x0u )
2020-01-20 23:40:10 +03:00
{
2020-01-21 15:51:55 +03:00
if ( spu_thread : : stop_and_signal ( 0x0 ) )
pc + = 4 ;
2020-01-20 23:40:10 +03:00
continue ;
}
2020-07-15 21:57:39 +03:00
spu_runtime : : g_gateway ( * this , _ptr < u8 > ( 0 ) , nullptr ) ;
2018-04-09 17:45:37 +03:00
}
2018-06-10 15:46:01 +03:00
// Print some stats
2020-02-01 11:36:09 +03:00
spu_log . notice ( " Stats: Block Weight: %u (Retreats: %u); " , block_counter , block_failure ) ;
2018-04-09 17:45:37 +03:00
}
2019-07-15 16:16:30 +03:00
else
2019-03-25 21:31:16 +03:00
{
2020-12-09 18:04:52 +03:00
ensure ( spu_runtime : : g_interpreter ) ;
2019-07-15 16:16:30 +03:00
2019-03-25 21:31:16 +03:00
while ( true )
{
2020-02-05 10:00:08 +03:00
if ( state ) [[unlikely]]
2019-03-25 21:31:16 +03:00
{
if ( check_state ( ) )
break ;
}
2020-07-15 21:57:39 +03:00
spu_runtime : : g_interpreter ( * this , _ptr < u8 > ( 0 ) , nullptr ) ;
2019-03-25 21:31:16 +03:00
}
2015-03-20 19:53:54 +03:00
}
2012-11-15 01:39:56 +02:00
}
2021-01-21 18:29:37 +02:00
struct raw_spu_cleanup
{
2021-03-02 19:22:39 +03:00
raw_spu_cleanup ( ) = default ;
raw_spu_cleanup ( const raw_spu_cleanup & ) = delete ;
raw_spu_cleanup & operator = ( const raw_spu_cleanup & ) = delete ;
2021-01-21 18:29:37 +02:00
~ raw_spu_cleanup ( )
{
std : : memset ( spu_thread : : g_raw_spu_id , 0 , sizeof ( spu_thread : : g_raw_spu_id ) ) ;
spu_thread : : g_raw_spu_ctr = 0 ;
g_fxo - > get < raw_spu_cleanup > ( ) ; // Register destructor
}
} ;
2021-01-19 20:15:57 +02:00
void spu_thread : : cleanup ( )
2016-04-25 13:49:12 +03:00
{
2021-01-19 20:15:57 +02:00
// Deallocate local storage
2021-02-26 11:20:25 +02:00
ensure ( vm : : dealloc ( vm_offset ( ) , vm : : spu , & shm ) ) ;
if ( g_cfg . core . mfc_debug )
{
utils : : memory_decommit ( vm : : g_stat_addr + vm_offset ( ) , SPU_LS_SIZE ) ;
}
2018-10-11 01:17:19 +03:00
// Deallocate RawSPU ID
2020-07-17 11:18:04 +03:00
if ( get_type ( ) > = spu_type : : raw )
2018-10-11 01:17:19 +03:00
{
g_raw_spu_id [ index ] = 0 ;
g_raw_spu_ctr - - ;
}
2020-10-26 04:05:17 +03:00
2021-01-21 18:29:37 +02:00
// Free range lock (and signals cleanup was called to the destructor)
2021-01-22 10:11:54 +02:00
vm : : free_range_lock ( range_lock ) ;
2021-02-03 13:59:01 +02:00
// Signal the debugger about the termination
state + = cpu_flag : : exit ;
2016-04-25 13:49:12 +03:00
}
2021-01-19 20:15:57 +02:00
spu_thread : : ~ spu_thread ( )
{
2021-01-25 17:45:47 +03:00
// Unmap LS and its mirrors
shm - > unmap ( ls + SPU_LS_SIZE ) ;
shm - > unmap ( ls ) ;
shm - > unmap ( ls - SPU_LS_SIZE ) ;
2021-01-21 18:29:37 +02:00
perf_log . notice ( " Perf stats for transactions: success %u, failure %u " , stx , ftx ) ;
perf_log . notice ( " Perf stats for PUTLLC reload: successs %u, failure %u " , last_succ , last_fail ) ;
2021-01-19 20:15:57 +02:00
}
2020-11-15 07:37:56 +03:00
spu_thread : : spu_thread ( lv2_spu_group * group , u32 index , std : : string_view name , u32 lv2_id , bool is_isolated , u32 option )
2017-01-25 20:50:30 +03:00
: cpu_thread ( idm : : last_id ( ) )
2021-02-26 11:20:25 +02:00
, group ( group )
2016-04-25 13:49:12 +03:00
, index ( index )
2020-11-15 07:37:56 +03:00
, shm ( std : : make_shared < utils : : shm > ( SPU_LS_SIZE ) )
2020-07-15 21:57:39 +03:00
, ls ( [ & ] ( )
{
2021-02-26 11:20:25 +02:00
if ( g_cfg . core . mfc_debug )
{
utils : : memory_commit ( vm : : g_stat_addr + vm_offset ( ) , SPU_LS_SIZE ) ;
}
2020-11-15 07:37:56 +03:00
if ( ! group )
{
2021-03-19 20:07:35 +02:00
ensure ( vm : : get ( vm : : spu ) - > falloc ( vm_offset ( ) , SPU_LS_SIZE , & shm , 0x200 ) ) ;
2020-11-15 07:37:56 +03:00
}
else
{
2020-11-17 14:53:41 +03:00
// 0x1000 indicates falloc to allocate page with no access rights in base memory
2021-03-19 20:07:35 +02:00
ensure ( vm : : get ( vm : : spu ) - > falloc ( vm_offset ( ) , SPU_LS_SIZE , & shm , 0x1200 ) ) ;
2020-11-15 07:37:56 +03:00
}
2021-01-25 17:45:47 +03:00
// Try to guess free area
const auto start = vm : : g_free_addr + SPU_LS_SIZE * ( cpu_thread : : id & 0xffffff ) * 12 ;
2021-01-19 20:15:57 +02:00
2021-01-25 17:45:47 +03:00
u32 total = 0 ;
2020-11-15 07:37:56 +03:00
2021-01-25 17:45:47 +03:00
// Map LS and its mirrors
for ( u64 addr = reinterpret_cast < u64 > ( start ) ; addr < 0x8000'0000'0000 ; )
2020-07-15 21:57:39 +03:00
{
2021-01-25 17:45:47 +03:00
if ( auto ptr = shm - > try_map ( reinterpret_cast < u8 * > ( addr ) ) )
{
if ( + + total = = 3 )
{
// Use the middle mirror
return ptr - SPU_LS_SIZE ;
}
addr + = SPU_LS_SIZE ;
}
else
{
// Reset, cleanup and start again
for ( u32 i = 1 ; i < = total ; i + + )
{
shm - > unmap ( reinterpret_cast < u8 * > ( addr - i * SPU_LS_SIZE ) ) ;
}
total = 0 ;
addr + = 0x10000 ;
}
2020-07-15 21:57:39 +03:00
}
2021-01-25 17:45:47 +03:00
fmt : : throw_exception ( " Failed to map SPU LS memory " ) ;
2020-07-15 21:57:39 +03:00
} ( ) )
2020-07-17 11:18:04 +03:00
, thread_type ( group ? spu_type : : threaded : is_isolated ? spu_type : : isolated : spu_type : : raw )
2020-07-30 16:01:25 +03:00
, option ( option )
2019-11-03 01:44:02 +02:00
, lv2_id ( lv2_id )
2020-11-26 12:30:51 +03:00
, spu_tname ( make_single < std : : string > ( name ) )
2016-04-14 02:09:41 +03:00
{
2018-04-09 17:45:37 +03:00
if ( g_cfg . core . spu_decoder = = spu_decoder_type : : asmjit )
{
2018-05-03 15:55:45 +03:00
jit = spu_recompiler_base : : make_asmjit_recompiler ( ) ;
2018-04-09 17:45:37 +03:00
}
if ( g_cfg . core . spu_decoder = = spu_decoder_type : : llvm )
{
2019-05-17 23:54:47 +03:00
jit = spu_recompiler_base : : make_fast_llvm_recompiler ( ) ;
2018-04-09 17:45:37 +03:00
}
2018-05-03 15:55:45 +03:00
2018-06-04 00:20:14 +03:00
if ( g_cfg . core . spu_decoder ! = spu_decoder_type : : fast & & g_cfg . core . spu_decoder ! = spu_decoder_type : : precise )
{
if ( g_cfg . core . spu_block_size ! = spu_block_size_type : : safe )
{
// Initialize stack mirror
std : : memset ( stack_mirror . data ( ) , 0xff , sizeof ( stack_mirror ) ) ;
}
}
2018-10-11 01:17:19 +03:00
2020-07-17 11:18:04 +03:00
if ( get_type ( ) > = spu_type : : raw )
2018-10-11 01:17:19 +03:00
{
cpu_init ( ) ;
}
2020-10-26 04:05:17 +03:00
range_lock = vm : : alloc_range_lock ( ) ;
2016-04-14 02:09:41 +03:00
}
2014-09-24 22:44:26 +04:00
2018-10-11 01:17:19 +03:00
void spu_thread : : push_snr ( u32 number , u32 value )
2016-04-14 02:09:41 +03:00
{
2016-04-19 16:04:02 +03:00
// Get channel
const auto channel = number & 1 ? & ch_snr2 : & ch_snr1 ;
2014-09-24 22:44:26 +04:00
2020-10-12 17:19:06 +03:00
// Prepare some data
const u32 event_bit = SPU_EVENT_S1 > > ( number & 1 ) ;
const u32 bitor_bit = ( snr_config > > number ) & 1 ;
2020-10-31 01:52:24 +03:00
// Redundant, g_use_rtm is checked inside tx_start now.
2020-10-12 17:19:06 +03:00
if ( g_use_rtm )
{
bool channel_notify = false ;
bool thread_notify = false ;
const bool ok = utils : : tx_start ( [ & ]
{
channel_notify = ( channel - > data . raw ( ) & spu_channel : : bit_wait ) ! = 0 ;
thread_notify = ( channel - > data . raw ( ) & spu_channel : : bit_count ) = = 0 ;
if ( bitor_bit )
{
channel - > data . raw ( ) & = ~ spu_channel : : bit_wait ;
channel - > data . raw ( ) | = spu_channel : : bit_count | value ;
}
else
{
channel - > data . raw ( ) = spu_channel : : bit_count | value ;
}
if ( thread_notify )
{
ch_events . raw ( ) . events | = event_bit ;
if ( ch_events . raw ( ) . mask & event_bit )
{
ch_events . raw ( ) . count = 1 ;
thread_notify = ch_events . raw ( ) . waiting ! = 0 ;
}
else
{
thread_notify = false ;
}
}
} ) ;
if ( ok )
{
if ( channel_notify )
channel - > data . notify_one ( ) ;
if ( thread_notify )
this - > notify ( ) ;
return ;
}
}
// Lock event channel in case it needs event notification
ch_events . atomic_op ( [ ] ( ch_events_t & ev )
{
ev . locks + + ;
} ) ;
2016-04-19 16:04:02 +03:00
// Check corresponding SNR register settings
2020-10-12 17:19:06 +03:00
if ( bitor_bit )
2015-07-01 01:25:52 +03:00
{
2020-10-12 17:19:06 +03:00
if ( channel - > push_or ( value ) )
set_events ( event_bit ) ;
2015-07-01 01:25:52 +03:00
}
2016-04-14 02:09:41 +03:00
else
2015-07-01 01:25:52 +03:00
{
2020-10-12 17:19:06 +03:00
if ( channel - > push ( value ) )
set_events ( event_bit ) ;
2016-04-14 02:09:41 +03:00
}
2020-10-12 17:19:06 +03:00
ch_events . atomic_op ( [ ] ( ch_events_t & ev )
{
ev . locks - - ;
} ) ;
2014-10-02 14:29:20 +04:00
}
2020-10-30 02:07:20 +03:00
void spu_thread : : do_dma_transfer ( spu_thread * _this , const spu_mfc_cmd & args , u8 * ls )
2014-08-23 01:15:02 +04:00
{
2020-11-08 17:21:19 +03:00
perf_meter < " DMA " _u32 > perf_ ;
2018-03-10 06:16:22 +02:00
const bool is_get = ( args . cmd & ~ ( MFC_BARRIER_MASK | MFC_FENCE_MASK | MFC_START_MASK ) ) = = MFC_GET_CMD ;
2014-08-23 01:15:02 +04:00
2017-02-17 22:35:57 +03:00
u32 eal = args . eal ;
u32 lsa = args . lsa & 0x3ffff ;
2015-02-15 20:13:06 +03:00
2020-11-23 20:02:05 +02:00
// Keep src point to const
u8 * dst = nullptr ;
const u8 * src = nullptr ;
std : : tie ( dst , src ) = [ & ] ( ) - > std : : pair < u8 * , const u8 * >
{
u8 * dst = vm : : _ptr < u8 > ( eal ) ;
u8 * src = ls + lsa ;
if ( is_get )
{
std : : swap ( src , dst ) ;
}
return { dst , src } ;
} ( ) ;
2018-04-01 21:48:58 +03:00
// SPU Thread Group MMIO (LS and SNR) and RawSPU MMIO
2020-10-30 02:07:20 +03:00
if ( _this & & eal > = RAW_SPU_BASE_ADDR )
2014-08-23 01:15:02 +04:00
{
2021-02-26 11:20:25 +02:00
if ( g_cfg . core . mfc_debug & & _this )
{
// TODO
}
2015-03-05 00:51:14 +03:00
const u32 index = ( eal - SYS_SPU_THREAD_BASE_LOW ) / SYS_SPU_THREAD_OFFSET ; // thread number in group
const u32 offset = ( eal - SYS_SPU_THREAD_BASE_LOW ) % SYS_SPU_THREAD_OFFSET ; // LS offset or MMIO register
2015-02-15 20:13:06 +03:00
2018-04-01 21:48:58 +03:00
if ( eal < SYS_SPU_THREAD_BASE_LOW )
{
// RawSPU MMIO
2018-10-11 01:17:19 +03:00
auto thread = idm : : get < named_thread < spu_thread > > ( find_raw_spu ( ( eal - RAW_SPU_BASE_ADDR ) / RAW_SPU_OFFSET ) ) ;
2018-04-01 21:48:58 +03:00
if ( ! thread )
{
2020-10-01 19:52:39 +03:00
// Access Violation
2018-04-01 21:48:58 +03:00
}
2020-10-01 19:52:39 +03:00
else if ( ( eal - RAW_SPU_BASE_ADDR ) % RAW_SPU_OFFSET + args . size - 1 < SPU_LS_SIZE ) // LS access
2018-04-01 21:48:58 +03:00
{
}
2020-10-01 19:52:39 +03:00
else if ( u32 value ; args . size = = 4 & & is_get & & thread - > read_reg ( eal , value ) )
2018-04-01 21:48:58 +03:00
{
2020-10-30 02:07:20 +03:00
_this - > _ref < u32 > ( lsa ) = value ;
2018-04-01 21:48:58 +03:00
return ;
}
2020-10-30 02:07:20 +03:00
else if ( args . size = = 4 & & ! is_get & & thread - > write_reg ( eal , args . cmd ! = MFC_SDCRZ_CMD ? + _this - > _ref < u32 > ( lsa ) : 0 ) )
2018-04-01 21:48:58 +03:00
{
return ;
}
else
{
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " Invalid RawSPU MMIO offset (cmd=[%s]) " , args ) ;
2018-04-01 21:48:58 +03:00
}
}
2020-10-30 02:07:20 +03:00
else if ( _this - > get_type ( ) > = spu_type : : raw )
2018-04-01 21:48:58 +03:00
{
2020-10-01 19:52:39 +03:00
// Access Violation
2018-04-01 21:48:58 +03:00
}
2020-10-30 02:07:20 +03:00
else if ( _this - > group & & _this - > group - > threads_map [ index ] ! = - 1 )
2015-02-15 20:13:06 +03:00
{
2020-10-30 02:07:20 +03:00
auto & spu = static_cast < spu_thread & > ( * _this - > group - > threads [ _this - > group - > threads_map [ index ] ] ) ;
2014-09-19 04:19:22 +04:00
2020-11-23 20:02:05 +02:00
if ( offset + args . size < = SPU_LS_SIZE ) // LS access
2014-08-23 01:15:02 +04:00
{
2020-11-23 20:02:05 +02:00
// redirect access
if ( auto ptr = spu . ls + offset ; is_get )
src = ptr ;
else
dst = ptr ;
2014-08-23 01:15:02 +04:00
}
2017-02-17 22:35:57 +03:00
else if ( ! is_get & & args . size = = 4 & & ( offset = = SYS_SPU_THREAD_SNR1 | | offset = = SYS_SPU_THREAD_SNR2 ) )
2014-08-23 01:15:02 +04:00
{
2020-10-30 02:07:20 +03:00
spu . push_snr ( SYS_SPU_THREAD_SNR2 = = offset , args . cmd ! = MFC_SDCRZ_CMD ? + _this - > _ref < u32 > ( lsa ) : 0 ) ;
2014-08-23 01:15:02 +04:00
return ;
}
else
{
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " Invalid MMIO offset (cmd=[%s]) " , args ) ;
2014-08-23 01:15:02 +04:00
}
}
else
{
2020-10-01 19:52:39 +03:00
// Access Violation
2014-08-23 01:15:02 +04:00
}
}
2020-10-29 05:35:09 +03:00
// Cleanup: if PUT or GET happens after PUTLLC failure, it's too complicated and it's easier to just give up
2020-10-30 02:07:20 +03:00
if ( _this )
{
_this - > last_faddr = 0 ;
}
2020-10-29 05:35:09 +03:00
2020-05-16 20:03:27 +03:00
// It is so rare that optimizations are not implemented (TODO)
alignas ( 64 ) static constexpr u8 zero_buf [ 0x10000 ] { } ;
if ( args . cmd = = MFC_SDCRZ_CMD )
2019-06-11 22:45:11 +03:00
{
2020-05-16 20:03:27 +03:00
src = zero_buf ;
2019-06-11 22:45:11 +03:00
}
2020-09-04 09:52:56 +03:00
if ( ( ! g_use_rtm & & ! is_get ) | | g_cfg . core . spu_accurate_dma ) [[unlikely]]
2018-05-21 20:25:05 +03:00
{
2020-11-08 17:21:19 +03:00
perf_meter < " ADMA_GET " _u64 > perf_get = perf_ ;
perf_meter < " ADMA_PUT " _u64 > perf_put = perf_ ;
2020-10-27 21:34:08 +03:00
2020-10-30 02:07:20 +03:00
cpu_thread * _cpu = _this ? _this : get_current_cpu_thread ( ) ;
atomic_t < u64 , 64 > * range_lock = nullptr ;
if ( ! _this ) [[unlikely]]
{
if ( _cpu - > id_type ( ) = = 2 )
{
// Use range_lock of current SPU thread for range locks
range_lock = static_cast < spu_thread * > ( _cpu ) - > range_lock ;
}
else
{
goto plain_access ;
}
}
else
{
range_lock = _this - > range_lock ;
}
2020-11-24 08:18:31 +03:00
utils : : prefetch_write ( range_lock ) ;
2020-11-08 17:21:19 +03:00
2020-10-30 02:07:20 +03:00
for ( u32 size = args . size , size0 ; is_get ; size - = size0 , dst + = size0 , src + = size0 , eal + = size0 )
2020-05-08 20:41:15 +03:00
{
2020-09-03 00:58:29 +03:00
size0 = std : : min < u32 > ( 128 - ( eal & 127 ) , std : : min < u32 > ( size , 128 ) ) ;
2020-05-08 20:41:15 +03:00
for ( u64 i = 0 ; ; [ & ] ( )
{
2020-10-30 02:07:20 +03:00
if ( _cpu - > state )
2020-09-03 00:58:29 +03:00
{
2020-10-30 02:07:20 +03:00
_cpu - > check_state ( ) ;
2020-09-03 00:58:29 +03:00
}
else if ( + + i < 25 ) [[likely]]
2020-05-08 20:41:15 +03:00
{
busy_wait ( 300 ) ;
}
else
{
2020-10-30 02:07:20 +03:00
_cpu - > state + = cpu_flag : : wait + cpu_flag : : temp ;
2020-05-08 20:41:15 +03:00
std : : this_thread : : yield ( ) ;
2020-10-30 02:07:20 +03:00
_cpu - > check_state ( ) ;
2020-05-08 20:41:15 +03:00
}
} ( ) )
{
2021-03-05 22:05:37 +03:00
const u64 time0 = vm : : reservation_acquire ( eal ) ;
2020-05-08 20:41:15 +03:00
2020-09-05 20:16:03 +03:00
if ( time0 & 127 )
2020-05-08 20:41:15 +03:00
{
continue ;
}
2020-10-12 15:20:06 +03:00
const auto cpu = static_cast < spu_thread * > ( get_current_cpu_thread ( ) ) ;
alignas ( 64 ) u8 temp [ 128 ] ;
u8 * dst0 = cpu & & cpu - > id_type ( ) ! = 1 & & ( eal & - 128 ) = = cpu - > raddr ? temp : dst ;
if ( dst0 = = + temp & & time0 ! = cpu - > rtime )
{
// Validate rtime for read data
cpu - > set_events ( SPU_EVENT_LR ) ;
cpu - > raddr = 0 ;
}
2020-10-12 17:19:06 +03:00
2020-09-03 00:58:29 +03:00
switch ( size0 )
2020-05-08 20:41:15 +03:00
{
case 1 :
{
2020-10-12 15:20:06 +03:00
* reinterpret_cast < u8 * > ( dst0 ) = * reinterpret_cast < const u8 * > ( src ) ;
2020-05-08 20:41:15 +03:00
break ;
}
case 2 :
{
2020-10-12 15:20:06 +03:00
* reinterpret_cast < u16 * > ( dst0 ) = * reinterpret_cast < const u16 * > ( src ) ;
2020-05-08 20:41:15 +03:00
break ;
}
case 4 :
{
2020-10-12 15:20:06 +03:00
* reinterpret_cast < u32 * > ( dst0 ) = * reinterpret_cast < const u32 * > ( src ) ;
2020-05-08 20:41:15 +03:00
break ;
}
case 8 :
{
2020-10-12 15:20:06 +03:00
* reinterpret_cast < u64 * > ( dst0 ) = * reinterpret_cast < const u64 * > ( src ) ;
2020-05-08 20:41:15 +03:00
break ;
}
2020-09-03 00:58:29 +03:00
case 128 :
{
2020-10-12 15:20:06 +03:00
mov_rdata ( * reinterpret_cast < spu_rdata_t * > ( dst0 ) , * reinterpret_cast < const spu_rdata_t * > ( src ) ) ;
2020-09-03 00:58:29 +03:00
break ;
}
2020-05-08 20:41:15 +03:00
default :
{
2020-10-12 15:20:06 +03:00
auto dst1 = dst0 ;
auto src1 = src ;
auto size1 = size0 ;
2020-05-08 20:41:15 +03:00
2020-10-12 15:20:06 +03:00
while ( size1 )
2020-05-08 20:41:15 +03:00
{
2020-10-12 15:20:06 +03:00
* reinterpret_cast < v128 * > ( dst1 ) = * reinterpret_cast < const v128 * > ( src1 ) ;
2020-05-08 20:41:15 +03:00
2020-10-12 15:20:06 +03:00
dst1 + = 16 ;
src1 + = 16 ;
size1 - = 16 ;
2020-05-08 20:41:15 +03:00
}
break ;
}
}
2021-03-05 22:05:37 +03:00
if ( time0 ! = vm : : reservation_acquire ( eal ) | | ( size0 = = 128 & & ! cmp_rdata ( * reinterpret_cast < spu_rdata_t * > ( dst0 ) , * reinterpret_cast < const spu_rdata_t * > ( src ) ) ) )
2020-05-08 20:41:15 +03:00
{
continue ;
}
2020-10-12 15:20:06 +03:00
if ( dst0 = = + temp )
{
// Write to LS
std : : memcpy ( dst , dst0 , size0 ) ;
// Validate data
if ( std : : memcmp ( dst0 , & cpu - > rdata [ eal & 127 ] , size0 ) ! = 0 )
{
cpu - > set_events ( SPU_EVENT_LR ) ;
cpu - > raddr = 0 ;
}
}
2020-05-08 20:41:15 +03:00
2020-09-03 00:58:29 +03:00
break ;
}
if ( size = = size0 )
{
2021-02-26 11:20:25 +02:00
if ( g_cfg . core . mfc_debug & & _this )
{
auto & dump = reinterpret_cast < mfc_cmd_dump * > ( vm : : g_stat_addr + _this - > vm_offset ( ) ) [ _this - > mfc_dump_idx + + % spu_thread : : max_mfc_dump_idx ] ;
dump . cmd = args ;
dump . cmd . eah = _this - > pc ;
std : : memcpy ( dump . data , is_get ? dst : src , std : : min < u32 > ( args . size , 128 ) ) ;
}
2020-05-08 20:41:15 +03:00
return ;
}
}
2020-10-26 04:05:17 +03:00
if ( g_cfg . core . spu_accurate_dma ) [[unlikely]]
{
for ( u32 size0 , size = args . size ; ; size - = size0 , dst + = size0 , src + = size0 , eal + = size0 )
{
size0 = std : : min < u32 > ( 128 - ( eal & 127 ) , std : : min < u32 > ( size , 128 ) ) ;
if ( size0 = = 128u & & g_cfg . core . accurate_cache_line_stores )
{
// As atomic as PUTLLUC
do_cell_atomic_128_store ( eal , src ) ;
if ( size = = size0 )
{
break ;
}
continue ;
}
2020-10-27 21:34:08 +03:00
// Lock each cache line
2021-03-05 22:05:37 +03:00
auto & res = vm : : reservation_acquire ( eal ) ;
2020-10-27 21:34:08 +03:00
// Lock each bit corresponding to a byte being written, using some free space in reservation memory
2021-03-10 15:54:32 +03:00
auto * bits = utils : : bless < atomic_t < u128 > > ( vm : : g_reservations + ( ( eal & 0xff80 ) / 2 + 16 ) ) ;
2020-10-27 21:34:08 +03:00
// Get writing mask
const u128 wmask = ( ~ u128 { } < < ( eal & 127 ) ) & ( ~ u128 { } > > ( 127 - ( ( eal + size0 - 1 ) & 127 ) ) ) ;
//const u64 start = (eal & 127) / 2;
//const u64 _end_ = ((eal + size0 - 1) & 127) / 2;
//const u64 wmask = (UINT64_MAX << start) & (UINT64_MAX >> (63 - _end_));
u128 old = 0 ;
for ( u64 i = 0 ; i ! = umax ; [ & ] ( )
{
2020-10-30 02:07:20 +03:00
if ( _cpu - > state & cpu_flag : : pause )
2020-10-27 21:34:08 +03:00
{
2020-11-09 22:53:32 +03:00
const bool ok = cpu_thread : : if_suspended < 0 > ( _cpu , { dst , dst + 64 , & res } , [ & ]
2020-10-27 21:34:08 +03:00
{
std : : memcpy ( dst , src , size0 ) ;
res + = 128 ;
} ) ;
2020-11-09 22:53:32 +03:00
if ( ok )
{
// Exit loop and function
i = - 1 ;
bits = nullptr ;
return ;
}
2020-10-27 21:34:08 +03:00
}
2020-11-09 22:53:32 +03:00
if ( + + i < 10 )
2020-10-27 21:34:08 +03:00
{
busy_wait ( 500 ) ;
}
else
{
// Wait
2020-10-30 02:07:20 +03:00
_cpu - > state + = cpu_flag : : wait + cpu_flag : : temp ;
2020-10-27 21:34:08 +03:00
bits - > wait ( old , wmask ) ;
2020-10-30 02:07:20 +03:00
_cpu - > check_state ( ) ;
2020-10-27 21:34:08 +03:00
}
} ( ) )
{
// Completed in suspend_all()
if ( ! bits )
{
break ;
}
bool ok = false ;
std : : tie ( old , ok ) = bits - > fetch_op ( [ & ] ( auto & v )
{
if ( v & wmask )
{
return false ;
}
v | = wmask ;
return true ;
} ) ;
if ( ok ) [[likely]]
{
break ;
}
}
if ( ! bits )
{
if ( size = = size0 )
{
break ;
}
continue ;
}
// Lock reservation (shared)
auto [ _oldd , _ok ] = res . fetch_op ( [ & ] ( u64 & r )
{
if ( r & vm : : rsrv_unique_lock )
{
return false ;
}
r + = 1 ;
return true ;
} ) ;
if ( ! _ok )
{
vm : : reservation_shared_lock_internal ( res ) ;
}
// Obtain range lock as normal store
2020-11-01 23:29:33 +03:00
vm : : range_lock ( range_lock , eal , size0 ) ;
2020-10-26 04:05:17 +03:00
switch ( size0 )
{
case 1 :
{
* reinterpret_cast < u8 * > ( dst ) = * reinterpret_cast < const u8 * > ( src ) ;
break ;
}
case 2 :
{
* reinterpret_cast < u16 * > ( dst ) = * reinterpret_cast < const u16 * > ( src ) ;
break ;
}
case 4 :
{
* reinterpret_cast < u32 * > ( dst ) = * reinterpret_cast < const u32 * > ( src ) ;
break ;
}
case 8 :
{
* reinterpret_cast < u64 * > ( dst ) = * reinterpret_cast < const u64 * > ( src ) ;
break ;
}
case 128 :
{
mov_rdata ( * reinterpret_cast < spu_rdata_t * > ( dst ) , * reinterpret_cast < const spu_rdata_t * > ( src ) ) ;
break ;
}
default :
{
auto _dst = dst ;
auto _src = src ;
auto _size = size0 ;
while ( _size )
{
* reinterpret_cast < v128 * > ( _dst ) = * reinterpret_cast < const v128 * > ( _src ) ;
_dst + = 16 ;
_src + = 16 ;
_size - = 16 ;
}
break ;
}
}
2020-10-27 21:34:08 +03:00
range_lock - > release ( 0 ) ;
res + = 127 ;
// Release bits and notify
bits - > atomic_op ( [ & ] ( auto & v )
{
v & = ~ wmask ;
} ) ;
2020-11-04 17:19:35 +03:00
bits - > notify_all ( wmask ) ;
2020-10-26 04:05:17 +03:00
if ( size = = size0 )
{
break ;
}
}
2020-12-06 12:10:00 +03:00
//atomic_fence_seq_cst();
2021-02-26 11:20:25 +02:00
if ( g_cfg . core . mfc_debug & & _this )
{
auto & dump = reinterpret_cast < mfc_cmd_dump * > ( vm : : g_stat_addr + _this - > vm_offset ( ) ) [ _this - > mfc_dump_idx + + % spu_thread : : max_mfc_dump_idx ] ;
dump . cmd = args ;
dump . cmd . eah = _this - > pc ;
std : : memcpy ( dump . data , is_get ? dst : src , std : : min < u32 > ( args . size , 128 ) ) ;
}
2020-10-26 04:05:17 +03:00
return ;
}
2020-11-08 17:21:19 +03:00
else
{
perf_put . reset ( ) ;
perf_get . reset ( ) ;
}
2020-10-26 04:05:17 +03:00
2020-11-08 17:21:19 +03:00
perf_meter < " DMA_PUT " _u64 > perf2 = perf_ ;
2020-10-27 21:34:08 +03:00
2018-05-21 20:25:05 +03:00
switch ( u32 size = args . size )
{
case 1 :
{
2020-11-08 08:01:35 +03:00
vm : : range_lock < 1 > ( range_lock , eal , 1 ) ;
2019-01-15 17:31:21 +02:00
* reinterpret_cast < u8 * > ( dst ) = * reinterpret_cast < const u8 * > ( src ) ;
2020-10-26 04:05:17 +03:00
range_lock - > release ( 0 ) ;
2018-05-21 20:25:05 +03:00
break ;
}
case 2 :
{
2020-11-08 08:01:35 +03:00
vm : : range_lock < 2 > ( range_lock , eal , 2 ) ;
2019-01-15 17:31:21 +02:00
* reinterpret_cast < u16 * > ( dst ) = * reinterpret_cast < const u16 * > ( src ) ;
2020-10-26 04:05:17 +03:00
range_lock - > release ( 0 ) ;
2018-05-21 20:25:05 +03:00
break ;
}
case 4 :
{
2020-11-08 08:01:35 +03:00
vm : : range_lock < 4 > ( range_lock , eal , 4 ) ;
2019-01-15 17:31:21 +02:00
* reinterpret_cast < u32 * > ( dst ) = * reinterpret_cast < const u32 * > ( src ) ;
2020-10-26 04:05:17 +03:00
range_lock - > release ( 0 ) ;
2018-05-21 20:25:05 +03:00
break ;
}
case 8 :
{
2020-11-08 08:01:35 +03:00
vm : : range_lock < 8 > ( range_lock , eal , 8 ) ;
2019-01-15 17:31:21 +02:00
* reinterpret_cast < u64 * > ( dst ) = * reinterpret_cast < const u64 * > ( src ) ;
2020-10-26 04:05:17 +03:00
range_lock - > release ( 0 ) ;
2018-05-21 20:25:05 +03:00
break ;
}
default :
{
2019-01-15 17:31:21 +02:00
if ( ( ( eal & 127 ) + size ) < = 128 )
2018-05-21 20:25:05 +03:00
{
2020-11-01 23:29:33 +03:00
vm : : range_lock ( range_lock , eal , size ) ;
2018-05-21 20:25:05 +03:00
2019-01-15 17:31:21 +02:00
while ( size )
2018-05-21 20:25:05 +03:00
{
2019-06-06 21:32:35 +03:00
* reinterpret_cast < v128 * > ( dst ) = * reinterpret_cast < const v128 * > ( src ) ;
2018-05-21 20:25:05 +03:00
2019-01-15 17:31:21 +02:00
dst + = 16 ;
src + = 16 ;
size - = 16 ;
2018-05-21 20:25:05 +03:00
}
2020-10-26 04:05:17 +03:00
range_lock - > release ( 0 ) ;
2019-01-15 17:31:21 +02:00
break ;
2018-05-21 20:25:05 +03:00
}
2020-04-11 11:16:28 +03:00
u32 range_addr = eal & - 128 ;
2020-12-18 17:43:34 +03:00
u32 range_end = utils : : align ( eal + size , 128 ) ;
2020-04-11 11:16:28 +03:00
2020-10-26 04:05:17 +03:00
// Handle the case of crossing 64K page borders (TODO: maybe split in 4K fragments?)
2020-04-11 11:16:28 +03:00
if ( range_addr > > 16 ! = ( range_end - 1 ) > > 16 )
{
u32 nexta = range_end & - 65536 ;
u32 size0 = nexta - eal ;
size - = size0 ;
// Split locking + transfer in two parts (before 64K border, and after it)
2020-11-01 23:29:33 +03:00
vm : : range_lock ( range_lock , range_addr , size0 ) ;
2020-04-26 19:46:19 +03:00
2020-04-27 17:46:15 +03:00
// Avoid unaligned stores in mov_rdata_avx
if ( reinterpret_cast < u64 > ( dst ) & 0x10 )
{
* reinterpret_cast < v128 * > ( dst ) = * reinterpret_cast < const v128 * > ( src ) ;
dst + = 16 ;
src + = 16 ;
size0 - = 16 ;
}
2020-04-11 11:16:28 +03:00
while ( size0 > = 128 )
{
2020-04-07 20:29:11 +03:00
mov_rdata ( * reinterpret_cast < spu_rdata_t * > ( dst ) , * reinterpret_cast < const spu_rdata_t * > ( src ) ) ;
2020-04-11 11:16:28 +03:00
dst + = 128 ;
src + = 128 ;
size0 - = 128 ;
}
while ( size0 )
{
* reinterpret_cast < v128 * > ( dst ) = * reinterpret_cast < const v128 * > ( src ) ;
dst + = 16 ;
src + = 16 ;
size0 - = 16 ;
}
2020-04-26 19:46:19 +03:00
2020-10-26 04:05:17 +03:00
range_lock - > release ( 0 ) ;
2020-04-11 11:16:28 +03:00
range_addr = nexta ;
}
2020-11-01 23:29:33 +03:00
vm : : range_lock ( range_lock , range_addr , range_end - range_addr ) ;
2019-01-15 17:31:21 +02:00
2020-04-27 17:46:15 +03:00
// Avoid unaligned stores in mov_rdata_avx
if ( reinterpret_cast < u64 > ( dst ) & 0x10 )
{
* reinterpret_cast < v128 * > ( dst ) = * reinterpret_cast < const v128 * > ( src ) ;
dst + = 16 ;
src + = 16 ;
size - = 16 ;
}
2019-01-15 17:31:21 +02:00
while ( size > = 128 )
{
2020-04-07 20:29:11 +03:00
mov_rdata ( * reinterpret_cast < spu_rdata_t * > ( dst ) , * reinterpret_cast < const spu_rdata_t * > ( src ) ) ;
2019-01-15 17:31:21 +02:00
dst + = 128 ;
src + = 128 ;
size - = 128 ;
}
while ( size )
{
2019-06-06 21:32:35 +03:00
* reinterpret_cast < v128 * > ( dst ) = * reinterpret_cast < const v128 * > ( src ) ;
2019-01-15 17:31:21 +02:00
dst + = 16 ;
src + = 16 ;
size - = 16 ;
}
2020-10-26 04:05:17 +03:00
range_lock - > release ( 0 ) ;
2018-05-21 20:25:05 +03:00
break ;
}
}
2021-02-26 11:20:25 +02:00
if ( g_cfg . core . mfc_debug & & _this )
{
auto & dump = reinterpret_cast < mfc_cmd_dump * > ( vm : : g_stat_addr + _this - > vm_offset ( ) ) [ _this - > mfc_dump_idx + + % spu_thread : : max_mfc_dump_idx ] ;
dump . cmd = args ;
dump . cmd . eah = _this - > pc ;
std : : memcpy ( dump . data , is_get ? dst : src , std : : min < u32 > ( args . size , 128 ) ) ;
}
2018-05-21 20:25:05 +03:00
return ;
}
2020-10-30 02:07:20 +03:00
plain_access :
2017-02-17 22:35:57 +03:00
switch ( u32 size = args . size )
2014-08-23 01:15:02 +04:00
{
2017-02-17 22:35:57 +03:00
case 1 :
{
2019-01-15 17:31:21 +02:00
* reinterpret_cast < u8 * > ( dst ) = * reinterpret_cast < const u8 * > ( src ) ;
2017-02-17 22:35:57 +03:00
break ;
2014-08-23 01:15:02 +04:00
}
2017-02-17 22:35:57 +03:00
case 2 :
{
2019-01-15 17:31:21 +02:00
* reinterpret_cast < u16 * > ( dst ) = * reinterpret_cast < const u16 * > ( src ) ;
2017-02-17 22:35:57 +03:00
break ;
}
case 4 :
{
2019-01-15 17:31:21 +02:00
* reinterpret_cast < u32 * > ( dst ) = * reinterpret_cast < const u32 * > ( src ) ;
2017-02-17 22:35:57 +03:00
break ;
}
case 8 :
2014-08-23 01:15:02 +04:00
{
2019-01-15 17:31:21 +02:00
* reinterpret_cast < u64 * > ( dst ) = * reinterpret_cast < const u64 * > ( src ) ;
2017-02-17 22:35:57 +03:00
break ;
}
default :
{
2020-04-27 17:46:15 +03:00
// Avoid unaligned stores in mov_rdata_avx
if ( reinterpret_cast < u64 > ( dst ) & 0x10 )
{
* reinterpret_cast < v128 * > ( dst ) = * reinterpret_cast < const v128 * > ( src ) ;
dst + = 16 ;
src + = 16 ;
size - = 16 ;
}
2019-01-15 17:31:21 +02:00
while ( size > = 128 )
{
2020-04-07 20:29:11 +03:00
mov_rdata ( * reinterpret_cast < spu_rdata_t * > ( dst ) , * reinterpret_cast < const spu_rdata_t * > ( src ) ) ;
2017-02-17 22:35:57 +03:00
2019-01-15 17:31:21 +02:00
dst + = 128 ;
src + = 128 ;
size - = 128 ;
}
while ( size )
2017-02-17 22:35:57 +03:00
{
2019-06-06 21:32:35 +03:00
* reinterpret_cast < v128 * > ( dst ) = * reinterpret_cast < const v128 * > ( src ) ;
2019-01-15 17:31:21 +02:00
dst + = 16 ;
src + = 16 ;
size - = 16 ;
2017-02-17 22:35:57 +03:00
}
2019-01-15 17:31:21 +02:00
2018-05-21 20:25:05 +03:00
break ;
2014-08-23 01:15:02 +04:00
}
}
2021-02-26 11:20:25 +02:00
if ( g_cfg . core . mfc_debug & & _this )
{
auto & dump = reinterpret_cast < mfc_cmd_dump * > ( vm : : g_stat_addr + _this - > vm_offset ( ) ) [ _this - > mfc_dump_idx + + % spu_thread : : max_mfc_dump_idx ] ;
dump . cmd = args ;
dump . cmd . eah = _this - > pc ;
std : : memcpy ( dump . data , is_get ? dst : src , std : : min < u32 > ( args . size , 128 ) ) ;
}
2015-03-02 05:10:41 +03:00
}
2014-08-23 01:15:02 +04:00
2018-10-11 01:17:19 +03:00
bool spu_thread : : do_dma_check ( const spu_mfc_cmd & args )
2014-08-23 01:15:02 +04:00
{
2020-04-14 19:41:31 +03:00
const u32 mask = utils : : rol32 ( 1 , args . tag ) ;
2018-04-03 16:09:43 +03:00
2020-02-05 10:00:08 +03:00
if ( mfc_barrier & mask | | ( args . cmd & ( MFC_BARRIER_MASK | MFC_FENCE_MASK ) & & mfc_fence & mask ) ) [[unlikely]]
2018-03-25 00:03:32 +03:00
{
2018-04-03 16:09:43 +03:00
// Check for special value combination (normally impossible)
2018-04-28 20:11:16 +03:00
if ( false )
2018-03-25 00:03:32 +03:00
{
2018-04-03 16:09:43 +03:00
// Update barrier/fence masks if necessary
mfc_barrier = 0 ;
mfc_fence = 0 ;
2018-03-25 00:03:32 +03:00
2018-04-03 16:09:43 +03:00
for ( u32 i = 0 ; i < mfc_size ; i + + )
{
2018-04-03 22:42:47 +03:00
if ( ( mfc_queue [ i ] . cmd & ~ 0xc ) = = MFC_BARRIER_CMD )
2018-04-03 16:09:43 +03:00
{
mfc_barrier | = - 1 ;
2020-04-14 19:41:31 +03:00
mfc_fence | = utils : : rol32 ( 1 , mfc_queue [ i ] . tag ) ;
2018-04-03 16:09:43 +03:00
continue ;
}
2018-04-03 22:42:47 +03:00
if ( true )
2018-04-03 16:09:43 +03:00
{
2020-04-14 19:41:31 +03:00
const u32 _mask = utils : : rol32 ( 1u , mfc_queue [ i ] . tag ) ;
2018-04-03 16:09:43 +03:00
// A command with barrier hard blocks that tag until it's been dealt with
if ( mfc_queue [ i ] . cmd & MFC_BARRIER_MASK )
{
mfc_barrier | = _mask ;
}
// A new command that has a fence can't be executed until the stalled list has been dealt with
mfc_fence | = _mask ;
}
}
if ( mfc_barrier & mask | | ( args . cmd & MFC_FENCE_MASK & & mfc_fence & mask ) )
2018-03-25 00:03:32 +03:00
{
return false ;
}
2018-04-03 16:09:43 +03:00
return true ;
2018-03-25 00:03:32 +03:00
}
2018-04-03 16:09:43 +03:00
return false ;
2018-03-25 00:03:32 +03:00
}
2015-03-02 05:10:41 +03:00
2018-03-25 00:03:32 +03:00
return true ;
}
2018-10-11 01:17:19 +03:00
bool spu_thread : : do_list_transfer ( spu_mfc_cmd & args )
2018-03-25 00:03:32 +03:00
{
2019-07-24 02:13:45 +04:00
// Amount of elements to fetch in one go
constexpr u32 fetch_size = 6 ;
struct alignas ( 8 ) list_element
2018-03-25 00:03:32 +03:00
{
be_t < u16 > sb ; // Stall-and-Notify bit (0x8000)
be_t < u16 > ts ; // List Transfer Size
be_t < u32 > ea ; // External Address Low
2019-07-24 02:13:45 +04:00
} ;
2014-08-23 01:15:02 +04:00
2019-07-24 02:13:45 +04:00
union
2014-08-23 01:15:02 +04:00
{
2019-07-24 02:13:45 +04:00
list_element items [ fetch_size ] ;
alignas ( v128 ) char bufitems [ sizeof ( items ) ] ;
} ;
2018-03-25 00:03:32 +03:00
2019-07-24 02:13:45 +04:00
spu_mfc_cmd transfer ;
transfer . eah = 0 ;
transfer . tag = args . tag ;
transfer . cmd = MFC ( args . cmd & ~ MFC_LIST_MASK ) ;
2014-08-23 01:15:02 +04:00
2019-07-24 02:13:45 +04:00
args . lsa & = 0x3fff0 ;
2020-07-15 21:57:39 +03:00
args . eal & = 0x3fff8 ;
2019-01-07 08:13:17 +02:00
2019-07-24 02:13:45 +04:00
u32 index = fetch_size ;
// Assume called with size greater than 0
while ( true )
{
// Check if fetching is needed
if ( index = = fetch_size )
{
// Reset to elements array head
index = 0 ;
2020-07-15 21:57:39 +03:00
const auto src = _ptr < const void > ( args . eal ) ;
2020-04-10 10:05:23 +03:00
const v128 data0 = v128 : : loadu ( src , 0 ) ;
const v128 data1 = v128 : : loadu ( src , 1 ) ;
const v128 data2 = v128 : : loadu ( src , 2 ) ;
2017-02-17 22:35:57 +03:00
2019-12-03 00:31:34 +03:00
reinterpret_cast < v128 * > ( bufitems ) [ 0 ] = data0 ;
reinterpret_cast < v128 * > ( bufitems ) [ 1 ] = data1 ;
reinterpret_cast < v128 * > ( bufitems ) [ 2 ] = data2 ;
2019-07-24 02:13:45 +04:00
}
2018-03-25 00:03:32 +03:00
2019-07-24 02:13:45 +04:00
const u32 size = items [ index ] . ts & 0x7fff ;
const u32 addr = items [ index ] . ea ;
2018-03-25 00:03:32 +03:00
2020-10-01 19:52:39 +03:00
spu_log . trace ( " LIST: item=0x%016x, lsa=0x%05x " , std : : bit_cast < be_t < u64 > > ( items [ index ] ) , args . lsa | ( addr & 0xf ) ) ;
2018-03-25 00:03:32 +03:00
if ( size )
{
transfer . eal = addr ;
transfer . lsa = args . lsa | ( addr & 0xf ) ;
transfer . size = size ;
2020-10-30 02:07:20 +03:00
do_dma_transfer ( this , transfer , ls ) ;
2018-03-25 00:03:32 +03:00
const u32 add_size = std : : max < u32 > ( size , 16 ) ;
args . lsa + = add_size ;
}
args . size - = 8 ;
2019-07-24 02:13:45 +04:00
if ( ! args . size )
{
// No more elements
break ;
}
args . eal + = 8 ;
2020-02-05 10:00:08 +03:00
if ( items [ index ] . sb & 0x8000 ) [[unlikely]]
2019-07-24 02:13:45 +04:00
{
2020-04-14 19:41:31 +03:00
ch_stall_mask | = utils : : rol32 ( 1 , args . tag ) ;
2019-07-24 02:13:45 +04:00
if ( ! ch_stall_stat . get_count ( ) )
{
2018-08-06 10:19:47 +03:00
set_events ( SPU_EVENT_SN ) ;
2019-07-24 02:13:45 +04:00
}
2020-04-14 19:41:31 +03:00
ch_stall_stat . set_value ( utils : : rol32 ( 1 , args . tag ) | ch_stall_stat . get_value ( ) ) ;
2019-07-24 02:13:45 +04:00
args . tag | = 0x80 ; // Set stalled status
return false ;
}
index + + ;
2018-03-25 00:03:32 +03:00
}
return true ;
}
2020-09-11 06:49:19 +03:00
bool spu_thread : : do_putllc ( const spu_mfc_cmd & args )
{
2020-10-18 15:00:10 +03:00
perf_meter < " PUTLLC- " _u64 > perf0 ;
perf_meter < " PUTLLC+ " _u64 > perf1 = perf0 ;
2020-09-11 06:49:19 +03:00
// Store conditionally
const u32 addr = args . eal & - 128 ;
if ( [ & ] ( )
{
2020-10-18 15:00:10 +03:00
perf_meter < " PUTLLC. " _u64 > perf2 = perf0 ;
2020-09-11 06:49:19 +03:00
if ( raddr ! = addr )
{
return false ;
}
2020-04-07 20:29:11 +03:00
const auto & to_write = _ref < spu_rdata_t > ( args . lsa & 0x3ff80 ) ;
2021-03-05 22:05:37 +03:00
auto & res = vm : : reservation_acquire ( addr ) ;
2020-09-11 06:49:19 +03:00
2020-09-14 23:38:17 +03:00
// TODO: Limit scope!!
rsx : : reservation_lock rsx_lock ( addr , 128 ) ;
2020-09-11 06:49:19 +03:00
if ( ! g_use_rtm & & rtime ! = res )
{
return false ;
}
if ( ! g_use_rtm & & cmp_rdata ( to_write , rdata ) )
{
// Writeback of unchanged data. Only check memory change
2020-04-07 20:29:11 +03:00
return cmp_rdata ( rdata , vm : : _ref < spu_rdata_t > ( addr ) ) & & res . compare_and_swap_test ( rtime , rtime + 128 ) ;
2020-09-11 06:49:19 +03:00
}
if ( g_use_rtm ) [[likely]]
{
2020-10-31 01:52:24 +03:00
switch ( u64 count = spu_putllc_tx ( addr , rtime , rdata , to_write ) )
2020-09-11 06:49:19 +03:00
{
2020-10-31 01:52:24 +03:00
case UINT64_MAX :
2020-09-11 06:49:19 +03:00
{
2020-10-30 05:17:00 +03:00
auto & data = * vm : : get_super_ptr < spu_rdata_t > ( addr ) ;
2020-10-29 05:35:09 +03:00
2020-11-09 03:41:56 +03:00
const bool ok = cpu_thread : : suspend_all < + 3 > ( this , { data , data + 64 , & res } , [ & ] ( )
2020-10-30 05:17:00 +03:00
{
2020-10-09 20:33:12 +03:00
if ( ( res & - 128 ) = = rtime )
2020-10-08 16:13:55 +03:00
{
2020-10-09 20:33:12 +03:00
if ( cmp_rdata ( rdata , data ) )
{
mov_rdata ( data , to_write ) ;
2020-10-28 21:58:52 +03:00
res + = 127 ;
2020-10-09 20:33:12 +03:00
return true ;
}
2020-09-11 06:49:19 +03:00
}
2020-10-29 05:35:09 +03:00
// Save previous data
2020-10-30 05:17:00 +03:00
mov_rdata_nt ( rdata , data ) ;
2020-10-28 21:58:52 +03:00
res - = 1 ;
2020-10-09 20:33:12 +03:00
return false ;
} ) ;
2020-11-15 03:26:10 +03:00
const u64 count2 = __rdtsc ( ) - perf2 . get ( ) ;
if ( count2 > 20000 & & g_cfg . core . perf_report ) [[unlikely]]
{
perf_log . warning ( u8 " PUTLLC: took too long: %.3fµs (%u c) (addr=0x%x) (S) " , count2 / ( utils : : get_tsc_freq ( ) / 1000'000 . ) , count2 , addr ) ;
}
2020-10-29 05:35:09 +03:00
if ( ok )
{
break ;
}
2020-10-31 01:52:24 +03:00
last_ftime = - 1 ;
2020-10-29 05:35:09 +03:00
[[fallthrough]] ;
}
case 0 :
{
if ( addr = = last_faddr )
{
last_fail + + ;
}
2020-10-31 01:52:24 +03:00
if ( last_ftime ! = umax )
{
last_faddr = 0 ;
return false ;
}
2020-11-24 08:18:31 +03:00
utils : : prefetch_read ( rdata ) ;
utils : : prefetch_read ( rdata + 64 ) ;
2020-10-29 05:35:09 +03:00
last_faddr = addr ;
last_ftime = res . load ( ) & - 128 ;
last_ftsc = __rdtsc ( ) ;
return false ;
2020-09-11 06:49:19 +03:00
}
2020-10-19 15:31:10 +03:00
default :
{
2020-10-31 01:52:24 +03:00
if ( count > 20000 & & g_cfg . core . perf_report ) [[unlikely]]
2020-10-19 15:31:10 +03:00
{
2020-11-15 03:26:10 +03:00
perf_log . warning ( u8 " PUTLLC: took too long: %.3fµs (%u c) (addr = 0x%x) " , count / ( utils : : get_tsc_freq ( ) / 1000'000 . ) , count , addr ) ;
2020-10-19 15:31:10 +03:00
}
2020-10-29 05:35:09 +03:00
break ;
2020-10-19 15:31:10 +03:00
}
2020-09-11 06:49:19 +03:00
}
2020-10-29 05:35:09 +03:00
if ( addr = = last_faddr )
{
last_succ + + ;
}
last_faddr = 0 ;
return true ;
2020-09-11 06:49:19 +03:00
}
2020-10-15 12:40:53 +03:00
auto [ _oldd , _ok ] = res . fetch_op ( [ & ] ( u64 & r )
2020-09-11 06:49:19 +03:00
{
2020-10-15 12:40:53 +03:00
if ( ( r & - 128 ) ! = rtime | | ( r & 127 ) )
2020-10-08 16:13:55 +03:00
{
return false ;
}
2020-10-15 12:40:53 +03:00
r + = vm : : rsrv_unique_lock ;
return true ;
} ) ;
2020-10-08 16:13:55 +03:00
2020-10-15 12:40:53 +03:00
if ( ! _ok )
2020-10-08 16:13:55 +03:00
{
2020-10-15 12:40:53 +03:00
// Already locked or updated: give up
2020-09-11 06:49:19 +03:00
return false ;
}
vm : : _ref < atomic_t < u32 > > ( addr ) + = 0 ;
2020-04-07 20:29:11 +03:00
auto & super_data = * vm : : get_super_ptr < spu_rdata_t > ( addr ) ;
2020-09-11 06:49:19 +03:00
const bool success = [ & ] ( )
{
// Full lock (heavyweight)
// TODO: vm::check_addr
vm : : writer_lock lock ( addr ) ;
if ( cmp_rdata ( rdata , super_data ) )
{
mov_rdata ( super_data , to_write ) ;
2020-10-13 11:07:43 +03:00
res + = 64 ;
2020-09-11 06:49:19 +03:00
return true ;
}
2020-10-13 11:07:43 +03:00
res - = 64 ;
2020-09-11 06:49:19 +03:00
return false ;
} ( ) ;
return success ;
} ( ) )
{
2021-03-05 22:05:37 +03:00
vm : : reservation_notifier ( addr ) . notify_all ( - 128 ) ;
2020-09-11 06:49:19 +03:00
raddr = 0 ;
2020-10-18 15:00:10 +03:00
perf0 . reset ( ) ;
2020-09-11 06:49:19 +03:00
return true ;
}
else
{
if ( raddr )
{
// Last check for event before we clear the reservation
2020-09-15 21:19:58 +03:00
if ( raddr = = addr )
2020-09-11 06:49:19 +03:00
{
2018-08-06 10:19:47 +03:00
set_events ( SPU_EVENT_LR ) ;
2020-09-11 06:49:19 +03:00
}
2020-09-15 21:19:58 +03:00
else
{
get_events ( SPU_EVENT_LR ) ;
}
2020-09-11 06:49:19 +03:00
}
2020-11-10 19:09:28 +02:00
if ( ! vm : : check_addr ( addr , vm : : page_writable ) )
2020-10-18 11:01:57 +03:00
{
vm : : _ref < atomic_t < u8 > > ( addr ) + = 0 ; // Access violate
}
2020-09-11 06:49:19 +03:00
raddr = 0 ;
2020-10-18 15:00:10 +03:00
perf1 . reset ( ) ;
2020-09-11 06:49:19 +03:00
return false ;
}
}
2020-09-25 17:29:25 +03:00
void do_cell_atomic_128_store ( u32 addr , const void * to_write )
2018-03-25 00:03:32 +03:00
{
2020-10-18 15:00:10 +03:00
perf_meter < " STORE128 " _u64 > perf0 ;
2020-09-25 17:29:25 +03:00
const auto cpu = get_current_cpu_thread ( ) ;
2020-09-14 23:38:17 +03:00
rsx : : reservation_lock rsx_lock ( addr , 128 ) ;
2018-06-21 15:24:47 +03:00
2020-02-05 10:00:08 +03:00
if ( g_use_rtm ) [[likely]]
2018-03-25 00:03:32 +03:00
{
2020-11-15 03:26:10 +03:00
u64 result = spu_putlluc_tx ( addr , to_write , cpu ) ;
2018-06-11 02:30:43 +03:00
2020-10-08 16:13:55 +03:00
if ( result = = 0 )
2018-05-19 23:14:02 +03:00
{
2020-10-30 05:17:00 +03:00
auto & sdata = * vm : : get_super_ptr < spu_rdata_t > ( addr ) ;
2021-03-05 22:05:37 +03:00
auto & res = vm : : reservation_acquire ( addr ) ;
2020-10-30 05:17:00 +03:00
2020-11-09 03:41:56 +03:00
cpu_thread : : suspend_all < + 2 > ( cpu , { & res } , [ & ]
2020-04-29 15:40:41 +03:00
{
2020-10-30 05:17:00 +03:00
mov_rdata_nt ( sdata , * static_cast < const spu_rdata_t * > ( to_write ) ) ;
res + = 127 ;
2020-10-09 20:33:12 +03:00
} ) ;
2020-04-29 15:40:41 +03:00
}
2020-11-15 03:26:10 +03:00
if ( ! result )
{
result = __rdtsc ( ) - perf0 . get ( ) ;
}
if ( result > 20000 & & g_cfg . core . perf_report ) [[unlikely]]
2020-10-19 15:31:10 +03:00
{
2020-11-15 03:26:10 +03:00
perf_log . warning ( u8 " STORE128: took too long: %.3fµs (%u c) (addr=0x%x) " , result / ( utils : : get_tsc_freq ( ) / 1000'000 . ) , result , addr ) ;
2020-10-19 15:31:10 +03:00
}
2020-05-17 08:59:49 +03:00
2020-09-25 17:29:25 +03:00
static_cast < void > ( cpu - > test_stopped ( ) ) ;
2018-05-21 20:25:05 +03:00
}
else
{
2020-04-07 20:29:11 +03:00
auto & data = vm : : _ref < spu_rdata_t > ( addr ) ;
2020-10-08 16:13:55 +03:00
auto [ res , time0 ] = vm : : reservation_lock ( addr ) ;
2018-06-02 13:43:22 +03:00
2019-01-15 17:31:21 +02:00
* reinterpret_cast < atomic_t < u32 > * > ( & data ) + = 0 ;
2018-06-02 13:43:22 +03:00
2020-04-07 20:29:11 +03:00
auto & super_data = * vm : : get_super_ptr < spu_rdata_t > ( addr ) ;
2018-06-11 02:30:43 +03:00
{
2020-09-10 21:54:00 +03:00
// Full lock (heavyweight)
// TODO: vm::check_addr
vm : : writer_lock lock ( addr ) ;
2020-04-07 20:29:11 +03:00
mov_rdata ( super_data , * static_cast < const spu_rdata_t * > ( to_write ) ) ;
2020-10-13 11:07:43 +03:00
res + = 64 ;
2018-06-11 02:30:43 +03:00
}
2018-03-25 00:03:32 +03:00
}
2020-09-25 17:29:25 +03:00
}
void spu_thread : : do_putlluc ( const spu_mfc_cmd & args )
{
2020-10-18 15:00:10 +03:00
perf_meter < " PUTLLUC " _u64 > perf0 ;
2020-09-25 17:29:25 +03:00
const u32 addr = args . eal & - 128 ;
if ( raddr & & addr = = raddr )
{
// Try to process PUTLLUC using PUTLLC when a reservation is active:
// If it fails the reservation is cleared, LR event is set and we fallback to the main implementation
// All of this is done atomically in PUTLLC
if ( do_putllc ( args ) )
{
// Success, return as our job was done here
return ;
}
// Failure, fallback to the main implementation
}
2018-03-25 00:03:32 +03:00
2020-04-07 20:29:11 +03:00
do_cell_atomic_128_store ( addr , _ptr < spu_rdata_t > ( args . lsa & 0x3ff80 ) ) ;
2021-03-05 22:05:37 +03:00
vm : : reservation_notifier ( addr ) . notify_all ( - 128 ) ;
2018-03-25 00:03:32 +03:00
}
2021-03-05 22:05:37 +03:00
void spu_thread : : do_mfc ( bool /*wait*/ )
2018-03-25 00:03:32 +03:00
{
u32 removed = 0 ;
u32 barrier = 0 ;
u32 fence = 0 ;
// Process enqueued commands
2019-10-25 13:32:21 +03:00
static_cast < void > ( std : : remove_if ( mfc_queue + 0 , mfc_queue + mfc_size , [ & ] ( spu_mfc_cmd & args )
2018-03-25 00:03:32 +03:00
{
2019-07-09 07:30:06 +03:00
// Select tag bit in the tag mask or the stall mask
2020-04-14 19:41:31 +03:00
const u32 mask = utils : : rol32 ( 1 , args . tag ) ;
2019-07-09 07:30:06 +03:00
2018-04-03 22:42:47 +03:00
if ( ( args . cmd & ~ 0xc ) = = MFC_BARRIER_CMD )
2018-03-25 00:03:32 +03:00
{
if ( & args - mfc_queue < = removed )
{
// Remove barrier-class command if it's the first in the queue
2020-12-06 12:10:00 +03:00
atomic_fence_seq_cst ( ) ;
2018-03-25 00:03:32 +03:00
removed + + ;
return true ;
}
2018-04-03 22:42:47 +03:00
// Block all tags
barrier | = - 1 ;
2019-07-09 07:30:06 +03:00
fence | = mask ;
2018-03-25 00:03:32 +03:00
return false ;
}
if ( barrier & mask )
{
2018-04-03 16:09:43 +03:00
fence | = mask ;
2018-03-25 00:03:32 +03:00
return false ;
}
2018-07-10 11:25:11 +03:00
if ( args . cmd & ( MFC_BARRIER_MASK | MFC_FENCE_MASK ) & & fence & mask )
2018-04-03 22:42:47 +03:00
{
2018-07-10 11:25:11 +03:00
if ( args . cmd & MFC_BARRIER_MASK )
{
barrier | = mask ;
}
2018-04-03 22:42:47 +03:00
return false ;
}
2018-03-25 00:03:32 +03:00
if ( args . cmd & MFC_LIST_MASK )
{
2019-01-07 08:13:17 +02:00
if ( ! ( args . tag & 0x80 ) )
2018-03-25 00:03:32 +03:00
{
2018-05-23 11:55:15 +03:00
if ( do_list_transfer ( args ) )
2018-04-03 22:42:47 +03:00
{
2018-05-23 11:55:15 +03:00
removed + + ;
return true ;
2018-04-03 22:42:47 +03:00
}
2018-03-25 00:03:32 +03:00
}
if ( args . cmd & MFC_BARRIER_MASK )
{
barrier | = mask ;
}
2018-04-03 16:09:43 +03:00
fence | = mask ;
2018-03-25 00:03:32 +03:00
return false ;
}
2019-01-15 17:31:21 +02:00
if ( args . cmd = = MFC_PUTQLLUC_CMD )
2018-03-25 00:03:32 +03:00
{
2018-04-28 20:11:16 +03:00
if ( fence & mask )
{
return false ;
}
2018-04-03 22:42:47 +03:00
do_putlluc ( args ) ;
2018-03-25 00:03:32 +03:00
}
2019-01-15 17:31:21 +02:00
else if ( args . size )
{
2020-10-30 02:07:20 +03:00
do_dma_transfer ( this , args , ls ) ;
2019-01-15 17:31:21 +02:00
}
2018-03-25 00:03:32 +03:00
removed + + ;
return true ;
2019-10-25 13:32:21 +03:00
} ) ) ;
2018-03-25 00:03:32 +03:00
mfc_size - = removed ;
2018-04-03 16:09:43 +03:00
mfc_barrier = barrier ;
mfc_fence = fence ;
2018-03-25 00:03:32 +03:00
if ( removed & & ch_tag_upd )
{
const u32 completed = get_mfc_completed ( ) ;
2020-06-27 10:47:13 +03:00
if ( completed & & ch_tag_upd = = MFC_TAG_UPDATE_ANY )
2018-03-25 00:03:32 +03:00
{
ch_tag_stat . set_value ( completed ) ;
2020-06-27 10:47:13 +03:00
ch_tag_upd = MFC_TAG_UPDATE_IMMEDIATE ;
2018-03-25 00:03:32 +03:00
}
2020-06-27 10:47:13 +03:00
else if ( completed = = ch_tag_mask & & ch_tag_upd = = MFC_TAG_UPDATE_ALL )
2018-03-25 00:03:32 +03:00
{
ch_tag_stat . set_value ( completed ) ;
2020-06-27 10:47:13 +03:00
ch_tag_upd = MFC_TAG_UPDATE_IMMEDIATE ;
2018-03-25 00:03:32 +03:00
}
}
2020-05-16 20:29:39 +03:00
if ( check_mfc_interrupts ( pc + 4 ) )
{
spu_runtime : : g_escape ( this ) ;
}
}
bool spu_thread : : check_mfc_interrupts ( u32 next_pc )
{
2018-08-06 10:19:47 +03:00
if ( ch_events . load ( ) . count & & std : : exchange ( interrupts_enabled , false ) )
2020-05-16 20:29:39 +03:00
{
srr0 = next_pc ;
// Test for BR/BRA instructions (they are equivalent at zero pc)
const u32 br = _ref < u32 > ( 0 ) ;
pc = ( br & 0xfd80007f ) = = 0x30000000 ? ( br > > 5 ) & 0x3fffc : 0 ;
return true ;
}
return false ;
2018-03-25 00:03:32 +03:00
}
2020-11-10 16:57:06 +02:00
bool spu_thread : : is_exec_code ( u32 addr ) const
{
if ( addr & ~ 0x3FFFC )
{
return false ;
}
for ( u32 i = 0 ; i < 30 ; i + + )
{
const u32 addr0 = addr + ( i * 4 ) ;
const u32 op = _ref < u32 > ( addr0 ) ;
const auto type = s_spu_itype . decode ( op ) ;
if ( type = = spu_itype : : UNK | | ! op )
{
return false ;
}
if ( type & spu_itype : : branch )
{
// TODO
break ;
}
}
return true ;
}
2021-04-09 21:12:47 +02:00
u32 spu_thread : : get_mfc_completed ( ) const
2018-03-25 00:03:32 +03:00
{
2018-04-03 22:42:47 +03:00
return ch_tag_mask & ~ mfc_fence ;
2018-03-25 00:03:32 +03:00
}
2019-01-15 17:31:21 +02:00
bool spu_thread : : process_mfc_cmd ( )
2018-03-25 00:03:32 +03:00
{
// Stall infinitely if MFC queue is full
2020-02-05 10:00:08 +03:00
while ( mfc_size > = 16 ) [[unlikely]]
2018-03-25 00:03:32 +03:00
{
2021-02-13 16:50:07 +02:00
auto old = state . add_fetch ( cpu_flag : : wait ) ;
2019-06-06 21:32:35 +03:00
2021-02-13 16:50:07 +02:00
if ( is_stopped ( old ) )
2018-03-25 00:03:32 +03:00
{
return false ;
}
2021-04-09 21:12:47 +02:00
thread_ctrl : : wait_on ( state , old ) ;
2018-03-25 00:03:32 +03:00
}
spu : : scheduler : : concurrent_execution_watchdog watchdog ( * this ) ;
2020-10-22 18:28:56 +03:00
spu_log . trace ( " DMAC: (%s) " , ch_mfc_cmd ) ;
2018-03-25 00:03:32 +03:00
2019-01-15 17:31:21 +02:00
switch ( ch_mfc_cmd . cmd )
2017-02-17 22:35:57 +03:00
{
2020-05-16 20:03:27 +03:00
case MFC_SDCRT_CMD :
case MFC_SDCRTST_CMD :
return true ;
2017-02-17 22:35:57 +03:00
case MFC_GETLLAR_CMD :
2014-08-23 01:15:02 +04:00
{
2020-10-29 05:35:09 +03:00
perf_meter < " GETLLAR " _u64 > perf0 ;
2019-06-28 08:24:28 +03:00
const u32 addr = ch_mfc_cmd . eal & - 128 ;
2020-04-07 20:29:11 +03:00
const auto & data = vm : : _ref < spu_rdata_t > ( addr ) ;
2014-08-23 01:15:02 +04:00
2020-10-29 05:35:09 +03:00
if ( addr = = last_faddr )
{
// TODO: make this configurable and possible to disable
spu_log . trace ( u8 " GETLLAR after fail: addr=0x%x, time=%u c " , last_faddr , ( perf0 . get ( ) - last_ftsc ) ) ;
}
2021-03-05 22:05:37 +03:00
if ( addr = = last_faddr & & perf0 . get ( ) - last_ftsc < 1000 & & ( vm : : reservation_acquire ( addr ) & - 128 ) = = last_ftime )
2020-10-29 05:35:09 +03:00
{
rtime = last_ftime ;
raddr = last_faddr ;
2020-10-31 01:52:24 +03:00
last_ftime = 0 ;
2020-10-29 05:35:09 +03:00
mov_rdata ( _ref < spu_rdata_t > ( ch_mfc_cmd . lsa & 0x3ff80 ) , rdata ) ;
ch_atomic_stat . set_value ( MFC_GETLLAR_SUCCESS ) ;
return true ;
}
else
{
// Silent failure
last_faddr = 0 ;
}
2021-03-05 22:05:37 +03:00
if ( addr = = raddr & & ! g_use_rtm & & g_cfg . core . spu_getllar_polling_detection & & rtime = = vm : : reservation_acquire ( addr ) & & cmp_rdata ( rdata , data ) )
2020-06-05 18:43:42 +03:00
{
// Spinning, might as well yield cpu resources
std : : this_thread : : yield ( ) ;
2020-10-29 05:35:09 +03:00
// Reset perf
2020-10-30 03:19:13 +03:00
perf0 . restart ( ) ;
2020-10-29 05:35:09 +03:00
}
2020-10-18 15:00:10 +03:00
2020-10-03 11:31:28 +03:00
alignas ( 64 ) spu_rdata_t temp ;
2020-04-26 05:48:31 +03:00
u64 ntime ;
2020-09-14 23:38:17 +03:00
rsx : : reservation_lock rsx_lock ( addr , 128 ) ;
2020-04-26 05:48:31 +03:00
2020-10-03 11:31:28 +03:00
if ( raddr )
{
// Save rdata from previous reservation
mov_rdata ( temp , rdata ) ;
}
2020-10-17 14:55:31 +03:00
for ( u64 i = 0 ; i ! = umax ; [ & ] ( )
2019-06-06 21:32:35 +03:00
{
2020-04-29 05:24:28 +03:00
if ( state & cpu_flag : : pause )
2018-05-19 23:20:41 +03:00
{
2020-10-30 05:17:00 +03:00
auto & sdata = * vm : : get_super_ptr < spu_rdata_t > ( addr ) ;
2020-11-09 22:53:32 +03:00
const bool ok = cpu_thread : : if_suspended < 0 > ( this , { & ntime } , [ & ]
2020-10-17 14:55:31 +03:00
{
// Guaranteed success
2021-03-05 22:05:37 +03:00
ntime = vm : : reservation_acquire ( addr ) ;
2020-10-30 05:17:00 +03:00
mov_rdata_nt ( rdata , sdata ) ;
2020-10-17 14:55:31 +03:00
} ) ;
// Exit loop
2020-11-09 22:53:32 +03:00
if ( ok & & ( ntime & 127 ) = = 0 )
2020-10-17 14:55:31 +03:00
{
2020-12-06 12:10:00 +03:00
atomic_fence_seq_cst ( ) ;
2020-10-17 14:55:31 +03:00
i = - 1 ;
return ;
}
2020-04-16 22:45:58 +03:00
}
2019-06-06 21:32:35 +03:00
2020-04-16 22:45:58 +03:00
if ( + + i < 25 ) [[likely]]
{
busy_wait ( 300 ) ;
2018-05-19 23:20:41 +03:00
}
2020-04-16 22:45:58 +03:00
else
{
2020-10-26 20:14:16 +03:00
state + = cpu_flag : : wait + cpu_flag : : temp ;
2020-04-16 22:45:58 +03:00
std : : this_thread : : yield ( ) ;
2020-10-26 20:14:16 +03:00
! check_state ( ) ;
2020-04-16 22:45:58 +03:00
}
} ( ) )
2017-02-17 22:35:57 +03:00
{
2021-03-05 22:05:37 +03:00
ntime = vm : : reservation_acquire ( addr ) ;
2018-06-02 13:45:28 +03:00
2020-10-13 20:23:10 +03:00
if ( ntime & vm : : rsrv_unique_lock )
2018-06-02 13:45:28 +03:00
{
2020-04-16 22:45:58 +03:00
// There's an on-going reservation store, wait
continue ;
}
2020-02-11 23:36:46 +02:00
2020-10-13 20:23:10 +03:00
u64 test_mask = - 1 ;
if ( ntime & 127 )
{
// Try to use TSX to obtain data atomically
2020-10-14 02:53:29 +03:00
if ( ! g_use_rtm | | ! spu_getllar_tx ( addr , rdata , this , ntime & - 128 ) )
2020-10-13 20:23:10 +03:00
{
// See previous ntime check.
continue ;
}
else
{
// If succeeded, only need to check unique lock bit
test_mask = ~ vm : : rsrv_shared_mask ;
}
}
2020-10-19 12:19:34 +03:00
else
{
mov_rdata ( rdata , data ) ;
}
2020-10-13 20:23:10 +03:00
2021-03-05 22:05:37 +03:00
if ( u64 time0 = vm : : reservation_acquire ( addr ) ; ( ntime & test_mask ) ! = ( time0 & test_mask ) )
2020-04-16 22:45:58 +03:00
{
// Reservation data has been modified recently
2020-10-13 20:23:10 +03:00
if ( time0 & vm : : rsrv_unique_lock ) i + = 12 ;
2020-04-16 22:45:58 +03:00
continue ;
2018-06-02 13:45:28 +03:00
}
2020-04-16 22:45:58 +03:00
2020-10-03 11:31:28 +03:00
if ( g_cfg . core . spu_accurate_getllar & & ! cmp_rdata ( rdata , data ) )
2018-06-02 13:45:28 +03:00
{
2020-04-16 22:45:58 +03:00
i + = 2 ;
continue ;
2018-06-02 13:45:28 +03:00
}
2020-04-16 22:45:58 +03:00
2020-10-27 21:34:08 +03:00
if ( i > = 15 & & g_cfg . core . perf_report ) [[unlikely]]
2020-10-13 00:12:01 +03:00
{
2020-10-18 15:00:10 +03:00
perf_log . warning ( " GETLLAR: took too long: %u " , i ) ;
2020-10-13 00:12:01 +03:00
}
2020-04-16 22:45:58 +03:00
break ;
}
2019-06-06 21:32:35 +03:00
if ( raddr & & raddr ! = addr )
2019-01-15 17:31:21 +02:00
{
// Last check for event before we replace the reservation with a new one
2020-09-15 21:19:58 +03:00
if ( reservation_check ( raddr , temp ) )
2019-06-06 21:32:35 +03:00
{
2018-08-06 10:19:47 +03:00
set_events ( SPU_EVENT_LR ) ;
2019-06-06 21:32:35 +03:00
}
}
else if ( raddr = = addr )
{
// Lost previous reservation on polling
2020-10-03 11:31:28 +03:00
if ( ntime ! = rtime | | ! cmp_rdata ( rdata , temp ) )
2019-01-15 17:31:21 +02:00
{
2018-08-06 10:19:47 +03:00
set_events ( SPU_EVENT_LR ) ;
2019-01-15 17:31:21 +02:00
}
}
raddr = addr ;
rtime = ntime ;
2020-10-03 11:31:28 +03:00
mov_rdata ( _ref < spu_rdata_t > ( ch_mfc_cmd . lsa & 0x3ff80 ) , rdata ) ;
2019-01-15 17:31:21 +02:00
2018-03-25 00:03:32 +03:00
ch_atomic_stat . set_value ( MFC_GETLLAR_SUCCESS ) ;
2021-02-26 11:20:25 +02:00
if ( g_cfg . core . mfc_debug )
{
auto & dump = reinterpret_cast < mfc_cmd_dump * > ( vm : : g_stat_addr + vm_offset ( ) ) [ mfc_dump_idx + + % spu_thread : : max_mfc_dump_idx ] ;
dump . cmd = ch_mfc_cmd ;
dump . cmd . eah = pc ;
std : : memcpy ( dump . data , rdata , 128 ) ;
}
2018-03-25 00:03:32 +03:00
return true ;
2017-02-17 22:35:57 +03:00
}
case MFC_PUTLLC_CMD :
{
2021-02-26 11:20:25 +02:00
// Avoid logging useless commands if there is no reservation
const bool dump = g_cfg . core . mfc_debug & & raddr ;
2020-10-29 05:35:09 +03:00
if ( do_putllc ( ch_mfc_cmd ) )
{
ch_atomic_stat . set_value ( MFC_PUTLLC_SUCCESS ) ;
}
else
{
ch_atomic_stat . set_value ( MFC_PUTLLC_FAILURE ) ;
}
2021-02-26 11:20:25 +02:00
if ( dump )
{
auto & dump = reinterpret_cast < mfc_cmd_dump * > ( vm : : g_stat_addr + vm_offset ( ) ) [ mfc_dump_idx + + % spu_thread : : max_mfc_dump_idx ] ;
dump . cmd = ch_mfc_cmd ;
dump . cmd . eah = pc ;
dump . cmd . tag = static_cast < u32 > ( ch_atomic_stat . get_value ( ) ) ; // Use tag as atomic status
std : : memcpy ( dump . data , _ptr < u8 > ( ch_mfc_cmd . lsa & 0x3ff80 ) , 128 ) ;
}
2020-10-29 05:35:09 +03:00
return ! test_stopped ( ) ;
2014-08-23 01:15:02 +04:00
}
2017-02-17 22:35:57 +03:00
case MFC_PUTLLUC_CMD :
{
2021-02-26 11:20:25 +02:00
if ( g_cfg . core . mfc_debug )
{
auto & dump = reinterpret_cast < mfc_cmd_dump * > ( vm : : g_stat_addr + vm_offset ( ) ) [ mfc_dump_idx + + % spu_thread : : max_mfc_dump_idx ] ;
dump . cmd = ch_mfc_cmd ;
dump . cmd . eah = pc ;
std : : memcpy ( dump . data , _ptr < u8 > ( ch_mfc_cmd . lsa & 0x3ff80 ) , 128 ) ;
}
2019-01-15 17:31:21 +02:00
do_putlluc ( ch_mfc_cmd ) ;
2017-02-17 22:35:57 +03:00
ch_atomic_stat . set_value ( MFC_PUTLLUC_SUCCESS ) ;
2020-10-29 05:35:09 +03:00
return ! test_stopped ( ) ;
2017-02-17 22:35:57 +03:00
}
case MFC_PUTQLLUC_CMD :
2014-08-23 01:15:02 +04:00
{
2021-02-26 11:20:25 +02:00
if ( g_cfg . core . mfc_debug )
{
auto & dump = reinterpret_cast < mfc_cmd_dump * > ( vm : : g_stat_addr + vm_offset ( ) ) [ mfc_dump_idx + + % spu_thread : : max_mfc_dump_idx ] ;
dump . cmd = ch_mfc_cmd ;
dump . cmd . eah = pc ;
std : : memcpy ( dump . data , _ptr < u8 > ( ch_mfc_cmd . lsa & 0x3ff80 ) , 128 ) ;
}
2020-04-14 19:41:31 +03:00
const u32 mask = utils : : rol32 ( 1 , ch_mfc_cmd . tag ) ;
2018-04-28 20:11:16 +03:00
2020-02-05 10:00:08 +03:00
if ( ( mfc_barrier | mfc_fence ) & mask ) [[unlikely]]
2018-03-25 00:03:32 +03:00
{
2019-01-15 17:31:21 +02:00
mfc_queue [ mfc_size + + ] = ch_mfc_cmd ;
2018-04-28 20:11:16 +03:00
mfc_fence | = mask ;
2018-04-03 22:42:47 +03:00
}
else
{
2019-01-15 17:31:21 +02:00
do_putlluc ( ch_mfc_cmd ) ;
2018-03-25 00:03:32 +03:00
}
return true ;
2017-02-17 22:35:57 +03:00
}
case MFC_SNDSIG_CMD :
case MFC_SNDSIGB_CMD :
case MFC_SNDSIGF_CMD :
{
2019-01-15 17:31:21 +02:00
if ( ch_mfc_cmd . size ! = 4 )
{
break ;
}
2018-09-06 13:28:12 +02:00
[[fallthrough]] ;
2017-02-17 22:35:57 +03:00
}
2014-08-23 01:15:02 +04:00
case MFC_PUT_CMD :
2015-03-02 05:10:41 +03:00
case MFC_PUTB_CMD :
case MFC_PUTF_CMD :
case MFC_PUTR_CMD :
case MFC_PUTRB_CMD :
case MFC_PUTRF_CMD :
2014-08-23 01:15:02 +04:00
case MFC_GET_CMD :
2015-03-02 05:10:41 +03:00
case MFC_GETB_CMD :
case MFC_GETF_CMD :
2020-05-16 20:03:27 +03:00
case MFC_SDCRZ_CMD :
2014-08-23 01:15:02 +04:00
{
2020-02-05 10:00:08 +03:00
if ( ch_mfc_cmd . size < = 0x4000 ) [[likely]]
2017-02-17 22:35:57 +03:00
{
2020-02-05 10:00:08 +03:00
if ( do_dma_check ( ch_mfc_cmd ) ) [[likely]]
2017-02-17 22:35:57 +03:00
{
2019-01-15 17:31:21 +02:00
if ( ch_mfc_cmd . size )
2018-04-03 22:42:47 +03:00
{
2020-10-30 02:07:20 +03:00
do_dma_transfer ( this , ch_mfc_cmd , ls ) ;
2018-04-03 22:42:47 +03:00
}
2018-05-23 11:55:15 +03:00
return true ;
2017-02-17 22:35:57 +03:00
}
2014-08-23 01:15:02 +04:00
2019-01-15 17:31:21 +02:00
mfc_queue [ mfc_size + + ] = ch_mfc_cmd ;
2020-04-14 19:41:31 +03:00
mfc_fence | = utils : : rol32 ( 1 , ch_mfc_cmd . tag ) ;
2018-04-03 22:42:47 +03:00
2019-01-15 17:31:21 +02:00
if ( ch_mfc_cmd . cmd & MFC_BARRIER_MASK )
2017-02-17 22:35:57 +03:00
{
2020-04-14 19:41:31 +03:00
mfc_barrier | = utils : : rol32 ( 1 , ch_mfc_cmd . tag ) ;
2017-02-17 22:35:57 +03:00
}
2018-03-25 00:03:32 +03:00
return true ;
2017-02-17 22:35:57 +03:00
}
2017-12-09 17:57:43 +03:00
2017-02-17 22:35:57 +03:00
break ;
}
2014-08-23 01:15:02 +04:00
case MFC_PUTL_CMD :
2015-03-02 05:10:41 +03:00
case MFC_PUTLB_CMD :
case MFC_PUTLF_CMD :
case MFC_PUTRL_CMD :
case MFC_PUTRLB_CMD :
case MFC_PUTRLF_CMD :
2014-08-23 01:15:02 +04:00
case MFC_GETL_CMD :
2015-03-02 05:10:41 +03:00
case MFC_GETLB_CMD :
case MFC_GETLF_CMD :
2014-08-23 01:15:02 +04:00
{
2020-02-05 10:00:08 +03:00
if ( ch_mfc_cmd . size < = 0x4000 ) [[likely]]
2015-07-21 23:14:04 +03:00
{
2019-01-15 17:31:21 +02:00
auto & cmd = mfc_queue [ mfc_size ] ;
cmd = ch_mfc_cmd ;
2021-02-26 11:20:25 +02:00
//if (g_cfg.core.mfc_debug)
//{
2021-03-05 22:05:37 +03:00
// TODO: This needs a disambiguator with list elements dumping
2021-02-26 11:20:25 +02:00
// auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + vm_offset())[mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
// dump.cmd = ch_mfc_cmd;
// dump.cmd.eah = pc;
// std::memcpy(dump.data, _ptr<u8>(ch_mfc_cmd.eah & 0x3fff0), std::min<u32>(ch_mfc_cmd.size, 128));
//}
2020-02-05 10:00:08 +03:00
if ( do_dma_check ( cmd ) ) [[likely]]
2018-04-03 22:42:47 +03:00
{
2020-02-05 10:00:08 +03:00
if ( ! cmd . size | | do_list_transfer ( cmd ) ) [[likely]]
2018-04-03 22:42:47 +03:00
{
2018-05-23 11:55:15 +03:00
return true ;
2018-04-03 22:42:47 +03:00
}
}
2019-01-15 17:31:21 +02:00
mfc_size + + ;
2020-04-14 19:41:31 +03:00
mfc_fence | = utils : : rol32 ( 1 , cmd . tag ) ;
2018-04-03 22:42:47 +03:00
2019-01-15 17:31:21 +02:00
if ( cmd . cmd & MFC_BARRIER_MASK )
2015-07-21 23:14:04 +03:00
{
2020-04-14 19:41:31 +03:00
mfc_barrier | = utils : : rol32 ( 1 , cmd . tag ) ;
2015-07-21 23:14:04 +03:00
}
2020-05-16 20:29:39 +03:00
if ( check_mfc_interrupts ( pc + 4 ) )
{
spu_runtime : : g_escape ( this ) ;
}
2018-03-25 00:03:32 +03:00
return true ;
2015-03-02 05:10:41 +03:00
}
2017-02-17 22:35:57 +03:00
break ;
2014-08-23 01:15:02 +04:00
}
2015-12-20 10:16:31 +02:00
case MFC_BARRIER_CMD :
case MFC_EIEIO_CMD :
2018-04-01 20:52:54 +03:00
case MFC_SYNC_CMD :
{
2018-04-03 22:42:47 +03:00
if ( mfc_size = = 0 )
2018-04-01 20:52:54 +03:00
{
2020-12-06 12:10:00 +03:00
atomic_fence_seq_cst ( ) ;
2018-04-01 20:52:54 +03:00
}
else
{
2019-01-15 17:31:21 +02:00
mfc_queue [ mfc_size + + ] = ch_mfc_cmd ;
2018-04-03 22:42:47 +03:00
mfc_barrier | = - 1 ;
2020-04-14 19:41:31 +03:00
mfc_fence | = utils : : rol32 ( 1 , ch_mfc_cmd . tag ) ;
2018-04-01 20:52:54 +03:00
}
return true ;
}
2017-02-17 22:35:57 +03:00
default :
{
2018-03-25 00:03:32 +03:00
break ;
2017-02-17 22:35:57 +03:00
}
2015-02-16 04:53:53 +03:00
}
2015-03-02 05:10:41 +03:00
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " Unknown command (cmd=%s, lsa=0x%x, ea=0x%llx, tag=0x%x, size=0x%x) " ,
2019-01-15 17:31:21 +02:00
ch_mfc_cmd . cmd , ch_mfc_cmd . lsa , ch_mfc_cmd . eal , ch_mfc_cmd . tag , ch_mfc_cmd . size ) ;
2014-08-23 01:15:02 +04:00
}
2021-04-09 21:12:47 +02:00
bool spu_thread : : reservation_check ( u32 addr , const decltype ( rdata ) & data ) const
2020-09-15 21:19:58 +03:00
{
if ( ! addr )
{
// No reservation to be lost in the first place
return false ;
}
2021-03-05 22:05:37 +03:00
if ( ( vm : : reservation_acquire ( addr ) & - 128 ) ! = rtime )
2020-09-15 21:19:58 +03:00
{
return true ;
}
// Ensure data is allocated (HACK: would raise LR event if not)
2020-11-08 08:01:35 +03:00
// Set range_lock first optimistically
range_lock - > store ( u64 { 128 } < < 32 | addr ) ;
2020-09-15 21:19:58 +03:00
2020-11-08 08:01:35 +03:00
u64 lock_val = vm : : g_range_lock ;
u64 old_lock = 0 ;
while ( lock_val ! = old_lock )
{
// Since we want to read data, let's check readability first
if ( ! ( lock_val & vm : : range_readable ) )
{
// Only one abnormal operation is "unreadable"
if ( ( lock_val > > vm : : range_pos ) = = ( vm : : range_locked > > vm : : range_pos ) )
{
// All page flags are untouched and can be read safely
2020-11-10 19:09:28 +02:00
if ( ! vm : : check_addr ( addr ) )
2020-11-08 08:01:35 +03:00
{
// Assume our memory is being (de)allocated
range_lock - > release ( 0 ) ;
break ;
}
// g_shmem values are unchanged too
const u64 is_shmem = vm : : g_shmem [ addr > > 16 ] ;
const u64 test_addr = is_shmem ? ( is_shmem | static_cast < u16 > ( addr ) ) / 128 : u64 { addr } / 128 ;
const u64 lock_addr = lock_val / 128 ;
if ( test_addr = = lock_addr )
{
// Our reservation is locked
range_lock - > release ( 0 ) ;
break ;
}
break ;
}
}
// Fallback to normal range check
const u64 lock_addr = static_cast < u32 > ( lock_val ) ;
const u32 lock_size = static_cast < u32 > ( lock_val < < 3 > > 35 ) ;
if ( lock_addr + lock_size < = addr | | lock_addr > = addr + 128 )
{
// We are outside locked range, so page flags are unaffected
2020-11-10 19:09:28 +02:00
if ( ! vm : : check_addr ( addr ) )
2020-11-08 08:01:35 +03:00
{
range_lock - > release ( 0 ) ;
break ;
}
}
else if ( ! ( lock_val & vm : : range_readable ) )
{
range_lock - > release ( 0 ) ;
break ;
}
old_lock = std : : exchange ( lock_val , vm : : g_range_lock ) ;
}
if ( ! range_lock - > load ( ) ) [[unlikely]]
{
return true ;
}
const bool res = cmp_rdata ( data , vm : : _ref < decltype ( rdata ) > ( addr ) ) ;
2020-09-15 21:19:58 +03:00
range_lock - > release ( 0 ) ;
return ! res ;
}
2018-08-06 10:19:47 +03:00
spu_thread : : ch_events_t spu_thread : : get_events ( u32 mask_hint , bool waiting , bool reading )
2015-07-21 23:14:04 +03:00
{
2018-08-06 10:19:47 +03:00
if ( auto mask1 = ch_events . load ( ) . mask ; mask1 & ~ SPU_EVENT_IMPLEMENTED )
2018-07-02 01:02:34 +03:00
{
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " SPU Events not implemented (mask=0x%x) " , mask1 ) ;
2018-07-02 01:02:34 +03:00
}
2020-10-12 17:19:06 +03:00
retry :
2018-08-06 10:19:47 +03:00
u32 collect = 0 ;
2017-02-17 22:35:57 +03:00
// Check reservation status and set SPU_EVENT_LR if lost
2020-09-15 21:19:58 +03:00
if ( mask_hint & SPU_EVENT_LR )
2015-07-21 23:14:04 +03:00
{
2020-09-15 21:19:58 +03:00
if ( reservation_check ( raddr , rdata ) )
{
collect | = SPU_EVENT_LR ;
raddr = 0 ;
}
2015-07-15 21:11:32 +03:00
}
2020-08-20 20:36:49 +03:00
// SPU Decrementer Event on underflow (use the upper 32-bits to determine it)
2020-08-27 23:36:54 +03:00
if ( mask_hint & SPU_EVENT_TM )
2017-02-07 23:59:59 +03:00
{
2020-08-27 23:36:54 +03:00
if ( const u64 res = ( ch_dec_value - ( get_timebased_time ( ) - ch_dec_start_timestamp ) ) > > 32 )
2017-02-13 16:12:24 +03:00
{
2020-08-27 23:36:54 +03:00
// Set next event to the next time the decrementer underflows
ch_dec_start_timestamp - = res < < 32 ;
2018-08-06 10:19:47 +03:00
collect | = SPU_EVENT_TM ;
2017-02-13 16:12:24 +03:00
}
2017-02-07 23:59:59 +03:00
}
2018-08-06 10:19:47 +03:00
if ( collect )
2020-08-28 01:18:24 +03:00
{
2018-08-06 10:19:47 +03:00
set_events ( collect ) ;
}
2020-08-28 01:18:24 +03:00
2020-10-12 17:19:06 +03:00
auto [ res , ok ] = ch_events . fetch_op ( [ & ] ( ch_events_t & events )
2018-08-06 10:19:47 +03:00
{
if ( ! reading )
return false ;
if ( waiting )
events . waiting = ! events . count ;
events . count = false ;
return true ;
2020-10-12 17:19:06 +03:00
} ) ;
if ( reading & & res . locks & & mask_hint & ( SPU_EVENT_S1 | SPU_EVENT_S2 ) )
{
busy_wait ( 100 ) ;
goto retry ;
}
return res ;
2015-07-21 23:14:04 +03:00
}
2018-08-06 10:19:47 +03:00
void spu_thread : : set_events ( u32 bits )
2015-07-15 21:11:32 +03:00
{
2018-08-06 10:19:47 +03:00
if ( ch_events . atomic_op ( [ & ] ( ch_events_t & events )
2015-07-15 21:11:32 +03:00
{
2018-08-06 10:19:47 +03:00
events . events | = bits ;
2015-07-15 21:11:32 +03:00
2018-08-06 10:19:47 +03:00
// If one masked event was fired, set the channel count (even if the event bit was already 1)
if ( events . mask & bits )
{
events . count = true ;
return ! ! events . waiting ;
}
2015-07-15 21:11:32 +03:00
2018-08-06 10:19:47 +03:00
return false ;
} ) )
2015-07-15 21:11:32 +03:00
{
2020-10-12 17:19:06 +03:00
notify ( ) ;
2015-07-15 21:11:32 +03:00
}
}
2018-10-11 01:17:19 +03:00
void spu_thread : : set_interrupt_status ( bool enable )
2015-07-16 14:32:19 +03:00
{
if ( enable )
{
2018-07-02 01:02:34 +03:00
// Detect enabling interrupts with events masked
2018-08-06 10:19:47 +03:00
if ( auto mask = ch_events . load ( ) . mask ; mask & ~ SPU_EVENT_INTR_IMPLEMENTED )
2015-07-16 14:32:19 +03:00
{
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " SPU Interrupts not implemented (mask=0x%x) " , mask ) ;
2015-07-16 14:32:19 +03:00
}
}
2018-08-06 10:19:47 +03:00
interrupts_enabled = enable ;
2015-07-16 14:32:19 +03:00
}
2018-10-11 01:17:19 +03:00
u32 spu_thread : : get_ch_count ( u32 ch )
2014-08-23 01:15:02 +04:00
{
2020-10-22 18:28:56 +03:00
if ( ch < 128 ) spu_log . trace ( " get_ch_count(ch=%s) " , spu_ch_name [ ch ] ) ;
2015-03-02 05:10:41 +03:00
switch ( ch )
{
2017-02-17 22:35:57 +03:00
case SPU_WrOutMbox : return ch_out_mbox . get_count ( ) ^ 1 ;
case SPU_WrOutIntrMbox : return ch_out_intr_mbox . get_count ( ) ^ 1 ;
case SPU_RdInMbox : return ch_in_mbox . get_count ( ) ;
case MFC_RdTagStat : return ch_tag_stat . get_count ( ) ;
case MFC_RdListStallStat : return ch_stall_stat . get_count ( ) ;
2020-07-02 17:06:00 +03:00
case MFC_WrTagUpdate : return 1 ;
2017-02-17 22:35:57 +03:00
case SPU_RdSigNotify1 : return ch_snr1 . get_count ( ) ;
case SPU_RdSigNotify2 : return ch_snr2 . get_count ( ) ;
case MFC_RdAtomicStat : return ch_atomic_stat . get_count ( ) ;
2018-08-06 10:19:47 +03:00
case SPU_RdEventStat : return get_events ( ) . count ;
2018-03-25 00:03:32 +03:00
case MFC_Cmd : return 16 - mfc_size ;
2020-09-19 19:40:47 +03:00
// Channels with a constant count of 1:
case SPU_WrEventMask :
case SPU_WrEventAck :
case SPU_WrDec :
case SPU_RdDec :
case SPU_RdEventMask :
case SPU_RdMachStat :
case SPU_WrSRR0 :
case SPU_RdSRR0 :
case SPU_Set_Bkmk_Tag :
case SPU_PM_Start_Ev :
case SPU_PM_Stop_Ev :
case MFC_RdTagMask :
case MFC_LSA :
case MFC_EAH :
case MFC_EAL :
case MFC_Size :
case MFC_TagID :
case MFC_WrTagMask :
case MFC_WrListStallAck :
return 1 ;
2020-09-20 07:06:30 +03:00
default : break ;
2015-03-02 05:10:41 +03:00
}
2020-12-09 10:47:45 +03:00
ensure ( ch < 128u ) ;
2020-10-22 18:28:56 +03:00
spu_log . error ( " Unknown/illegal channel in RCHCNT (ch=%s) " , spu_ch_name [ ch ] ) ;
2020-09-20 07:06:30 +03:00
return 0 ; // Default count
2014-08-23 01:15:02 +04:00
}
2018-10-11 01:17:19 +03:00
s64 spu_thread : : get_ch_value ( u32 ch )
2014-08-23 01:15:02 +04:00
{
2020-10-22 18:28:56 +03:00
if ( ch < 128 ) spu_log . trace ( " get_ch_value(ch=%s) " , spu_ch_name [ ch ] ) ;
2014-10-02 14:29:20 +04:00
2018-04-30 19:39:06 +03:00
auto read_channel = [ & ] ( spu_channel & channel ) - > s64
2015-07-03 19:07:36 +03:00
{
2019-06-06 21:32:35 +03:00
if ( channel . get_count ( ) = = 0 )
{
2020-10-25 20:06:34 +03:00
state + = cpu_flag : : wait + cpu_flag : : temp ;
2019-06-06 21:32:35 +03:00
}
2017-02-17 22:35:57 +03:00
for ( int i = 0 ; i < 10 & & channel . get_count ( ) = = 0 ; i + + )
{
2018-05-14 23:07:36 +03:00
busy_wait ( ) ;
2017-02-17 22:35:57 +03:00
}
2020-06-26 12:17:15 +03:00
const s64 out = channel . pop_wait ( * this ) ;
static_cast < void > ( test_stopped ( ) ) ;
2018-04-30 19:39:06 +03:00
return out ;
2015-07-03 19:07:36 +03:00
} ;
2014-08-23 01:15:02 +04:00
switch ( ch )
{
2017-02-13 16:12:24 +03:00
case SPU_RdSRR0 :
{
2018-04-30 19:39:06 +03:00
return srr0 ;
2017-02-13 16:12:24 +03:00
}
2015-03-02 05:10:41 +03:00
case SPU_RdInMbox :
{
2019-06-06 21:32:35 +03:00
if ( ch_in_mbox . get_count ( ) = = 0 )
{
state + = cpu_flag : : wait ;
}
2015-07-17 19:27:12 +03:00
while ( true )
2015-03-02 05:10:41 +03:00
{
2017-02-17 22:35:57 +03:00
for ( int i = 0 ; i < 10 & & ch_in_mbox . get_count ( ) = = 0 ; i + + )
{
2018-05-14 23:07:36 +03:00
busy_wait ( ) ;
2017-02-17 22:35:57 +03:00
}
2019-05-18 23:47:35 +03:00
u32 out = 0 ;
2018-04-30 19:39:06 +03:00
2016-04-19 16:04:02 +03:00
if ( const uint old_count = ch_in_mbox . try_pop ( out ) )
2015-07-17 19:27:12 +03:00
{
2016-04-19 16:04:02 +03:00
if ( old_count = = 4 /* SPU_IN_MBOX_THRESHOLD */ ) // TODO: check this
2015-07-17 19:27:12 +03:00
{
int_ctrl [ 2 ] . set ( SPU_INT2_STAT_SPU_MAILBOX_THRESHOLD_INT ) ;
}
2019-06-06 21:32:35 +03:00
check_state ( ) ;
2018-04-30 19:39:06 +03:00
return out ;
2015-07-17 19:27:12 +03:00
}
2021-02-13 16:50:07 +02:00
auto old = + state ;
if ( is_stopped ( old ) )
2016-04-14 02:09:41 +03:00
{
2018-04-30 19:39:06 +03:00
return - 1 ;
2016-04-14 02:09:41 +03:00
}
2015-07-03 19:07:36 +03:00
2021-02-13 16:50:07 +02:00
thread_ctrl : : wait_on ( state , old ) ;
2015-03-02 05:10:41 +03:00
}
}
2014-08-23 01:15:02 +04:00
2015-03-02 05:10:41 +03:00
case MFC_RdTagStat :
{
2020-06-26 12:17:15 +03:00
if ( u32 out ; ch_tag_stat . try_read ( out ) )
2018-03-25 00:03:32 +03:00
{
ch_tag_stat . set_value ( 0 , false ) ;
2018-04-30 19:39:06 +03:00
return out ;
2018-03-25 00:03:32 +03:00
}
// Will stall infinitely
2015-07-03 19:07:36 +03:00
return read_channel ( ch_tag_stat ) ;
2015-03-02 05:10:41 +03:00
}
case MFC_RdTagMask :
{
2018-04-30 19:39:06 +03:00
return ch_tag_mask ;
2015-03-02 05:10:41 +03:00
}
case SPU_RdSigNotify1 :
{
2015-07-03 19:07:36 +03:00
return read_channel ( ch_snr1 ) ;
2015-03-02 05:10:41 +03:00
}
case SPU_RdSigNotify2 :
{
2015-07-03 19:07:36 +03:00
return read_channel ( ch_snr2 ) ;
2015-03-02 05:10:41 +03:00
}
case MFC_RdAtomicStat :
{
2020-06-26 12:17:15 +03:00
if ( u32 out ; ch_atomic_stat . try_read ( out ) )
2018-03-25 00:03:32 +03:00
{
ch_atomic_stat . set_value ( 0 , false ) ;
2018-04-30 19:39:06 +03:00
return out ;
2018-03-25 00:03:32 +03:00
}
// Will stall infinitely
2015-07-03 19:07:36 +03:00
return read_channel ( ch_atomic_stat ) ;
2015-03-02 05:10:41 +03:00
}
case MFC_RdListStallStat :
{
2020-06-26 12:17:15 +03:00
if ( u32 out ; ch_stall_stat . try_read ( out ) )
2018-03-25 00:03:32 +03:00
{
ch_stall_stat . set_value ( 0 , false ) ;
2018-04-30 19:39:06 +03:00
return out ;
2018-03-25 00:03:32 +03:00
}
// Will stall infinitely
2015-07-03 19:07:36 +03:00
return read_channel ( ch_stall_stat ) ;
2015-03-02 05:10:41 +03:00
}
case SPU_RdDec :
{
2019-12-03 00:31:34 +03:00
u32 out = ch_dec_value - static_cast < u32 > ( get_timebased_time ( ) - ch_dec_start_timestamp ) ;
2017-07-14 17:00:49 +03:00
//Polling: We might as well hint to the scheduler to slot in another thread since this one is counting down
if ( g_cfg . core . spu_loop_detection & & out > spu : : scheduler : : native_jiffy_duration_us )
2019-06-20 04:32:19 +03:00
{
state + = cpu_flag : : wait ;
2017-07-14 17:00:49 +03:00
std : : this_thread : : yield ( ) ;
2019-06-20 04:32:19 +03:00
}
2017-07-14 17:00:49 +03:00
2018-04-30 19:39:06 +03:00
return out ;
2015-03-02 05:10:41 +03:00
}
case SPU_RdEventMask :
{
2018-08-06 10:19:47 +03:00
return ch_events . load ( ) . mask ;
2015-03-02 05:10:41 +03:00
}
case SPU_RdEventStat :
{
2018-08-06 10:19:47 +03:00
const u32 mask1 = ch_events . load ( ) . mask ;
auto events = get_events ( mask1 , false , true ) ;
2017-02-17 22:35:57 +03:00
2018-08-06 10:19:47 +03:00
if ( events . count )
2015-03-02 05:10:41 +03:00
{
2018-08-06 10:19:47 +03:00
return events . events & mask1 ;
2015-07-15 21:11:32 +03:00
}
2015-07-04 02:22:24 +03:00
2020-04-13 21:53:19 +03:00
spu_function_logger logger ( * this , " MFC Events read " ) ;
2018-05-14 23:07:36 +03:00
if ( mask1 & SPU_EVENT_LR & & raddr )
2015-07-15 21:11:32 +03:00
{
2018-05-17 19:34:35 +03:00
if ( mask1 ! = SPU_EVENT_LR & & mask1 ! = SPU_EVENT_LR + SPU_EVENT_TM )
2018-05-14 23:07:36 +03:00
{
2018-05-17 19:34:35 +03:00
// Combining LR with other flags needs another solution
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " Not supported: event mask 0x%x " , mask1 ) ;
2018-05-14 23:07:36 +03:00
}
2018-08-06 10:19:47 +03:00
for ( ; ! events . count ; events = get_events ( mask1 , false , true ) )
2018-05-14 23:07:36 +03:00
{
2021-02-13 16:50:07 +02:00
const auto old = state . add_fetch ( cpu_flag : : wait ) ;
if ( is_stopped ( old ) )
2020-12-19 09:48:37 +02:00
{
2021-02-13 16:50:07 +02:00
return - 1 ;
2020-12-19 09:48:37 +02:00
}
2021-02-13 16:50:07 +02:00
if ( is_paused ( old ) )
2018-05-14 23:07:36 +03:00
{
2021-02-13 16:50:07 +02:00
// Ensure reservation data won't change while paused for debugging purposes
check_state ( ) ;
continue ;
2018-05-14 23:07:36 +03:00
}
2021-03-05 22:05:37 +03:00
vm : : reservation_notifier ( raddr ) . wait ( rtime , - 128 , atomic_wait_timeout { 100'000 } ) ;
2018-05-14 23:07:36 +03:00
}
2019-06-06 21:32:35 +03:00
check_state ( ) ;
2018-08-06 10:19:47 +03:00
return events . events & mask1 ;
2015-07-15 21:11:32 +03:00
}
2017-02-17 22:35:57 +03:00
2018-08-06 10:19:47 +03:00
for ( ; ! events . count ; events = get_events ( mask1 , true , true ) )
2015-07-15 21:11:32 +03:00
{
2021-02-13 16:50:07 +02:00
const auto old = state . add_fetch ( cpu_flag : : wait ) ;
if ( is_stopped ( old ) )
2020-12-19 09:48:37 +02:00
{
2021-02-13 16:50:07 +02:00
return - 1 ;
2020-12-19 09:48:37 +02:00
}
2021-02-13 16:50:07 +02:00
if ( is_paused ( old ) )
2015-07-08 01:33:24 +03:00
{
2021-02-13 16:50:07 +02:00
check_state ( ) ;
continue ;
2015-07-15 21:11:32 +03:00
}
2021-02-13 16:50:07 +02:00
thread_ctrl : : wait_on ( state , old , 100 ) ;
2016-04-14 02:09:41 +03:00
}
2017-12-09 17:57:43 +03:00
2019-06-06 21:32:35 +03:00
check_state ( ) ;
2018-08-06 10:19:47 +03:00
return events . events & mask1 ;
2015-03-02 05:10:41 +03:00
}
case SPU_RdMachStat :
2014-08-23 01:15:02 +04:00
{
2015-07-16 14:32:19 +03:00
// Return SPU Interrupt status in LSB
2020-07-17 11:18:04 +03:00
return u32 { interrupts_enabled } | ( u32 { get_type ( ) = = spu_type : : isolated } < < 1 ) ;
2014-08-23 01:15:02 +04:00
}
}
2014-10-02 14:29:20 +04:00
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " Unknown/illegal channel in RDCH (ch=%d [%s]) " , ch , ch < 128 ? spu_ch_name [ ch ] : " ??? " ) ;
2014-08-23 01:15:02 +04:00
}
2018-10-11 01:17:19 +03:00
bool spu_thread : : set_ch_value ( u32 ch , u32 value )
2014-08-23 01:15:02 +04:00
{
2020-10-22 18:28:56 +03:00
if ( ch < 128 ) spu_log . trace ( " set_ch_value(ch=%s, value=0x%x) " , spu_ch_name [ ch ] , value ) ;
2014-10-02 14:29:20 +04:00
2014-08-23 01:15:02 +04:00
switch ( ch )
{
2017-02-13 16:12:24 +03:00
case SPU_WrSRR0 :
{
2019-10-01 10:06:34 +03:00
srr0 = value & 0x3fffc ;
2018-01-15 15:18:13 +02:00
return true ;
2017-02-13 16:12:24 +03:00
}
2017-12-09 17:57:43 +03:00
2014-08-23 01:15:02 +04:00
case SPU_WrOutIntrMbox :
{
2020-07-17 11:18:04 +03:00
if ( get_type ( ) > = spu_type : : raw )
2014-08-23 01:15:02 +04:00
{
2020-06-26 12:17:15 +03:00
if ( ch_out_intr_mbox . get_count ( ) )
2014-08-23 01:15:02 +04:00
{
2019-06-06 21:32:35 +03:00
state + = cpu_flag : : wait ;
2020-06-26 12:17:15 +03:00
}
2019-06-06 21:32:35 +03:00
2020-06-26 12:17:15 +03:00
if ( ! ch_out_intr_mbox . push_wait ( * this , value ) )
{
return false ;
2014-08-23 01:15:02 +04:00
}
2015-03-02 05:10:41 +03:00
2015-07-13 00:02:02 +03:00
int_ctrl [ 2 ] . set ( SPU_INT2_STAT_MAILBOX_INT ) ;
2019-06-06 21:32:35 +03:00
check_state ( ) ;
2016-04-19 16:04:02 +03:00
return true ;
2014-08-23 01:15:02 +04:00
}
2017-12-09 17:57:43 +03:00
2019-06-06 21:32:35 +03:00
state + = cpu_flag : : wait ;
2017-02-05 02:26:57 +03:00
const u32 code = value > > 24 ;
2014-08-23 01:15:02 +04:00
{
if ( code < 64 )
{
/* ===== sys_spu_thread_send_event (used by spu_printf) ===== */
2017-02-05 02:26:57 +03:00
u32 spup = code & 63 ;
2019-05-18 23:47:35 +03:00
u32 data = 0 ;
2014-08-23 01:15:02 +04:00
2017-02-05 02:26:57 +03:00
if ( ! ch_out_mbox . try_pop ( data ) )
2014-08-23 01:15:02 +04:00
{
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " sys_spu_thread_send_event(value=0x%x, spup=%d): Out_MBox is empty " , value , spup ) ;
2014-08-23 01:15:02 +04:00
}
2020-02-01 11:36:09 +03:00
spu_log . trace ( " sys_spu_thread_send_event(spup=%d, data0=0x%x, data1=0x%x) " , spup , value & 0x00ffffff , data ) ;
2014-08-23 01:15:02 +04:00
2020-06-06 17:08:04 +03:00
std : : lock_guard lock ( group - > mutex ) ;
2014-08-23 01:15:02 +04:00
2020-06-06 17:08:04 +03:00
const auto queue = this - > spup [ spup ] . lock ( ) ;
const auto res = ch_in_mbox . get_count ( ) ? CELL_EBUSY :
! queue ? CELL_ENOTCONN :
2020-05-02 11:22:28 +03:00
queue - > send ( SYS_SPU_THREAD_EVENT_USER_KEY , lv2_id , ( u64 { spup } < < 32 ) | ( value & 0x00ffffff ) , data ) ;
2017-02-24 16:19:11 +03:00
2020-06-06 17:08:04 +03:00
if ( ch_in_mbox . get_count ( ) )
{
spu_log . warning ( " sys_spu_thread_send_event(spup=%d, data0=0x%x, data1=0x%x): In_MBox is not empty (%d) " , spup , ( value & 0x00ffffff ) , data , ch_in_mbox . get_count ( ) ) ;
}
else if ( res = = CELL_ENOTCONN )
2014-08-23 01:15:02 +04:00
{
2020-06-06 17:08:04 +03:00
spu_log . warning ( " sys_spu_thread_send_event(spup=%d, data0=0x%x, data1=0x%x): error (%s) " , spup , ( value & 0x00ffffff ) , data , res ) ;
2014-08-23 01:15:02 +04:00
}
2020-05-02 11:22:28 +03:00
ch_in_mbox . set_values ( 1 , res ) ;
2017-02-24 16:19:11 +03:00
return true ;
2014-08-23 01:15:02 +04:00
}
else if ( code < 128 )
{
/* ===== sys_spu_thread_throw_event ===== */
2017-02-05 02:26:57 +03:00
u32 spup = code & 63 ;
2019-05-18 23:47:35 +03:00
u32 data = 0 ;
2015-07-17 19:27:12 +03:00
2017-02-05 02:26:57 +03:00
if ( ! ch_out_mbox . try_pop ( data ) )
2014-08-23 01:15:02 +04:00
{
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " sys_spu_thread_throw_event(value=0x%x, spup=%d): Out_MBox is empty " , value , spup ) ;
2014-08-23 01:15:02 +04:00
}
2020-02-01 11:36:09 +03:00
spu_log . trace ( " sys_spu_thread_throw_event(spup=%d, data0=0x%x, data1=0x%x) " , spup , value & 0x00ffffff , data ) ;
2014-08-23 01:15:02 +04:00
2018-09-03 22:28:33 +03:00
const auto queue = ( std : : lock_guard { group - > mutex } , this - > spup [ spup ] . lock ( ) ) ;
2014-08-23 01:15:02 +04:00
// TODO: check passing spup value
2020-05-02 11:22:28 +03:00
if ( auto res = queue ? queue - > send ( SYS_SPU_THREAD_EVENT_USER_KEY , lv2_id , ( u64 { spup } < < 32 ) | ( value & 0x00ffffff ) , data ) : CELL_ENOTCONN )
2014-08-23 01:15:02 +04:00
{
2020-05-02 11:22:28 +03:00
spu_log . warning ( " sys_spu_thread_throw_event(spup=%d, data0=0x%x, data1=0x%x) failed (error=%s) " , spup , ( value & 0x00ffffff ) , data , res ) ;
2014-08-23 01:15:02 +04:00
}
2016-04-19 16:04:02 +03:00
return true ;
2014-08-23 01:15:02 +04:00
}
else if ( code = = 128 )
{
/* ===== sys_event_flag_set_bit ===== */
2017-02-05 02:26:57 +03:00
u32 flag = value & 0xffffff ;
2019-05-18 23:47:35 +03:00
u32 data = 0 ;
2015-07-17 19:27:12 +03:00
2017-02-05 02:26:57 +03:00
if ( ! ch_out_mbox . try_pop ( data ) )
2014-08-23 01:15:02 +04:00
{
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " sys_event_flag_set_bit(value=0x%x (flag=%d)): Out_MBox is empty " , value , flag ) ;
2014-08-23 01:15:02 +04:00
}
2020-02-01 11:36:09 +03:00
spu_log . trace ( " sys_event_flag_set_bit(id=%d, value=0x%x (flag=%d)) " , data , value , flag ) ;
2014-08-23 01:15:02 +04:00
2020-06-06 17:08:04 +03:00
std : : lock_guard lock ( group - > mutex ) ;
2017-02-03 19:27:03 +03:00
// Use the syscall to set flag
2020-10-30 16:32:49 +03:00
const auto res = ch_in_mbox . get_count ( ) ? CELL_EBUSY : 0u + sys_event_flag_set ( * this , data , 1ull < < flag ) ;
2017-02-24 16:19:11 +03:00
2020-06-06 17:08:04 +03:00
if ( res = = CELL_EBUSY )
{
spu_log . warning ( " sys_event_flag_set_bit(value=0x%x (flag=%d)): In_MBox is not empty (%d) " , value , flag , ch_in_mbox . get_count ( ) ) ;
}
ch_in_mbox . set_values ( 1 , res ) ;
2017-02-24 16:19:11 +03:00
return true ;
2014-08-23 01:15:02 +04:00
}
else if ( code = = 192 )
{
/* ===== sys_event_flag_set_bit_impatient ===== */
2017-02-05 02:26:57 +03:00
u32 flag = value & 0xffffff ;
2019-05-18 23:47:35 +03:00
u32 data = 0 ;
2015-07-17 19:27:12 +03:00
2017-02-05 02:26:57 +03:00
if ( ! ch_out_mbox . try_pop ( data ) )
2014-08-23 01:15:02 +04:00
{
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " sys_event_flag_set_bit_impatient(value=0x%x (flag=%d)): Out_MBox is empty " , value , flag ) ;
2014-08-23 01:15:02 +04:00
}
2020-02-01 11:36:09 +03:00
spu_log . trace ( " sys_event_flag_set_bit_impatient(id=%d, value=0x%x (flag=%d)) " , data , value , flag ) ;
2014-08-23 01:15:02 +04:00
2017-02-03 19:27:03 +03:00
// Use the syscall to set flag
2020-10-30 16:32:49 +03:00
sys_event_flag_set ( * this , data , 1ull < < flag ) ;
2016-04-19 16:04:02 +03:00
return true ;
2014-08-23 01:15:02 +04:00
}
else
{
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " SPU_WrOutIntrMbox: unknown data (value=0x%x, Out_MBox=%s) " , value , ch_out_mbox ) ;
2014-08-23 01:15:02 +04:00
}
}
}
case SPU_WrOutMbox :
{
2020-06-26 12:17:15 +03:00
if ( ch_out_mbox . get_count ( ) )
2018-04-03 22:42:47 +03:00
{
2019-06-06 21:32:35 +03:00
state + = cpu_flag : : wait ;
2020-06-26 12:17:15 +03:00
}
2019-06-06 21:32:35 +03:00
2020-06-26 12:17:15 +03:00
if ( ! ch_out_mbox . push_wait ( * this , value ) )
{
return false ;
2014-12-23 02:31:11 +03:00
}
2015-03-02 05:10:41 +03:00
2019-06-06 21:32:35 +03:00
check_state ( ) ;
2016-04-19 16:04:02 +03:00
return true ;
2014-08-23 01:15:02 +04:00
}
case MFC_WrTagMask :
{
2015-03-02 05:10:41 +03:00
ch_tag_mask = value ;
2018-03-25 00:03:32 +03:00
if ( ch_tag_upd )
{
const u32 completed = get_mfc_completed ( ) ;
2020-06-27 10:47:13 +03:00
if ( completed & & ch_tag_upd = = MFC_TAG_UPDATE_ANY )
2018-03-25 00:03:32 +03:00
{
ch_tag_stat . set_value ( completed ) ;
2020-06-27 10:47:13 +03:00
ch_tag_upd = MFC_TAG_UPDATE_IMMEDIATE ;
2018-03-25 00:03:32 +03:00
}
2020-06-27 10:47:13 +03:00
else if ( completed = = value & & ch_tag_upd = = MFC_TAG_UPDATE_ALL )
2018-03-25 00:03:32 +03:00
{
ch_tag_stat . set_value ( completed ) ;
2020-06-27 10:47:13 +03:00
ch_tag_upd = MFC_TAG_UPDATE_IMMEDIATE ;
2018-03-25 00:03:32 +03:00
}
}
2016-04-19 16:04:02 +03:00
return true ;
2014-08-23 01:15:02 +04:00
}
case MFC_WrTagUpdate :
{
2020-06-27 10:47:13 +03:00
if ( value > MFC_TAG_UPDATE_ALL )
2017-02-17 22:35:57 +03:00
{
break ;
}
2018-03-25 00:03:32 +03:00
const u32 completed = get_mfc_completed ( ) ;
2017-02-17 22:35:57 +03:00
2018-03-25 00:03:32 +03:00
if ( ! value )
2017-05-15 21:54:12 +03:00
{
2020-06-27 10:47:13 +03:00
ch_tag_upd = MFC_TAG_UPDATE_IMMEDIATE ;
2018-03-25 00:03:32 +03:00
ch_tag_stat . set_value ( completed ) ;
2017-05-15 21:54:12 +03:00
}
2020-06-27 10:47:13 +03:00
else if ( completed & & value = = MFC_TAG_UPDATE_ANY )
2017-02-17 22:35:57 +03:00
{
2020-06-27 10:47:13 +03:00
ch_tag_upd = MFC_TAG_UPDATE_IMMEDIATE ;
2018-03-25 00:03:32 +03:00
ch_tag_stat . set_value ( completed ) ;
2017-02-17 22:35:57 +03:00
}
2020-06-27 10:47:13 +03:00
else if ( completed = = ch_tag_mask & & value = = MFC_TAG_UPDATE_ALL )
2017-02-17 22:35:57 +03:00
{
2020-06-27 10:47:13 +03:00
ch_tag_upd = MFC_TAG_UPDATE_IMMEDIATE ;
2017-02-17 22:35:57 +03:00
ch_tag_stat . set_value ( completed ) ;
}
else
{
2018-03-25 00:03:32 +03:00
ch_tag_upd = value ;
2017-02-17 22:35:57 +03:00
}
2016-04-19 16:04:02 +03:00
return true ;
2014-08-23 01:15:02 +04:00
}
case MFC_LSA :
{
2017-02-17 22:35:57 +03:00
ch_mfc_cmd . lsa = value ;
2016-04-19 16:04:02 +03:00
return true ;
2014-08-23 01:15:02 +04:00
}
case MFC_EAH :
{
2017-02-17 22:35:57 +03:00
ch_mfc_cmd . eah = value ;
2016-04-19 16:04:02 +03:00
return true ;
2014-08-23 01:15:02 +04:00
}
case MFC_EAL :
{
2017-02-17 22:35:57 +03:00
ch_mfc_cmd . eal = value ;
2016-04-19 16:04:02 +03:00
return true ;
2014-08-23 01:15:02 +04:00
}
case MFC_Size :
{
2017-11-29 14:28:41 +02:00
ch_mfc_cmd . size = value & 0x7fff ;
2016-04-19 16:04:02 +03:00
return true ;
2014-08-23 01:15:02 +04:00
}
2015-03-02 05:10:41 +03:00
case MFC_TagID :
2014-08-23 01:15:02 +04:00
{
2017-11-29 14:28:41 +02:00
ch_mfc_cmd . tag = value & 0x1f ;
2016-04-19 16:04:02 +03:00
return true ;
2014-08-23 01:15:02 +04:00
}
2015-03-02 05:10:41 +03:00
case MFC_Cmd :
2014-08-23 01:15:02 +04:00
{
2017-02-17 22:35:57 +03:00
ch_mfc_cmd . cmd = MFC ( value & 0xff ) ;
2019-01-15 17:31:21 +02:00
return process_mfc_cmd ( ) ;
2014-08-23 01:15:02 +04:00
}
2015-03-02 05:10:41 +03:00
case MFC_WrListStallAck :
2014-08-23 01:15:02 +04:00
{
2020-05-16 21:43:44 +03:00
value & = 0x1f ;
2017-02-17 22:35:57 +03:00
// Reset stall status for specified tag
2020-04-14 19:41:31 +03:00
const u32 tag_mask = utils : : rol32 ( 1 , value ) ;
2018-09-02 20:22:35 +03:00
if ( ch_stall_mask & tag_mask )
2014-10-02 14:29:20 +04:00
{
2018-09-02 20:22:35 +03:00
ch_stall_mask & = ~ tag_mask ;
2019-01-07 08:13:17 +02:00
for ( u32 i = 0 ; i < mfc_size ; i + + )
{
if ( mfc_queue [ i ] . tag = = ( value | 0x80 ) )
{
// Unset stall bit
mfc_queue [ i ] . tag & = 0x7f ;
}
}
2018-04-08 14:03:00 +03:00
do_mfc ( true ) ;
2014-10-02 14:29:20 +04:00
}
2015-03-02 05:10:41 +03:00
2016-04-19 16:04:02 +03:00
return true ;
2014-08-23 01:15:02 +04:00
}
2015-03-02 05:10:41 +03:00
case SPU_WrDec :
2014-08-23 01:15:02 +04:00
{
2020-08-27 23:36:54 +03:00
get_events ( SPU_EVENT_TM ) ; // Don't discard possibly occured old event
2015-07-06 02:21:15 +03:00
ch_dec_start_timestamp = get_timebased_time ( ) ;
2015-03-02 05:10:41 +03:00
ch_dec_value = value ;
2016-04-19 16:04:02 +03:00
return true ;
2014-08-23 01:15:02 +04:00
}
2015-03-02 05:10:41 +03:00
case SPU_WrEventMask :
2014-08-23 01:15:02 +04:00
{
2018-08-06 10:19:47 +03:00
get_events ( value ) ;
if ( ch_events . atomic_op ( [ & ] ( ch_events_t & events )
{
events . mask = value ;
if ( events . events & events . mask )
{
events . count = true ;
return true ;
}
return false ;
} ) )
{
// Check interrupts in case count is 1
if ( check_mfc_interrupts ( pc + 4 ) )
{
spu_runtime : : g_escape ( this ) ;
}
}
2016-04-19 16:04:02 +03:00
return true ;
2014-08-23 01:15:02 +04:00
}
2015-03-02 05:10:41 +03:00
case SPU_WrEventAck :
2014-08-23 01:15:02 +04:00
{
2020-08-27 23:36:54 +03:00
// "Collect" events before final acknowledgment
get_events ( value ) ;
2018-08-06 10:19:47 +03:00
if ( ch_events . atomic_op ( [ & ] ( ch_events_t & events )
{
events . events & = ~ value ;
if ( events . events & events . mask )
{
events . count = true ;
return true ;
}
return false ;
} ) )
{
// Check interrupts in case count is 1
if ( check_mfc_interrupts ( pc + 4 ) )
{
spu_runtime : : g_escape ( this ) ;
}
}
2016-04-19 16:04:02 +03:00
return true ;
2014-08-23 01:15:02 +04:00
}
2017-03-11 21:55:50 +03:00
2020-09-19 15:08:35 +03:00
case SPU_Set_Bkmk_Tag :
case SPU_PM_Start_Ev :
case SPU_PM_Stop_Ev :
2017-03-11 21:55:50 +03:00
{
return true ;
}
2014-08-23 01:15:02 +04:00
}
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " Unknown/illegal channel in WRCH (ch=%d [%s], value=0x%x) " , ch , ch < 128 ? spu_ch_name [ ch ] : " ??? " , value ) ;
2015-03-02 05:10:41 +03:00
}
2018-10-11 01:17:19 +03:00
bool spu_thread : : stop_and_signal ( u32 code )
2015-03-02 05:10:41 +03:00
{
2020-02-01 11:36:09 +03:00
spu_log . trace ( " stop_and_signal(code=0x%x) " , code ) ;
2014-08-23 01:15:02 +04:00
2020-05-09 19:49:12 +03:00
auto set_status_npc = [ & ] ( )
2014-08-23 01:15:02 +04:00
{
2020-01-21 20:05:45 +02:00
status_npc . atomic_op ( [ & ] ( status_npc_sync_var & state )
2015-03-02 05:10:41 +03:00
{
2020-01-21 20:05:45 +02:00
state . status = ( state . status & 0xffff ) | ( code < < 16 ) ;
state . status | = SPU_STATUS_STOPPED_BY_STOP ;
state . status & = ~ SPU_STATUS_RUNNING ;
state . npc = ( pc + 4 ) | + interrupts_enabled ;
2015-03-02 05:10:41 +03:00
} ) ;
2020-05-09 19:49:12 +03:00
} ;
2020-07-17 11:18:04 +03:00
if ( get_type ( ) > = spu_type : : raw )
2020-05-09 19:49:12 +03:00
{
// Save next PC and current SPU Interrupt Status
2020-07-30 12:07:18 +03:00
state + = cpu_flag : : stop + cpu_flag : : wait + cpu_flag : : ret ;
2020-05-09 19:49:12 +03:00
set_status_npc ( ) ;
2014-08-23 01:15:02 +04:00
2020-02-19 22:25:53 +02:00
status_npc . notify_one ( ) ;
2015-07-13 00:02:02 +03:00
int_ctrl [ 2 ] . set ( SPU_INT2_STAT_SPU_STOP_AND_SIGNAL_INT ) ;
2019-06-06 21:32:35 +03:00
check_state ( ) ;
2018-11-05 14:24:08 +03:00
return true ;
2015-03-02 05:10:41 +03:00
}
2014-08-23 01:15:02 +04:00
switch ( code )
{
2014-10-02 14:29:20 +04:00
case 0x001 :
{
2019-06-06 21:32:35 +03:00
state + = cpu_flag : : wait ;
2021-02-13 16:50:07 +02:00
std : : this_thread : : sleep_for ( 1 ms ) ; // hack
2019-06-06 21:32:35 +03:00
check_state ( ) ;
2016-04-19 16:04:02 +03:00
return true ;
2014-10-02 14:29:20 +04:00
}
case 0x002 :
{
2016-08-09 17:14:41 +03:00
state + = cpu_flag : : ret ;
2016-04-19 16:04:02 +03:00
return true ;
2014-10-02 14:29:20 +04:00
}
2020-05-09 19:49:12 +03:00
case SYS_SPU_THREAD_STOP_RECEIVE_EVENT :
2014-08-23 01:15:02 +04:00
{
2014-08-28 01:04:55 +04:00
/* ===== sys_spu_thread_receive_event ===== */
2019-05-18 23:47:35 +03:00
u32 spuq = 0 ;
2015-07-17 19:27:12 +03:00
2017-02-05 02:26:57 +03:00
if ( ! ch_out_mbox . try_pop ( spuq ) )
2014-08-23 01:15:02 +04:00
{
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " sys_spu_thread_receive_event(): Out_MBox is empty " ) ;
2014-08-23 01:15:02 +04:00
}
2015-07-17 19:27:12 +03:00
if ( u32 count = ch_in_mbox . get_count ( ) )
2014-08-23 01:15:02 +04:00
{
2020-02-01 11:36:09 +03:00
spu_log . error ( " sys_spu_thread_receive_event(): In_MBox is not empty (%d) " , count ) ;
2016-04-19 16:04:02 +03:00
return ch_in_mbox . set_values ( 1 , CELL_EBUSY ) , true ;
2014-08-23 01:15:02 +04:00
}
2020-02-01 11:36:09 +03:00
spu_log . trace ( " sys_spu_thread_receive_event(spuq=0x%x) " , spuq ) ;
2014-08-23 01:15:02 +04:00
2020-02-05 22:46:05 +02:00
if ( ! group - > has_scheduler_context /*|| group->type & 0xf00*/ )
2015-07-03 19:07:36 +03:00
{
2020-02-05 22:46:05 +02:00
spu_log . error ( " sys_spu_thread_receive_event(): Incompatible group type = 0x%x " , group - > type ) ;
2016-04-19 16:04:02 +03:00
return ch_in_mbox . set_values ( 1 , CELL_EINVAL ) , true ;
2015-07-03 19:07:36 +03:00
}
2017-01-29 19:50:18 +03:00
std : : shared_ptr < lv2_event_queue > queue ;
2015-03-05 16:18:06 +03:00
2019-06-06 21:32:35 +03:00
state + = cpu_flag : : wait ;
2020-04-13 21:53:19 +03:00
spu_function_logger logger ( * this , " sys_spu_thread_receive_event " ) ;
2017-02-05 02:26:57 +03:00
while ( true )
2015-03-05 16:18:06 +03:00
{
2017-02-05 02:26:57 +03:00
queue . reset ( ) ;
2015-03-05 16:18:06 +03:00
2017-02-05 02:26:57 +03:00
// Check group status, wait if necessary
2020-04-16 20:08:30 +03:00
for ( auto _state = + group - > run_state ;
2020-04-28 17:23:43 +03:00
_state > = SPU_THREAD_GROUP_STATUS_WAITING & & _state < = SPU_THREAD_GROUP_STATUS_WAITING_AND_SUSPENDED ;
2020-04-16 20:08:30 +03:00
_state = group - > run_state )
2017-02-05 02:26:57 +03:00
{
2021-02-13 16:50:07 +02:00
const auto old = state . load ( ) ;
if ( is_stopped ( old ) )
2015-03-05 16:18:06 +03:00
{
2017-02-05 02:26:57 +03:00
return false ;
2015-03-05 16:18:06 +03:00
}
2017-02-05 02:26:57 +03:00
2021-02-13 16:50:07 +02:00
thread_ctrl : : wait_on ( state , old ) ; ;
2015-03-05 16:18:06 +03:00
}
2017-02-05 02:26:57 +03:00
reader_lock rlock ( id_manager : : g_mutex ) ;
2014-12-23 02:31:11 +03:00
2018-09-03 22:28:33 +03:00
std : : lock_guard lock ( group - > mutex ) ;
2015-07-03 19:07:36 +03:00
2020-04-16 19:40:35 +03:00
if ( is_stopped ( ) )
{
return false ;
}
2017-02-05 02:26:57 +03:00
if ( group - > run_state > = SPU_THREAD_GROUP_STATUS_WAITING & & group - > run_state < = SPU_THREAD_GROUP_STATUS_WAITING_AND_SUSPENDED )
2016-04-14 02:09:41 +03:00
{
2017-02-05 02:26:57 +03:00
// Try again
continue ;
2016-04-14 02:09:41 +03:00
}
2015-07-03 19:07:36 +03:00
2017-02-05 02:26:57 +03:00
for ( auto & v : this - > spuq )
2014-12-23 02:31:11 +03:00
{
2017-02-05 02:26:57 +03:00
if ( spuq = = v . first )
2016-04-14 02:09:41 +03:00
{
2017-02-05 02:26:57 +03:00
queue = v . second . lock ( ) ;
2020-04-09 19:05:43 +03:00
if ( lv2_event_queue : : check ( queue ) )
2017-02-05 02:26:57 +03:00
{
break ;
}
2016-04-14 02:09:41 +03:00
}
2014-12-23 02:31:11 +03:00
}
2015-07-03 19:07:36 +03:00
2020-04-09 19:05:43 +03:00
if ( ! lv2_event_queue : : check ( queue ) )
2017-02-05 02:26:57 +03:00
{
2020-05-02 11:22:28 +03:00
return ch_in_mbox . set_values ( 1 , CELL_EINVAL ) , true ;
2017-02-05 02:26:57 +03:00
}
2018-09-03 22:28:33 +03:00
std : : lock_guard qlock ( queue - > mutex ) ;
2015-07-19 15:58:11 +03:00
2020-05-02 11:22:28 +03:00
if ( ! queue - > exists )
{
return ch_in_mbox . set_values ( 1 , CELL_EINVAL ) , true ;
}
2017-02-04 00:36:04 +03:00
if ( queue - > events . empty ( ) )
2015-07-19 15:58:11 +03:00
{
2017-02-04 00:36:04 +03:00
queue - > sq . emplace_back ( this ) ;
2017-02-05 02:26:57 +03:00
group - > run_state = SPU_THREAD_GROUP_STATUS_WAITING ;
for ( auto & thread : group - > threads )
{
if ( thread )
{
thread - > state + = cpu_flag : : suspend ;
}
}
// Wait
break ;
2017-02-04 00:36:04 +03:00
}
else
{
2017-02-05 02:26:57 +03:00
// Return the event immediately
2017-02-04 00:36:04 +03:00
const auto event = queue - > events . front ( ) ;
const auto data1 = static_cast < u32 > ( std : : get < 1 > ( event ) ) ;
const auto data2 = static_cast < u32 > ( std : : get < 2 > ( event ) ) ;
const auto data3 = static_cast < u32 > ( std : : get < 3 > ( event ) ) ;
ch_in_mbox . set_values ( 4 , CELL_OK , data1 , data2 , data3 ) ;
queue - > events . pop_front ( ) ;
2017-02-05 02:26:57 +03:00
return true ;
2017-02-04 00:36:04 +03:00
}
}
2015-07-01 01:25:52 +03:00
2021-02-13 16:50:07 +02:00
while ( auto old = state . fetch_sub ( cpu_flag : : signal ) )
2017-02-04 00:36:04 +03:00
{
2021-02-13 16:50:07 +02:00
if ( is_stopped ( old ) )
2017-02-04 00:36:04 +03:00
{
2020-05-02 11:22:28 +03:00
// The thread group cannot be stopped while waiting for an event
2021-02-13 16:50:07 +02:00
ensure ( ! ( old & cpu_flag : : stop ) ) ;
2017-02-04 00:36:04 +03:00
return false ;
2015-07-19 15:58:11 +03:00
}
2015-03-04 07:42:04 +03:00
2021-02-13 16:50:07 +02:00
if ( old & cpu_flag : : signal )
2017-02-05 02:26:57 +03:00
{
break ;
}
2021-02-13 16:50:07 +02:00
thread_ctrl : : wait_on ( state , old ) ; ;
2015-07-03 19:07:36 +03:00
}
2017-02-05 02:26:57 +03:00
2018-09-03 22:28:33 +03:00
std : : lock_guard lock ( group - > mutex ) ;
2017-12-09 17:57:43 +03:00
2017-02-05 02:26:57 +03:00
if ( group - > run_state = = SPU_THREAD_GROUP_STATUS_WAITING )
2015-07-03 19:07:36 +03:00
{
2017-02-05 02:26:57 +03:00
group - > run_state = SPU_THREAD_GROUP_STATUS_RUNNING ;
2015-07-03 19:07:36 +03:00
}
2017-02-05 02:26:57 +03:00
else if ( group - > run_state = = SPU_THREAD_GROUP_STATUS_WAITING_AND_SUSPENDED )
2015-07-03 19:07:36 +03:00
{
2017-02-05 02:26:57 +03:00
group - > run_state = SPU_THREAD_GROUP_STATUS_SUSPENDED ;
2015-07-03 19:07:36 +03:00
}
2015-03-06 00:29:05 +03:00
2015-07-19 15:58:11 +03:00
for ( auto & thread : group - > threads )
2015-03-06 00:29:05 +03:00
{
2017-02-05 02:26:57 +03:00
if ( thread )
2016-04-14 02:09:41 +03:00
{
2016-08-09 17:14:41 +03:00
thread - > state - = cpu_flag : : suspend ;
2017-02-05 02:26:57 +03:00
if ( thread . get ( ) ! = this )
{
2021-02-13 16:50:07 +02:00
thread - > state . notify_one ( cpu_flag : : suspend ) ;
2017-02-05 02:26:57 +03:00
}
2016-04-14 02:09:41 +03:00
}
2015-03-06 00:29:05 +03:00
}
2016-04-19 16:04:02 +03:00
return true ;
2014-08-23 01:15:02 +04:00
}
2014-08-28 01:04:55 +04:00
2020-05-09 19:49:12 +03:00
case SYS_SPU_THREAD_STOP_TRY_RECEIVE_EVENT :
2018-11-12 10:55:48 +02:00
{
/* ===== sys_spu_thread_tryreceive_event ===== */
2019-05-18 23:47:35 +03:00
u32 spuq = 0 ;
2018-11-12 10:55:48 +02:00
if ( ! ch_out_mbox . try_pop ( spuq ) )
{
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " sys_spu_thread_tryreceive_event(): Out_MBox is empty " ) ;
2018-11-12 10:55:48 +02:00
}
if ( u32 count = ch_in_mbox . get_count ( ) )
{
2020-02-01 11:36:09 +03:00
spu_log . error ( " sys_spu_thread_tryreceive_event(): In_MBox is not empty (%d) " , count ) ;
2018-11-12 10:55:48 +02:00
return ch_in_mbox . set_values ( 1 , CELL_EBUSY ) , true ;
}
2020-02-01 11:36:09 +03:00
spu_log . trace ( " sys_spu_thread_tryreceive_event(spuq=0x%x) " , spuq ) ;
2018-11-12 10:55:48 +02:00
std : : lock_guard lock ( group - > mutex ) ;
std : : shared_ptr < lv2_event_queue > queue ;
for ( auto & v : this - > spuq )
{
if ( spuq = = v . first )
{
2020-04-09 19:05:43 +03:00
if ( queue = v . second . lock ( ) ; lv2_event_queue : : check ( queue ) )
2018-11-12 10:55:48 +02:00
{
break ;
}
}
}
2020-04-09 19:05:43 +03:00
if ( ! lv2_event_queue : : check ( queue ) )
2018-11-12 10:55:48 +02:00
{
return ch_in_mbox . set_values ( 1 , CELL_EINVAL ) , true ;
}
std : : lock_guard qlock ( queue - > mutex ) ;
2020-05-02 11:22:28 +03:00
if ( ! queue - > exists )
{
return ch_in_mbox . set_values ( 1 , CELL_EINVAL ) , true ;
}
2018-11-12 10:55:48 +02:00
if ( queue - > events . empty ( ) )
{
return ch_in_mbox . set_values ( 1 , CELL_EBUSY ) , true ;
}
const auto event = queue - > events . front ( ) ;
const auto data1 = static_cast < u32 > ( std : : get < 1 > ( event ) ) ;
const auto data2 = static_cast < u32 > ( std : : get < 2 > ( event ) ) ;
const auto data3 = static_cast < u32 > ( std : : get < 3 > ( event ) ) ;
ch_in_mbox . set_values ( 4 , CELL_OK , data1 , data2 , data3 ) ;
queue - > events . pop_front ( ) ;
return true ;
}
2020-05-09 19:49:12 +03:00
case SYS_SPU_THREAD_STOP_YIELD :
2018-02-04 13:47:49 +02:00
{
2019-06-29 18:48:42 +03:00
// SPU thread group yield (TODO)
2018-02-04 13:47:49 +02:00
if ( ch_out_mbox . get_count ( ) )
{
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " STOP code 0x100: Out_MBox is not empty " ) ;
2018-02-04 13:47:49 +02:00
}
2020-12-06 12:10:00 +03:00
atomic_fence_seq_cst ( ) ;
2018-02-04 13:47:49 +02:00
return true ;
}
2020-05-09 19:49:12 +03:00
case SYS_SPU_THREAD_STOP_GROUP_EXIT :
2014-08-28 01:04:55 +04:00
{
/* ===== sys_spu_thread_group_exit ===== */
2019-06-06 21:32:35 +03:00
state + = cpu_flag : : wait ;
2019-05-18 23:47:35 +03:00
u32 value = 0 ;
2015-07-17 19:27:12 +03:00
2017-02-05 02:26:57 +03:00
if ( ! ch_out_mbox . try_pop ( value ) )
2014-08-28 01:04:55 +04:00
{
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " sys_spu_thread_group_exit(): Out_MBox is empty " ) ;
2014-08-28 01:04:55 +04:00
}
2015-03-02 05:10:41 +03:00
2020-02-01 11:36:09 +03:00
spu_log . trace ( " sys_spu_thread_group_exit(status=0x%x) " , value ) ;
2015-03-02 05:10:41 +03:00
2020-04-16 20:08:30 +03:00
while ( true )
2014-08-28 01:04:55 +04:00
{
2020-04-16 20:08:30 +03:00
for ( auto _state = + group - > run_state ;
2020-04-28 17:23:43 +03:00
_state > = SPU_THREAD_GROUP_STATUS_WAITING & & _state < = SPU_THREAD_GROUP_STATUS_WAITING_AND_SUSPENDED ;
2020-04-16 20:08:30 +03:00
_state = group - > run_state )
2014-08-28 01:04:55 +04:00
{
2021-02-13 16:50:07 +02:00
const auto old = + state ;
if ( is_stopped ( old ) )
2020-04-16 20:08:30 +03:00
{
return false ;
}
2021-02-13 16:50:07 +02:00
thread_ctrl : : wait_on ( state , old ) ; ;
2014-08-28 01:04:55 +04:00
}
2020-04-16 20:08:30 +03:00
std : : lock_guard lock ( group - > mutex ) ;
if ( auto _state = + group - > run_state ;
_state > = SPU_THREAD_GROUP_STATUS_WAITING & & _state < = SPU_THREAD_GROUP_STATUS_WAITING_AND_SUSPENDED )
{
// We can't exit while we are waiting on an SPU event
continue ;
}
2020-05-14 17:43:53 +03:00
if ( std : : exchange ( group - > set_terminate , true ) )
{
// Whoever terminated first decides the error status + cause
return true ;
}
2020-04-16 20:08:30 +03:00
for ( auto & thread : group - > threads )
{
2020-09-12 17:16:21 +03:00
if ( thread )
2020-04-16 20:08:30 +03:00
{
2020-09-12 17:16:21 +03:00
thread - > state . fetch_op ( [ ] ( bs_t < cpu_flag > & flags )
{
if ( flags & cpu_flag : : stop )
{
// In case the thread raised the ret flag itself at some point do not raise it again
return false ;
}
flags + = cpu_flag : : stop + cpu_flag : : ret ;
return true ;
} ) ;
2021-02-13 16:50:07 +02:00
if ( thread . get ( ) ! = this )
thread_ctrl : : notify ( * thread ) ;
2020-04-16 20:08:30 +03:00
}
}
group - > exit_status = value ;
group - > join_state = SYS_SPU_THREAD_GROUP_JOIN_GROUP_EXIT ;
2020-05-09 19:49:12 +03:00
set_status_npc ( ) ;
2020-04-16 20:08:30 +03:00
break ;
}
2015-07-01 01:25:52 +03:00
2019-06-06 21:32:35 +03:00
check_state ( ) ;
2016-04-19 16:04:02 +03:00
return true ;
2014-08-28 01:04:55 +04:00
}
2020-05-09 19:49:12 +03:00
case SYS_SPU_THREAD_STOP_THREAD_EXIT :
2014-08-28 01:04:55 +04:00
{
/* ===== sys_spu_thread_exit ===== */
2019-06-06 21:32:35 +03:00
state + = cpu_flag : : wait ;
2020-11-20 12:53:16 +02:00
u32 value ;
if ( ! ch_out_mbox . try_pop ( value ) )
2014-08-23 01:15:02 +04:00
{
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " sys_spu_thread_exit(): Out_MBox is empty " ) ;
2014-08-23 01:15:02 +04:00
}
2015-03-02 05:10:41 +03:00
2020-05-09 19:49:12 +03:00
spu_log . trace ( " sys_spu_thread_exit(status=0x%x) " , value ) ;
2020-05-11 21:24:04 +03:00
last_exit_status . release ( value ) ;
2020-05-09 19:49:12 +03:00
set_status_npc ( ) ;
2020-07-30 12:07:18 +03:00
state + = cpu_flag : : stop + cpu_flag : : ret ;
2019-06-06 21:32:35 +03:00
check_state ( ) ;
2016-04-19 16:04:02 +03:00
return true ;
2014-08-28 01:04:55 +04:00
}
2015-03-02 05:10:41 +03:00
}
2014-08-28 01:04:55 +04:00
2020-12-09 18:04:52 +03:00
fmt : : throw_exception ( " Unknown STOP code: 0x%x (op=0x%x, Out_MBox=%s) " , code , _ref < u32 > ( pc ) , ch_out_mbox ) ;
2015-03-02 05:10:41 +03:00
}
2018-10-11 01:17:19 +03:00
void spu_thread : : halt ( )
2015-03-02 05:10:41 +03:00
{
2020-02-01 11:36:09 +03:00
spu_log . trace ( " halt() " ) ;
2015-03-02 05:10:41 +03:00
2020-07-17 11:18:04 +03:00
if ( get_type ( ) > = spu_type : : raw )
2015-03-02 05:10:41 +03:00
{
2020-01-09 08:52:31 +02:00
state + = cpu_flag : : stop + cpu_flag : : wait ;
2020-01-21 20:05:45 +02:00
status_npc . atomic_op ( [ this ] ( status_npc_sync_var & state )
2014-08-23 01:15:02 +04:00
{
2020-01-21 20:05:45 +02:00
state . status | = SPU_STATUS_STOPPED_BY_HALT ;
state . status & = ~ SPU_STATUS_RUNNING ;
state . npc = pc | + interrupts_enabled ;
2015-03-02 05:10:41 +03:00
} ) ;
2020-02-19 22:25:53 +02:00
status_npc . notify_one ( ) ;
2015-07-13 00:02:02 +03:00
int_ctrl [ 2 ] . set ( SPU_INT2_STAT_SPU_HALT_OR_STEP_INT ) ;
2015-07-01 01:25:52 +03:00
2019-11-08 01:36:19 +03:00
spu_runtime : : g_escape ( this ) ;
2014-08-23 01:15:02 +04:00
}
2015-03-02 05:10:41 +03:00
2021-04-08 00:52:18 +03:00
spu_log . fatal ( " Halt " ) ;
spu_runtime : : g_escape ( this ) ;
2014-10-24 17:24:09 +04:00
}
2014-11-19 16:16:30 +02:00
2018-10-11 01:17:19 +03:00
void spu_thread : : fast_call ( u32 ls_addr )
2014-11-19 16:16:30 +02:00
{
2016-04-14 02:09:41 +03:00
// LS:0x0: this is originally the entry point of the interrupt handler, but interrupts are not implemented
_ref < u32 > ( 0 ) = 0x00000002 ; // STOP 2
2015-07-01 01:25:52 +03:00
2016-04-14 02:09:41 +03:00
auto old_pc = pc ;
auto old_lr = gpr [ 0 ] . _u32 [ 3 ] ;
auto old_stack = gpr [ 1 ] . _u32 [ 3 ] ; // only saved and restored (may be wrong)
pc = ls_addr ;
gpr [ 0 ] . _u32 [ 3 ] = 0x0 ;
2019-11-08 01:36:19 +03:00
cpu_task ( ) ;
2014-11-19 16:16:30 +02:00
2016-08-09 17:14:41 +03:00
state - = cpu_flag : : ret ;
2016-04-14 02:09:41 +03:00
pc = old_pc ;
gpr [ 0 ] . _u32 [ 3 ] = old_lr ;
gpr [ 1 ] . _u32 [ 3 ] = old_stack ;
2016-04-25 13:49:12 +03:00
}
2018-10-11 01:17:19 +03:00
2020-10-08 09:14:35 +03:00
bool spu_thread : : capture_local_storage ( ) const
{
spu_exec_object spu_exec ;
2020-10-24 10:54:32 +03:00
// Save data as an executable segment, even the SPU stack
// In the past, an optimization was made here to save only non-zero chunks of data
// But Ghidra didn't like accessing memory out of chunks (pretty common)
// So it has been reverted
auto & prog = spu_exec . progs . emplace_back ( SYS_SPU_SEGMENT_TYPE_COPY , 0x7 , 0 , SPU_LS_SIZE , 8 , std : : vector < uchar > ( ls , ls + SPU_LS_SIZE ) ) ;
2020-10-08 09:14:35 +03:00
2020-10-24 10:54:32 +03:00
prog . p_paddr = prog . p_vaddr ;
spu_log . success ( " Segment: p_type=0x%x, p_vaddr=0x%x, p_filesz=0x%x, p_memsz=0x%x " , prog . p_type , prog . p_vaddr , prog . p_filesz , prog . p_memsz ) ;
2020-10-08 09:14:35 +03:00
std : : string name ;
if ( get_type ( ) = = spu_type : : threaded )
{
name = * spu_tname . load ( ) ;
if ( name . empty ( ) )
{
// TODO: Maybe add thread group name here
2020-12-22 15:29:41 +02:00
fmt : : append ( name , " SPU.0x%07x " , lv2_id ) ;
2020-10-08 09:14:35 +03:00
}
}
else
{
fmt : : append ( name , " RawSPU.%u " , lv2_id ) ;
}
2020-10-25 09:08:50 +02:00
u32 pc0 = pc ;
for ( ; pc0 ; pc0 - = 4 )
{
2021-03-08 23:41:23 +03:00
be_t < u32 > op ;
std : : memcpy ( & op , prog . bin . data ( ) + pc0 - 4 , 4 ) ;
2020-10-25 09:08:50 +02:00
// Try to find function entry (if they are placed sequentially search for BI $LR of previous function)
if ( ! op | | op = = 0x35000000u | | s_spu_itype . decode ( op ) = = spu_itype : : UNK )
{
break ;
}
}
spu_exec . header . e_entry = pc0 ;
2020-10-08 09:14:35 +03:00
name = vfs : : escape ( name , true ) ;
std : : replace ( name . begin ( ) , name . end ( ) , ' ' , ' _ ' ) ;
auto get_filename = [ & ] ( ) - > std : : string
{
return fs : : get_cache_dir ( ) + " spu_progs/ " + Emu . GetTitleID ( ) + " _ " + vfs : : escape ( name , true ) + ' _ ' + date_time : : current_time_narrow ( ) + " _capture.elf " ;
} ;
auto elf_path = get_filename ( ) ;
2021-02-23 06:29:11 +02:00
if ( fs : : exists ( elf_path ) )
2020-10-08 09:14:35 +03:00
{
// Wait 1 second so current_time_narrow() will return a different string
std : : this_thread : : sleep_for ( 1 s ) ;
2021-02-23 06:29:11 +02:00
if ( elf_path = get_filename ( ) ; fs : : exists ( elf_path ) )
2020-10-08 09:14:35 +03:00
{
spu_log . error ( " Failed to create '%s' (error=%s) " , elf_path , fs : : g_tls_error ) ;
return false ;
}
}
2021-02-23 06:29:11 +02:00
fs : : pending_file temp ( elf_path ) ;
if ( ! temp . file )
{
spu_log . error ( " Failed to create temporary file for '%s' (error=%s) " , elf_path , fs : : g_tls_error ) ;
return false ;
}
temp . file . write ( spu_exec . save ( ) ) ;
if ( ! temp . commit ( false ) )
{
spu_log . error ( " Failed to create rename temporary file to '%s' (error=%s) " , elf_path , fs : : g_tls_error ) ;
return false ;
}
2020-10-08 09:14:35 +03:00
spu_log . success ( " SPU Local Storage image saved to '%s' " , elf_path ) ;
return true ;
}
2020-04-13 21:53:19 +03:00
spu_function_logger : : spu_function_logger ( spu_thread & spu , const char * func )
: spu ( spu )
{
spu . current_func = func ;
spu . start_time = get_system_time ( ) ;
}
2020-04-03 12:18:00 +03:00
template < >
void fmt_class_string < spu_channel > : : format ( std : : string & out , u64 arg )
{
const auto & ch = get_object ( arg ) ;
2020-05-12 19:17:50 +03:00
u32 data = 0 ;
2020-04-03 12:18:00 +03:00
2020-05-12 19:17:50 +03:00
if ( ch . try_read ( data ) )
2020-04-03 12:18:00 +03:00
{
2020-05-12 19:17:50 +03:00
fmt : : append ( out , " 0x%08x " , data ) ;
2020-04-03 12:18:00 +03:00
}
else
{
2020-04-09 15:31:34 +03:00
out + = " empty " ;
2020-04-03 12:18:00 +03:00
}
}
template < >
void fmt_class_string < spu_channel_4_t > : : format ( std : : string & out , u64 arg )
{
const auto & ch = get_object ( arg ) ;
2020-05-12 19:17:50 +03:00
// TODO (use try_read)
2020-04-03 12:18:00 +03:00
fmt : : append ( out , " count = %d " , ch . get_count ( ) ) ;
}
2018-10-11 01:17:19 +03:00
DECLARE ( spu_thread : : g_raw_spu_ctr ) { } ;
DECLARE ( spu_thread : : g_raw_spu_id ) { } ;