2009-09-07 19:23:46 +00:00
|
|
|
#include "VuBasicBlock.h"
|
|
|
|
#include "MA_VU.h"
|
|
|
|
#include "offsetof_def.h"
|
2017-07-06 23:35:22 -04:00
|
|
|
#include "MemoryUtils.h"
|
|
|
|
#include "Vpu.h"
|
2009-09-07 19:23:46 +00:00
|
|
|
|
2022-08-15 10:45:12 -04:00
|
|
|
CVuBasicBlock::CVuBasicBlock(CMIPS& context, uint32 begin, uint32 end, BLOCK_CATEGORY category)
|
|
|
|
: CBasicBlock(context, begin, end, category)
|
2009-09-07 19:23:46 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2020-05-29 13:27:39 -04:00
|
|
|
bool CVuBasicBlock::IsLinkable() const
|
|
|
|
{
|
|
|
|
return m_isLinkable;
|
|
|
|
}
|
|
|
|
|
2010-09-03 21:13:57 +00:00
|
|
|
void CVuBasicBlock::CompileRange(CMipsJitter* jitter)
|
2009-09-07 19:23:46 +00:00
|
|
|
{
|
2018-07-21 20:49:58 -04:00
|
|
|
CompileProlog(jitter);
|
|
|
|
|
2009-09-07 19:23:46 +00:00
|
|
|
assert((m_begin & 0x07) == 0);
|
|
|
|
assert(((m_end + 4) & 0x07) == 0);
|
2016-08-06 22:42:47 -04:00
|
|
|
auto arch = static_cast<CMA_VU*>(m_context.m_pArch);
|
2013-10-27 04:54:28 +00:00
|
|
|
|
2017-08-06 13:34:05 -04:00
|
|
|
bool hasPendingXgKick = false;
|
|
|
|
const auto clearPendingXgKick =
|
2018-04-30 21:01:23 +01:00
|
|
|
[&]() {
|
|
|
|
assert(hasPendingXgKick);
|
|
|
|
EmitXgKick(jitter);
|
|
|
|
hasPendingXgKick = false;
|
|
|
|
};
|
2017-07-06 23:35:22 -04:00
|
|
|
|
2021-02-05 17:03:10 -05:00
|
|
|
BlockFmacPipelineInfo prevBlockFmacPipelineInfo;
|
|
|
|
auto prevBlockWindow = GetPreviousBlockWindow();
|
|
|
|
if((prevBlockWindow.second - prevBlockWindow.first) != 0)
|
|
|
|
{
|
|
|
|
prevBlockFmacPipelineInfo = ComputeFmacStallDelays(prevBlockWindow.first, prevBlockWindow.second);
|
|
|
|
OffsetFmacWriteTimes(prevBlockFmacPipelineInfo);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto fmacPipelineInfo = ComputeFmacStallDelays(m_begin, m_end, prevBlockFmacPipelineInfo.regWriteTimes);
|
2020-02-27 13:13:57 -05:00
|
|
|
|
2022-08-19 12:14:52 -04:00
|
|
|
auto integerBranchDelayInfo = ComputeIntegerBranchDelayInfo(fmacPipelineInfo.stallDelays);
|
|
|
|
|
2020-02-26 21:02:37 -05:00
|
|
|
uint32 maxInstructions = ((m_end - m_begin) / 8) + 1;
|
2020-02-08 21:59:10 -05:00
|
|
|
std::vector<uint32> hints;
|
2020-02-26 21:02:37 -05:00
|
|
|
hints.resize(maxInstructions);
|
2020-02-08 21:59:10 -05:00
|
|
|
|
2021-02-05 17:03:10 -05:00
|
|
|
ComputeSkipFlagsHints(fmacPipelineInfo.stallDelays, hints);
|
2020-02-24 19:20:56 -05:00
|
|
|
|
2020-02-26 21:02:37 -05:00
|
|
|
uint32 relativePipeTime = 0;
|
|
|
|
uint32 instructionIndex = 0;
|
|
|
|
|
2018-06-11 12:59:00 -04:00
|
|
|
for(uint32 address = m_begin; address <= m_end; address += 8)
|
2009-06-06 15:38:03 +00:00
|
|
|
{
|
|
|
|
uint32 addressLo = address + 0;
|
|
|
|
uint32 addressHi = address + 4;
|
|
|
|
|
|
|
|
uint32 opcodeLo = m_context.m_pMemoryMap->GetInstruction(addressLo);
|
|
|
|
uint32 opcodeHi = m_context.m_pMemoryMap->GetInstruction(addressHi);
|
|
|
|
|
2017-01-06 23:23:27 -05:00
|
|
|
auto loOps = arch->GetAffectedOperands(&m_context, addressLo, opcodeLo);
|
|
|
|
auto hiOps = arch->GetAffectedOperands(&m_context, addressHi, opcodeHi);
|
2009-06-06 15:38:03 +00:00
|
|
|
|
2011-04-03 05:25:07 +00:00
|
|
|
//No upper instruction writes to Q
|
|
|
|
assert(hiOps.syncQ == false);
|
2018-04-30 21:01:23 +01:00
|
|
|
|
2012-03-18 01:52:44 +00:00
|
|
|
//No lower instruction reads Q
|
|
|
|
assert(loOps.readQ == false);
|
2011-04-03 05:25:07 +00:00
|
|
|
|
2016-12-22 12:17:01 -05:00
|
|
|
//No upper instruction writes to P
|
|
|
|
assert(hiOps.syncP == false);
|
|
|
|
|
|
|
|
//No upper instruction reads from P
|
|
|
|
assert(hiOps.readP == false);
|
|
|
|
|
2017-08-06 13:34:05 -04:00
|
|
|
bool loIsXgKick = (opcodeLo & ~(0x1F << 11)) == 0x800006FC;
|
|
|
|
|
2013-10-27 04:54:28 +00:00
|
|
|
if(loOps.syncQ)
|
2009-06-06 15:38:03 +00:00
|
|
|
{
|
2013-10-27 04:54:28 +00:00
|
|
|
VUShared::FlushPipeline(VUShared::g_pipeInfoQ, jitter);
|
2009-06-06 15:38:03 +00:00
|
|
|
}
|
2016-12-22 12:17:01 -05:00
|
|
|
if(loOps.syncP)
|
|
|
|
{
|
|
|
|
VUShared::FlushPipeline(VUShared::g_pipeInfoP, jitter);
|
|
|
|
}
|
2012-03-18 01:52:44 +00:00
|
|
|
|
2021-07-23 14:00:09 -04:00
|
|
|
auto fmacStallDelay = fmacPipelineInfo.stallDelays[instructionIndex];
|
|
|
|
relativePipeTime += fmacStallDelay;
|
|
|
|
|
2013-10-27 04:54:28 +00:00
|
|
|
if(hiOps.readQ)
|
|
|
|
{
|
|
|
|
VUShared::CheckPipeline(VUShared::g_pipeInfoQ, jitter, relativePipeTime);
|
|
|
|
}
|
2016-12-22 12:17:01 -05:00
|
|
|
if(loOps.readP)
|
|
|
|
{
|
|
|
|
VUShared::CheckPipeline(VUShared::g_pipeInfoP, jitter, relativePipeTime);
|
|
|
|
}
|
2011-04-03 05:25:07 +00:00
|
|
|
|
2013-10-27 04:54:28 +00:00
|
|
|
uint8 savedReg = 0;
|
2009-06-06 15:38:03 +00:00
|
|
|
|
2013-10-27 04:54:28 +00:00
|
|
|
if(hiOps.writeF != 0)
|
|
|
|
{
|
|
|
|
assert(hiOps.writeF != loOps.writeF);
|
|
|
|
if(
|
2018-04-30 21:01:23 +01:00
|
|
|
(hiOps.writeF == loOps.readF0) ||
|
|
|
|
(hiOps.writeF == loOps.readF1))
|
2009-06-06 15:38:03 +00:00
|
|
|
{
|
2013-10-27 04:54:28 +00:00
|
|
|
savedReg = hiOps.writeF;
|
|
|
|
jitter->MD_PushRel(offsetof(CMIPS, m_State.nCOP2[savedReg]));
|
|
|
|
jitter->MD_PullRel(offsetof(CMIPS, m_State.nCOP2VF_PreUp));
|
2009-06-06 15:38:03 +00:00
|
|
|
}
|
2013-10-27 04:54:28 +00:00
|
|
|
}
|
2009-06-06 15:38:03 +00:00
|
|
|
|
2016-08-06 22:14:44 -04:00
|
|
|
if(address == integerBranchDelayInfo.saveRegAddress)
|
|
|
|
{
|
2016-06-03 23:35:59 +01:00
|
|
|
// grab the value of the delayed reg to use in the conditional branch later
|
2016-08-06 22:14:44 -04:00
|
|
|
jitter->PushRel(offsetof(CMIPS, m_State.nCOP2VI[integerBranchDelayInfo.regIndex]));
|
2016-06-03 23:35:59 +01:00
|
|
|
jitter->PullRel(offsetof(CMIPS, m_State.savedIntReg));
|
|
|
|
}
|
|
|
|
|
2020-02-27 13:13:57 -05:00
|
|
|
uint32 compileHints = hints[instructionIndex];
|
2019-10-22 08:04:52 -04:00
|
|
|
arch->SetRelativePipeTime(relativePipeTime, compileHints);
|
2022-10-18 20:01:20 +01:00
|
|
|
arch->CompileInstruction(addressHi, jitter, &m_context, addressHi - m_begin);
|
2009-06-06 15:38:03 +00:00
|
|
|
|
2013-10-27 04:54:28 +00:00
|
|
|
if(savedReg != 0)
|
|
|
|
{
|
|
|
|
jitter->MD_PushRel(offsetof(CMIPS, m_State.nCOP2[savedReg]));
|
|
|
|
jitter->MD_PullRel(offsetof(CMIPS, m_State.nCOP2VF_UpRes));
|
2009-06-06 15:38:03 +00:00
|
|
|
|
2013-10-27 04:54:28 +00:00
|
|
|
jitter->MD_PushRel(offsetof(CMIPS, m_State.nCOP2VF_PreUp));
|
|
|
|
jitter->MD_PullRel(offsetof(CMIPS, m_State.nCOP2[savedReg]));
|
|
|
|
}
|
2009-06-06 15:38:03 +00:00
|
|
|
|
2016-08-06 22:14:44 -04:00
|
|
|
if(address == integerBranchDelayInfo.useRegAddress)
|
|
|
|
{
|
2016-06-03 23:35:59 +01:00
|
|
|
// set the target from the saved value
|
2016-08-06 22:14:44 -04:00
|
|
|
jitter->PushRel(offsetof(CMIPS, m_State.nCOP2VI[integerBranchDelayInfo.regIndex]));
|
2016-06-03 23:35:59 +01:00
|
|
|
jitter->PullRel(offsetof(CMIPS, m_State.savedIntRegTemp));
|
|
|
|
|
|
|
|
jitter->PushRel(offsetof(CMIPS, m_State.savedIntReg));
|
2016-08-06 22:14:44 -04:00
|
|
|
jitter->PullRel(offsetof(CMIPS, m_State.nCOP2VI[integerBranchDelayInfo.regIndex]));
|
2016-06-03 23:35:59 +01:00
|
|
|
}
|
|
|
|
|
2017-08-06 13:34:25 -04:00
|
|
|
//If there's a pending XGKICK and the current lower instruction is
|
|
|
|
//an XGKICK, make sure we flush the pending one first
|
|
|
|
if(loIsXgKick && hasPendingXgKick)
|
|
|
|
{
|
|
|
|
clearPendingXgKick();
|
|
|
|
}
|
|
|
|
|
2022-10-18 20:01:20 +01:00
|
|
|
arch->CompileInstruction(addressLo, jitter, &m_context, addressLo - m_begin);
|
2009-06-06 15:38:03 +00:00
|
|
|
|
2016-08-06 22:14:44 -04:00
|
|
|
if(address == integerBranchDelayInfo.useRegAddress)
|
|
|
|
{
|
2016-06-03 23:35:59 +01:00
|
|
|
// put the target value back
|
|
|
|
jitter->PushRel(offsetof(CMIPS, m_State.savedIntRegTemp));
|
2016-08-06 22:14:44 -04:00
|
|
|
jitter->PullRel(offsetof(CMIPS, m_State.nCOP2VI[integerBranchDelayInfo.regIndex]));
|
2016-06-03 23:35:59 +01:00
|
|
|
}
|
|
|
|
|
2013-10-27 04:54:28 +00:00
|
|
|
if(savedReg != 0)
|
|
|
|
{
|
|
|
|
jitter->MD_PushRel(offsetof(CMIPS, m_State.nCOP2VF_UpRes));
|
|
|
|
jitter->MD_PullRel(offsetof(CMIPS, m_State.nCOP2[savedReg]));
|
2009-06-06 15:38:03 +00:00
|
|
|
}
|
|
|
|
|
2017-08-06 13:34:05 -04:00
|
|
|
if(hasPendingXgKick)
|
2017-07-06 23:35:22 -04:00
|
|
|
{
|
2017-08-06 13:34:05 -04:00
|
|
|
clearPendingXgKick();
|
2017-07-06 23:35:22 -04:00
|
|
|
}
|
|
|
|
|
2017-08-06 13:34:05 -04:00
|
|
|
if(loIsXgKick)
|
2017-07-06 23:35:22 -04:00
|
|
|
{
|
2017-08-06 13:34:05 -04:00
|
|
|
assert(!hasPendingXgKick);
|
|
|
|
hasPendingXgKick = true;
|
2017-07-06 23:35:22 -04:00
|
|
|
}
|
2017-04-11 22:34:29 -04:00
|
|
|
//Adjust pipeTime
|
|
|
|
relativePipeTime++;
|
2020-02-26 21:02:37 -05:00
|
|
|
instructionIndex++;
|
2017-07-06 23:35:22 -04:00
|
|
|
|
2020-05-29 13:27:39 -04:00
|
|
|
//Handle some branch in delay slot situation (Star Ocean 3):
|
|
|
|
//B $label1
|
|
|
|
//Bxx $label2
|
|
|
|
if((address == (m_end - 4)) && IsConditionalBranch(opcodeLo))
|
|
|
|
{
|
|
|
|
//Disable block linking because targets will be wrong
|
|
|
|
m_isLinkable = false;
|
|
|
|
|
|
|
|
uint32 branchOpcodeAddr = address - 8;
|
|
|
|
assert(branchOpcodeAddr >= m_begin);
|
|
|
|
uint32 branchOpcodeLo = m_context.m_pMemoryMap->GetInstruction(branchOpcodeAddr);
|
|
|
|
if(IsNonConditionalBranch(branchOpcodeLo))
|
|
|
|
{
|
|
|
|
//We need to compile the instruction at the branch target because it will be executed
|
|
|
|
//before the branch is taken
|
|
|
|
uint32 branchTgtAddress = branchOpcodeAddr + VUShared::GetBranch(branchOpcodeLo & 0x7FF) + 8;
|
2022-10-18 20:01:20 +01:00
|
|
|
arch->CompileInstruction(branchTgtAddress, jitter, &m_context, address - m_begin);
|
2020-05-29 13:27:39 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-06-06 15:38:03 +00:00
|
|
|
//Sanity check
|
2010-09-03 21:13:57 +00:00
|
|
|
assert(jitter->IsStackEmpty());
|
2009-09-07 19:23:46 +00:00
|
|
|
}
|
2012-03-18 01:52:44 +00:00
|
|
|
|
2017-08-06 13:34:05 -04:00
|
|
|
if(hasPendingXgKick)
|
2017-07-06 23:35:22 -04:00
|
|
|
{
|
2017-08-06 13:34:05 -04:00
|
|
|
clearPendingXgKick();
|
2017-07-06 23:35:22 -04:00
|
|
|
}
|
|
|
|
|
2017-08-06 13:34:05 -04:00
|
|
|
assert(!hasPendingXgKick);
|
2017-07-06 23:35:22 -04:00
|
|
|
|
2012-03-18 01:52:44 +00:00
|
|
|
//Increment pipeTime
|
|
|
|
{
|
|
|
|
jitter->PushRel(offsetof(CMIPS, m_State.pipeTime));
|
2017-04-11 22:34:29 -04:00
|
|
|
jitter->PushCst(relativePipeTime);
|
2012-03-18 01:52:44 +00:00
|
|
|
jitter->Add();
|
|
|
|
jitter->PullRel(offsetof(CMIPS, m_State.pipeTime));
|
|
|
|
}
|
2013-10-27 04:54:28 +00:00
|
|
|
|
2018-07-17 18:51:31 -04:00
|
|
|
CompileEpilog(jitter);
|
2009-09-07 19:23:46 +00:00
|
|
|
}
|
2016-08-06 22:14:44 -04:00
|
|
|
|
|
|
|
bool CVuBasicBlock::IsConditionalBranch(uint32 opcodeLo)
|
|
|
|
{
|
|
|
|
//Conditional branches are in the contiguous opcode range 0x28 -> 0x2F inclusive
|
|
|
|
uint32 id = (opcodeLo >> 25) & 0x7F;
|
|
|
|
return (id >= 0x28) && (id < 0x30);
|
|
|
|
}
|
|
|
|
|
2020-05-29 13:27:39 -04:00
|
|
|
bool CVuBasicBlock::IsNonConditionalBranch(uint32 opcodeLo)
|
|
|
|
{
|
|
|
|
uint32 id = (opcodeLo >> 25) & 0x7F;
|
|
|
|
return (id == 0x20);
|
|
|
|
}
|
|
|
|
|
2022-08-19 12:14:52 -04:00
|
|
|
CVuBasicBlock::INTEGER_BRANCH_DELAY_INFO CVuBasicBlock::ComputeIntegerBranchDelayInfo(const std::vector<uint32>& fmacStallDelays) const
|
2016-08-06 22:14:44 -04:00
|
|
|
{
|
|
|
|
// Test if the block ends with a conditional branch instruction where the condition variable has been
|
|
|
|
// set in the prior instruction.
|
|
|
|
// In this case, the pipeline shortcut fails and we need to use the value from 4 instructions previous.
|
|
|
|
// If the relevant set instruction is not part of this block, use initial value of the integer register.
|
|
|
|
|
|
|
|
INTEGER_BRANCH_DELAY_INFO result;
|
|
|
|
auto arch = static_cast<CMA_VU*>(m_context.m_pArch);
|
2018-06-11 12:59:00 -04:00
|
|
|
uint32 adjustedEnd = m_end - 4;
|
2016-08-06 22:14:44 -04:00
|
|
|
|
|
|
|
// Check if we have a conditional branch instruction.
|
|
|
|
uint32 branchOpcodeAddr = adjustedEnd - 8;
|
|
|
|
uint32 branchOpcodeLo = m_context.m_pMemoryMap->GetInstruction(branchOpcodeAddr);
|
|
|
|
if(IsConditionalBranch(branchOpcodeLo))
|
|
|
|
{
|
2022-08-19 12:14:52 -04:00
|
|
|
uint32 fmacDelayOnBranch = fmacStallDelays[fmacStallDelays.size() - 2];
|
|
|
|
|
2016-08-06 22:14:44 -04:00
|
|
|
// We have a conditional branch instruction. Now we need to check that the condition register is not written
|
|
|
|
// by the previous instruction.
|
|
|
|
uint32 priorOpcodeAddr = adjustedEnd - 16;
|
|
|
|
uint32 priorOpcodeLo = m_context.m_pMemoryMap->GetInstruction(priorOpcodeAddr);
|
|
|
|
|
|
|
|
auto priorLoOps = arch->GetAffectedOperands(&m_context, priorOpcodeAddr, priorOpcodeLo);
|
2022-08-19 12:14:52 -04:00
|
|
|
if((priorLoOps.writeI != 0) && !priorLoOps.branchValue && (fmacDelayOnBranch == 0))
|
2016-08-06 22:14:44 -04:00
|
|
|
{
|
|
|
|
auto branchLoOps = arch->GetAffectedOperands(&m_context, branchOpcodeAddr, branchOpcodeLo);
|
|
|
|
if(
|
2018-04-30 21:01:23 +01:00
|
|
|
(branchLoOps.readI0 == priorLoOps.writeI) ||
|
|
|
|
(branchLoOps.readI1 == priorLoOps.writeI))
|
2016-08-06 22:14:44 -04:00
|
|
|
{
|
|
|
|
//Check if our block is a "special" loop. Disable delayed integer processing if it's the case
|
|
|
|
//TODO: Handle that case better
|
2018-06-11 12:59:00 -04:00
|
|
|
bool isSpecialLoop = CheckIsSpecialIntegerLoop(priorLoOps.writeI);
|
2016-08-06 22:14:44 -04:00
|
|
|
if(!isSpecialLoop)
|
|
|
|
{
|
|
|
|
// we need to use the value of intReg 4 steps prior or use initial value.
|
2018-04-30 21:01:23 +01:00
|
|
|
result.regIndex = priorLoOps.writeI;
|
2022-08-19 10:24:06 -04:00
|
|
|
result.saveRegAddress = std::max<int32>(adjustedEnd - 5 * 8, m_begin);
|
2018-04-30 21:01:23 +01:00
|
|
|
result.useRegAddress = adjustedEnd - 8;
|
2016-08-06 22:14:44 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2018-06-11 12:59:00 -04:00
|
|
|
bool CVuBasicBlock::CheckIsSpecialIntegerLoop(unsigned int regI) const
|
2016-08-06 22:14:44 -04:00
|
|
|
{
|
|
|
|
//This checks for a pattern where all instructions within a block
|
|
|
|
//modifies an integer register except for one branch instruction that
|
|
|
|
//tests that integer register
|
|
|
|
//Required by BGDA that has that kind of loop inside its VU microcode
|
|
|
|
|
|
|
|
auto arch = static_cast<CMA_VU*>(m_context.m_pArch);
|
2018-06-11 12:59:00 -04:00
|
|
|
uint32 length = (m_end - m_begin) / 8;
|
2016-08-06 22:14:44 -04:00
|
|
|
if(length != 4) return false;
|
|
|
|
for(uint32 index = 0; index <= length; index++)
|
|
|
|
{
|
|
|
|
uint32 address = m_begin + (index * 8);
|
|
|
|
uint32 opcodeLo = m_context.m_pMemoryMap->GetInstruction(address);
|
|
|
|
if(index == (length - 1))
|
|
|
|
{
|
|
|
|
assert(IsConditionalBranch(opcodeLo));
|
|
|
|
uint32 branchTarget = arch->GetInstructionEffectiveAddress(&m_context, address, opcodeLo);
|
|
|
|
if(branchTarget != m_begin) return false;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
auto loOps = arch->GetAffectedOperands(&m_context, address, opcodeLo);
|
|
|
|
if(loOps.writeI != regI) return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
2017-07-06 23:35:22 -04:00
|
|
|
|
2017-08-06 13:34:05 -04:00
|
|
|
void CVuBasicBlock::EmitXgKick(CMipsJitter* jitter)
|
2017-07-06 23:35:22 -04:00
|
|
|
{
|
|
|
|
//Push context
|
|
|
|
jitter->PushCtx();
|
|
|
|
|
|
|
|
//Push value
|
2017-07-13 22:33:33 -04:00
|
|
|
jitter->PushRel(offsetof(CMIPS, m_State.xgkickAddress));
|
2017-07-06 23:35:22 -04:00
|
|
|
|
|
|
|
//Compute Address
|
2022-07-07 16:04:00 -04:00
|
|
|
jitter->PushCst(CVpu::VU_ADDR_XGKICK);
|
2017-07-06 23:35:22 -04:00
|
|
|
|
|
|
|
jitter->Call(reinterpret_cast<void*>(&MemoryUtils_SetWordProxy), 3, false);
|
|
|
|
}
|
2020-02-05 21:53:02 -05:00
|
|
|
|
2020-02-27 13:13:57 -05:00
|
|
|
void CVuBasicBlock::ComputeSkipFlagsHints(const std::vector<uint32>& fmacStallDelays, std::vector<uint32>& hints) const
|
2020-02-05 21:53:02 -05:00
|
|
|
{
|
2020-02-27 13:13:57 -05:00
|
|
|
static const uint32 g_undefinedMACflagsResult = -1;
|
|
|
|
|
2020-02-05 21:53:02 -05:00
|
|
|
auto arch = static_cast<CMA_VU*>(m_context.m_pArch);
|
|
|
|
|
2020-02-27 13:13:57 -05:00
|
|
|
uint32 maxInstructions = static_cast<uint32>(hints.size());
|
|
|
|
|
|
|
|
uint32 maxPipeTime = maxInstructions;
|
2020-03-03 09:44:25 -05:00
|
|
|
for(const auto& fmacStallDelay : fmacStallDelays)
|
|
|
|
maxPipeTime += fmacStallDelay;
|
2020-02-27 13:13:57 -05:00
|
|
|
|
|
|
|
//Take into account the instructions that come after this block (up to 4 cycles later)
|
2020-02-08 21:59:10 -05:00
|
|
|
uint32 extendedMaxPipeTime = maxPipeTime + VUShared::LATENCY_MAC;
|
2020-02-05 21:53:02 -05:00
|
|
|
|
2020-02-27 13:13:57 -05:00
|
|
|
std::vector<uint32> flagsResults;
|
|
|
|
flagsResults.resize(extendedMaxPipeTime);
|
|
|
|
std::fill(flagsResults.begin(), flagsResults.end(), g_undefinedMACflagsResult);
|
2020-02-08 21:59:10 -05:00
|
|
|
|
|
|
|
std::vector<bool> resultUsed;
|
2020-02-27 13:13:57 -05:00
|
|
|
resultUsed.resize(maxInstructions);
|
2020-02-08 21:59:10 -05:00
|
|
|
|
2020-02-27 13:13:57 -05:00
|
|
|
uint32 relativePipeTime = 0;
|
2020-02-08 21:59:10 -05:00
|
|
|
for(uint32 address = m_begin; address <= m_end; address += 8)
|
2020-02-05 21:53:02 -05:00
|
|
|
{
|
2020-02-27 13:13:57 -05:00
|
|
|
uint32 instructionIndex = (address - m_begin) / 8;
|
|
|
|
assert(instructionIndex < maxInstructions);
|
2020-02-05 21:53:02 -05:00
|
|
|
|
|
|
|
uint32 addressLo = address + 0;
|
|
|
|
uint32 addressHi = address + 4;
|
|
|
|
|
|
|
|
uint32 opcodeLo = m_context.m_pMemoryMap->GetInstruction(addressLo);
|
|
|
|
uint32 opcodeHi = m_context.m_pMemoryMap->GetInstruction(addressHi);
|
2020-02-10 20:36:47 -05:00
|
|
|
|
2020-02-05 21:53:02 -05:00
|
|
|
auto loOps = arch->GetAffectedOperands(&m_context, addressLo, opcodeLo);
|
|
|
|
auto hiOps = arch->GetAffectedOperands(&m_context, addressHi, opcodeHi);
|
|
|
|
|
2020-02-27 13:13:57 -05:00
|
|
|
relativePipeTime += fmacStallDelays[instructionIndex];
|
|
|
|
|
2020-02-08 21:59:10 -05:00
|
|
|
if(hiOps.writeMACflags)
|
2020-02-05 21:53:02 -05:00
|
|
|
{
|
2020-02-08 21:59:10 -05:00
|
|
|
//Make this result available
|
|
|
|
std::fill(
|
2020-02-27 13:13:57 -05:00
|
|
|
flagsResults.begin() + relativePipeTime + VUShared::LATENCY_MAC,
|
|
|
|
flagsResults.end(), instructionIndex);
|
2020-02-05 21:53:02 -05:00
|
|
|
}
|
2020-02-08 21:59:10 -05:00
|
|
|
|
|
|
|
if(loOps.readMACflags)
|
2020-02-05 21:53:02 -05:00
|
|
|
{
|
2020-02-27 13:13:57 -05:00
|
|
|
uint32 pipeTimeForResult = flagsResults[relativePipeTime];
|
|
|
|
if(pipeTimeForResult != g_undefinedMACflagsResult)
|
2020-02-08 21:59:10 -05:00
|
|
|
{
|
|
|
|
resultUsed[pipeTimeForResult] = true;
|
|
|
|
}
|
2020-02-05 21:53:02 -05:00
|
|
|
}
|
2020-02-27 13:13:57 -05:00
|
|
|
|
|
|
|
relativePipeTime++;
|
2020-02-08 21:59:10 -05:00
|
|
|
}
|
2020-02-05 21:53:02 -05:00
|
|
|
|
2020-02-08 21:59:10 -05:00
|
|
|
//Simulate usage from outside our block
|
|
|
|
for(uint32 relativePipeTime = maxPipeTime; relativePipeTime < extendedMaxPipeTime; relativePipeTime++)
|
|
|
|
{
|
2020-02-27 13:13:57 -05:00
|
|
|
uint32 pipeTimeForResult = flagsResults[relativePipeTime];
|
|
|
|
if(pipeTimeForResult != g_undefinedMACflagsResult)
|
2020-02-05 21:53:02 -05:00
|
|
|
{
|
2020-02-08 21:59:10 -05:00
|
|
|
resultUsed[pipeTimeForResult] = true;
|
2020-02-05 21:53:02 -05:00
|
|
|
}
|
2020-02-08 21:59:10 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
//Flag unused results
|
2020-02-27 13:13:57 -05:00
|
|
|
for(uint32 instructionIndex = 0; instructionIndex < maxInstructions; instructionIndex++)
|
2020-02-08 21:59:10 -05:00
|
|
|
{
|
2020-02-27 13:13:57 -05:00
|
|
|
bool used = resultUsed[instructionIndex];
|
2020-02-08 21:59:10 -05:00
|
|
|
if(!used)
|
2020-02-05 21:53:02 -05:00
|
|
|
{
|
2020-02-27 13:13:57 -05:00
|
|
|
hints[instructionIndex] |= VUShared::COMPILEHINT_SKIPFMACUPDATE;
|
2020-02-05 21:53:02 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-02-24 19:20:56 -05:00
|
|
|
|
2021-02-05 17:03:10 -05:00
|
|
|
CVuBasicBlock::BlockFmacPipelineInfo CVuBasicBlock::ComputeFmacStallDelays(uint32 begin, uint32 end, FmacRegWriteTimes initWriteFTime) const
|
2020-02-24 19:20:56 -05:00
|
|
|
{
|
|
|
|
auto arch = static_cast<CMA_VU*>(m_context.m_pArch);
|
|
|
|
|
2021-02-05 17:03:10 -05:00
|
|
|
assert((begin & 0x07) == 0);
|
|
|
|
assert(((end + 4) & 0x07) == 0);
|
|
|
|
uint32 maxInstructions = ((end - begin) / 8) + 1;
|
2020-02-24 19:20:56 -05:00
|
|
|
|
|
|
|
std::vector<uint32> fmacStallDelays;
|
2020-02-26 21:02:37 -05:00
|
|
|
fmacStallDelays.resize(maxInstructions);
|
2020-02-24 19:20:56 -05:00
|
|
|
|
2020-02-26 21:02:37 -05:00
|
|
|
uint32 relativePipeTime = 0;
|
2021-02-05 17:03:10 -05:00
|
|
|
FmacRegWriteTimes writeFTime = {};
|
2022-09-07 09:13:03 -04:00
|
|
|
FmacRegWriteTimes writeITime = {};
|
2021-02-05 17:03:10 -05:00
|
|
|
if(initWriteFTime != nullptr)
|
|
|
|
{
|
|
|
|
memcpy(writeFTime, initWriteFTime, sizeof(writeFTime));
|
|
|
|
}
|
2020-02-24 19:20:56 -05:00
|
|
|
|
2020-02-26 21:02:37 -05:00
|
|
|
auto adjustPipeTime =
|
2022-09-07 09:13:03 -04:00
|
|
|
[](uint32 pipeTime, const FmacRegWriteTimes& writeTime, uint32 dest, uint32 regIndex) {
|
2020-03-03 09:44:25 -05:00
|
|
|
if(regIndex == 0) return pipeTime;
|
|
|
|
for(unsigned int i = 0; i < 4; i++)
|
|
|
|
{
|
|
|
|
if(dest & (1 << i))
|
|
|
|
{
|
2022-09-07 09:13:03 -04:00
|
|
|
pipeTime = std::max<uint32>(pipeTime, writeTime[regIndex][i]);
|
2020-03-03 09:44:25 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return pipeTime;
|
|
|
|
};
|
2020-02-26 21:02:37 -05:00
|
|
|
|
2021-02-05 17:03:10 -05:00
|
|
|
for(uint32 address = begin; address <= end; address += 8)
|
2020-02-24 19:20:56 -05:00
|
|
|
{
|
2021-02-05 17:03:10 -05:00
|
|
|
uint32 instructionIndex = (address - begin) / 8;
|
2020-02-26 21:02:37 -05:00
|
|
|
assert(instructionIndex < maxInstructions);
|
2020-02-24 19:20:56 -05:00
|
|
|
|
|
|
|
uint32 addressLo = address + 0;
|
|
|
|
uint32 addressHi = address + 4;
|
|
|
|
|
|
|
|
uint32 opcodeLo = m_context.m_pMemoryMap->GetInstruction(addressLo);
|
|
|
|
uint32 opcodeHi = m_context.m_pMemoryMap->GetInstruction(addressHi);
|
|
|
|
|
|
|
|
auto loOps = arch->GetAffectedOperands(&m_context, addressLo, opcodeLo);
|
|
|
|
auto hiOps = arch->GetAffectedOperands(&m_context, addressHi, opcodeHi);
|
|
|
|
|
2020-02-26 21:02:37 -05:00
|
|
|
uint32 loDest = (opcodeLo >> 21) & 0xF;
|
|
|
|
uint32 hiDest = (opcodeHi >> 21) & 0xF;
|
|
|
|
|
2020-02-24 19:20:56 -05:00
|
|
|
//Instruction executes...
|
|
|
|
|
2020-02-26 21:02:37 -05:00
|
|
|
relativePipeTime++;
|
|
|
|
uint32 prevRelativePipeTime = relativePipeTime;
|
2020-02-24 19:20:56 -05:00
|
|
|
|
2022-09-07 09:13:03 -04:00
|
|
|
relativePipeTime = adjustPipeTime(relativePipeTime, writeFTime, loOps.readElemF0, loOps.readF0);
|
|
|
|
relativePipeTime = adjustPipeTime(relativePipeTime, writeFTime, loOps.readElemF1, loOps.readF1);
|
|
|
|
relativePipeTime = adjustPipeTime(relativePipeTime, writeFTime, hiOps.readElemF0, hiOps.readF0);
|
|
|
|
relativePipeTime = adjustPipeTime(relativePipeTime, writeFTime, hiOps.readElemF1, hiOps.readF1);
|
|
|
|
|
|
|
|
relativePipeTime = adjustPipeTime(relativePipeTime, writeITime, 0xF, loOps.readI0);
|
|
|
|
relativePipeTime = adjustPipeTime(relativePipeTime, writeITime, 0xF, loOps.readI1);
|
2020-02-24 19:20:56 -05:00
|
|
|
|
2020-02-26 21:02:37 -05:00
|
|
|
if(prevRelativePipeTime != relativePipeTime)
|
2020-02-24 19:20:56 -05:00
|
|
|
{
|
|
|
|
//We got a stall, sync
|
2020-02-26 21:02:37 -05:00
|
|
|
assert(relativePipeTime >= prevRelativePipeTime);
|
|
|
|
uint32 diff = relativePipeTime - prevRelativePipeTime;
|
|
|
|
fmacStallDelays[instructionIndex] = diff;
|
2020-02-24 19:20:56 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
if(loOps.writeF != 0)
|
|
|
|
{
|
|
|
|
assert(loOps.writeF < 32);
|
2020-02-26 21:02:37 -05:00
|
|
|
for(uint32 i = 0; i < 4; i++)
|
|
|
|
{
|
|
|
|
if(loDest & (1 << i))
|
|
|
|
{
|
|
|
|
writeFTime[loOps.writeF][i] = relativePipeTime + VUShared::LATENCY_MAC;
|
|
|
|
}
|
|
|
|
}
|
2020-02-24 19:20:56 -05:00
|
|
|
}
|
|
|
|
|
2022-09-07 09:13:03 -04:00
|
|
|
//Not FMAC, but we consider LSU (load/store unit) stalls here too
|
|
|
|
if(loOps.writeILsu != 0)
|
|
|
|
{
|
|
|
|
assert(loOps.writeILsu < 32);
|
|
|
|
for(uint32 i = 0; i < 4; i++)
|
|
|
|
{
|
|
|
|
writeITime[loOps.writeILsu][i] = relativePipeTime + VUShared::LATENCY_MAC;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-24 19:20:56 -05:00
|
|
|
if(hiOps.writeF != 0)
|
|
|
|
{
|
|
|
|
assert(hiOps.writeF < 32);
|
2020-02-26 21:02:37 -05:00
|
|
|
for(uint32 i = 0; i < 4; i++)
|
|
|
|
{
|
|
|
|
if(hiDest & (1 << i))
|
|
|
|
{
|
|
|
|
writeFTime[hiOps.writeF][i] = relativePipeTime + VUShared::LATENCY_MAC;
|
|
|
|
}
|
|
|
|
}
|
2020-02-24 19:20:56 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-05 17:03:10 -05:00
|
|
|
//TODO: Check that we don't have unconditional branches?
|
|
|
|
|
|
|
|
BlockFmacPipelineInfo result;
|
|
|
|
result.pipeTime = relativePipeTime;
|
|
|
|
result.stallDelays = fmacStallDelays;
|
|
|
|
memcpy(result.regWriteTimes, writeFTime, sizeof(writeFTime));
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::pair<uint32, uint32> CVuBasicBlock::GetPreviousBlockWindow() const
|
|
|
|
{
|
|
|
|
//3 previous instructions
|
|
|
|
static const uint32 windowSize = (3 * 8);
|
|
|
|
auto result = std::make_pair(0U, 0U);
|
|
|
|
if(m_begin >= windowSize)
|
|
|
|
{
|
|
|
|
//TODO: Check for unconditional jumps
|
|
|
|
result = std::make_pair(m_begin - windowSize, m_begin - 4);
|
|
|
|
assert((result.second + 4) == m_begin);
|
|
|
|
}
|
|
|
|
assert(result.second >= result.first);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CVuBasicBlock::OffsetFmacWriteTimes(BlockFmacPipelineInfo& pipelineInfo)
|
|
|
|
{
|
|
|
|
auto& writeFTime = pipelineInfo.regWriteTimes;
|
|
|
|
|
|
|
|
//Resync all write times
|
|
|
|
for(uint32 reg = 0; reg < 32; reg++)
|
|
|
|
{
|
|
|
|
for(uint32 elem = 0; elem < 4; elem++)
|
|
|
|
{
|
|
|
|
if(writeFTime[reg][elem] >= pipelineInfo.pipeTime)
|
|
|
|
{
|
|
|
|
writeFTime[reg][elem] -= pipelineInfo.pipeTime;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-02-24 19:20:56 -05:00
|
|
|
}
|