Use templates to allow better code generation for UNPACK.

This commit is contained in:
Jean-Philip Desjardins 2021-05-27 14:31:07 -04:00
parent bd20f4484b
commit 36f1564913
4 changed files with 369 additions and 316 deletions

View file

@ -437,6 +437,7 @@ set(COMMON_SRC_FILES
states/StructFile.h states/StructFile.h
states/XmlStateFile.cpp states/XmlStateFile.cpp
states/XmlStateFile.h states/XmlStateFile.h
static_loop.h
uint128.h uint128.h
VirtualPad.cpp VirtualPad.cpp
VirtualPad.h VirtualPad.h

View file

@ -1,7 +1,7 @@
#include <cassert> #include <cassert>
#include <cstring> #include <cstring>
#include <climits>
#include <stdexcept> #include <stdexcept>
#include "static_loop.h"
#include "string_format.h" #include "string_format.h"
#include "../Log.h" #include "../Log.h"
#include "../Ps2Const.h" #include "../Ps2Const.h"
@ -48,6 +48,15 @@ CVif::CVif(unsigned int number, CVpu& vpu, CINTC& intc, uint8* ram, uint8* spr)
, m_vpu(vpu) , m_vpu(vpu)
, m_vifProfilerZone(CProfiler::GetInstance().RegisterZone(string_format("VIF%d", number).c_str())) , m_vifProfilerZone(CProfiler::GetInstance().RegisterZone(string_format("VIF%d", number).c_str()))
{ {
static_loop<int, MAX_UNPACKERS>(
[this](auto i) {
constexpr uint8 dataType = (i & 0x0F);
constexpr bool clGreaterEqualWl = (i & 0x10) ? true : false;
constexpr bool useMask = (i & 0x20) ? true : false;
constexpr uint8 mode = (i & 0xC0) >> 6;
constexpr uint8 usn = (i & 0x100) >> 8;
m_unpacker[i] = &CVif::Unpack<dataType, clGreaterEqualWl, useMask, mode, usn>;
});
} }
void CVif::Reset() void CVif::Reset()
@ -655,215 +664,19 @@ void CVif::Cmd_STMASK(StreamType& stream, CODE command)
void CVif::Cmd_UNPACK(StreamType& stream, CODE nCommand, uint32 nDstAddr) void CVif::Cmd_UNPACK(StreamType& stream, CODE nCommand, uint32 nDstAddr)
{ {
assert((nCommand.nCMD & 0x60) == 0x60);
const auto vuMem = m_vpu.GetVuMemory();
const auto vuMemSize = m_vpu.GetVuMemorySize();
bool usn = (m_CODE.nIMM & 0x4000) != 0;
bool useMask = (nCommand.nCMD & 0x10) != 0;
uint32 cl = m_CYCLE.nCL; uint32 cl = m_CYCLE.nCL;
uint32 wl = m_CYCLE.nWL; uint32 wl = m_CYCLE.nWL;
if(wl == 0) if(wl == 0)
{ {
wl = UINT_MAX; wl = UINT_MAX;
cl = 0; cl = UINT_MAX;
} }
bool clGreaterEqualWl = (cl >= wl);
if(m_NUM == nCommand.nNUM) bool useMask = (nCommand.nCMD & 0x10) != 0;
{ bool usn = (m_CODE.nIMM & 0x4000) != 0;
m_readTick = 0; uint8 mode = m_MODE & 0x3;
m_writeTick = 0; auto unpackFct = m_unpacker[(nCommand.nCMD & 0x0F) | ((clGreaterEqualWl ? 1 : 0) << 4) | ((useMask ? 1 : 0) << 5) | (mode << 6) | (usn << 8)];
} ((*this).*(unpackFct))(stream, nCommand, nDstAddr);
uint32 currentNum = (m_NUM == 0) ? 256 : m_NUM;
uint32 codeNum = (m_CODE.nNUM == 0) ? 256 : m_CODE.nNUM;
uint32 transfered = codeNum - currentNum;
if(cl > wl)
{
nDstAddr += cl * (transfered / wl) + (transfered % wl);
}
else
{
nDstAddr += transfered;
}
nDstAddr *= 0x10;
assert(nDstAddr < vuMemSize);
nDstAddr &= (vuMemSize - 1);
while(currentNum != 0)
{
bool mustWrite = false;
uint128 writeValue;
memset(&writeValue, 0, sizeof(writeValue));
if(cl >= wl)
{
if(m_readTick < wl)
{
bool success = Unpack_ReadValue(nCommand, stream, writeValue, usn);
if(!success) break;
mustWrite = true;
}
}
else
{
if(m_writeTick < cl)
{
bool success = Unpack_ReadValue(nCommand, stream, writeValue, usn);
if(!success) break;
}
mustWrite = true;
}
if(mustWrite)
{
auto dst = reinterpret_cast<uint128*>(vuMem + nDstAddr);
for(unsigned int i = 0; i < 4; i++)
{
uint32 maskOp = useMask ? GetMaskOp(i, m_writeTick) : MASK_DATA;
if(maskOp == MASK_DATA)
{
if(m_MODE == MODE_OFFSET)
{
writeValue.nV[i] += m_R[i];
}
else if(m_MODE == MODE_DIFFERENCE)
{
writeValue.nV[i] += m_R[i];
m_R[i] = writeValue.nV[i];
}
dst->nV[i] = writeValue.nV[i];
}
else if(maskOp == MASK_ROW)
{
dst->nV[i] = m_R[i];
}
else if(maskOp == MASK_COL)
{
int index = (m_writeTick > 3) ? 3 : m_writeTick;
dst->nV[i] = m_C[index];
}
else if(maskOp == MASK_MASK)
{
//Don't write anything
}
else
{
assert(0);
}
}
currentNum--;
}
if(cl >= wl)
{
m_writeTick = std::min<uint32>(m_writeTick + 1, wl);
m_readTick = std::min<uint32>(m_readTick + 1, cl);
if(m_readTick == cl)
{
m_writeTick = 0;
m_readTick = 0;
}
}
else
{
m_writeTick = std::min<uint32>(m_writeTick + 1, wl);
m_readTick = std::min<uint32>(m_readTick + 1, cl);
if(m_writeTick == wl)
{
m_writeTick = 0;
m_readTick = 0;
}
}
nDstAddr += 0x10;
nDstAddr &= (vuMemSize - 1);
}
if(currentNum != 0)
{
m_STAT.nVPS = 1;
}
else
{
stream.Align32();
m_STAT.nVPS = 0;
}
m_NUM = static_cast<uint8>(currentNum);
}
bool CVif::Unpack_ReadValue(const CODE& nCommand, StreamType& stream, uint128& writeValue, bool usn)
{
bool success = false;
switch(nCommand.nCMD & 0x0F)
{
case 0x00:
//S-32
success = Unpack_S32(stream, writeValue);
break;
case 0x01:
//S-16
success = Unpack_S16(stream, writeValue, usn);
break;
case 0x02:
//S-8
success = Unpack_S8(stream, writeValue, usn);
break;
case 0x04:
//V2-32
success = Unpack_V32(stream, writeValue, 2);
break;
case 0x05:
//V2-16
success = Unpack_V16(stream, writeValue, 2, usn);
break;
case 0x06:
//V2-8
success = Unpack_V8(stream, writeValue, 2, usn);
break;
case 0x08:
//V3-32
success = Unpack_V32(stream, writeValue, 3);
break;
case 0x09:
//V3-16
success = Unpack_V16(stream, writeValue, 3, usn);
break;
case 0x0A:
//V3-8
success = Unpack_V8(stream, writeValue, 3, usn);
break;
case 0x0C:
//V4-32
success = Unpack_V32(stream, writeValue, 4);
break;
case 0x0D:
//V4-16
success = Unpack_V16(stream, writeValue, 4, usn);
break;
case 0x0E:
//V4-8
success = Unpack_V8(stream, writeValue, 4, usn);
break;
case 0x0F:
//V4-5
success = Unpack_V45(stream, writeValue);
break;
default:
assert(0);
break;
}
return success;
} }
bool CVif::Unpack_S32(StreamType& stream, uint128& result) bool CVif::Unpack_S32(StreamType& stream, uint128& result)
@ -919,53 +732,6 @@ bool CVif::Unpack_S8(StreamType& stream, uint128& result, bool zeroExtend)
return true; return true;
} }
bool CVif::Unpack_V8(StreamType& stream, uint128& result, unsigned int fields, bool zeroExtend)
{
if(stream.GetAvailableReadBytes() < (fields)) return false;
for(unsigned int i = 0; i < fields; i++)
{
uint32 temp = 0;
stream.Read(&temp, 1);
if(!zeroExtend)
{
temp = static_cast<int8>(temp);
}
result.nV[i] = temp;
}
return true;
}
bool CVif::Unpack_V16(StreamType& stream, uint128& result, unsigned int fields, bool zeroExtend)
{
if(stream.GetAvailableReadBytes() < (fields * 2)) return false;
for(unsigned int i = 0; i < fields; i++)
{
uint32 temp = 0;
stream.Read(&temp, 2);
if(!zeroExtend)
{
temp = static_cast<int16>(temp);
}
result.nV[i] = temp;
}
return true;
}
bool CVif::Unpack_V32(StreamType& stream, uint128& result, unsigned int fields)
{
if(stream.GetAvailableReadBytes() < (fields * 4)) return false;
stream.Read(&result, (fields * 4));
return true;
}
bool CVif::Unpack_V45(StreamType& stream, uint128& result) bool CVif::Unpack_V45(StreamType& stream, uint128& result)
{ {
if(stream.GetAvailableReadBytes() < 2) return false; if(stream.GetAvailableReadBytes() < 2) return false;
@ -1242,24 +1008,6 @@ void CVif::CFifoStream::Reset()
m_source = nullptr; m_source = nullptr;
} }
void CVif::CFifoStream::Read(void* buffer, uint32 size)
{
assert(m_source != NULL);
uint8* readBuffer = reinterpret_cast<uint8*>(buffer);
while(size != 0)
{
SyncBuffer();
uint32 read = std::min<uint32>(size, BUFFERSIZE - m_bufferPosition);
if(readBuffer != NULL)
{
memcpy(readBuffer, reinterpret_cast<uint8*>(&m_buffer) + m_bufferPosition, read);
readBuffer += read;
}
m_bufferPosition += read;
size -= read;
}
}
void CVif::CFifoStream::Flush() void CVif::CFifoStream::Flush()
{ {
m_bufferPosition = BUFFERSIZE; m_bufferPosition = BUFFERSIZE;
@ -1296,21 +1044,12 @@ void CVif::CFifoStream::SetFifoParams(uint8* source, uint32 size)
SyncBuffer(); SyncBuffer();
} }
uint32 CVif::CFifoStream::GetAvailableReadBytes() const
{
return GetRemainingDmaTransferSize() + (BUFFERSIZE - m_bufferPosition);
}
uint32 CVif::CFifoStream::GetRemainingDmaTransferSize() const
{
return m_endAddress - m_nextAddress;
}
void CVif::CFifoStream::Align32() void CVif::CFifoStream::Align32()
{ {
unsigned int remainBytes = m_bufferPosition & 0x03; unsigned int remainBytes = m_bufferPosition & 0x03;
if(remainBytes == 0) return; if(remainBytes == 0) return;
Read(NULL, 4 - remainBytes); uint32 dummy = 0;
Read(&dummy, 4 - remainBytes);
assert((m_bufferPosition & 0x03) == 0); assert((m_bufferPosition & 0x03) == 0);
} }
@ -1348,24 +1087,3 @@ void CVif::CFifoStream::Advance(uint32 size)
m_buffer = *reinterpret_cast<uint128*>(&m_source[m_nextAddress - 0x10]); m_buffer = *reinterpret_cast<uint128*>(&m_source[m_nextAddress - 0x10]);
} }
} }
void CVif::CFifoStream::SyncBuffer()
{
assert(m_bufferPosition <= BUFFERSIZE);
if(m_bufferPosition >= BUFFERSIZE)
{
if(m_nextAddress >= m_endAddress)
{
throw std::exception();
}
m_buffer = *reinterpret_cast<uint128*>(&m_source[m_nextAddress]);
m_nextAddress += 0x10;
m_bufferPosition = 0;
if(m_tagIncluded)
{
//Skip next 8 bytes
m_tagIncluded = false;
m_bufferPosition += 8;
}
}
}

View file

@ -1,13 +1,15 @@
#pragma once #pragma once
#include <climits>
#include <cstring>
#include "Types.h" #include "Types.h"
#include "Convertible.h" #include "Convertible.h"
#include "Vpu.h"
#include "../uint128.h" #include "../uint128.h"
#include "../Profiler.h" #include "../Profiler.h"
#include "zip/ZipArchiveWriter.h" #include "zip/ZipArchiveWriter.h"
#include "zip/ZipArchiveReader.h" #include "zip/ZipArchiveReader.h"
class CVpu;
class CINTC; class CINTC;
class CVif class CVif
@ -101,11 +103,34 @@ protected:
void Reset(); void Reset();
uint32 GetAvailableReadBytes() const; inline uint32 GetAvailableReadBytes() const
uint32 GetRemainingDmaTransferSize() const; {
void Read(void*, uint32); return GetRemainingDmaTransferSize() + (BUFFERSIZE - m_bufferPosition);
}
inline uint32 GetRemainingDmaTransferSize() const
{
return m_endAddress - m_nextAddress;
}
inline void Read(void* buffer, uint32 size)
{
assert(m_source != nullptr);
assert(buffer != nullptr);
uint8* readBuffer = reinterpret_cast<uint8*>(buffer);
while(size != 0)
{
SyncBuffer();
uint32 read = std::min<uint32>(size, BUFFERSIZE - m_bufferPosition);
memcpy(readBuffer, reinterpret_cast<uint8*>(&m_buffer) + m_bufferPosition, read);
readBuffer += read;
m_bufferPosition += read;
size -= read;
}
}
void Flush(); void Flush();
void Align32(); inline void Align32();
void SetDmaParams(uint32, uint32, bool); void SetDmaParams(uint32, uint32, bool);
void SetFifoParams(uint8*, uint32); void SetFifoParams(uint8*, uint32);
@ -113,7 +138,26 @@ protected:
void Advance(uint32); void Advance(uint32);
private: private:
void SyncBuffer(); inline void SyncBuffer()
{
assert(m_bufferPosition <= BUFFERSIZE);
if(m_bufferPosition >= BUFFERSIZE)
{
if(m_nextAddress >= m_endAddress)
{
throw std::exception();
}
m_buffer = *reinterpret_cast<uint128*>(&m_source[m_nextAddress]);
m_nextAddress += 0x10;
m_bufferPosition = 0;
if(m_tagIncluded)
{
//Skip next 8 bytes
m_tagIncluded = false;
m_bufferPosition += 8;
}
}
}
enum enum
{ {
@ -206,16 +250,284 @@ protected:
void Cmd_STCOL(StreamType&, CODE); void Cmd_STCOL(StreamType&, CODE);
void Cmd_STMASK(StreamType&, CODE); void Cmd_STMASK(StreamType&, CODE);
bool Unpack_ReadValue(const CODE&, StreamType&, uint128&, bool); inline uint32 GetMaskOp(unsigned int, unsigned int) const;
bool Unpack_S32(StreamType&, uint128&);
bool Unpack_S16(StreamType&, uint128&, bool);
bool Unpack_S8(StreamType&, uint128&, bool);
bool Unpack_V16(StreamType&, uint128&, unsigned int, bool);
bool Unpack_V8(StreamType&, uint128&, unsigned int, bool);
bool Unpack_V32(StreamType&, uint128&, unsigned int);
bool Unpack_V45(StreamType&, uint128&);
uint32 GetMaskOp(unsigned int, unsigned int) const; inline bool Unpack_S32(StreamType&, uint128&);
inline bool Unpack_S16(StreamType&, uint128&, bool);
inline bool Unpack_S8(StreamType&, uint128&, bool);
inline bool Unpack_V45(StreamType&, uint128&);
template <unsigned int fields>
inline bool Unpack_V32(StreamType& stream, uint128& result)
{
if(stream.GetAvailableReadBytes() < (fields * 4)) return false;
stream.Read(&result, (fields * 4));
return true;
}
template <unsigned int fields, bool zeroExtend>
inline bool Unpack_V16(StreamType& stream, uint128& result)
{
if(stream.GetAvailableReadBytes() < (fields * 2)) return false;
uint16 values[fields];
stream.Read(values, fields * 2);
for(unsigned int i = 0; i < fields; i++)
{
uint32 temp = values[i];
if(!zeroExtend)
{
temp = static_cast<int16>(temp);
}
result.nV[i] = temp;
}
return true;
}
template <unsigned int fields, bool zeroExtend>
inline bool Unpack_V8(StreamType& stream, uint128& result)
{
if(stream.GetAvailableReadBytes() < (fields)) return false;
uint8 values[fields];
stream.Read(values, fields);
for(unsigned int i = 0; i < fields; i++)
{
uint32 temp = values[i];
if(!zeroExtend)
{
temp = static_cast<int8>(temp);
}
result.nV[i] = temp;
}
return true;
}
template <uint8 dataType, bool usn>
bool Unpack_ReadValue(StreamType& stream, uint128& writeValue)
{
bool success = false;
switch(dataType)
{
case 0x00:
//S-32
success = Unpack_S32(stream, writeValue);
break;
case 0x01:
//S-16
success = Unpack_S16(stream, writeValue, usn);
break;
case 0x02:
//S-8
success = Unpack_S8(stream, writeValue, usn);
break;
case 0x04:
//V2-32
success = Unpack_V32<2>(stream, writeValue);
break;
case 0x05:
//V2-16
success = Unpack_V16<2, usn>(stream, writeValue);
break;
case 0x06:
//V2-8
success = Unpack_V8<2, usn>(stream, writeValue);
break;
case 0x08:
//V3-32
success = Unpack_V32<3>(stream, writeValue);
break;
case 0x09:
//V3-16
success = Unpack_V16<3, usn>(stream, writeValue);
break;
case 0x0A:
//V3-8
success = Unpack_V8<3, usn>(stream, writeValue);
break;
case 0x0C:
//V4-32
success = Unpack_V32<4>(stream, writeValue);
break;
case 0x0D:
//V4-16
success = Unpack_V16<4, usn>(stream, writeValue);
break;
case 0x0E:
//V4-8
success = Unpack_V8<4, usn>(stream, writeValue);
break;
case 0x0F:
//V4-5
success = Unpack_V45(stream, writeValue);
break;
default:
assert(0);
break;
}
return success;
}
template <uint8 dataType, bool clGreaterEqualWl, bool useMask, uint8 mode, bool usn>
void Unpack(StreamType& stream, CODE nCommand, uint32 nDstAddr)
{
assert((nCommand.nCMD & 0x60) == 0x60);
const auto vuMem = m_vpu.GetVuMemory();
const auto vuMemSize = m_vpu.GetVuMemorySize();
uint32 cl = m_CYCLE.nCL;
uint32 wl = m_CYCLE.nWL;
if(wl == 0)
{
wl = UINT_MAX;
cl = UINT_MAX;
}
if(m_NUM == nCommand.nNUM)
{
m_readTick = 0;
m_writeTick = 0;
}
uint32 currentNum = (m_NUM == 0) ? 256 : m_NUM;
uint32 codeNum = (m_CODE.nNUM == 0) ? 256 : m_CODE.nNUM;
uint32 transfered = codeNum - currentNum;
if(cl > wl)
{
nDstAddr += cl * (transfered / wl) + (transfered % wl);
}
else
{
nDstAddr += transfered;
}
nDstAddr *= 0x10;
while(currentNum != 0)
{
bool mustWrite = false;
uint128 writeValue;
memset(&writeValue, 0, sizeof(writeValue));
if(clGreaterEqualWl)
{
if(m_readTick < wl || wl == 0)
{
bool success = Unpack_ReadValue<dataType, usn>(stream, writeValue);
if(!success) break;
mustWrite = true;
}
}
else
{
if(m_writeTick < cl)
{
bool success = Unpack_ReadValue<dataType, usn>(stream, writeValue);
if(!success) break;
}
mustWrite = true;
}
if(mustWrite)
{
auto dst = reinterpret_cast<uint128*>(vuMem + nDstAddr);
for(unsigned int i = 0; i < 4; i++)
{
uint32 maskOp = useMask ? GetMaskOp(i, m_writeTick) : MASK_DATA;
if(maskOp == MASK_DATA)
{
if(mode == MODE_OFFSET)
{
writeValue.nV[i] += m_R[i];
}
else if(mode == MODE_DIFFERENCE)
{
writeValue.nV[i] += m_R[i];
m_R[i] = writeValue.nV[i];
}
dst->nV[i] = writeValue.nV[i];
}
else if(maskOp == MASK_ROW)
{
dst->nV[i] = m_R[i];
}
else if(maskOp == MASK_COL)
{
int index = (m_writeTick > 3) ? 3 : m_writeTick;
dst->nV[i] = m_C[index];
}
else if(maskOp == MASK_MASK)
{
//Don't write anything
}
else
{
assert(0);
}
}
currentNum--;
}
if(clGreaterEqualWl)
{
m_writeTick = std::min<uint32>(m_writeTick + 1, wl);
m_readTick = std::min<uint32>(m_readTick + 1, cl);
if(m_readTick == cl)
{
m_writeTick = 0;
m_readTick = 0;
}
}
else
{
m_writeTick = std::min<uint32>(m_writeTick + 1, wl);
m_readTick = std::min<uint32>(m_readTick + 1, cl);
if(m_writeTick == wl)
{
m_writeTick = 0;
m_readTick = 0;
}
}
nDstAddr += 0x10;
nDstAddr &= (vuMemSize - 1);
}
if(currentNum != 0)
{
m_STAT.nVPS = 1;
}
else
{
stream.Align32();
m_STAT.nVPS = 0;
}
m_NUM = static_cast<uint8>(currentNum);
}
typedef void (CVif::*Unpacker)(StreamType&, CODE, uint32);
enum
{
MAX_UNPACKERS = 0x200
};
virtual void PrepareMicroProgram(); virtual void PrepareMicroProgram();
void StartMicroProgram(uint32); void StartMicroProgram(uint32);
@ -232,6 +544,7 @@ protected:
uint8* m_ram = nullptr; uint8* m_ram = nullptr;
uint8* m_spr = nullptr; uint8* m_spr = nullptr;
CFifoStream m_stream; CFifoStream m_stream;
Unpacker m_unpacker[MAX_UNPACKERS];
uint8 m_fifoBuffer[FIFO_SIZE]; uint8 m_fifoBuffer[FIFO_SIZE];
uint32 m_fifoIndex = 0; uint32 m_fifoIndex = 0;

21
Source/static_loop.h Normal file
View file

@ -0,0 +1,21 @@
#pragma once
//Source: https://stackoverflow.com/a/46873787
#include <type_traits>
#include <utility>
namespace detail
{
template <class T, T... inds, class F>
constexpr void static_loop(std::integer_sequence<T, inds...>, F&& f)
{
(f(std::integral_constant<T, inds>{}), ...); // C++17 fold expression
}
} // detail
template <class T, T count, class F>
constexpr void static_loop(F&& f)
{
detail::static_loop(std::make_integer_sequence<T, count>{}, std::forward<F>(f));
}