Acceleration working in miami.

This commit is contained in:
Falco Girgis 2025-03-28 00:17:25 -05:00
parent 02799207ed
commit 0ba1c595cd
10 changed files with 302 additions and 144 deletions

View file

@ -118,6 +118,7 @@ OBJS_NO_FAST_MATH = \
../src/miami/core/Cam.o \
../src/miami/core/Camera.o \
../src/miami/vehicles/Bike.o \
../src/miami/vehicles/Boat.o \
../src/miami/renderer/Particle.o
KOS_CPPFLAGS += -fbuiltin -ffast-math -ffp-contract=fast \

View file

@ -1,6 +1,6 @@
#pragma once
struct CColLine
struct alignas(8) CColLine
{
// NB: this has to be compatible with two CVuVectors
CVector p0;

View file

@ -1,6 +1,6 @@
#pragma once
struct CColPoint
struct alignas(8) CColPoint
{
CVector point;
int pad1;

View file

@ -2,7 +2,7 @@
#include "SurfaceTable.h"
struct CSphere
struct alignas(8) CSphere
{
// NB: this has to be compatible with a CVuVector
CVector center;
@ -15,6 +15,11 @@ struct CColSphere : public CSphere
uint8 surface;
uint8 piece;
void Set(float radius, uint8 surf = SURFACE_DEFAULT, uint8 piece = 0) {
this->radius = radius;
this->surface = surf;
this->piece = piece;
}
void Set(float radius, const CVector &center, uint8 surf, uint8 piece);
bool IntersectRay(CVector const &from, CVector const &dir, CVector &entry, CVector &exit);
using CSphere::Set;

View file

@ -24,6 +24,10 @@
#include "Camera.h"
#include "ColStore.h"
#ifdef DC_SH4
#include "VuCollision.h"
#endif
#ifdef VU_COLLISION
#include "VuCollision.h"
@ -572,7 +576,12 @@ CCollision::TestLineOfSight(const CColLine &line, const CMatrix &matrix, CColMod
// transform line to model space
Invert(matrix, matTransform);
CColLine newline(matTransform * line.p0, matTransform * line.p1);
CColLine newline;
#ifndef DC_SH4
newline.Set(matTransform * line.p0, matTransform * line.p1);
#else
TransformPoints(reinterpret_cast<CVuVector*>(&newline), 2, matTransform, &line.p0, sizeof(CColLine)/2);
#endif
// If we don't intersect with the bounding box, no chance on the rest
if(!TestLineBox(newline, model.boundingBox))
@ -1428,7 +1437,12 @@ CCollision::ProcessLineOfSight(const CColLine &line,
// transform line to model space
Invert(matrix, matTransform);
CColLine newline(matTransform * line.p0, matTransform * line.p1);
CColLine newline;
#ifdef DC_SH4
TransformPoints(reinterpret_cast<CVuVector*>(&newline), 2, matTransform, &line.p0, sizeof(CColLine)/2);
#else
newline.Set(matTransform * line.p0, matTransform * line.p1);
#endif
// If we don't intersect with the bounding box, no chance on the rest
if(!TestLineBox(newline, model.boundingBox))
@ -1455,9 +1469,18 @@ CCollision::ProcessLineOfSight(const CColLine &line,
}
if(coldist < mindist){
#ifndef DC_SH4
point.point = matrix * point.point;
point.normal = Multiply3x3(matrix, point.normal);
mindist = coldist;
#else
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&matrix)));
mat_trans_single3_nodiv(point.point.x,
point.point.y,
point.point.z);
mat_trans_normal3(point.normal.x,
point.normal.y,
point.normal.z);
#endif
return true;
}
return false;
@ -1593,7 +1616,14 @@ CCollision::ProcessVerticalLine(const CColLine &line,
// transform line to model space
// Why does the game seem to do this differently than above?
CColLine newline(MultiplyInverse(matrix, line.p0), MultiplyInverse(matrix, line.p1));
CMatrix matTransform;
Invert(matrix, matTransform);
CColLine newline;
#ifndef DC_SH4
newline.Set(matTransform * line.p0, matTransform * line.p1);
#else
TransformPoints(reinterpret_cast<CVuVector*>(&newline), 2, matTransform, &line.p0, sizeof(CColLine)/2);
#endif
if(!TestLineBox(newline, model.boundingBox))
return false;
@ -1618,13 +1648,29 @@ CCollision::ProcessVerticalLine(const CColLine &line,
}
if(coldist < mindist){
#ifndef DC_SH4
point.point = matrix * point.point;
point.normal = Multiply3x3(matrix, point.normal);
#else
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&matrix)));
mat_trans_single3_nodiv(point.point.x,
point.point.y,
point.point.z);
mat_trans_normal3(point.normal.x,
point.normal.y,
point.normal.z);
#endif
if(TempStoredPoly.valid && poly){
*poly = TempStoredPoly;
#ifndef DC_SH4
poly->verts[0] = matrix * poly->verts[0];
poly->verts[1] = matrix * poly->verts[1];
poly->verts[2] = matrix * poly->verts[2];
#else
mat_trans_single3_nodiv(poly->verts[0].x, poly->verts[0].y, poly->verts[0].z);
mat_trans_single3_nodiv(poly->verts[1].x, poly->verts[1].y, poly->verts[1].z);
mat_trans_single3_nodiv(poly->verts[2].x, poly->verts[2].y, poly->verts[2].z);
#endif
}
mindist = coldist;
return true;
@ -1981,21 +2027,52 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
CColSphere bsphereAB; // bounding sphere of A in B space
bsphereAB.radius = modelA.boundingSphere.radius;
#ifndef DC_SH4
bsphereAB.center = matAB * modelA.boundingSphere.center;
#else
/* No need to reload the matrix, since it's already banked.
mat_load(reinterpret_cast<const matrix_t *>(&matAB)); */
mat_trans_single3_nodiv_nomod(modelA.boundingSphere.center.x,
modelA.boundingSphere.center.y,
modelA.boundingSphere.center.z,
bsphereAB.center.x,
bsphereAB.center.y,
bsphereAB.center.z);
#endif
if(!TestSphereBox(bsphereAB, modelB.boundingBox))
return 0;
// B to A space
matBA = Invert(matrixA, matBA);
matBA *= matrixB;
// transform modelA's spheres and lines to B space
for(i = 0; i < modelA.numSpheres; i++){
CColSphere &s = modelA.spheres[i];
#ifndef DC_SH4
aSpheresA[i].Set(s.radius, matAB * s.center, s.surface, s.piece);
#else
auto &d = aSpheresA[i];
mat_trans_single3_nodiv_nomod(s.center.x, s.center.y, s.center.z,
d.center.x, d.center.y, d.center.z);
d.Set(s.radius, s.surface, s.piece);
#endif
}
for(i = 0; i < modelA.numLines; i++)
aLinesA[i].Set(matAB * modelA.lines[i].p0, matAB * modelA.lines[i].p1);
for(i = 0; i < modelA.numLines; i++) {
#ifndef DC_SH4
aLinesA[i].Set(matAB * modelA.lines[i].p0, matAB * modelA.lines[i].p1);
#else
mat_trans_single3_nodiv_nomod(modelA.lines[i].p0.x,
modelA.lines[i].p0.y,
modelA.lines[i].p0.z,
aLinesA[i].p0.x,
aLinesA[i].p0.y,
aLinesA[i].p0.z);
mat_trans_single3_nodiv_nomod(modelA.lines[i].p1.x,
modelA.lines[i].p1.y,
modelA.lines[i].p1.z,
aLinesA[i].p1.x,
aLinesA[i].p1.y,
aLinesA[i].p1.z);
#endif
}
// Test them against model B's bounding volumes
int numSpheresA = 0;
int numLinesA = 0;
@ -2013,9 +2090,25 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
int numSpheresB = 0;
int numBoxesB = 0;
int numTrianglesB = 0;
// B to A space
matBA = Invert(matrixA, matBA);
matBA *= matrixB;
#ifdef DC_SH4
/* No need to reload the matrix, since it's already banked.
mat_load(reinterpret_cast<const matrix_t *>(&matBA)); */
#endif
for(i = 0; i < modelB.numSpheres; i++){
s.radius = modelB.spheres[i].radius;
#ifndef DC_SH4
s.center = matBA * modelB.spheres[i].center;
#else
mat_trans_single3_nodiv_nomod(modelB.spheres[i].center.x,
modelB.spheres[i].center.y,
modelB.spheres[i].center.z,
s.center.x,
s.center.y,
s.center.z);
#endif
if(TestSphereBox(s, modelA.boundingBox))
aSphereIndicesB[numSpheresB++] = i;
}
@ -2062,9 +2155,22 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
if(hasCollided)
numCollisions++;
}
#ifdef DC_SH4
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&matrixB)));
#endif
for(i = 0; i < numCollisions; i++){
#ifndef DC_SH4
spherepoints[i].point = matrixB * spherepoints[i].point;
spherepoints[i].normal = Multiply3x3(matrixB, spherepoints[i].normal);
#else
mat_trans_single3_nodiv(spherepoints[i].point.x,
spherepoints[i].point.y,
spherepoints[i].point.z);
mat_trans_normal3(spherepoints[i].normal.x,
spherepoints[i].normal.y,
spherepoints[i].normal.z);
#endif
}
// And the same thing for the lines in A
@ -2095,8 +2201,17 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
for(i = 0; i < numLinesA; i++)
if(aCollided[i]){
j = aLineIndicesA[i];
#ifndef DC_SH4
linepoints[j].point = matrixB * linepoints[j].point;
linepoints[j].normal = Multiply3x3(matrixB, linepoints[j].normal);
#else
mat_trans_single3_nodiv(linepoints[j].point.x,
linepoints[j].point.y,
linepoints[j].point.z);
mat_trans_normal3(linepoints[j].normal.x,
linepoints[j].normal.y,
linepoints[j].normal.z);
#endif
}
return numCollisions; // sphere collisions

View file

@ -1,11 +1,5 @@
#include "common.h"
CMatrix::CMatrix(void)
{
m_attachment = nil;
m_hasRwMatrix = false;
}
CMatrix::CMatrix(CMatrix const &m)
{
m_attachment = nil;
@ -434,6 +428,11 @@ operator*(const CMatrix &m1, const CMatrix &m2)
{
// TODO: VU0 code
CMatrix out;
#if defined(RW_DC)
mat_load(reinterpret_cast<const matrix_t *>(&m1));
mat_apply(reinterpret_cast<const matrix_t *>(&m2));
mat_store(reinterpret_cast<matrix_t *>(&out));
#else
out.rx = m1.rx * m2.rx + m1.fx * m2.ry + m1.ux * m2.rz;
out.ry = m1.ry * m2.rx + m1.fy * m2.ry + m1.uy * m2.rz;
out.rz = m1.rz * m2.rx + m1.fz * m2.ry + m1.uz * m2.rz;
@ -446,6 +445,7 @@ operator*(const CMatrix &m1, const CMatrix &m2)
out.px = m1.rx * m2.px + m1.fx * m2.py + m1.ux * m2.pz + m1.px;
out.py = m1.ry * m2.px + m1.fy * m2.py + m1.uy * m2.pz + m1.py;
out.pz = m1.rz * m2.px + m1.fz * m2.py + m1.uz * m2.pz + m1.pz;
#endif
return out;
}

View file

@ -1,6 +1,6 @@
#pragma once
class CMatrix
class alignas(8) CMatrix
{
public:
#ifdef GTA_PS2
@ -23,18 +23,18 @@ public:
float f[4][4];
struct
{
float rx, ry, rz, rw;
float fx, fy, fz, fw;
float ux, uy, uz, uw;
float px, py, pz, pw;
float rx, ry, rz, rw=0.0f;
float fx, fy, fz, fw=0.0f;
float ux, uy, uz, uw=0.0f;
float px, py, pz, pw=1.0f;
};
};
RwMatrix *m_attachment;
bool m_hasRwMatrix; // are we the owner?
RwMatrix *m_attachment=nil;
bool m_hasRwMatrix=false; // are we the owner?
#endif
CMatrix(void);
CMatrix(void)=default;
CMatrix(CMatrix const &m);
CMatrix(RwMatrix *matrix, bool owner = false);
CMatrix(float scale){

View file

@ -1,6 +1,14 @@
#pragma once
class TYPEALIGN(16) CVuVector : public CVector
#include "maths.h"
#ifdef RW_DC
#define VECTOR_ALIGN 8
#else
#define VECTOR_ALIGN 16
#endif
class TYPEALIGN(VECTOR_ALIGN) CVuVector : public CVector
{
public:
float w;
@ -26,7 +34,123 @@ public:
// TODO: operator-
};
void TransformPoint(CVuVector &out, const CMatrix &mat, const CVuVector &in);
void TransformPoint(CVuVector &out, const CMatrix &mat, const RwV3d &in);
void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const RwV3d *in, int stride);
void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const CVuVector *in);
__always_inline void TransformPoint(CVuVector &out, const CMatrix &mat, const CVuVector &in)
{
#ifdef GTA_PS2
__asm__ __volatile__("\n\
lqc2 vf01,0x0(%2)\n\
lqc2 vf02,0x0(%1)\n\
lqc2 vf03,0x10(%1)\n\
lqc2 vf04,0x20(%1)\n\
lqc2 vf05,0x30(%1)\n\
vmulax.xyz ACC, vf02,vf01\n\
vmadday.xyz ACC, vf03,vf01\n\
vmaddaz.xyz ACC, vf04,vf01\n\
vmaddw.xyz vf06,vf05,vf00\n\
sqc2 vf06,0x0(%0)\n\
": : "r" (&out) , "r" (&mat) ,"r" (&in): "memory");
#elif defined(DC_SH4)
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
mat_trans_nodiv_nomod(in.x, in.y, in.z, out.x, out.y, out.z, out.y);
#else
out = mat * in;
#endif
}
__always_inline void TransformPoint(CVuVector &out, const CMatrix &mat, const RwV3d &in)
{
#ifdef GTA_PS2
__asm__ __volatile__("\n\
ldr $8,0x0(%2)\n\
ldl $8,0x7(%2)\n\
lw $9,0x8(%2)\n\
pcpyld $10,$9,$8\n\
qmtc2 $10,vf01\n\
lqc2 vf02,0x0(%1)\n\
lqc2 vf03,0x10(%1)\n\
lqc2 vf04,0x20(%1)\n\
lqc2 vf05,0x30(%1)\n\
vmulax.xyz ACC, vf02,vf01\n\
vmadday.xyz ACC, vf03,vf01\n\
vmaddaz.xyz ACC, vf04,vf01\n\
vmaddw.xyz vf06,vf05,vf00\n\
sqc2 vf06,0x0(%0)\n\
": : "r" (&out) , "r" (&mat) ,"r" (&in): "memory");
#elif defined(DC_SH4)
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
mat_trans_nodiv_nomod(in.x, in.y, in.z, out.x, out.y, out.z, out.y);
#else
out = mat * in;
#endif
}
__always_inline void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const RwV3d *in, int stride)
{
#ifdef GTA_PS3
__asm__ __volatile__("\n\
paddub $3,%4,$0\n\
lqc2 vf02,0x0(%2)\n\
lqc2 vf03,0x10(%2)\n\
lqc2 vf04,0x20(%2)\n\
lqc2 vf05,0x30(%2)\n\
ldr $8,0x0(%3)\n\
ldl $8,0x7(%3)\n\
lw $9,0x8(%3)\n\
pcpyld $10,$9,$8\n\
qmtc2 $10,vf01\n\
1: vmulax.xyz ACC, vf02,vf01\n\
vmadday.xyz ACC, vf03,vf01\n\
vmaddaz.xyz ACC, vf04,vf01\n\
vmaddw.xyz vf06,vf05,vf00\n\
add %3,%3,$3\n\
ldr $8,0x0(%3)\n\
ldl $8,0x7(%3)\n\
lw $9,0x8(%3)\n\
pcpyld $10,$9,$8\n\
qmtc2 $10,vf01\n\
addi %1,%1,-1\n\
addiu %0,%0,0x10\n\
sqc2 vf06,-0x10(%0)\n\
bnez %1,1b\n\
": : "r" (out) , "r" (n), "r" (&mat), "r" (in), "r" (stride): "memory");
#elif defined(DC_SH4)
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
while(n--) {
mat_trans_single3_nodiv_nomod(in->x, in->y, in->z, out->x, out->y, out->z);
in = reinterpret_cast<const RwV3d *>(reinterpret_cast<const uint8_t *>(in) + stride);
++out;
}
#else
while(n--){
*out = mat * *in;
in = (RwV3d*)((uint8*)in + stride);
out++;
}
#endif
}
__always_inline void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const CVuVector *in)
{
#ifdef GTA_PS2
__asm__ __volatile__("\n\
lqc2 vf02,0x0(%2)\n\
lqc2 vf03,0x10(%2)\n\
lqc2 vf04,0x20(%2)\n\
lqc2 vf05,0x30(%2)\n\
lqc2 vf01,0x0(%3)\n\
nop\n\
1: vmulax.xyz ACC, vf02,vf01\n\
vmadday.xyz ACC, vf03,vf01\n\
vmaddaz.xyz ACC, vf04,vf01\n\
vmaddw.xyz vf06,vf05,vf00\n\
lqc2 vf01,0x10(%3)\n\
addiu %3,%3,0x10\n\
addi %1,%1,-1\n\
addiu %0,%0,0x10\n\
sqc2 vf06,-0x10(%0)\n\
bnez %1,1b\n\
": : "r" (out) , "r" (n), "r" (&mat) ,"r" (in): "memory");
#else
TransformPoints(out, n, mat, in, sizeof(CVuVector));
#endif
}

View file

@ -3,116 +3,3 @@
#include "VuVector.h"
// TODO: move more stuff into here
void TransformPoint(CVuVector &out, const CMatrix &mat, const CVuVector &in)
{
#ifdef GTA_PS2
__asm__ __volatile__("\n\
lqc2 vf01,0x0(%2)\n\
lqc2 vf02,0x0(%1)\n\
lqc2 vf03,0x10(%1)\n\
lqc2 vf04,0x20(%1)\n\
lqc2 vf05,0x30(%1)\n\
vmulax.xyz ACC, vf02,vf01\n\
vmadday.xyz ACC, vf03,vf01\n\
vmaddaz.xyz ACC, vf04,vf01\n\
vmaddw.xyz vf06,vf05,vf00\n\
sqc2 vf06,0x0(%0)\n\
": : "r" (&out) , "r" (&mat) ,"r" (&in): "memory");
#else
out = mat * in;
#endif
}
void TransformPoint(CVuVector &out, const CMatrix &mat, const RwV3d &in)
{
#ifdef GTA_PS2
__asm__ __volatile__("\n\
ldr $8,0x0(%2)\n\
ldl $8,0x7(%2)\n\
lw $9,0x8(%2)\n\
pcpyld $10,$9,$8\n\
qmtc2 $10,vf01\n\
lqc2 vf02,0x0(%1)\n\
lqc2 vf03,0x10(%1)\n\
lqc2 vf04,0x20(%1)\n\
lqc2 vf05,0x30(%1)\n\
vmulax.xyz ACC, vf02,vf01\n\
vmadday.xyz ACC, vf03,vf01\n\
vmaddaz.xyz ACC, vf04,vf01\n\
vmaddw.xyz vf06,vf05,vf00\n\
sqc2 vf06,0x0(%0)\n\
": : "r" (&out) , "r" (&mat) ,"r" (&in): "memory");
#else
out = mat * in;
#endif
}
void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const RwV3d *in, int stride)
{
#ifdef GTA_PS3
__asm__ __volatile__("\n\
paddub $3,%4,$0\n\
lqc2 vf02,0x0(%2)\n\
lqc2 vf03,0x10(%2)\n\
lqc2 vf04,0x20(%2)\n\
lqc2 vf05,0x30(%2)\n\
ldr $8,0x0(%3)\n\
ldl $8,0x7(%3)\n\
lw $9,0x8(%3)\n\
pcpyld $10,$9,$8\n\
qmtc2 $10,vf01\n\
1: vmulax.xyz ACC, vf02,vf01\n\
vmadday.xyz ACC, vf03,vf01\n\
vmaddaz.xyz ACC, vf04,vf01\n\
vmaddw.xyz vf06,vf05,vf00\n\
add %3,%3,$3\n\
ldr $8,0x0(%3)\n\
ldl $8,0x7(%3)\n\
lw $9,0x8(%3)\n\
pcpyld $10,$9,$8\n\
qmtc2 $10,vf01\n\
addi %1,%1,-1\n\
addiu %0,%0,0x10\n\
sqc2 vf06,-0x10(%0)\n\
bnez %1,1b\n\
": : "r" (out) , "r" (n), "r" (&mat), "r" (in), "r" (stride): "memory");
#else
while(n--){
*out = mat * *in;
in = (RwV3d*)((uint8*)in + stride);
out++;
}
#endif
}
void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const CVuVector *in)
{
#ifdef GTA_PS2
__asm__ __volatile__("\n\
lqc2 vf02,0x0(%2)\n\
lqc2 vf03,0x10(%2)\n\
lqc2 vf04,0x20(%2)\n\
lqc2 vf05,0x30(%2)\n\
lqc2 vf01,0x0(%3)\n\
nop\n\
1: vmulax.xyz ACC, vf02,vf01\n\
vmadday.xyz ACC, vf03,vf01\n\
vmaddaz.xyz ACC, vf04,vf01\n\
vmaddw.xyz vf06,vf05,vf00\n\
lqc2 vf01,0x10(%3)\n\
addiu %3,%3,0x10\n\
addi %1,%1,-1\n\
addiu %0,%0,0x10\n\
sqc2 vf06,-0x10(%0)\n\
bnez %1,1b\n\
": : "r" (out) , "r" (n), "r" (&mat) ,"r" (in): "memory");
#else
while(n--){
*out = mat * *in;
in++;
out++;
}
#endif
}

View file

@ -1,5 +1,31 @@
#pragma once
#include "src/common_defines.h"
#ifdef DC_SH4
#define mat_trans_nodiv_nomod(x, y, z, x2, y2, z2, w2) do { \
register float __x __asm__("fr12") = (x); \
register float __y __asm__("fr13") = (y); \
register float __z __asm__("fr14") = (z); \
register float __w __asm__("fr15") = 1.0f; \
__asm__ __volatile__( "ftrv xmtrx, fv12\n" \
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) \
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) ); \
x2 = __x; y2 = __y; z2 = __z; w2 = __w; \
} while(false)
#else
#define mat_trans_nodiv_nomod(x_, y_, z_, x2, y2, z2, w2) do { \
vector_t tmp = { x_, y_, z_, 1.0f }; \
mat_transform(&tmp, &tmp, 1, 0); \
x2 = tmp.x; y2 = tmp.y; z2 = tmp.z; w2 = tmp.w; \
} while(false)
#endif
// wrapper around float versions of functions
// in gta they are in CMaths but that makes the code rather noisy