Saving state.
Some checks failed
re3 cmake devkitA64 (Nintendo Switch) / build-nintendo-switch (push) Has been cancelled
re3 premake amd64 / build (Debug, win-amd64-librw_d3d9-oal) (push) Has been cancelled
re3 premake amd64 / build (Debug, win-amd64-librw_gl3_glfw-oal) (push) Has been cancelled
re3 premake amd64 / build (Release, win-amd64-librw_d3d9-oal) (push) Has been cancelled
re3 premake x86 / build (Debug, win-x86-librw_d3d9-mss) (push) Has been cancelled
re3 conan+cmake / build-cmake (openal, glfw, macos-latest, gl3) (push) Has been cancelled
re3 conan+cmake / build-cmake (openal, glfw, ubuntu-18.04, gl3) (push) Has been cancelled
re3 conan+cmake / build-cmake (openal, glfw, windows-latest, gl3) (push) Has been cancelled
re3 conan+cmake / build-cmake (openal, windows-latest, d3d9) (push) Has been cancelled
re3 premake amd64 / build (Release, win-amd64-librw_gl3_glfw-oal) (push) Has been cancelled
re3 premake x86 / build (Debug, win-x86-librw_d3d9-oal) (push) Has been cancelled
re3 premake x86 / build (Debug, win-x86-librw_gl3_glfw-mss) (push) Has been cancelled
re3 premake x86 / build (Debug, win-x86-librw_gl3_glfw-oal) (push) Has been cancelled
re3 premake x86 / build (Release, win-x86-librw_d3d9-mss) (push) Has been cancelled
re3 premake x86 / build (Release, win-x86-librw_d3d9-oal) (push) Has been cancelled
re3 premake x86 / build (Release, win-x86-librw_gl3_glfw-mss) (push) Has been cancelled
re3 premake x86 / build (Release, win-x86-librw_gl3_glfw-oal) (push) Has been cancelled
re3 premake x86 / build (Vanilla, win-x86-librw_d3d9-mss) (push) Has been cancelled
re3 premake x86 / build (Vanilla, win-x86-librw_d3d9-oal) (push) Has been cancelled
re3 premake x86 / build (Vanilla, win-x86-librw_gl3_glfw-mss) (push) Has been cancelled
re3 premake x86 / build (Vanilla, win-x86-librw_gl3_glfw-oal) (push) Has been cancelled

This commit is contained in:
Falco Girgis 2025-03-27 15:17:11 -05:00
parent c02781fe81
commit 2ade727c46
9 changed files with 43 additions and 131 deletions

View file

@ -1,14 +1,14 @@
#pragma once #pragma once
struct CColLine struct alignas(8) CColLine
{ {
// NB: this has to be compatible with two CVuVectors // NB: this has to be compatible with two CVuVectors
CVector p0; alignas(8) CVector p0;
int pad0; float pad0 = 0.0f;
CVector p1; alignas(8) CVector p1;
int pad1; float pad1 = 0.0f;
CColLine(void) { }; CColLine(void) = default;
CColLine(const CVector &p0, const CVector &p1) { this->p0 = p0; this->p1 = p1; }; CColLine(const CVector &p0, const CVector &p1) { this->p0 = p0; this->p1 = p1; };
void Set(const CVector &p0, const CVector &p1); void Set(const CVector &p0, const CVector &p1);
}; };

View file

@ -1,12 +1,12 @@
#pragma once #pragma once
struct CColPoint struct alignas(8) CColPoint
{ {
CVector point; alignas(8) CVector point;
int pad1; float pad1 = 0.0f;
// the surface normal on the surface of point // the surface normal on the surface of point
CVector normal; alignas(8) CVector normal;
int pad2; float pad2 = 0.0f;
uint8 surfaceA; uint8 surfaceA;
uint8 pieceA; uint8 pieceA;
uint8 surfaceB; uint8 surfaceB;

View file

@ -787,12 +787,13 @@ bool
CCollision::ProcessSphereSphere(const CColSphere &s1, const CColSphere &s2, CColPoint &point, float &mindistsq) CCollision::ProcessSphereSphere(const CColSphere &s1, const CColSphere &s2, CColPoint &point, float &mindistsq)
{ {
CVector dist = s1.center - s2.center; CVector dist = s1.center - s2.center;
float d = dist.Magnitude() - s2.radius; // distance from s1's center to s2 float mag = dist.Magnitude();
float d = mag - s2.radius; // distance from s1's center to s2
float depth = s1.radius - d; // sphere overlap float depth = s1.radius - d; // sphere overlap
if(d < 0.0f) d = 0.0f; // clamp to zero, i.e. if s1's center is inside s2 if(d < 0.0f) d = 0.0f; // clamp to zero, i.e. if s1's center is inside s2
// no collision if sphere is not close enough // no collision if sphere is not close enough
if(d*d < mindistsq && d < s1.radius){ if(d*d < mindistsq && d < s1.radius){
dist.Normalise(); dist *= Invert<true, false>(mag);
point.point = s1.center - dist*d; point.point = s1.center - dist*d;
point.normal = dist; point.normal = dist;
#ifndef VU_COLLISION #ifndef VU_COLLISION

View file

@ -39,10 +39,9 @@ CQuaternion::Slerp(const CQuaternion &q1, const CQuaternion &q2, float theta, fl
void void
CQuaternion::Multiply(const CQuaternion &q1, const CQuaternion &q2) CQuaternion::Multiply(const CQuaternion &q1, const CQuaternion &q2)
{ {
x = (q2.z * q1.y) - (q1.z * q2.y) + (q1.x * q2.w) + (q2.x * q1.w); quat_mult(reinterpret_cast<quaternion_t *>(this),
y = (q2.x * q1.z) - (q1.x * q2.z) + (q1.y * q2.w) + (q2.y * q1.w); reinterpret_cast<const quaternion_t &>(q1),
z = (q2.y * q1.x) - (q1.y * q2.x) + (q1.z * q2.w) + (q2.z * q1.w); reinterpret_cast<const quaternion_t &>(q2));
w = (q2.w * q1.w) - (q2.x * q1.x) - (q2.y * q1.y) - (q2.z * q1.z);
} }
void void
@ -50,10 +49,11 @@ CQuaternion::Get(RwV3d *axis, float *angle)
{ {
*angle = Acos(w); *angle = Acos(w);
float s = Sin(*angle); float s = Sin(*angle);
float invS = dc::Invert<true, false>(s);
axis->x = x * (1.0f / s); axis->x = x * invS;
axis->y = y * (1.0f / s); axis->y = y * invS;
axis->z = z * (1.0f / s); axis->z = z * invS;
} }
void void
@ -104,7 +104,7 @@ CQuaternion::Set(const RwMatrix &matrix)
if (f >= 0.0f) { if (f >= 0.0f) {
s = Sqrt(f + 1.0f); s = Sqrt(f + 1.0f);
w = 0.5f * s; w = 0.5f * s;
m = 0.5f / s; m = Div<true, false>(0.5f, s);
x = (matrix.up.z - matrix.at.y) * m; x = (matrix.up.z - matrix.at.y) * m;
y = (matrix.at.x - matrix.right.z) * m; y = (matrix.at.x - matrix.right.z) * m;
z = (matrix.right.y - matrix.up.x) * m; z = (matrix.right.y - matrix.up.x) * m;
@ -115,7 +115,7 @@ CQuaternion::Set(const RwMatrix &matrix)
if (f >= 0.0f) { if (f >= 0.0f) {
s = Sqrt(f + 1.0f); s = Sqrt(f + 1.0f);
x = 0.5f * s; x = 0.5f * s;
m = 0.5f / s; m = Div<true, false>(0.5f, s);
y = (matrix.up.x + matrix.right.y) * m; y = (matrix.up.x + matrix.right.y) * m;
z = (matrix.at.x + matrix.right.z) * m; z = (matrix.at.x + matrix.right.z) * m;
w = (matrix.up.z - matrix.at.y) * m; w = (matrix.up.z - matrix.at.y) * m;
@ -126,7 +126,7 @@ CQuaternion::Set(const RwMatrix &matrix)
if (f >= 0.0f) { if (f >= 0.0f) {
s = Sqrt(f + 1.0f); s = Sqrt(f + 1.0f);
y = 0.5f * s; y = 0.5f * s;
m = 0.5f / s; m = Div<true, false>(0.5f, s);
w = (matrix.at.x - matrix.right.z) * m; w = (matrix.at.x - matrix.right.z) * m;
x = (matrix.up.x - matrix.right.y) * m; x = (matrix.up.x - matrix.right.y) * m;
z = (matrix.at.y + matrix.up.z) * m; z = (matrix.at.y + matrix.up.z) * m;
@ -136,7 +136,7 @@ CQuaternion::Set(const RwMatrix &matrix)
f = matrix.at.z - (matrix.up.y + matrix.right.x); f = matrix.at.z - (matrix.up.y + matrix.right.x);
s = Sqrt(f + 1.0f); s = Sqrt(f + 1.0f);
z = 0.5f * s; z = 0.5f * s;
m = 0.5f / s; m = Div<true, false>(0.5f, s);
w = (matrix.right.y - matrix.up.x) * m; w = (matrix.right.y - matrix.up.x) * m;
x = (matrix.at.x + matrix.right.z) * m; x = (matrix.at.x + matrix.right.z) * m;
y = (matrix.at.y + matrix.up.z) * m; y = (matrix.at.y + matrix.up.z) * m;
@ -151,8 +151,7 @@ CQuaternion::Get(float *f1, float *f2, float *f3)
*f3 = Atan2(matrix.right.y, matrix.up.y); *f3 = Atan2(matrix.right.y, matrix.up.y);
if (*f3 < 0.0f) if (*f3 < 0.0f)
*f3 += TWOPI; *f3 += TWOPI;
float s = Sin(*f3); auto [s, c] = SinCos(*f3);
float c = Cos(*f3);
*f1 = Atan2(-matrix.at.y, s * matrix.right.y + c * matrix.up.y); *f1 = Atan2(-matrix.at.y, s * matrix.right.y + c * matrix.up.y);
if (*f1 < 0.0f) if (*f1 < 0.0f)
*f1 += TWOPI; *f1 += TWOPI;
@ -164,12 +163,10 @@ CQuaternion::Get(float *f1, float *f2, float *f3)
void void
CQuaternion::Set(float f1, float f2, float f3) CQuaternion::Set(float f1, float f2, float f3)
{ {
float c1 = Cos(f1 * 0.5f); auto [s1, c1] = SinCos(f1 * 0.5f);
float c2 = Cos(f2 * 0.5f); auto [s2, c2] = SinCos(f2 * 0.5f);
float c3 = Cos(f3 * 0.5f); auto [s3, c3] = SinCos(f3 * 0.5f);
float s1 = Sin(f1 * 0.5f);
float s2 = Sin(f2 * 0.5f);
float s3 = Sin(f3 * 0.5f);
x = ((c2 * c1) * s3) - ((s2 * s1) * c3); x = ((c2 * c1) * s3) - ((s2 * s1) * c3);
y = ((s1 * c2) * c3) + ((s2 * c1) * s3); y = ((s1 * c2) * c3) + ((s2 * c1) * s3);
z = ((s2 * c1) * c3) - ((s1 * c2) * s3); z = ((s2 * c1) * c3) - ((s1 * c2) * s3);

View file

@ -617,12 +617,13 @@ bool
CCollision::ProcessSphereSphere(const CColSphere &s1, const CColSphere &s2, CColPoint &point, float &mindistsq) CCollision::ProcessSphereSphere(const CColSphere &s1, const CColSphere &s2, CColPoint &point, float &mindistsq)
{ {
CVector dist = s1.center - s2.center; CVector dist = s1.center - s2.center;
float d = dist.Magnitude() - s2.radius; // distance from s1's center to s2 float mag = dist.Magnitude();
float d = mag - s2.radius; // distance from s1's center to s2
float depth = s1.radius - d; // sphere overlap float depth = s1.radius - d; // sphere overlap
if(d < 0.0f) d = 0.0f; // clamp to zero, i.e. if s1's center is inside s2 if(d < 0.0f) d = 0.0f; // clamp to zero, i.e. if s1's center is inside s2
// no collision if sphere is not close enough // no collision if sphere is not close enough
if(d*d < mindistsq && d < s1.radius){ if(d*d < mindistsq && d < s1.radius){
dist.Normalise(); dist *= Invert<true, false>(mag);
point.point = s1.center - dist*d; point.point = s1.center - dist*d;
point.normal = dist; point.normal = dist;
#ifndef VU_COLLISION #ifndef VU_COLLISION

View file

@ -49,7 +49,7 @@ CQuaternion::Get(RwV3d *axis, float *angle)
{ {
*angle = Acos(w); *angle = Acos(w);
float s = Sin(*angle); float s = Sin(*angle);
float invS = dc::Invert(s); float invS = dc::Invert<true, false>(s);
axis->x = x * invS; axis->x = x * invS;
axis->y = y * invS; axis->y = y * invS;
@ -151,8 +151,7 @@ CQuaternion::Get(float *f1, float *f2, float *f3)
*f3 = Atan2(matrix.right.y, matrix.up.y); *f3 = Atan2(matrix.right.y, matrix.up.y);
if (*f3 < 0.0f) if (*f3 < 0.0f)
*f3 += TWOPI; *f3 += TWOPI;
float s = Sin(*f3); auto [s, c] = SinCos(*f3);
float c = Cos(*f3);
*f1 = Atan2(-matrix.at.y, s * matrix.right.y + c * matrix.up.y); *f1 = Atan2(-matrix.at.y, s * matrix.right.y + c * matrix.up.y);
if (*f1 < 0.0f) if (*f1 < 0.0f)
*f1 += TWOPI; *f1 += TWOPI;
@ -164,12 +163,10 @@ CQuaternion::Get(float *f1, float *f2, float *f3)
void void
CQuaternion::Set(float f1, float f2, float f3) CQuaternion::Set(float f1, float f2, float f3)
{ {
float c1 = Cos(f1 * 0.5f); auto [s1, c1] = SinCos(f1 * 0.5f);
float c2 = Cos(f2 * 0.5f); auto [s2, c2] = SinCos(f2 * 0.5f);
float c3 = Cos(f3 * 0.5f); auto [s3, c3] = SinCos(f3 * 0.5f);
float s1 = Sin(f1 * 0.5f);
float s2 = Sin(f2 * 0.5f);
float s3 = Sin(f3 * 0.5f);
x = ((c2 * c1) * s3) - ((s2 * s1) * c3); x = ((c2 * c1) * s3) - ((s2 * s1) * c3);
y = ((s1 * c2) * c3) + ((s2 * c1) * s3); y = ((s1 * c2) * c3) + ((s2 * c1) * s3);
z = ((s2 * c1) * c3) - ((s1 * c2) * s3); z = ((s2 * c1) * c3) - ((s1 * c2) * s3);

2
vendor/dca3-kos vendored

@ -1 +1 @@
Subproject commit 5d475f6ecffa0bc7aaaccf9bd8b64cda09aec89d Subproject commit 5c0e0936ad15a249858b01f29210a2ae7beec603

View file

@ -2856,7 +2856,7 @@ void tnlMeshletSkinVertices(uint8_t *OCR, uint8_t *OCR_normal, const uint8_t* ve
__builtin_prefetch(srcVtxBytes); __builtin_prefetch(srcVtxBytes);
for(int c = 0; c < count - 1; ++c) for(int c = 0; c < count - 1; ++c)
innerLoop.template operator()<false>(); innerLoop.template operator()<true>();
innerLoop.template operator()<false>(); innerLoop.template operator()<false>();
} else if (!(flags & 0x80)) { } else if (!(flags & 0x80)) {
@ -3464,9 +3464,6 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
assert(atomicContexts.size() <= 32767); assert(atomicContexts.size() <= 32767);
auto meshes = geo->meshHeader->getMeshes(); auto meshes = geo->meshHeader->getMeshes();
RawMatrix worldOrient;
bool worldOrientValid = false;
for (int16_t n = 0; n < numMeshes; n++) { for (int16_t n = 0; n < numMeshes; n++) {
bool doBlend = meshes[n].material->color.alpha != 255; // TODO: check all vertexes for alpha? bool doBlend = meshes[n].material->color.alpha != 255; // TODO: check all vertexes for alpha?
bool doBlendMaterial = doBlend; bool doBlendMaterial = doBlend;
@ -3485,16 +3482,8 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
isMatFX = true; isMatFX = true;
matfxCoefficient = matfx->fx[0].env.coefficient; matfxCoefficient = matfx->fx[0].env.coefficient;
matfxContexts.resize(matfxContexts.size() + 1); matfxContexts.resize(matfxContexts.size() + 1);
#warning "Get rid of me by creating mat_apply_3x3()!"
if(!worldOrientValid) { uploadEnvMatrix(matfx->fx[0].env.frame, reinterpret_cast<rw::RawMatrix*>(atomic->getFrame()->getLTM()), &matfxContexts.back().mtx);
rw::convMatrix(&worldOrient, atomic->getFrame()->getLTM());
worldOrient.pos = { 0, 0, 0 };
worldOrient.rightw = 0;
worldOrient.upw = 0;
worldOrient.atw = 0;
worldOrientValid = true;
}
uploadEnvMatrix(matfx->fx[0].env.frame, &worldOrient, &matfxContexts.back().mtx);
matfxContexts.back().coefficient = matfxCoefficient; matfxContexts.back().coefficient = matfxCoefficient;
pvr_poly_cxt_t cxt; pvr_poly_cxt_t cxt;

View file

@ -267,62 +267,6 @@ __always_inline __hot constexpr float Acos(float x) {
x2 = __x; y2 = __y; z2 = __z; \ x2 = __x; y2 = __y; z2 = __z; \
} }
// no declspec naked, so can't do rts / fschg. instead compiler pads with nop?
__always_inline __hot void mat_load_3x3(const matrix_t* mtx) {
__asm__ __volatile__ (
R"(
fschg
frchg
fmov @%[mtx]+, dr0
fldi0 fr12
fmov @%[mtx]+, dr2
fldi0 fr13
fmov @%[mtx]+, dr4
fldi0 fr3
fmov @%[mtx]+, dr6
fmov dr12, dr14
fmov @%[mtx]+, dr8
fldi0 fr7
fmov @%[mtx]+, dr10
fldi0 fr11
fschg
frchg
)"
: [mtx] "+r" (mtx)
);
}
// sets pos.w to 1
__always_inline __hot void rw_mat_load_4x4(const rw::Matrix* mtx) {
__asm__ __volatile__ (
R"(
fschg
frchg
fmov @%[mtx]+, dr0
fmov @%[mtx]+, dr2
fmov @%[mtx]+, dr4
fmov @%[mtx]+, dr6
fmov @%[mtx]+, dr8
fmov @%[mtx]+, dr10
fmov @%[mtx]+, dr12
fmov @%[mtx]+, dr14
fldi1 fr15
fschg
frchg
)"
: [mtx] "+r" (mtx)
);
}
__always_inline __hot void mat_transpose(void) { __always_inline __hot void mat_transpose(void) {
asm volatile ( asm volatile (
"frchg\n\t" // fmov for singles only works on front bank "frchg\n\t" // fmov for singles only works on front bank
@ -560,23 +504,6 @@ __hot inline void mat_load_apply(const matrix_t* matrix1, const matrix_t* matrix
w_ = tmp1233123.w; \ w_ = tmp1233123.w; \
} while(false) } while(false)
inline void mat_load_3x3(const matrix_t* mtx) {
memcpy(XMTRX, mtx, sizeof(matrix_t));
XMTRX[0][3] = 0.0f;
XMTRX[1][3] = 0.0f;
XMTRX[2][3] = 0.0f;
XMTRX[3][0] = 0.0f;
XMTRX[3][1] = 0.0f;
XMTRX[3][2] = 0.0f;
XMTRX[3][3] = 0.0f;
}
inline void rw_mat_load_4x4(const rw::Matrix* mtx) {
memcpy(XMTRX, mtx, sizeof(matrix_t));
XMTRX[3][3] = 1.0f;
}
inline void mat_transpose(void) { inline void mat_transpose(void) {
matrix_t tmp; matrix_t tmp;