From 2ade727c46b9bb4c4525c3028617c9f45c03fa6a Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Thu, 27 Mar 2025 15:17:11 -0500 Subject: [PATCH] Saving state. --- src/liberty/collision/ColLine.h | 12 ++--- src/liberty/collision/ColPoint.h | 10 ++-- src/liberty/collision/Collision.cpp | 5 +- src/liberty/math/Quaternion.cpp | 35 +++++++------- src/miami/collision/Collision.cpp | 5 +- src/miami/math/Quaternion.cpp | 15 +++--- vendor/dca3-kos | 2 +- vendor/librw/src/dc/rwdc.cpp | 17 ++----- vendor/librw/src/dc/rwdc_common.h | 73 ----------------------------- 9 files changed, 43 insertions(+), 131 deletions(-) diff --git a/src/liberty/collision/ColLine.h b/src/liberty/collision/ColLine.h index 21587a06..e2400a28 100644 --- a/src/liberty/collision/ColLine.h +++ b/src/liberty/collision/ColLine.h @@ -1,14 +1,14 @@ #pragma once -struct CColLine +struct alignas(8) CColLine { // NB: this has to be compatible with two CVuVectors - CVector p0; - int pad0; - CVector p1; - int pad1; + alignas(8) CVector p0; + float pad0 = 0.0f; + alignas(8) CVector p1; + float pad1 = 0.0f; - CColLine(void) { }; + CColLine(void) = default; CColLine(const CVector &p0, const CVector &p1) { this->p0 = p0; this->p1 = p1; }; void Set(const CVector &p0, const CVector &p1); }; \ No newline at end of file diff --git a/src/liberty/collision/ColPoint.h b/src/liberty/collision/ColPoint.h index a15b2345..35e5fd13 100644 --- a/src/liberty/collision/ColPoint.h +++ b/src/liberty/collision/ColPoint.h @@ -1,12 +1,12 @@ #pragma once -struct CColPoint +struct alignas(8) CColPoint { - CVector point; - int pad1; + alignas(8) CVector point; + float pad1 = 0.0f; // the surface normal on the surface of point - CVector normal; - int pad2; + alignas(8) CVector normal; + float pad2 = 0.0f; uint8 surfaceA; uint8 pieceA; uint8 surfaceB; diff --git a/src/liberty/collision/Collision.cpp b/src/liberty/collision/Collision.cpp index e20586c3..3bf023f8 100644 --- a/src/liberty/collision/Collision.cpp +++ b/src/liberty/collision/Collision.cpp @@ -787,12 +787,13 @@ bool CCollision::ProcessSphereSphere(const CColSphere &s1, const CColSphere &s2, CColPoint &point, float &mindistsq) { CVector dist = s1.center - s2.center; - float d = dist.Magnitude() - s2.radius; // distance from s1's center to s2 + float mag = dist.Magnitude(); + float d = mag - s2.radius; // distance from s1's center to s2 float depth = s1.radius - d; // sphere overlap if(d < 0.0f) d = 0.0f; // clamp to zero, i.e. if s1's center is inside s2 // no collision if sphere is not close enough if(d*d < mindistsq && d < s1.radius){ - dist.Normalise(); + dist *= Invert(mag); point.point = s1.center - dist*d; point.normal = dist; #ifndef VU_COLLISION diff --git a/src/liberty/math/Quaternion.cpp b/src/liberty/math/Quaternion.cpp index b0e782e2..aeefcd59 100644 --- a/src/liberty/math/Quaternion.cpp +++ b/src/liberty/math/Quaternion.cpp @@ -39,10 +39,9 @@ CQuaternion::Slerp(const CQuaternion &q1, const CQuaternion &q2, float theta, fl void CQuaternion::Multiply(const CQuaternion &q1, const CQuaternion &q2) { - x = (q2.z * q1.y) - (q1.z * q2.y) + (q1.x * q2.w) + (q2.x * q1.w); - y = (q2.x * q1.z) - (q1.x * q2.z) + (q1.y * q2.w) + (q2.y * q1.w); - z = (q2.y * q1.x) - (q1.y * q2.x) + (q1.z * q2.w) + (q2.z * q1.w); - w = (q2.w * q1.w) - (q2.x * q1.x) - (q2.y * q1.y) - (q2.z * q1.z); + quat_mult(reinterpret_cast(this), + reinterpret_cast(q1), + reinterpret_cast(q2)); } void @@ -50,10 +49,11 @@ CQuaternion::Get(RwV3d *axis, float *angle) { *angle = Acos(w); float s = Sin(*angle); + float invS = dc::Invert(s); - axis->x = x * (1.0f / s); - axis->y = y * (1.0f / s); - axis->z = z * (1.0f / s); + axis->x = x * invS; + axis->y = y * invS; + axis->z = z * invS; } void @@ -104,7 +104,7 @@ CQuaternion::Set(const RwMatrix &matrix) if (f >= 0.0f) { s = Sqrt(f + 1.0f); w = 0.5f * s; - m = 0.5f / s; + m = Div(0.5f, s); x = (matrix.up.z - matrix.at.y) * m; y = (matrix.at.x - matrix.right.z) * m; z = (matrix.right.y - matrix.up.x) * m; @@ -115,7 +115,7 @@ CQuaternion::Set(const RwMatrix &matrix) if (f >= 0.0f) { s = Sqrt(f + 1.0f); x = 0.5f * s; - m = 0.5f / s; + m = Div(0.5f, s); y = (matrix.up.x + matrix.right.y) * m; z = (matrix.at.x + matrix.right.z) * m; w = (matrix.up.z - matrix.at.y) * m; @@ -126,7 +126,7 @@ CQuaternion::Set(const RwMatrix &matrix) if (f >= 0.0f) { s = Sqrt(f + 1.0f); y = 0.5f * s; - m = 0.5f / s; + m = Div(0.5f, s); w = (matrix.at.x - matrix.right.z) * m; x = (matrix.up.x - matrix.right.y) * m; z = (matrix.at.y + matrix.up.z) * m; @@ -136,7 +136,7 @@ CQuaternion::Set(const RwMatrix &matrix) f = matrix.at.z - (matrix.up.y + matrix.right.x); s = Sqrt(f + 1.0f); z = 0.5f * s; - m = 0.5f / s; + m = Div(0.5f, s); w = (matrix.right.y - matrix.up.x) * m; x = (matrix.at.x + matrix.right.z) * m; y = (matrix.at.y + matrix.up.z) * m; @@ -151,8 +151,7 @@ CQuaternion::Get(float *f1, float *f2, float *f3) *f3 = Atan2(matrix.right.y, matrix.up.y); if (*f3 < 0.0f) *f3 += TWOPI; - float s = Sin(*f3); - float c = Cos(*f3); + auto [s, c] = SinCos(*f3); *f1 = Atan2(-matrix.at.y, s * matrix.right.y + c * matrix.up.y); if (*f1 < 0.0f) *f1 += TWOPI; @@ -164,12 +163,10 @@ CQuaternion::Get(float *f1, float *f2, float *f3) void CQuaternion::Set(float f1, float f2, float f3) { - float c1 = Cos(f1 * 0.5f); - float c2 = Cos(f2 * 0.5f); - float c3 = Cos(f3 * 0.5f); - float s1 = Sin(f1 * 0.5f); - float s2 = Sin(f2 * 0.5f); - float s3 = Sin(f3 * 0.5f); + auto [s1, c1] = SinCos(f1 * 0.5f); + auto [s2, c2] = SinCos(f2 * 0.5f); + auto [s3, c3] = SinCos(f3 * 0.5f); + x = ((c2 * c1) * s3) - ((s2 * s1) * c3); y = ((s1 * c2) * c3) + ((s2 * c1) * s3); z = ((s2 * c1) * c3) - ((s1 * c2) * s3); diff --git a/src/miami/collision/Collision.cpp b/src/miami/collision/Collision.cpp index f39f3f35..587e1e26 100644 --- a/src/miami/collision/Collision.cpp +++ b/src/miami/collision/Collision.cpp @@ -617,12 +617,13 @@ bool CCollision::ProcessSphereSphere(const CColSphere &s1, const CColSphere &s2, CColPoint &point, float &mindistsq) { CVector dist = s1.center - s2.center; - float d = dist.Magnitude() - s2.radius; // distance from s1's center to s2 + float mag = dist.Magnitude(); + float d = mag - s2.radius; // distance from s1's center to s2 float depth = s1.radius - d; // sphere overlap if(d < 0.0f) d = 0.0f; // clamp to zero, i.e. if s1's center is inside s2 // no collision if sphere is not close enough if(d*d < mindistsq && d < s1.radius){ - dist.Normalise(); + dist *= Invert(mag); point.point = s1.center - dist*d; point.normal = dist; #ifndef VU_COLLISION diff --git a/src/miami/math/Quaternion.cpp b/src/miami/math/Quaternion.cpp index 6eab890b..aeefcd59 100644 --- a/src/miami/math/Quaternion.cpp +++ b/src/miami/math/Quaternion.cpp @@ -49,7 +49,7 @@ CQuaternion::Get(RwV3d *axis, float *angle) { *angle = Acos(w); float s = Sin(*angle); - float invS = dc::Invert(s); + float invS = dc::Invert(s); axis->x = x * invS; axis->y = y * invS; @@ -151,8 +151,7 @@ CQuaternion::Get(float *f1, float *f2, float *f3) *f3 = Atan2(matrix.right.y, matrix.up.y); if (*f3 < 0.0f) *f3 += TWOPI; - float s = Sin(*f3); - float c = Cos(*f3); + auto [s, c] = SinCos(*f3); *f1 = Atan2(-matrix.at.y, s * matrix.right.y + c * matrix.up.y); if (*f1 < 0.0f) *f1 += TWOPI; @@ -164,12 +163,10 @@ CQuaternion::Get(float *f1, float *f2, float *f3) void CQuaternion::Set(float f1, float f2, float f3) { - float c1 = Cos(f1 * 0.5f); - float c2 = Cos(f2 * 0.5f); - float c3 = Cos(f3 * 0.5f); - float s1 = Sin(f1 * 0.5f); - float s2 = Sin(f2 * 0.5f); - float s3 = Sin(f3 * 0.5f); + auto [s1, c1] = SinCos(f1 * 0.5f); + auto [s2, c2] = SinCos(f2 * 0.5f); + auto [s3, c3] = SinCos(f3 * 0.5f); + x = ((c2 * c1) * s3) - ((s2 * s1) * c3); y = ((s1 * c2) * c3) + ((s2 * c1) * s3); z = ((s2 * c1) * c3) - ((s1 * c2) * s3); diff --git a/vendor/dca3-kos b/vendor/dca3-kos index 5d475f6e..5c0e0936 160000 --- a/vendor/dca3-kos +++ b/vendor/dca3-kos @@ -1 +1 @@ -Subproject commit 5d475f6ecffa0bc7aaaccf9bd8b64cda09aec89d +Subproject commit 5c0e0936ad15a249858b01f29210a2ae7beec603 diff --git a/vendor/librw/src/dc/rwdc.cpp b/vendor/librw/src/dc/rwdc.cpp index 29f7ec23..c4d106ed 100644 --- a/vendor/librw/src/dc/rwdc.cpp +++ b/vendor/librw/src/dc/rwdc.cpp @@ -2856,7 +2856,7 @@ void tnlMeshletSkinVertices(uint8_t *OCR, uint8_t *OCR_normal, const uint8_t* ve __builtin_prefetch(srcVtxBytes); for(int c = 0; c < count - 1; ++c) - innerLoop.template operator()(); + innerLoop.template operator()(); innerLoop.template operator()(); } else if (!(flags & 0x80)) { @@ -3464,9 +3464,6 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) { assert(atomicContexts.size() <= 32767); auto meshes = geo->meshHeader->getMeshes(); - RawMatrix worldOrient; - bool worldOrientValid = false; - for (int16_t n = 0; n < numMeshes; n++) { bool doBlend = meshes[n].material->color.alpha != 255; // TODO: check all vertexes for alpha? bool doBlendMaterial = doBlend; @@ -3485,16 +3482,8 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) { isMatFX = true; matfxCoefficient = matfx->fx[0].env.coefficient; matfxContexts.resize(matfxContexts.size() + 1); -#warning "Get rid of me by creating mat_apply_3x3()!" - if(!worldOrientValid) { - rw::convMatrix(&worldOrient, atomic->getFrame()->getLTM()); - worldOrient.pos = { 0, 0, 0 }; - worldOrient.rightw = 0; - worldOrient.upw = 0; - worldOrient.atw = 0; - worldOrientValid = true; - } - uploadEnvMatrix(matfx->fx[0].env.frame, &worldOrient, &matfxContexts.back().mtx); + + uploadEnvMatrix(matfx->fx[0].env.frame, reinterpret_cast(atomic->getFrame()->getLTM()), &matfxContexts.back().mtx); matfxContexts.back().coefficient = matfxCoefficient; pvr_poly_cxt_t cxt; diff --git a/vendor/librw/src/dc/rwdc_common.h b/vendor/librw/src/dc/rwdc_common.h index a054ed2b..c081be21 100644 --- a/vendor/librw/src/dc/rwdc_common.h +++ b/vendor/librw/src/dc/rwdc_common.h @@ -267,62 +267,6 @@ __always_inline __hot constexpr float Acos(float x) { x2 = __x; y2 = __y; z2 = __z; \ } -// no declspec naked, so can't do rts / fschg. instead compiler pads with nop? -__always_inline __hot void mat_load_3x3(const matrix_t* mtx) { - __asm__ __volatile__ ( - R"( - fschg - frchg - - fmov @%[mtx]+, dr0 - fldi0 fr12 - - fmov @%[mtx]+, dr2 - fldi0 fr13 - - fmov @%[mtx]+, dr4 - fldi0 fr3 - - fmov @%[mtx]+, dr6 - fmov dr12, dr14 - - fmov @%[mtx]+, dr8 - fldi0 fr7 - - fmov @%[mtx]+, dr10 - fldi0 fr11 - - fschg - frchg - )" - : [mtx] "+r" (mtx) - ); -} - -// sets pos.w to 1 -__always_inline __hot void rw_mat_load_4x4(const rw::Matrix* mtx) { - __asm__ __volatile__ ( - R"( - fschg - frchg - fmov @%[mtx]+, dr0 - - fmov @%[mtx]+, dr2 - fmov @%[mtx]+, dr4 - fmov @%[mtx]+, dr6 - fmov @%[mtx]+, dr8 - fmov @%[mtx]+, dr10 - fmov @%[mtx]+, dr12 - fmov @%[mtx]+, dr14 - fldi1 fr15 - - fschg - frchg - )" - : [mtx] "+r" (mtx) - ); -} - __always_inline __hot void mat_transpose(void) { asm volatile ( "frchg\n\t" // fmov for singles only works on front bank @@ -560,23 +504,6 @@ __hot inline void mat_load_apply(const matrix_t* matrix1, const matrix_t* matrix w_ = tmp1233123.w; \ } while(false) -inline void mat_load_3x3(const matrix_t* mtx) { - memcpy(XMTRX, mtx, sizeof(matrix_t)); - XMTRX[0][3] = 0.0f; - XMTRX[1][3] = 0.0f; - XMTRX[2][3] = 0.0f; - - XMTRX[3][0] = 0.0f; - XMTRX[3][1] = 0.0f; - XMTRX[3][2] = 0.0f; - XMTRX[3][3] = 0.0f; -} - -inline void rw_mat_load_4x4(const rw::Matrix* mtx) { - memcpy(XMTRX, mtx, sizeof(matrix_t)); - XMTRX[3][3] = 1.0f; -} - inline void mat_transpose(void) { matrix_t tmp;