diff --git a/src/liberty/math/Quaternion.h b/src/liberty/math/Quaternion.h
index 365e988e..47581368 100644
--- a/src/liberty/math/Quaternion.h
+++ b/src/liberty/math/Quaternion.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include "src/common_defines.h"
+
 // TODO: actually implement this
 class CQuaternion
 {
@@ -8,13 +10,19 @@ public:
 	CQuaternion(void) {}
 	CQuaternion(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {}
 
-	float Magnitude(void) const { return Sqrt(MagnitudeSqr()); }
+	float Magnitude(void) const {
+#ifndef DC_SH4
+		return Sqrt(x*x + y*y + z*z + w*w);
+#else
+		return Sqrt(fipr_magnitude_sqr(x, y, z, w));
+#endif
+	}
 	float MagnitudeSqr(void) const {
-	#ifdef DC_SH4
-		return fipr_magnitude_sqr(x, y, z, w);
-	#else
+#ifndef DC_SH4
 		return x*x + y*y + z*z + w*w;
-	#endif
+#else
+		return fipr_magnitude_sqr(x, y, z, w);
+#endif
 	}
 	void Normalise(void);
 	void Multiply(const CQuaternion &q1, const CQuaternion &q2);
@@ -72,7 +80,11 @@ public:
 inline float
 DotProduct(const CQuaternion &q1, const CQuaternion &q2)
 {
+#ifndef DC_SH4
 	return q1.x*q2.x + q1.y*q2.y + q1.z*q2.z + q1.w*q2.w;
+#else
+	return fipr(q1.x, q1.y, q1.z, q1.w, q2.x, q2.y, q2.z, q2.w);
+#endif
 }
 
 inline CQuaternion operator+(const CQuaternion &left, const CQuaternion &right)
diff --git a/src/liberty/math/Vector.cpp b/src/liberty/math/Vector.cpp
index 309da539..9ac3c2d7 100644
--- a/src/liberty/math/Vector.cpp
+++ b/src/liberty/math/Vector.cpp
@@ -32,7 +32,7 @@ Multiply3x3(const CMatrix &mat, const CVector &vec)
     register float __y __asm__("fr13") = vec.y;
     register float __z __asm__("fr14") = vec.z;
     register float __w __asm__("fr15") = 0.0f;
-	
+
 	mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
 
 	asm volatile( "ftrv  xmtrx, fv12\n"
diff --git a/src/liberty/math/Vector.h b/src/liberty/math/Vector.h
index 724c4571..c0996510 100644
--- a/src/liberty/math/Vector.h
+++ b/src/liberty/math/Vector.h
@@ -19,22 +19,22 @@ public:
   }
   // (0,1,0) means no rotation. So get right vector and its atan
   __always_inline float Heading(void) const { return Atan2(-x, y); }
-  __always_inline float Magnitude(void) const { 
+  __always_inline float Magnitude(void) const {
 #ifdef DC_SH4
     float w;
     vec3f_length(x, y, z, w);
     return w;
 #else
-    return Sqrt(x*x + y*y + z*z); 
+    return Sqrt(x*x + y*y + z*z);
 #endif
 }
-  __always_inline float MagnitudeSqr(void) const { 
+  __always_inline float MagnitudeSqr(void) const {
 #ifdef DC_SH4
-    return fipr_magnitude_sqr(x, y,z, 0.0f); 
+    return fipr_magnitude_sqr(x, y,z, 0.0f);
 #else
     return x*x + y*y + z*z;
 #endif
-} 
+}
   __always_inline float Magnitude2D(void) const { return Sqrt(x*x + y*y); }
   float MagnitudeSqr2D(void) const { return x*x + y*y; }
   void Normalise(void);
diff --git a/src/miami/math/Quaternion.h b/src/miami/math/Quaternion.h
index 47c94f7c..47581368 100644
--- a/src/miami/math/Quaternion.h
+++ b/src/miami/math/Quaternion.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include "src/common_defines.h"
+
 // TODO: actually implement this
 class CQuaternion
 {
@@ -8,8 +10,20 @@ public:
 	CQuaternion(void) {}
 	CQuaternion(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {}
 
-	float Magnitude(void) const { return Sqrt(x*x + y*y + z*z + w*w); }
-	float MagnitudeSqr(void) const { return x*x + y*y + z*z + w*w; }
+	float Magnitude(void) const {
+#ifndef DC_SH4
+		return Sqrt(x*x + y*y + z*z + w*w);
+#else
+		return Sqrt(fipr_magnitude_sqr(x, y, z, w));
+#endif
+	}
+	float MagnitudeSqr(void) const {
+#ifndef DC_SH4
+		return x*x + y*y + z*z + w*w;
+#else
+		return fipr_magnitude_sqr(x, y, z, w);
+#endif
+	}
 	void Normalise(void);
 	void Multiply(const CQuaternion &q1, const CQuaternion &q2);
 	void Invert(void){	// Conjugate would have been a better name
@@ -66,7 +80,11 @@ public:
 inline float
 DotProduct(const CQuaternion &q1, const CQuaternion &q2)
 {
+#ifndef DC_SH4
 	return q1.x*q2.x + q1.y*q2.y + q1.z*q2.z + q1.w*q2.w;
+#else
+	return fipr(q1.x, q1.y, q1.z, q1.w, q2.x, q2.y, q2.z, q2.w);
+#endif
 }
 
 inline CQuaternion operator+(const CQuaternion &left, const CQuaternion &right)
diff --git a/src/miami/math/Vector.cpp b/src/miami/math/Vector.cpp
index ee76e555..a3510a6c 100644
--- a/src/miami/math/Vector.cpp
+++ b/src/miami/math/Vector.cpp
@@ -3,6 +3,10 @@
 void
 CVector::Normalise(void)
 {
+#ifdef DC_SH4_BROKEN
+	// TODO: This needs to handle zero vectors here
+	vec3f_normalize(x, y, z);
+#else
 	float sq = MagnitudeSqr();
 	if (sq > 0.0f) {
 		float invsqrt = RecipSqrt(sq);
@@ -11,6 +15,7 @@ CVector::Normalise(void)
 		z *= invsqrt;
 	} else
 		x = 1.0f;
+#endif
 }
 
 CVector
@@ -22,10 +27,24 @@ CrossProduct(const CVector &v1, const CVector &v2)
 CVector
 Multiply3x3(const CMatrix &mat, const CVector &vec)
 {
+#ifdef DC_SH4
+    register float __x __asm__("fr12") = vec.x;
+    register float __y __asm__("fr13") = vec.y;
+    register float __z __asm__("fr14") = vec.z;
+    register float __w __asm__("fr15") = 0.0f;
+
+	mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
+
+	asm volatile( "ftrv  xmtrx, fv12\n"
+                : "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w)
+                : "0" (__x), "1" (__y), "2" (__z), "3" (__w) );
+    return { __x, __y, __z };
+#else
 	// TODO: VU0 code
 	return CVector(mat.rx * vec.x + mat.fx * vec.y + mat.ux * vec.z,
 	               mat.ry * vec.x + mat.fy * vec.y + mat.uy * vec.z,
 	               mat.rz * vec.x + mat.fz * vec.y + mat.uz * vec.z);
+#endif
 }
 
 CVector
@@ -39,8 +58,15 @@ Multiply3x3(const CVector &vec, const CMatrix &mat)
 CVector
 operator*(const CMatrix &mat, const CVector &vec)
 {
+#ifdef DC_SH4
+	CVector out;
+	mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
+	mat_trans_single3_nodiv_nomod(vec.x, vec.y, vec.z, out.x, out.y, out.z);
+	return out;
+#else
 	// TODO: VU0 code
 	return CVector(mat.rx * vec.x + mat.fx * vec.y + mat.ux * vec.z + mat.px,
 	               mat.ry * vec.x + mat.fy * vec.y + mat.uy * vec.z + mat.py,
 	               mat.rz * vec.x + mat.fz * vec.y + mat.uz * vec.z + mat.pz);
+#endif
 }
diff --git a/src/miami/math/Vector.h b/src/miami/math/Vector.h
index 02128454..04287ece 100644
--- a/src/miami/math/Vector.h
+++ b/src/miami/math/Vector.h
@@ -18,11 +18,25 @@ public:
 		z = v.z;
 	}
 	// (0,1,0) means no rotation. So get right vector and its atan
-	float Heading(void) const { return Atan2(-x, y); }
-	float Magnitude(void) const { return Sqrt(x*x + y*y + z*z); }
-	float MagnitudeSqr(void) const { return x*x + y*y + z*z; }
-	float Magnitude2D(void) const { return Sqrt(x*x + y*y); }
-	float MagnitudeSqr2D(void) const { return x*x + y*y; }
+__always_inline float Heading(void) const { return Atan2(-x, y); }
+  __always_inline float Magnitude(void) const {
+#ifdef DC_SH4
+    float w;
+    vec3f_length(x, y, z, w);
+    return w;
+#else
+    return Sqrt(x*x + y*y + z*z);
+#endif
+}
+  __always_inline float MagnitudeSqr(void) const {
+#ifdef DC_SH4
+    return fipr_magnitude_sqr(x, y,z, 0.0f);
+#else
+    return x*x + y*y + z*z;
+#endif
+}
+  __always_inline float Magnitude2D(void) const { return Sqrt(x*x + y*y); }
+  float MagnitudeSqr2D(void) const { return x*x + y*y; }
 	void Normalise(void);
 	
 	void Normalise2D(void) {
@@ -103,7 +117,11 @@ inline CVector operator/(const CVector &left, float right)
 inline float
 DotProduct(const CVector &v1, const CVector &v2)
 {
-	return v1.x*v2.x + v1.y*v2.y + v1.z*v2.z;
+#ifdef DC_SH4
+  return fipr(v1.x, v1.y, v1.z, 0.0f, v2.x, v2.y, v2.z, 0.0f);
+#else
+  return v1.x*v2.x + v1.y*v2.y + v1.z*v2.z;
+#endif
 }
 
 CVector CrossProduct(const CVector &v1, const CVector &v2);
@@ -111,7 +129,13 @@ CVector CrossProduct(const CVector &v1, const CVector &v2);
 inline float
 Distance(const CVector &v1, const CVector &v2)
 {
-	return (v2 - v1).Magnitude();
+  float w;
+#ifdef DC_SH4
+  vec3f_distance(v1.x, v1.y, v1.z, v2.x, v2.y, v2.z, w);
+  return w;
+#else
+  return (v2 - v1).Magnitude();
+#endif
 }
 
 inline float
diff --git a/vendor/librw/src/rwbase.h b/vendor/librw/src/rwbase.h
index a10df427..4274be0e 100644
--- a/vendor/librw/src/rwbase.h
+++ b/vendor/librw/src/rwbase.h
@@ -253,11 +253,25 @@ inline V3d neg(const V3d &a) { return makeV3d(-a.x, -a.y, -a.z); }
 inline V3d add(const V3d &a, const V3d &b) { return makeV3d(a.x+b.x, a.y+b.y, a.z+b.z); }
 inline V3d sub(const V3d &a, const V3d &b) { return makeV3d(a.x-b.x, a.y-b.y, a.z-b.z); }
 inline V3d scale(const V3d &a, float32 r) { return makeV3d(a.x*r, a.y*r, a.z*r); }
-inline float32 length(const V3d &v) { return sqrtf(v.x*v.x + v.y*v.y + v.z*v.z); }
+inline float32 length(const V3d &v) {
+#ifndef DC_SH4
+	return sqrtf(v.x*v.x + v.y*v.y + v.z*v.z);
+#else
+	float len;
+	vec3f_length(v.x, v.y, v.z, len);
+	return len;
+#endif
+}
 inline V3d normalize(const V3d &v) { return scale(v, 1.0f/length(v)); }
 inline V3d setlength(const V3d &v, float32 l) { return scale(v, l/length(v)); }
 V3d cross(const V3d &a, const V3d &b);
-inline __attribute__((always_inline)) float32 dot(const V3d &a, const V3d &b) { return a.x*b.x + a.y*b.y + a.z*b.z; }
+inline __attribute__((always_inline)) float32 dot(const V3d &a, const V3d &b) {
+#ifdef DC_SH4
+	return fipr(a.x, a.y, a.z, 0.0f, b.x, b.y, b.z, 0.0f);
+#else
+	return a.x*b.x + a.y*b.y + a.z*b.z;
+#endif
+}
 inline V3d lerp(const V3d &a, const V3d &b, float32 r){
 	return makeV3d(a.x + r*(b.x - a.x),
 	               a.y + r*(b.y - a.y),
@@ -300,7 +314,13 @@ inline Quat makeQuat(float32 w, const V3d &vec) { Quat q = { vec.x, vec.y, vec.z
 inline Quat add(const Quat &q, const Quat &p) { return makeQuat(q.w+p.w, q.x+p.x, q.y+p.y, q.z+p.z); }
 inline Quat sub(const Quat &q, const Quat &p) { return makeQuat(q.w-p.w, q.x-p.x, q.y-p.y, q.z-p.z); }
 inline Quat negate(const Quat &q) { return makeQuat(-q.w, -q.x, -q.y, -q.z); }
-inline float32 dot(const Quat &q, const Quat &p) { return q.w*p.w + q.x*p.x + q.y*p.y + q.z*p.z; }
+inline float32 dot(const Quat &q, const Quat &p) {
+#ifdef DC_SH4
+	return fipr(q.x, q.y, q.z, q.w, p.x, p.y, p.z, p.w);
+#else
+	return q.w*p.w + q.x*p.x + q.y*p.y + q.z*p.z;
+#endif
+}
 inline Quat scale(const Quat &q, float32 r) { return makeQuat(q.w*r, q.x*r, q.y*r, q.z*r); }
 inline float32 length(const Quat &q) { return sqrtf(q.w*q.w + q.x*q.x + q.y*q.y + q.z*q.z); }
 inline Quat normalize(const Quat &q) { return scale(q, 1.0f/length(q)); }