mirror of
https://github.com/jpd002/Play-.git
synced 2025-04-28 21:57:57 +03:00
176 lines
4.7 KiB
C++
176 lines
4.7 KiB
C++
//Source: The LLVM Compiler Infrastructure - lib/addsf3.c
|
|
//Modified to truncate result of addition
|
|
|
|
#include <limits.h>
|
|
#include <cstdint>
|
|
#include "FpAddTruncate.h"
|
|
#include "BitManip.h"
|
|
|
|
typedef uint32 rep_t;
|
|
typedef int32 srep_t;
|
|
typedef float fp_t;
|
|
#define REP_C UINT32_C
|
|
#define significandBits 23
|
|
|
|
#define typeWidth (sizeof(rep_t) * CHAR_BIT)
|
|
#define exponentBits (typeWidth - significandBits - 1)
|
|
#define maxExponent ((1 << exponentBits) - 1)
|
|
#define exponentBias (maxExponent >> 1)
|
|
|
|
#define implicitBit (REP_C(1) << significandBits)
|
|
#define significandMask (implicitBit - 1U)
|
|
#define signBit (REP_C(1) << (significandBits + exponentBits))
|
|
#define absMask (signBit - 1U)
|
|
#define exponentMask (absMask ^ significandMask)
|
|
#define oneRep ((rep_t)exponentBias << significandBits)
|
|
#define infRep exponentMask
|
|
#define quietBit (implicitBit >> 1)
|
|
#define qnanRep (exponentMask | quietBit)
|
|
|
|
static inline int rep_clz(rep_t a)
|
|
{
|
|
return __builtin_clz(a);
|
|
}
|
|
|
|
uint32 FpAddTruncate(uint32 a, uint32 b)
|
|
{
|
|
const rep_t aAbs = a & absMask;
|
|
const rep_t bAbs = b & absMask;
|
|
|
|
// Detect if a or b is zero, infinity, or NaN.
|
|
if(aAbs - 1U >= infRep - 1U || bAbs - 1U >= infRep - 1U)
|
|
{
|
|
|
|
// NaN + anything = qNaN
|
|
if(aAbs > infRep) return (a | quietBit);
|
|
// anything + NaN = qNaN
|
|
if(bAbs > infRep) return (b | quietBit);
|
|
|
|
if(aAbs == infRep)
|
|
{
|
|
// +/-infinity + -/+infinity = qNaN
|
|
if((a ^ b) == signBit) return qnanRep;
|
|
// +/-infinity + anything remaining = +/- infinity
|
|
else
|
|
return a;
|
|
}
|
|
|
|
// anything remaining + +/-infinity = +/-infinity
|
|
if(bAbs == infRep) return b;
|
|
|
|
// zero + anything = anything
|
|
if(!aAbs)
|
|
{
|
|
// but we need to get the sign right for zero + zero
|
|
if(!bAbs)
|
|
return (a & b);
|
|
else
|
|
return b;
|
|
}
|
|
|
|
// anything + zero = anything
|
|
if(!bAbs) return a;
|
|
}
|
|
|
|
// Swap a and b if necessary so that a has the larger absolute value.
|
|
if(bAbs > aAbs)
|
|
{
|
|
const uint32 temp = a;
|
|
a = b;
|
|
b = temp;
|
|
}
|
|
|
|
// Extract the exponent and significand from the (possibly swapped) a and b.
|
|
int aExponent = a >> significandBits & maxExponent;
|
|
int bExponent = b >> significandBits & maxExponent;
|
|
rep_t aSignificand = a & significandMask;
|
|
rep_t bSignificand = b & significandMask;
|
|
|
|
// Normalize any denormals, and adjust the exponent accordingly.
|
|
//if (aExponent == 0) aExponent = normalize(&aSignificand);
|
|
//if (bExponent == 0) bExponent = normalize(&bSignificand);
|
|
|
|
// The sign of the result is the sign of the larger operand, a. If they
|
|
// have opposite signs, we are performing a subtraction; otherwise addition.
|
|
const rep_t resultSign = a & signBit;
|
|
const bool subtraction = (a ^ b) & signBit;
|
|
|
|
// Shift the significands to give us round, guard and sticky, and or in the
|
|
// implicit significand bit. (If we fell through from the denormal path it
|
|
// was already set by normalize( ), but setting it twice won't hurt
|
|
// anything.)
|
|
aSignificand = (aSignificand | implicitBit) << 3;
|
|
bSignificand = (bSignificand | implicitBit) << 3;
|
|
|
|
// Shift the significand of b by the difference in exponents, with a sticky
|
|
// bottom bit to get rounding correct.
|
|
const unsigned int align = aExponent - bExponent;
|
|
if(align)
|
|
{
|
|
if(align < typeWidth)
|
|
{
|
|
//const bool sticky = bSignificand << (typeWidth - align);
|
|
bSignificand = bSignificand >> align;
|
|
}
|
|
else
|
|
{
|
|
bSignificand = 0; // sticky; b is known to be non-zero.
|
|
}
|
|
}
|
|
|
|
if(subtraction)
|
|
{
|
|
aSignificand -= bSignificand;
|
|
|
|
// If a == -b, return +zero.
|
|
if(aSignificand == 0) return 0;
|
|
|
|
// If partial cancellation occured, we need to left-shift the result
|
|
// and adjust the exponent:
|
|
if(aSignificand < implicitBit << 3)
|
|
{
|
|
const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);
|
|
aSignificand <<= shift;
|
|
aExponent -= shift;
|
|
}
|
|
}
|
|
|
|
else /* addition */
|
|
{
|
|
aSignificand += bSignificand;
|
|
|
|
// If the addition carried up, we need to right-shift the result and
|
|
// adjust the exponent:
|
|
if(aSignificand & implicitBit << 4)
|
|
{
|
|
const bool sticky = aSignificand & 1;
|
|
aSignificand = aSignificand >> 1 | sticky;
|
|
aExponent += 1;
|
|
}
|
|
}
|
|
|
|
// If we have overflowed the type, return +/- infinity:
|
|
if(aExponent >= maxExponent) return infRep | resultSign;
|
|
|
|
if(aExponent <= 0)
|
|
{
|
|
// Result is denormal before rounding; the exponent is zero and we
|
|
// need to shift the significand.
|
|
const int shift = 1 - aExponent;
|
|
const bool sticky = aSignificand << (typeWidth - shift);
|
|
aSignificand = aSignificand >> shift | sticky;
|
|
aExponent = 0;
|
|
}
|
|
|
|
// Low three bits are round, guard, and sticky.
|
|
const int roundGuardSticky = aSignificand & 0x7;
|
|
|
|
// Shift the significand into place, and mask off the implicit bit.
|
|
rep_t result = aSignificand >> 3 & significandMask;
|
|
|
|
// Insert the exponent and sign.
|
|
result |= (rep_t)aExponent << significandBits;
|
|
result |= resultSign;
|
|
|
|
return result;
|
|
}
|