diff options
Diffstat (limited to 'lib/fp_lib.h')
-rw-r--r-- | lib/fp_lib.h | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/lib/fp_lib.h b/lib/fp_lib.h index c8c72342..ab425afa 100644 --- a/lib/fp_lib.h +++ b/lib/fp_lib.h @@ -37,6 +37,13 @@ static inline int rep_clz(rep_t a) { return __builtin_clz(a); } +// 32x32 --> 64 bit multiply +static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { + const uint64_t product = (uint64_t)a*b; + *hi = product >> 32; + *lo = product; +} + #elif defined DOUBLE_PRECISION typedef uint64_t rep_t; @@ -56,6 +63,26 @@ static inline int rep_clz(rep_t a) { #endif } +#define loWord(a) (a & 0xffffffffU) +#define hiWord(a) (a >> 32) + +// 64x64 -> 128 wide multiply for platforms that don't have such an operation; +// many 64-bit platforms have this operation, but they tend to have hardware +// floating-point, so we don't bother with a special case for them here. +static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { + // Each of the component 32x32 -> 64 products + const uint64_t plolo = loWord(a) * loWord(b); + const uint64_t plohi = loWord(a) * hiWord(b); + const uint64_t philo = hiWord(a) * loWord(b); + const uint64_t phihi = hiWord(a) * hiWord(b); + // Sum terms that contribute to lo in a way that allows us to get the carry + const uint64_t r0 = loWord(plolo); + const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo); + *lo = r0 + (r1 << 32); + // Sum terms contributing to hi with the carry from lo + *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi; +} + #else #error Either SINGLE_PRECISION or DOUBLE_PRECISION must be defined. #endif |