diff options
author | Andres Noetzli <andres.noetzli@gmail.com> | 2018-07-23 17:54:01 -0700 |
---|---|---|
committer | Andres Noetzli <andres.noetzli@gmail.com> | 2018-07-23 17:54:01 -0700 |
commit | 413301997439f407d66b74dc91a3d1a822a0ac5e (patch) | |
tree | adaacfeec27f0600e1c992068c9bd5e79bec5275 /src/util | |
parent | d4c49e755a53e7333c7638a5aeafe8baa2ea56d3 (diff) |
MPFR supportmpfr
Diffstat (limited to 'src/util')
-rw-r--r-- | src/util/Makefile.am | 4 | ||||
-rw-r--r-- | src/util/floatingpoint.h.in | 94 | ||||
-rw-r--r-- | src/util/floatingpoint_mpfr_imp.cpp | 384 | ||||
-rw-r--r-- | src/util/floatingpoint_mpfr_imp.h | 178 | ||||
-rw-r--r-- | src/util/floatingpoint_size.h | 101 | ||||
-rw-r--r-- | src/util/rounding_mode.h | 49 |
6 files changed, 718 insertions, 92 deletions
diff --git a/src/util/Makefile.am b/src/util/Makefile.am index 297d96208..2e9d761f8 100644 --- a/src/util/Makefile.am +++ b/src/util/Makefile.am @@ -31,6 +31,9 @@ libutil_la_SOURCES = \ divisible.h \ dynamic_array.h \ floatingpoint.cpp \ + floatingpoint_mpfr_imp.cpp \ + floatingpoint_mpfr_imp.h \ + floatingpoint_size.h \ gmp_util.h \ hash.h \ index.cpp \ @@ -48,6 +51,7 @@ libutil_la_SOURCES = \ resource_manager.h \ result.cpp \ result.h \ + rounding_mode.h \ safe_print.cpp \ safe_print.h \ sampler.cpp \ diff --git a/src/util/floatingpoint.h.in b/src/util/floatingpoint.h.in index d283d985f..50734d39f 100644 --- a/src/util/floatingpoint.h.in +++ b/src/util/floatingpoint.h.in @@ -21,99 +21,15 @@ #define __CVC4__FLOATINGPOINT_H #include "util/bitvector.h" +#include "util/floatingpoint_size.h" +#include "util/rounding_mode.h" #include "util/rational.h" -#include <fenv.h> - #if @CVC4_USE_SYMFPU@ #include <symfpu/core/unpackedFloat.h> #endif /* @CVC4_USE_SYMFPU@ */ namespace CVC4 { - // Inline these! - inline bool CVC4_PUBLIC validExponentSize (unsigned e) { return e >= 2; } - inline bool CVC4_PUBLIC validSignificandSize (unsigned s) { return s >= 2; } - - /** - * Floating point sorts are parameterised by two non-zero constants - * giving the width (in bits) of the exponent and significand - * (including the hidden bit). - */ - class CVC4_PUBLIC FloatingPointSize { - /* - Class invariants: - * VALIDEXPONENTSIZE(e) - * VALIDSIGNIFCANDSIZE(s) - */ - - private : - unsigned e; - unsigned s; - - public : - FloatingPointSize (unsigned _e, unsigned _s); - FloatingPointSize (const FloatingPointSize &old); - - inline unsigned exponent (void) const { - return this->e; - } - - inline unsigned significand (void) const { - return this->s; - } - - bool operator ==(const FloatingPointSize& fps) const { - return (e == fps.e) && (s == fps.s); - } - - // Implement the interface that symfpu uses for floating-point formats. - inline unsigned exponentWidth(void) const { return this->exponent(); } - inline unsigned significandWidth(void) const { return this->significand(); } - inline unsigned packedWidth(void) const - { - return this->exponentWidth() + this->significandWidth(); - } - inline unsigned packedExponentWidth(void) const - { - return this->exponentWidth(); - } - inline unsigned packedSignificandWidth(void) const - { - return this->significandWidth() - 1; - } - - }; /* class FloatingPointSize */ - - struct CVC4_PUBLIC FloatingPointSizeHashFunction { - static inline size_t ROLL(size_t X, size_t N) { - return (((X) << (N)) | ((X) >> (8*sizeof((X)) - (N)) )); - } - - inline size_t operator() (const FloatingPointSize& fpt) const { - return size_t(ROLL(fpt.exponent(), 4*sizeof(unsigned)) | - fpt.significand()); - } - }; /* struct FloatingPointSizeHashFunction */ - - - /** - * A concrete instance of the rounding mode sort - */ - enum CVC4_PUBLIC RoundingMode { - roundNearestTiesToEven = FE_TONEAREST, - roundTowardPositive = FE_UPWARD, - roundTowardNegative = FE_DOWNWARD, - roundTowardZero = FE_TOWARDZERO, - // Initializes this to the diagonalization of the 4 other values. - roundNearestTiesToAway = (((~FE_TONEAREST) & 0x1) | ((~FE_UPWARD) & 0x2) | - ((~FE_DOWNWARD) & 0x4) | ((~FE_TOWARDZERO) & 0x8)) - }; /* enum RoundingMode */ - - struct CVC4_PUBLIC RoundingModeHashFunction { - inline size_t operator() (const RoundingMode& rm) const { - return size_t(rm); - } - }; /* struct RoundingModeHashFunction */ /** * This is a symfpu literal "back-end". It allows the library to be used as @@ -324,7 +240,6 @@ namespace CVC4 { FloatingPoint (const FloatingPointSize &ct, const RoundingMode &rm, const BitVector &bv, bool signedBV); FloatingPoint (const FloatingPointSize &ct, const RoundingMode &rm, const Rational &r); - static FloatingPoint makeNaN (const FloatingPointSize &t); static FloatingPoint makeInf (const FloatingPointSize &t, bool sign); static FloatingPoint makeZero (const FloatingPointSize &t, bool sign); @@ -539,11 +454,6 @@ namespace CVC4 { << ")"; } - inline std::ostream& operator <<(std::ostream& os, const FloatingPointSize& fps) CVC4_PUBLIC; - inline std::ostream& operator <<(std::ostream& os, const FloatingPointSize& fps) { - return os << "(_ FloatingPoint " << fps.exponent() << " " << fps.significand() << ")"; - } - inline std::ostream& operator <<(std::ostream& os, const FloatingPointConvertSort& fpcs) CVC4_PUBLIC; inline std::ostream& operator <<(std::ostream& os, const FloatingPointConvertSort& fpcs) { return os << "(_ to_fp " << fpcs.t.exponent() << " " << fpcs.t.significand() << ")"; diff --git a/src/util/floatingpoint_mpfr_imp.cpp b/src/util/floatingpoint_mpfr_imp.cpp new file mode 100644 index 000000000..534bfbc72 --- /dev/null +++ b/src/util/floatingpoint_mpfr_imp.cpp @@ -0,0 +1,384 @@ +#include "util/floatingpoint_mpfr_imp.h" + +#include <stdio.h> +#include <iostream> + +#include "base/cvc4_assert.h" + +namespace CVC4 { + +FloatingPoint2::FloatingPoint2(unsigned e, unsigned s, const BitVector &bv) + : d_size(e, s) +{ + Assert(e + s == bv.getSize()); + + // Reset exponent ranges (in case we just did some arithmetic with narrower + // bit-widths. + mpfr_set_emax(mpfr_get_emax_max()); + mpfr_set_emin(mpfr_get_emin_min()); + + // Set precision to `s`. `s` includes the sign bit, which the MPFR's + // precision does not count but MPFR's representation does not have a hidden + // bit, so we end up with `s` bits of precision. + mpfr_init2(d_val, s); + + int sign = bv.isBitSet(bv.getSize() - 1) ? -1 : 1; + uint64_t exp = + bv.extract(bv.getSize() - 2, s - 1).getValue().getUnsignedLong(); + BitVector sig = bv.extract(s - 2, 0); + + uint64_t maxExp = (1 << e) - 1; + if (exp == maxExp) + { + if (sig == BitVector(s - 1)) + { + // Infinity + mpfr_set_inf(d_val, sign); + } + else + { + // NaN + mpfr_set_nan(d_val); + } + } + else if (exp == 0) + { + if (sig == BitVector(s - 1)) + { + // +/- 0.0 + mpfr_set_zero(d_val, sign); + } + else + { + // Subnormal values + std::stringstream ss; + ss << (sign > 0 ? "" : "-"); + ss << "0."; + ss << sig.toString(2); + std::string sigStr = ss.str(); + mpfr_set_str(d_val, sigStr.c_str(), 2, MPFR_RNDD); + + int64_t sigExp = mpfr_get_exp(d_val); + + // Remove the bias from the exponent and add one to adjust for the hidden + // bit. + int64_t ubExp = static_cast<int64_t>(exp) - ((1 << (e - 1)) - 1) + 1; + mpfr_set_exp(d_val, sigExp + ubExp); + } + } + else + { + // Normal values + std::stringstream ss; + ss << (sign > 0 ? "" : "-"); + ss << "1."; + ss << sig.toString(2); + std::string sigStr = ss.str(); + mpfr_set_str(d_val, sigStr.c_str(), 2, MPFR_RNDD); + + // Remove the bias from the exponent and add one to adjust for the hidden + // bit. + int64_t ubExp = static_cast<int64_t>(exp) - ((1 << (e - 1)) - 1) + 1; + mpfr_set_exp(d_val, ubExp); + } +} + +FloatingPoint2::FloatingPoint2(const FloatingPoint2 &fp) : d_size(fp.d_size) +{ + mpfr_init2(d_val, d_size.significand()); + // Rounding mode does not matter since the precision of `this` and `fp` are + // the same. + mpfr_set(d_val, fp.d_val, MPFR_RNDN); +} + +/* +FloatingPoint::FloatingPoint (const FloatingPointSize &oldt, const +FloatingPointLiteral &oldfpl) : fpl(oldfpl), t(oldt) {} +FloatingPoint::FloatingPoint (const FloatingPoint &fp) : fpl(fp.fpl), t(fp.t) {} +FloatingPoint::FloatingPoint (const FloatingPointSize &ct, const RoundingMode +&rm, const BitVector &bv, bool signedBV); FloatingPoint (const FloatingPointSize +&ct, const RoundingMode &rm, const Rational &r); +*/ + +FloatingPoint2::~FloatingPoint2() { mpfr_clear(d_val); } + +void FloatingPoint2::print() const +{ + // mpfr_out_str(stdout, 10, 0, d_val, MPFR_RNDD); + mpfr_dump(d_val); +} + +FloatingPoint2 FloatingPoint2::plus(const RoundingMode &rm, + const FloatingPoint2 &arg) const +{ + Assert(d_size == arg.d_size); + + mpfr_rnd_t mpfrRm = getMpfrRndMode(rm); + + setExpRange(); + + FloatingPoint2 res(d_size.exponent(), d_size.significand()); + int i = mpfr_add(res.d_val, d_val, arg.d_val, mpfrRm); + mpfr_subnormalize(res.d_val, i, mpfrRm); + return res; +} + +FloatingPoint2 FloatingPoint2::mult(const RoundingMode &rm, + const FloatingPoint2 &arg) const +{ + Assert(d_size == arg.d_size); + + setExpRange(); + + FloatingPoint2 res(d_size.exponent(), d_size.significand()); + if (rm == roundNearestTiesToAway) + { + int i = mpfr_round_nearest_away(mpfr_mul, res.d_val, d_val, arg.d_val); + i = mpfr_subnormalize(res.d_val, i, MPFR_RNDNA); + } + else + { + mpfr_rnd_t mpfrRm = getMpfrRndMode(rm); + int i = mpfr_mul(res.d_val, d_val, arg.d_val, mpfrRm); + mpfr_subnormalize(res.d_val, i, mpfrRm); + } + return res; +} + +FloatingPoint2 FloatingPoint2::div(const RoundingMode &rm, + const FloatingPoint2 &arg) const +{ + Assert(d_size == arg.d_size); + + setExpRange(); + + FloatingPoint2 res(d_size.exponent(), d_size.significand()); + if (rm == roundNearestTiesToAway) + { + int i = mpfr_round_nearest_away(mpfr_div, res.d_val, d_val, arg.d_val); + i = mpfr_subnormalize(res.d_val, i, MPFR_RNDNA); + } + else + { + mpfr_rnd_t mpfrRm = getMpfrRndMode(rm); + int i = mpfr_div(res.d_val, d_val, arg.d_val, mpfrRm); + mpfr_subnormalize(res.d_val, i, mpfrRm); + } + return res; +} + +FloatingPoint2 FloatingPoint2::sqrt(const RoundingMode &rm) const +{ + setExpRange(); + + FloatingPoint2 res(d_size.exponent(), d_size.significand()); + if (rm == roundNearestTiesToAway) + { + int i = mpfr_round_nearest_away(mpfr_sqrt, res.d_val, d_val); + i = mpfr_subnormalize(res.d_val, i, MPFR_RNDNA); + } + else + { + mpfr_rnd_t mpfrRm = getMpfrRndMode(rm); + int i = mpfr_sqrt(res.d_val, d_val, mpfrRm); + mpfr_subnormalize(res.d_val, i, mpfrRm); + } + return res; +} + +bool FloatingPoint2::operator==(const FloatingPoint2 &fp) const +{ + // mpfr_equal_p() considers +0.0 to be equal to -0.0, which is why we also + // have to make sure that the signs of the floating-point values match. + return (mpfr_equal_p(d_val, fp.d_val) && (isNegative() == fp.isNegative())) + || mpfr_unordered_p(d_val, fp.d_val); +} + +bool FloatingPoint2::operator!=(const FloatingPoint2 &fp) const +{ + return !(*this == fp); +} + +BitVector FloatingPoint2::pack(void) const +{ + // Reset exponent ranges (in case we just did some arithmetic with narrower + // bit-widths. + mpfr_set_emax(mpfr_get_emax_max()); + mpfr_set_emin(mpfr_get_emin_min()); + + unsigned e = d_size.exponent(); + unsigned s = d_size.significand() - 1; + unsigned size = e + s; + + if (isNaN()) + { + return BitVector::mkOnes(size); + } + + BitVector sign(1, isPositive() ? 0u : 1u); + BitVector exp; + BitVector sig; + + if (isZero()) + { + exp = BitVector(e); + sig = BitVector(s); + } + else if (isInfinite()) + { + exp = BitVector::mkOnes(e); + sig = BitVector(s); + } + else if (isSubnormal()) + { + mpfr_exp_t mpfrExp; + char *str = mpfr_get_str(nullptr, &mpfrExp, 2, s + 1, d_val, MPFR_RNDN); + exp = BitVector(e); + std::string sigStr(str + (isNegative() ? 1 : 0)); + sigStr = std::string(getSubnormalThreshold() - mpfrExp, '0') + sigStr; + sigStr.resize(s, '0'); + sig = BitVector(sigStr, 2); + mpfr_free_str(str); + } + else + { + mpfr_exp_t mpfrExp; + char *str = mpfr_get_str(nullptr, &mpfrExp, 2, s + 1, d_val, MPFR_RNDN); + exp = + BitVector(e, static_cast<uint64_t>(mpfrExp + ((1 << (e - 1)) - 1) - 1)); + std::string sigStr(str + 1 + (isNegative() ? 1 : 0)); + sigStr.resize(s, '0'); + sig = BitVector(sigStr, 2); + mpfr_free_str(str); + } + + return sign.concat(exp.concat(sig)); +} + +FloatingPoint2 FloatingPoint2::convert(const FloatingPointSize &target, + const RoundingMode &rm) const +{ + // Reset exponent ranges (in case we just did some arithmetic with narrower + // bit-widths. + mpfr_set_emax(mpfr_get_emax_max()); + mpfr_set_emin(mpfr_get_emin_min() + 1); + + FloatingPoint2 res(target.exponent(), target.significand()); + + if (rm == roundNearestTiesToAway) + { + // mpfr_t tmp; + // mpfr_init2(tmp, target.significand()); + int i = mpfr_round_nearest_away(mpfr_set, res.d_val, d_val); + res.setExpRange(); + i = mpfr_round_nearest_away(mpfr_check_range, res.d_val, i); + // mpfr_round_nearest_away_begin(res.d_val); + // mpfr_set(res.d_val, tmp, MPFR_RNDN); + i = mpfr_subnormalize(res.d_val, i, MPFR_RNDNA); + // mpfr_round_nearest_away_end(res.d_val, i); + } + else + { + int i = mpfr_set(res.d_val, d_val, getMpfrRndMode(rm)); + res.setExpRange(); + i = mpfr_check_range(res.d_val, i, getMpfrRndMode(rm)); + mpfr_subnormalize(res.d_val, i, getMpfrRndMode(rm)); + } + return res; +} + +/* +static FloatingPoint makeNaN (const FloatingPointSize &t); +static FloatingPoint makeInf (const FloatingPointSize &t, bool sign); +static FloatingPoint makeZero (const FloatingPointSize &t, bool sign); + +const FloatingPointLiteral & getLiteral (void) const { + return this->fpl; +} + +// Gives the corresponding IEEE-754 transfer format + + +FloatingPoint absolute (void) const; +FloatingPoint negate (void) const; +FloatingPoint plus (const RoundingMode &rm, const FloatingPoint &arg) const; +FloatingPoint sub (const RoundingMode &rm, const FloatingPoint &arg) const; +FloatingPoint mult (const RoundingMode &rm, const FloatingPoint &arg) const; +FloatingPoint div (const RoundingMode &rm, const FloatingPoint &arg) const; +FloatingPoint fma (const RoundingMode &rm, const FloatingPoint &arg1, const +FloatingPoint &arg2) const; FloatingPoint sqrt (const RoundingMode &rm) const; +FloatingPoint rti (const RoundingMode &rm) const; +FloatingPoint rem (const FloatingPoint &arg) const; + +// zeroCase is whether the left or right is returned in the case of +min/max(-0,+0) or (+0,-0) FloatingPoint maxTotal (const FloatingPoint &arg, bool +zeroCaseLeft) const; FloatingPoint minTotal (const FloatingPoint &arg, bool +zeroCaseLeft) const; + +// These detect when the answer is defined +typedef std::pair<FloatingPoint, bool> PartialFloatingPoint; + +PartialFloatingPoint max (const FloatingPoint &arg) const; +PartialFloatingPoint min (const FloatingPoint &arg) const; + + +bool operator ==(const FloatingPoint& fp) const; +bool operator <= (const FloatingPoint &arg) const; +bool operator < (const FloatingPoint &arg) const; + +bool isNormal (void) const; +bool isSubnormal (void) const; +bool isZero (void) const; +bool isInfinite (void) const; +bool isNaN (void) const; +bool isNegative (void) const; +bool isPositive (void) const; + +FloatingPoint convert (const FloatingPointSize &target, const RoundingMode &rm) +const; + +// These require a value to return in the undefined case +BitVector convertToBVTotal (BitVectorSize width, const RoundingMode &rm, + bool signedBV, BitVector undefinedCase) const; +Rational convertToRationalTotal (Rational undefinedCase) const; + +// These detect when the answer is defined +typedef std::pair<BitVector, bool> PartialBitVector; +typedef std::pair<Rational, bool> PartialRational; + +PartialBitVector convertToBV (BitVectorSize width, const RoundingMode &rm, + bool signedBV) const; +PartialRational convertToRational (void) const; +*/ + +FloatingPoint2::FloatingPoint2(unsigned e, unsigned s) : d_size(e, s) +{ + // Set precision to `s`. `s` includes the sign bit, which the MPFR's + // precision does not count but MPFR's representation does not have a hidden + // bit, so we end up with `s` bits of precision. + mpfr_init2(d_val, s); +} + +mpfr_rnd_t FloatingPoint2::getMpfrRndMode(const RoundingMode &rm) const +{ + switch (rm) + { + case roundNearestTiesToEven: return MPFR_RNDN; + case roundTowardPositive: return MPFR_RNDU; + case roundTowardNegative: return MPFR_RNDD; + case roundTowardZero: return MPFR_RNDZ; + default: + IllegalArgument( + "Rounding mode not supported by MPFR. Note: if you are trying to " + "round-to-nearest-away, consider using mpfr_round_nearest_away."); + } +} + +void FloatingPoint2::setExpRange() const +{ + mpfr_set_emax(1 << (d_size.exponent() - 1)); + mpfr_set_emin(-static_cast<int64_t>((1 << (d_size.exponent() - 1)) + + d_size.significand() - 4)); +} + +} // namespace CVC4 diff --git a/src/util/floatingpoint_mpfr_imp.h b/src/util/floatingpoint_mpfr_imp.h new file mode 100644 index 000000000..f7637ff96 --- /dev/null +++ b/src/util/floatingpoint_mpfr_imp.h @@ -0,0 +1,178 @@ +#include "cvc4_public.h" + +#ifndef __CVC4__FLOATINGPOINT_MPFR_IMP_H +#define __CVC4__FLOATINGPOINT_MPFR_IMP_H + +#define MPFR_USE_C99_FEATURE 1 +#include <mpfr.h> +#include <string> + +#include "util/bitvector.h" +#include "util/floatingpoint_size.h" +#include "util/rounding_mode.h" + +namespace CVC4 { + +class CVC4_PUBLIC FloatingPoint2 +{ + public: + FloatingPoint2(unsigned e, unsigned s, const BitVector &bv); + FloatingPoint2(const FloatingPoint2 &fp); + ~FloatingPoint2(); + + FloatingPoint2 plus(const RoundingMode &rm, const FloatingPoint2 &arg) const; + FloatingPoint2 mult(const RoundingMode &rm, const FloatingPoint2 &arg) const; + FloatingPoint2 div(const RoundingMode &rm, const FloatingPoint2 &arg) const; + FloatingPoint2 sqrt(const RoundingMode &rm) const; + + bool operator==(const FloatingPoint2 &fp) const; + bool operator!=(const FloatingPoint2 &fp) const; + + // Gives the corresponding IEEE-754 transfer format + BitVector pack(void) const; + + /** + * Returns true if the floating-point value is normal (i.e. not subnormal and + * not any special value) and false otherwise. + */ + bool isNormal(void) const + { + return !isZero() && mpfr_number_p(d_val) != 0 + && mpfr_get_exp(d_val) + > -static_cast<int64_t>((1 << (d_size.exponent() - 1)) - 2); + } + + /** + * Returns true if the floating-point value is subnormal and false otherwise. + */ + bool isSubnormal(void) const + { + return !isZero() && mpfr_number_p(d_val) != 0 + && mpfr_get_exp(d_val) <= getSubnormalThreshold(); + } + + /** + * Returns true if the floating-point value is positive or negative zero and + * false otherwise. + */ + bool isZero(void) const { return mpfr_zero_p(d_val) != 0; } + + /** + * Returns true if the floating-point value is positive or negative infinity + * and false otherwise. + */ + bool isInfinite(void) const { return mpfr_inf_p(d_val) != 0; } + + /** + * Returns true if the floating-point value is NaN and false otherwise. + */ + bool isNaN(void) const { return mpfr_nan_p(d_val) != 0; } + + /** + * Returns true if the floating-point value is negative and false otherwise. + * NaN is never negative. + */ + bool isNegative(void) const { return !isNaN() && mpfr_signbit(d_val) != 0; } + + /** + * Returns true if the floating-point value is positive and false otherwise. + * NaN is never positive. + */ + bool isPositive(void) const { return !isNaN() && mpfr_signbit(d_val) == 0; } + + /** + * Converts a floating-point value to a value with a different exponent- and + * signficand size. + */ + FloatingPoint2 convert(const FloatingPointSize &target, + const RoundingMode &rm) const; + + /* + FloatingPoint (const FloatingPointSize &oldt, const FloatingPointLiteral + &oldfpl) : fpl(oldfpl), t(oldt) {} FloatingPoint (const FloatingPoint &fp) : + fpl(fp.fpl), t(fp.t) {} FloatingPoint (const FloatingPointSize &ct, const + RoundingMode &rm, const BitVector &bv, bool signedBV); FloatingPoint (const + FloatingPointSize &ct, const RoundingMode &rm, const Rational &r); + + static FloatingPoint makeNaN (const FloatingPointSize &t); + static FloatingPoint makeInf (const FloatingPointSize &t, bool sign); + static FloatingPoint makeZero (const FloatingPointSize &t, bool sign); + + const FloatingPointLiteral & getLiteral (void) const { + return this->fpl; + } + + + + FloatingPoint absolute (void) const; + FloatingPoint negate (void) const; + FloatingPoint plus (const RoundingMode &rm, const FloatingPoint &arg) const; + FloatingPoint sub (const RoundingMode &rm, const FloatingPoint &arg) const; + FloatingPoint mult (const RoundingMode &rm, const FloatingPoint &arg) const; + FloatingPoint div (const RoundingMode &rm, const FloatingPoint &arg) const; + FloatingPoint fma (const RoundingMode &rm, const FloatingPoint &arg1, const + FloatingPoint &arg2) const; FloatingPoint sqrt (const RoundingMode &rm) const; + FloatingPoint rti (const RoundingMode &rm) const; + FloatingPoint rem (const FloatingPoint &arg) const; + + // zeroCase is whether the left or right is returned in the case of + min/max(-0,+0) or (+0,-0) FloatingPoint maxTotal (const FloatingPoint &arg, + bool zeroCaseLeft) const; FloatingPoint minTotal (const FloatingPoint &arg, + bool zeroCaseLeft) const; + + // These detect when the answer is defined + typedef std::pair<FloatingPoint, bool> PartialFloatingPoint; + + PartialFloatingPoint max (const FloatingPoint &arg) const; + PartialFloatingPoint min (const FloatingPoint &arg) const; + + + bool operator ==(const FloatingPoint& fp) const; + bool operator <= (const FloatingPoint &arg) const; + bool operator < (const FloatingPoint &arg) const; + + bool isNormal (void) const; + bool isSubnormal (void) const; + bool isZero (void) const; + bool isInfinite (void) const; + bool isNaN (void) const; + bool isNegative (void) const; + bool isPositive (void) const; + + FloatingPoint convert (const FloatingPointSize &target, const RoundingMode + &rm) const; + + // These require a value to return in the undefined case + BitVector convertToBVTotal (BitVectorSize width, const RoundingMode &rm, + bool signedBV, BitVector undefinedCase) const; + Rational convertToRationalTotal (Rational undefinedCase) const; + + // These detect when the answer is defined + typedef std::pair<BitVector, bool> PartialBitVector; + typedef std::pair<Rational, bool> PartialRational; + + PartialBitVector convertToBV (BitVectorSize width, const RoundingMode &rm, + bool signedBV) const; + PartialRational convertToRational (void) const; + */ + + void print() const; + + private: + FloatingPoint2(unsigned e, unsigned s); + + mpfr_rnd_t getMpfrRndMode(const RoundingMode &rm) const; + int64_t getSubnormalThreshold() const + { + return -static_cast<int64_t>((1 << (d_size.exponent() - 1)) - 2); + } + + void setExpRange() const; + + FloatingPointSize d_size; + mpfr_t d_val; +}; /* class FloatingPoint */ + +} // namespace CVC4 + +#endif diff --git a/src/util/floatingpoint_size.h b/src/util/floatingpoint_size.h new file mode 100644 index 000000000..bea708b5f --- /dev/null +++ b/src/util/floatingpoint_size.h @@ -0,0 +1,101 @@ +/********************* */ +/*! \file floatingpoint.h.in + ** \verbatim + ** Top contributors (to current version): + ** Martin Brain, Tim King, Andres Noetzli + ** Copyright (c) 2013 University of Oxford + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief [[ Utility functions for working with floating point theories. ]] + ** + ** [[ This file contains the data structures used by the constant and + ** parametric types of the floating point theory. ]] + **/ + +#include "cvc4_public.h" + +#ifndef __CVC4__UTIL__FLOATINGPOINT_SIZE_H +#define __CVC4__UTIL__FLOATINGPOINT_SIZE_H + +namespace CVC4 { + +// Inline these! +inline bool CVC4_PUBLIC validExponentSize(unsigned e) { return e >= 2; } +inline bool CVC4_PUBLIC validSignificandSize(unsigned s) { return s >= 2; } + +/** + * Floating point sorts are parameterised by two non-zero constants + * giving the width (in bits) of the exponent and significand + * (including the hidden bit). + */ +class CVC4_PUBLIC FloatingPointSize +{ + /* + Class invariants: + * VALIDEXPONENTSIZE(e) + * VALIDSIGNIFCANDSIZE(s) + */ + + public: + FloatingPointSize(unsigned _e, unsigned _s); + FloatingPointSize(const FloatingPointSize& old); + + inline unsigned exponent(void) const { return this->e; } + + inline unsigned significand(void) const { return this->s; } + + bool operator==(const FloatingPointSize& fps) const + { + return (e == fps.e) && (s == fps.s); + } + + // Implement the interface that symfpu uses for floating-point formats. + inline unsigned exponentWidth(void) const { return this->exponent(); } + inline unsigned significandWidth(void) const { return this->significand(); } + inline unsigned packedWidth(void) const + { + return this->exponentWidth() + this->significandWidth(); + } + inline unsigned packedExponentWidth(void) const + { + return this->exponentWidth(); + } + inline unsigned packedSignificandWidth(void) const + { + return this->significandWidth() - 1; + } + + private: + unsigned e; + unsigned s; +}; /* class FloatingPointSize */ + +struct CVC4_PUBLIC FloatingPointSizeHashFunction +{ + static inline size_t ROLL(size_t X, size_t N) + { + return (((X) << (N)) | ((X) >> (8 * sizeof((X)) - (N)))); + } + + inline size_t operator()(const FloatingPointSize& fpt) const + { + return size_t(ROLL(fpt.exponent(), 4 * sizeof(unsigned)) + | fpt.significand()); + } +}; /* struct FloatingPointSizeHashFunction */ + +inline std::ostream& operator<<(std::ostream& os, + const FloatingPointSize& fps) CVC4_PUBLIC; +inline std::ostream& operator<<(std::ostream& os, const FloatingPointSize& fps) +{ + return os << "(_ FloatingPoint " << fps.exponent() << " " << fps.significand() + << ")"; +} + +} // namespace CVC4 + +#endif /* __CVC4__UTIL__FLOATINGPOINT_SIZE_H */ diff --git a/src/util/rounding_mode.h b/src/util/rounding_mode.h new file mode 100644 index 000000000..2ab05d39a --- /dev/null +++ b/src/util/rounding_mode.h @@ -0,0 +1,49 @@ +/********************* */ +/*! \file rounding_mode.h + ** \verbatim + ** Top contributors (to current version): + ** Martin Brain, Tim King, Andres Noetzli + ** Copyright (c) 2013 University of Oxford + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief [[ Utility functions for working with floating point theories. ]] + ** + ** [[ This file contains the data structures used by the constant and + ** parametric types of the floating point theory. ]] + **/ + +#include "cvc4_public.h" + +#ifndef __CVC4__UTIL__ROUNDING_MODE_H +#define __CVC4__UTIL__ROUNDING_MODE_H + +#include <fenv.h> + +namespace CVC4 { + +/** + * A concrete instance of the rounding mode sort + */ +enum CVC4_PUBLIC RoundingMode +{ + roundNearestTiesToEven = FE_TONEAREST, + roundTowardPositive = FE_UPWARD, + roundTowardNegative = FE_DOWNWARD, + roundTowardZero = FE_TOWARDZERO, + // Initializes this to the diagonalization of the 4 other values. + roundNearestTiesToAway = (((~FE_TONEAREST) & 0x1) | ((~FE_UPWARD) & 0x2) + | ((~FE_DOWNWARD) & 0x4) | ((~FE_TOWARDZERO) & 0x8)) +}; /* enum RoundingMode */ + +struct CVC4_PUBLIC RoundingModeHashFunction +{ + inline size_t operator()(const RoundingMode& rm) const { return size_t(rm); } +}; /* struct RoundingModeHashFunction */ + +} // namespace CVC4 + +#endif /* __CVC4__UTIL__ROUNDING_MODE_H */ |