Floating point symfpu support (#1103)

- Update the parser to the new constant construction - Fix the problem with parsing +/-zero and remove some dead code - Extend the interface for literal floating-point values. - Add a constructor so that a parameteric operator structure can be created from a type - Add constructors so parametric operator constants can be easily converted - Update SMT2 printing so that it uses the informative output
author: Martin <martin.brain@diffblue.com> 2017-09-19 01:14:05 +0100
committer: Andres Noetzli <andres.noetzli@gmail.com> 2017-09-18 17:14:05 -0700
commit: 61a846a4998be697867292924454893271eb6496 (patch)
tree: 7d1c5a5cadb8a5c538efea00abdd82b5681b809a /src/util
parent: 053003a64bde91aa32f688d248d83c3d4f271250 (diff)
2 files changed, 423 insertions, 18 deletions
diff --git a/src/util/floatingpoint.cpp b/src/util/floatingpoint.cpp
index 8c446d9d2..fe90279ec 100644
--- a/src/util/floatingpoint.cpp
+++ b/src/util/floatingpoint.cpp
@@ -36,4 +36,307 @@ void FloatingPointLiteral::unfinished (void) const {
   Unimplemented("Floating-point literals not yet implemented.");
 }
 
+  FloatingPoint::FloatingPoint (unsigned e, unsigned s, const BitVector &bv) :
+    fpl(e,s,0.0),
+    t(e,s) {}
+
+
+  static FloatingPointLiteral constructorHelperBitVector (const FloatingPointSize &ct, const RoundingMode &rm, const BitVector &bv, bool signedBV) {
+    if (signedBV) {
+      return FloatingPointLiteral(2,2,0.0);
+    } else {
+      return FloatingPointLiteral(2,2,0.0);
+    }
+    Unreachable("Constructor helper broken");
+  }
+  
+  FloatingPoint::FloatingPoint (const FloatingPointSize &ct, const RoundingMode &rm, const BitVector &bv, bool signedBV) :
+    fpl(constructorHelperBitVector(ct, rm, bv, signedBV)),
+    t(ct) {}
+
+  
+  static FloatingPointLiteral constructorHelperRational (const FloatingPointSize &ct, const RoundingMode &rm, const Rational &ri) {
+    Rational r(ri);
+    Rational two(2,1);
+    
+    if (r.isZero()) {
+      return FloatingPointLiteral(2,2,0.0);
+    } else {
+      //int negative = (r.sgn() < 0) ? 1 : 0;
+      r = r.abs();
+
+      // Compute the exponent
+      Integer exp(0U);
+      Integer inc(1U);
+      Rational working(1,1);
+
+      if (r == working) {
+
+      } else if ( r < working ) {
+	while (r < working) {
+	  exp -= inc;
+	  working /= two;
+	}
+      } else {
+	while (r >= working) {
+	  exp += inc;
+	  working *= two;
+	}
+	exp -= inc;
+	working /= two;
+      }
+
+      Assert(working <= r);
+      Assert(r < working * two);
+
+      // Work out the number of bits required to represent the exponent for a normal number
+      unsigned expBits = 2; // No point starting with an invalid amount
+
+      Integer doubleInt(2);
+      if (exp.strictlyPositive()) {
+	Integer representable(4);     // 1 more than exactly representable with expBits
+	while (representable <= exp) {// hence <=
+	  representable *= doubleInt;
+	  ++expBits;
+	}
+      } else if (exp.strictlyNegative()) {
+	Integer representable(-4);    // Exactly representable with expBits + sign
+	                              // but -2^n and -(2^n - 1) are both subnormal
+	while ((representable + doubleInt) > exp) {
+	  representable *= doubleInt;
+	  ++expBits;
+	}
+      }
+      ++expBits; // To allow for sign
+
+      BitVector exactExp(expBits, exp);
+
+      
+      
+      // Compute the significand.
+      unsigned sigBits = ct.significand() + 2; // guard and sticky bits
+      BitVector sig(sigBits, 0U);
+      BitVector one(sigBits, 1U);
+      Rational workingSig(0,1);
+      for (unsigned i = 0; i < sigBits - 1; ++i) {
+	Rational mid(workingSig + working);
+
+	if (mid <= r) {
+	  sig = sig | one;
+	  workingSig = mid;
+	}
+
+	sig = sig.leftShift(one);
+	working /= two;
+      }
+
+      // Compute the sticky bit
+      Rational remainder(r - workingSig);
+      Assert(Rational(0,1) <= remainder);
+
+      if (!remainder.isZero()) {
+	sig = sig | one;
+      }
+
+      // Build an exact float
+      FloatingPointSize exactFormat(expBits, sigBits);
+
+      // A small subtlety... if the format has expBits the unpacked format
+      // may have more to allow subnormals to be normalised.
+      // Thus...
+      Unreachable("no concrete implementation of FloatingPointLiteral");
+    }
+    Unreachable("Constructor helper broken");
+  }
+  
+  FloatingPoint::FloatingPoint (const FloatingPointSize &ct, const RoundingMode &rm, const Rational &r) :
+    fpl(constructorHelperRational(ct, rm, r)),
+    t(ct) {}
+
+  
+  FloatingPoint FloatingPoint::makeNaN (const FloatingPointSize &t) {
+    return FloatingPoint(2, 2, BitVector(4U,0U));
+  }
+
+  FloatingPoint FloatingPoint::makeInf (const FloatingPointSize &t, bool sign) {
+    return FloatingPoint(2, 2, BitVector(4U,0U));
+  }
+
+  FloatingPoint FloatingPoint::makeZero (const FloatingPointSize &t, bool sign) {
+    return FloatingPoint(2, 2, BitVector(4U,0U));
+  }
+
+
+  /* Operations implemented using symfpu */
+  FloatingPoint FloatingPoint::absolute (void) const {
+    return *this;
+  }
+  
+  FloatingPoint FloatingPoint::negate (void) const {
+    return *this;
+  }
+  
+  FloatingPoint FloatingPoint::plus (const RoundingMode &rm, const FloatingPoint &arg) const {
+    Assert(this->t == arg.t);
+    return *this;
+  }
+
+  FloatingPoint FloatingPoint::sub (const RoundingMode &rm, const FloatingPoint &arg) const {
+    Assert(this->t == arg.t);
+    return *this;
+  }
+
+  FloatingPoint FloatingPoint::mult (const RoundingMode &rm, const FloatingPoint &arg) const {
+    Assert(this->t == arg.t);
+    return *this;
+  }
+
+  FloatingPoint FloatingPoint::fma (const RoundingMode &rm, const FloatingPoint &arg1, const FloatingPoint &arg2) const {
+    Assert(this->t == arg1.t);
+    Assert(this->t == arg2.t);
+    return *this;
+  }
+
+  FloatingPoint FloatingPoint::div (const RoundingMode &rm, const FloatingPoint &arg) const {
+    Assert(this->t == arg.t);
+    return *this;
+  }
+
+  FloatingPoint FloatingPoint::sqrt (const RoundingMode &rm) const {
+    return *this;
+  }
+
+  FloatingPoint FloatingPoint::rti (const RoundingMode &rm) const {
+    return *this;
+  }
+
+  FloatingPoint FloatingPoint::rem (const FloatingPoint &arg) const {
+    Assert(this->t == arg.t);
+    return *this;
+  }
+
+  FloatingPoint FloatingPoint::maxTotal (const FloatingPoint &arg, bool zeroCaseLeft) const {
+    Assert(this->t == arg.t);
+    return *this;
+  }
+  
+  FloatingPoint FloatingPoint::minTotal (const FloatingPoint &arg, bool zeroCaseLeft) const {
+    Assert(this->t == arg.t);
+    return *this;
+  }
+
+  FloatingPoint::PartialFloatingPoint FloatingPoint::max (const FloatingPoint &arg) const {
+    FloatingPoint tmp(maxTotal(arg, true));
+    return PartialFloatingPoint(tmp, tmp == maxTotal(arg, false));
+  }
+
+  FloatingPoint::PartialFloatingPoint FloatingPoint::min (const FloatingPoint &arg) const {
+    FloatingPoint tmp(minTotal(arg, true));
+    return PartialFloatingPoint(tmp, tmp == minTotal(arg, false));
+  }
+
+  bool FloatingPoint::operator ==(const FloatingPoint& fp) const {
+    return ( (t == fp.t) );
+  }
+
+  bool FloatingPoint::operator <= (const FloatingPoint &arg) const {
+    Assert(this->t == arg.t);
+    return false;
+  }
+
+  bool FloatingPoint::operator < (const FloatingPoint &arg) const {
+    Assert(this->t == arg.t);
+    return false;
+  }
+
+  bool FloatingPoint::isNormal (void) const {
+    return false;
+  }
+
+  bool FloatingPoint::isSubnormal (void) const {
+    return false;
+  }
+
+  bool FloatingPoint::isZero (void) const {
+    return false;
+  }
+
+  bool FloatingPoint::isInfinite (void) const {
+    return false;
+  }
+
+  bool FloatingPoint::isNaN (void) const {
+    return false;
+  }
+
+  bool FloatingPoint::isNegative (void) const {
+    return false;
+  }
+
+  bool FloatingPoint::isPositive (void) const {
+    return false;
+  }
+
+  FloatingPoint FloatingPoint::convert (const FloatingPointSize &target, const RoundingMode &rm) const {
+    return *this;
+  }
+  
+  BitVector FloatingPoint::convertToBVTotal (BitVectorSize width, const RoundingMode &rm, bool signedBV, BitVector undefinedCase) const {
+    if (signedBV)
+      return undefinedCase;
+    else
+      return undefinedCase;
+  }
+
+  Rational FloatingPoint::convertToRationalTotal (Rational undefinedCase) const {
+    PartialRational p(convertToRational());
+
+    return p.second ? p.first : undefinedCase;
+  }
+
+  FloatingPoint::PartialBitVector FloatingPoint::convertToBV (BitVectorSize width, const RoundingMode &rm, bool signedBV) const {
+    BitVector tmp(convertToBVTotal (width, rm, signedBV, BitVector(width, 0U)));
+    BitVector confirm(convertToBVTotal (width, rm, signedBV, BitVector(width, 1U)));
+
+    return PartialBitVector(tmp, tmp == confirm);
+  }
+
+  FloatingPoint::PartialRational FloatingPoint::convertToRational (void) const {
+    if (this->isNaN() || this->isInfinite()) {
+      return PartialRational(Rational(0U, 1U), false);
+    }
+    if (this->isZero()) {
+      return PartialRational(Rational(0U, 1U), true);
+      
+    } else {
+
+      Integer sign(0);
+      Integer exp(0);
+      Integer significand(0);
+      Integer signedSignificand(sign * significand);
+      
+      // Only have pow(uint32_t) so we should check this.
+      Assert(this->t.significand() <= 32);
+      
+      if (!(exp.strictlyNegative())) {
+	Integer r(signedSignificand.multiplyByPow2(exp.toUnsignedInt()));
+	return PartialRational(Rational(r), true);
+      } else {
+	Integer one(1U);
+	Integer q(one.multiplyByPow2((-exp).toUnsignedInt()));
+	Rational r(signedSignificand, q);
+	return PartialRational(r, true);
+      }
+    }
+
+    Unreachable("Convert float literal to real broken.");
+  }
+
+  BitVector FloatingPoint::pack (void) const {
+    BitVector bv(4u,0u);
+    return bv;
+  }
+
+
+
 }/* CVC4 namespace */
diff --git a/src/util/floatingpoint.h b/src/util/floatingpoint.h
index b3be84f13..fa4573123 100644
--- a/src/util/floatingpoint.h
+++ b/src/util/floatingpoint.h
@@ -23,6 +23,7 @@
 #include <fenv.h>
 
 #include "util/bitvector.h"
+#include "util/rational.h"
 
 namespace CVC4 {
   // Inline these!
@@ -127,25 +128,83 @@ namespace CVC4 {
   public :
     FloatingPointSize t;
 
-    FloatingPoint (unsigned e, unsigned s, double d) : fpl(e,s,d), t(e,s) {}
-    FloatingPoint (unsigned e, unsigned s, const std::string &bitString) : fpl(e,s,bitString), t(e,s) {}
+    FloatingPoint (unsigned e, unsigned s, const BitVector &bv);
+    FloatingPoint (const FloatingPointSize &oldt, const FloatingPointLiteral &oldfpl) : fpl(oldfpl), t(oldt) {}
     FloatingPoint (const FloatingPoint &fp) : fpl(fp.fpl), t(fp.t) {}
+    FloatingPoint (const FloatingPointSize &ct, const RoundingMode &rm, const BitVector &bv, bool signedBV);
+    FloatingPoint (const FloatingPointSize &ct, const RoundingMode &rm, const Rational &r);
 
-    bool operator ==(const FloatingPoint& fp) const {
-      return ( (t == fp.t) && fpl == fp.fpl );
-    }
+
+    static FloatingPoint makeNaN (const FloatingPointSize &t);
+    static FloatingPoint makeInf (const FloatingPointSize &t, bool sign);
+    static FloatingPoint makeZero (const FloatingPointSize &t, bool sign);
 
     const FloatingPointLiteral & getLiteral (void) const {
       return this->fpl;
     }
 
+    // Gives the corresponding IEEE-754 transfer format
+    BitVector pack (void) const;
+
+
+    FloatingPoint absolute (void) const;
+    FloatingPoint negate (void) const;
+    FloatingPoint plus (const RoundingMode &rm, const FloatingPoint &arg) const;
+    FloatingPoint sub (const RoundingMode &rm, const FloatingPoint &arg) const;
+    FloatingPoint mult (const RoundingMode &rm, const FloatingPoint &arg) const;
+    FloatingPoint div (const RoundingMode &rm, const FloatingPoint &arg) const;
+    FloatingPoint fma (const RoundingMode &rm, const FloatingPoint &arg1, const FloatingPoint &arg2) const;
+    FloatingPoint sqrt (const RoundingMode &rm) const;
+    FloatingPoint rti (const RoundingMode &rm) const;
+    FloatingPoint rem (const FloatingPoint &arg) const;
+
+    // zeroCase is whether the left or right is returned in the case of min/max(-0,+0) or (+0,-0)
+    FloatingPoint maxTotal (const FloatingPoint &arg, bool zeroCaseLeft) const;
+    FloatingPoint minTotal (const FloatingPoint &arg, bool zeroCaseLeft) const;
+
+    // These detect when the answer is defined
+    typedef std::pair<FloatingPoint, bool> PartialFloatingPoint;
+
+    PartialFloatingPoint max (const FloatingPoint &arg) const;
+    PartialFloatingPoint min (const FloatingPoint &arg) const;
+
+
+    bool operator ==(const FloatingPoint& fp) const;
+    bool operator <= (const FloatingPoint &arg) const;
+    bool operator < (const FloatingPoint &arg) const;
+
+    bool isNormal (void) const;
+    bool isSubnormal (void) const;
+    bool isZero (void) const;
+    bool isInfinite (void) const;
+    bool isNaN (void) const;
+    bool isNegative (void) const;
+    bool isPositive (void) const;
+
+    FloatingPoint convert (const FloatingPointSize &target, const RoundingMode &rm) const;
+
+    // These require a value to return in the undefined case
+    BitVector convertToBVTotal (BitVectorSize width, const RoundingMode &rm,
+				bool signedBV, BitVector undefinedCase) const;
+    Rational convertToRationalTotal (Rational undefinedCase) const;
+
+    // These detect when the answer is defined
+    typedef std::pair<BitVector, bool> PartialBitVector;
+    typedef std::pair<Rational, bool> PartialRational;
+
+    PartialBitVector convertToBV (BitVectorSize width, const RoundingMode &rm,
+				bool signedBV) const;
+    PartialRational convertToRational (void) const;
+
   }; /* class FloatingPoint */
 
 
   struct CVC4_PUBLIC FloatingPointHashFunction {
-    inline size_t operator() (const FloatingPoint& fp) const {
-      FloatingPointSizeHashFunction h;
-      return h(fp.t) ^ fp.getLiteral().hash();
+    size_t operator() (const FloatingPoint& fp) const {
+      FloatingPointSizeHashFunction fpshf;
+      BitVectorHashFunction bvhf;
+
+      return fpshf(fp.t) ^ bvhf(fp.pack());
     }
   }; /* struct FloatingPointHashFunction */
 
@@ -158,6 +217,8 @@ namespace CVC4 {
 
     FloatingPointConvertSort (unsigned _e, unsigned _s)
       : t(_e,_s) {}
+    FloatingPointConvertSort (const FloatingPointSize &fps)
+      : t(fps) {}
 
     bool operator ==(const FloatingPointConvertSort& fpcs) const {
       return t == fpcs.t;
@@ -171,22 +232,39 @@ namespace CVC4 {
    */
 
   class CVC4_PUBLIC FloatingPointToFPIEEEBitVector : public FloatingPointConvertSort {
-  public : FloatingPointToFPIEEEBitVector (unsigned _e, unsigned _s) : FloatingPointConvertSort(_e,_s) {}
+  public :
+    FloatingPointToFPIEEEBitVector (unsigned _e, unsigned _s) : FloatingPointConvertSort(_e,_s) {}
+    FloatingPointToFPIEEEBitVector (const FloatingPointConvertSort &old) : FloatingPointConvertSort(old) {}
   };
+
   class CVC4_PUBLIC FloatingPointToFPFloatingPoint : public FloatingPointConvertSort {
-  public : FloatingPointToFPFloatingPoint (unsigned _e, unsigned _s) : FloatingPointConvertSort(_e,_s) {}
+  public :
+    FloatingPointToFPFloatingPoint (unsigned _e, unsigned _s) : FloatingPointConvertSort(_e,_s) {}
+    FloatingPointToFPFloatingPoint (const FloatingPointConvertSort &old) : FloatingPointConvertSort(old) {}
   };
+
   class CVC4_PUBLIC FloatingPointToFPReal : public FloatingPointConvertSort {
-  public : FloatingPointToFPReal (unsigned _e, unsigned _s) : FloatingPointConvertSort(_e,_s) {}
+  public :
+    FloatingPointToFPReal (unsigned _e, unsigned _s) : FloatingPointConvertSort(_e,_s) {}
+    FloatingPointToFPReal (const FloatingPointConvertSort &old) : FloatingPointConvertSort(old) {}
   };
+
   class CVC4_PUBLIC FloatingPointToFPSignedBitVector : public FloatingPointConvertSort {
-  public : FloatingPointToFPSignedBitVector (unsigned _e, unsigned _s) : FloatingPointConvertSort(_e,_s) {}
+  public :
+    FloatingPointToFPSignedBitVector (unsigned _e, unsigned _s) : FloatingPointConvertSort(_e,_s) {}
+    FloatingPointToFPSignedBitVector (const FloatingPointConvertSort &old) : FloatingPointConvertSort(old) {}
   };
+
   class CVC4_PUBLIC FloatingPointToFPUnsignedBitVector : public FloatingPointConvertSort {
-  public : FloatingPointToFPUnsignedBitVector (unsigned _e, unsigned _s) : FloatingPointConvertSort(_e,_s) {}
+  public :
+    FloatingPointToFPUnsignedBitVector (unsigned _e, unsigned _s) : FloatingPointConvertSort(_e,_s) {}
+    FloatingPointToFPUnsignedBitVector (const FloatingPointConvertSort &old) : FloatingPointConvertSort(old) {}
   };
+
   class CVC4_PUBLIC FloatingPointToFPGeneric : public FloatingPointConvertSort {
-  public : FloatingPointToFPGeneric (unsigned _e, unsigned _s) : FloatingPointConvertSort(_e,_s) {}
+  public :
+    FloatingPointToFPGeneric (unsigned _e, unsigned _s) : FloatingPointConvertSort(_e,_s) {}
+    FloatingPointToFPGeneric (const FloatingPointConvertSort &old) : FloatingPointConvertSort(old) {}
   };
 
 
@@ -215,14 +293,26 @@ namespace CVC4 {
 
     FloatingPointToBV (unsigned s)
       : bvs(s) {}
+    FloatingPointToBV (const FloatingPointToBV &old) : bvs(old.bvs) {}
+    FloatingPointToBV (const BitVectorSize &old) : bvs(old) {}
     operator unsigned () const { return bvs; }
   };
 
   class CVC4_PUBLIC FloatingPointToUBV : public FloatingPointToBV {
   public : FloatingPointToUBV (unsigned _s) : FloatingPointToBV(_s) {}
+    FloatingPointToUBV (const FloatingPointToBV &old) : FloatingPointToBV(old) {}
   };
   class CVC4_PUBLIC FloatingPointToSBV : public FloatingPointToBV {
   public : FloatingPointToSBV (unsigned _s) : FloatingPointToBV(_s) {}
+    FloatingPointToSBV (const FloatingPointToBV &old) : FloatingPointToBV(old) {}
+  };
+  class CVC4_PUBLIC FloatingPointToUBVTotal : public FloatingPointToBV {
+  public : FloatingPointToUBVTotal (unsigned _s) : FloatingPointToBV(_s) {}
+    FloatingPointToUBVTotal (const FloatingPointToBV &old) : FloatingPointToBV(old) {}
+  };
+  class CVC4_PUBLIC FloatingPointToSBVTotal : public FloatingPointToBV {
+  public : FloatingPointToSBVTotal (unsigned _s) : FloatingPointToBV(_s) {}
+    FloatingPointToSBVTotal (const FloatingPointToBV &old) : FloatingPointToBV(old) {}
   };
 
 
@@ -235,16 +325,28 @@ namespace CVC4 {
   }; /* struct FloatingPointToBVHashFunction */
 
 
-
+  // It is not possible to pack a number without a size to pack to,
+  // thus it is difficult to implement this in a sensible way.  Use
+  // FloatingPoint instead...
+  /*
   inline std::ostream& operator <<(std::ostream& os, const FloatingPointLiteral& fp) CVC4_PUBLIC;
   inline std::ostream& operator <<(std::ostream& os, const FloatingPointLiteral& fp) {
-    fp.unfinished();
-    return os;
+    return os << "FloatingPointLiteral";
   }
+  */
 
   inline std::ostream& operator <<(std::ostream& os, const FloatingPoint& fp) CVC4_PUBLIC;
   inline std::ostream& operator <<(std::ostream& os, const FloatingPoint& fp) {
-    return os << fp.getLiteral();
+    BitVector bv(fp.pack());
+
+    unsigned largestSignificandBit = fp.t.significand() - 2; // -1 for -inclusive, -1 for hidden
+    unsigned largestExponentBit = (fp.t.exponent() - 1) + (largestSignificandBit + 1);
+
+    return os
+      << "(fp #b" << bv.extract(largestExponentBit + 1, largestExponentBit + 1)
+      << " #b" << bv.extract(largestExponentBit, largestSignificandBit + 1)
+      << " #b" << bv.extract(largestSignificandBit, 0)
+      << ")";
   }
 
   inline std::ostream& operator <<(std::ostream& os, const FloatingPointSize& fps) CVC4_PUBLIC;
author	Martin <martin.brain@diffblue.com>	2017-09-19 01:14:05 +0100
committer	Andres Noetzli <andres.noetzli@gmail.com>	2017-09-18 17:14:05 -0700
commit	61a846a4998be697867292924454893271eb6496 (patch)
tree	7d1c5a5cadb8a5c538efea00abdd82b5681b809a /src/util
parent	053003a64bde91aa32f688d248d83c3d4f271250 (diff)