diff options
author | Andres Noetzli <andres.noetzli@gmail.com> | 2021-12-03 12:29:00 -0800 |
---|---|---|
committer | Andres Noetzli <andres.noetzli@gmail.com> | 2021-12-03 12:29:00 -0800 |
commit | ed739ecb71a71fe23aff4a877cbf39542281ed37 (patch) | |
tree | aafc34ccecb2cad8154d0a9e65086b04f63db6a0 | |
parent | 774770af22c882ade8f44aedbeed027cdf3d9496 (diff) |
Faster hasing for `cvc5::String`optStringHash
Previously, our hashing algorithm for `cvc5::String` would create an
`std::string` representation of the string and then call
`std::hash<std::string>` on the result. Creating the temporary
`std::string` is quite expensive and can be avoided by computing the
hash directly on the elements of the vector of characters in
`cvc5::String`.
This change improves our solving time on an industrial benchmark from
about 59s to 55s.
-rw-r--r-- | src/expr/sequence.cpp | 4 | ||||
-rw-r--r-- | src/util/hash.h | 5 | ||||
-rw-r--r-- | src/util/string.cpp | 15 | ||||
-rw-r--r-- | src/util/string.h | 11 |
4 files changed, 28 insertions, 7 deletions
diff --git a/src/expr/sequence.cpp b/src/expr/sequence.cpp index 6b15f33e0..82c83e0ea 100644 --- a/src/expr/sequence.cpp +++ b/src/expr/sequence.cpp @@ -371,13 +371,13 @@ std::ostream& operator<<(std::ostream& os, const Sequence& s) size_t SequenceHashFunction::operator()(const Sequence& s) const { - size_t ret = 0; + uint64_t ret = fnv1a::offsetBasis; const std::vector<Node>& vec = s.getVec(); for (const Node& n : vec) { ret = fnv1a::fnv1a_64(ret, std::hash<Node>()(n)); } - return ret; + return static_cast<size_t>(ret); } } // namespace cvc5 diff --git a/src/util/hash.h b/src/util/hash.h index db280984b..ef8bf4a2a 100644 --- a/src/util/hash.h +++ b/src/util/hash.h @@ -43,12 +43,15 @@ namespace cvc5 { namespace fnv1a { +constexpr uint64_t offsetBasis = 14695981039346656037U; + /** * FNV-1a hash algorithm for 64-bit numbers. * * More details here: http://www.isthe.com/chongo/tech/comp/fnv/index.html */ -inline uint64_t fnv1a_64(uint64_t v, uint64_t hash = 14695981039346656037U) { +inline uint64_t fnv1a_64(uint64_t v, uint64_t hash = offsetBasis) +{ hash ^= v; // Compute (hash * 1099511628211) return hash + (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + diff --git a/src/util/string.cpp b/src/util/string.cpp index 50b0a7199..2235ac5ee 100644 --- a/src/util/string.cpp +++ b/src/util/string.cpp @@ -23,6 +23,7 @@ #include "base/check.h" #include "base/exception.h" +#include "util/hash.h" using namespace std; @@ -519,6 +520,20 @@ Rational String::toNumber() const return Rational(toString()); } +namespace strings { + +size_t StringHashFunction::operator()(const ::cvc5::String& s) const +{ + uint64_t ret = fnv1a::offsetBasis; + for (unsigned c : s.d_str) + { + ret = fnv1a::fnv1a_64(c, ret); + } + return static_cast<size_t>(ret); +} + +} // namespace strings + std::ostream &operator<<(std::ostream &os, const String &s) { return os << "\"" << s.toString() << "\""; } diff --git a/src/util/string.h b/src/util/string.h index 3f6384082..017c60fb8 100644 --- a/src/util/string.h +++ b/src/util/string.h @@ -26,6 +26,10 @@ namespace cvc5 { +namespace strings { +struct StringHashFunction; +} + /** The cvc5 string class * * This data structure is the domain of values for the string type. It can also @@ -33,6 +37,8 @@ namespace cvc5 { */ class String { + friend strings::StringHashFunction; + public: /** * This is the cardinality of the alphabet that is representable by this @@ -267,10 +273,7 @@ namespace strings { struct StringHashFunction { - size_t operator()(const ::cvc5::String& s) const - { - return std::hash<std::string>()(s.toString()); - } + size_t operator()(const ::cvc5::String& s) const; }; /* struct StringHashFunction */ } // namespace strings |