summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>2020-02-24 16:21:45 -0600
committerGitHub <noreply@github.com>2020-02-24 16:21:45 -0600
commitb0fa6b29a1e15b231547eab147b49f2883a139de (patch)
tree6aa77261ea9d53d945d93b8847acade934a1a914 /src
parent9fbe415992986d33d09b3b9e5049ebc22d20790a (diff)
Utilities for words (#3797)
This adds a utility file for handling operations over constant "words" (this will eventually generalize string or sequence constants). This is work towards CVC4/cvc4-projects#23. Also improves documentation in regexp.h.
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt2
-rw-r--r--src/theory/strings/word.cpp227
-rw-r--r--src/theory/strings/word.h121
-rw-r--r--src/util/regexp.cpp7
-rw-r--r--src/util/regexp.h25
5 files changed, 373 insertions, 9 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 45912a30d..9a81ccc63 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -702,6 +702,8 @@ libcvc4_add_sources(
theory/strings/theory_strings_utils.cpp
theory/strings/theory_strings_utils.h
theory/strings/type_enumerator.h
+ theory/strings/word.cpp
+ theory/strings/word.h
theory/subs_minimize.cpp
theory/subs_minimize.h
theory/substitutions.cpp
diff --git a/src/theory/strings/word.cpp b/src/theory/strings/word.cpp
new file mode 100644
index 000000000..655df7da3
--- /dev/null
+++ b/src/theory/strings/word.cpp
@@ -0,0 +1,227 @@
+/********************* */
+/*! \file word.cpp
+ ** \verbatim
+ ** Top contributors (to current version):
+ ** Andrew Reynolds
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved. See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Implementation of utility functions for words.
+ **/
+
+#include "theory/strings/word.h"
+
+#include "util/regexp.h"
+
+using namespace CVC4::kind;
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+Node Word::mkEmptyWord(Kind k)
+{
+ NodeManager* nm = NodeManager::currentNM();
+ if (k == CONST_STRING)
+ {
+ std::vector<unsigned> vec;
+ return nm->mkConst(String(vec));
+ }
+ Unimplemented();
+ return Node::null();
+}
+
+Node Word::mkWord(const std::vector<Node>& xs)
+{
+ Assert(!xs.empty());
+ NodeManager* nm = NodeManager::currentNM();
+ Kind k = xs[0].getKind();
+ if (k == CONST_STRING)
+ {
+ std::vector<unsigned> vec;
+ for (TNode x : xs)
+ {
+ Assert(x.getKind() == CONST_STRING);
+ String sx = x.getConst<String>();
+ const std::vector<unsigned>& vecc = sx.getVec();
+ vec.insert(vec.end(), vecc.begin(), vecc.end());
+ }
+ return nm->mkConst(String(vec));
+ }
+ Unimplemented();
+ return Node::null();
+}
+
+size_t Word::getLength(TNode x)
+{
+ Kind k = x.getKind();
+ if (k == CONST_STRING)
+ {
+ return x.getConst<String>().size();
+ }
+ Unimplemented();
+ return 0;
+}
+
+bool Word::isEmpty(TNode x) { return getLength(x) == 0; }
+
+std::size_t Word::find(TNode x, TNode y, std::size_t start)
+{
+ Kind k = x.getKind();
+ if (k == CONST_STRING)
+ {
+ Assert(y.getKind() == CONST_STRING);
+ String sx = x.getConst<String>();
+ String sy = y.getConst<String>();
+ return sx.find(sy, start);
+ }
+ Unimplemented();
+ return 0;
+}
+
+std::size_t Word::rfind(TNode x, TNode y, std::size_t start)
+{
+ Kind k = x.getKind();
+ if (k == CONST_STRING)
+ {
+ Assert(y.getKind() == CONST_STRING);
+ String sx = x.getConst<String>();
+ String sy = y.getConst<String>();
+ return sx.rfind(sy, start);
+ }
+ Unimplemented();
+ return 0;
+}
+
+bool Word::hasPrefix(TNode x, TNode y)
+{
+ Kind k = x.getKind();
+ if (k == CONST_STRING)
+ {
+ Assert(y.getKind() == CONST_STRING);
+ String sx = x.getConst<String>();
+ String sy = y.getConst<String>();
+ return sx.hasPrefix(sy);
+ }
+ Unimplemented();
+ return false;
+}
+
+bool Word::hasSuffix(TNode x, TNode y)
+{
+ Kind k = x.getKind();
+ if (k == CONST_STRING)
+ {
+ Assert(y.getKind() == CONST_STRING);
+ String sx = x.getConst<String>();
+ String sy = y.getConst<String>();
+ return sx.hasSuffix(sy);
+ }
+ Unimplemented();
+ return false;
+}
+
+Node Word::replace(TNode x, TNode y, TNode t)
+{
+ NodeManager* nm = NodeManager::currentNM();
+ Kind k = x.getKind();
+ if (k == CONST_STRING)
+ {
+ Assert(y.getKind() == CONST_STRING);
+ Assert(t.getKind() == CONST_STRING);
+ String sx = x.getConst<String>();
+ String sy = y.getConst<String>();
+ String st = t.getConst<String>();
+ return nm->mkConst(String(sx.replace(sy, st)));
+ }
+ Unimplemented();
+ return Node::null();
+}
+Node Word::substr(TNode x, std::size_t i)
+{
+ NodeManager* nm = NodeManager::currentNM();
+ Kind k = x.getKind();
+ if (k == CONST_STRING)
+ {
+ String sx = x.getConst<String>();
+ return nm->mkConst(String(sx.substr(i)));
+ }
+ Unimplemented();
+ return Node::null();
+}
+Node Word::substr(TNode x, std::size_t i, std::size_t j)
+{
+ NodeManager* nm = NodeManager::currentNM();
+ Kind k = x.getKind();
+ if (k == CONST_STRING)
+ {
+ String sx = x.getConst<String>();
+ return nm->mkConst(String(sx.substr(i, j)));
+ }
+ Unimplemented();
+ return Node::null();
+}
+
+Node Word::prefix(TNode x, std::size_t i) { return substr(x, 0, i); }
+
+Node Word::suffix(TNode x, std::size_t i)
+{
+ NodeManager* nm = NodeManager::currentNM();
+ Kind k = x.getKind();
+ if (k == CONST_STRING)
+ {
+ String sx = x.getConst<String>();
+ return nm->mkConst(String(sx.suffix(i)));
+ }
+ Unimplemented();
+ return Node::null();
+}
+
+bool Word::noOverlapWith(TNode x, TNode y)
+{
+ Kind k = x.getKind();
+ if (k == CONST_STRING)
+ {
+ Assert(y.getKind() == CONST_STRING);
+ String sx = x.getConst<String>();
+ String sy = y.getConst<String>();
+ return sx.noOverlapWith(sy);
+ }
+ Unimplemented();
+ return false;
+}
+
+std::size_t Word::overlap(TNode x, TNode y)
+{
+ Kind k = x.getKind();
+ if (k == CONST_STRING)
+ {
+ Assert(y.getKind() == CONST_STRING);
+ String sx = x.getConst<String>();
+ String sy = y.getConst<String>();
+ return sx.overlap(sy);
+ }
+ Unimplemented();
+ return 0;
+}
+
+std::size_t Word::roverlap(TNode x, TNode y)
+{
+ Kind k = x.getKind();
+ if (k == CONST_STRING)
+ {
+ Assert(y.getKind() == CONST_STRING);
+ String sx = x.getConst<String>();
+ String sy = y.getConst<String>();
+ return sx.roverlap(sy);
+ }
+ Unimplemented();
+ return 0;
+}
+
+} // namespace strings
+} // namespace theory
+} // namespace CVC4
diff --git a/src/theory/strings/word.h b/src/theory/strings/word.h
new file mode 100644
index 000000000..e4d10247a
--- /dev/null
+++ b/src/theory/strings/word.h
@@ -0,0 +1,121 @@
+/********************* */
+/*! \file word.h
+ ** \verbatim
+ ** Top contributors (to current version):
+ ** Andrew Reynolds
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved. See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Utility functions for words.
+ **/
+
+#include "cvc4_private.h"
+
+#ifndef CVC4__THEORY__STRINGS__WORD_H
+#define CVC4__THEORY__STRINGS__WORD_H
+
+#include <vector>
+
+#include "expr/node.h"
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+// ------------------------------ for words (string or sequence constants)
+class Word
+{
+public:
+ /** make empty constant of kind k */
+ static Node mkEmptyWord(Kind k);
+
+ /** make word from constants in (non-empty) vector vec */
+ static Node mkWord(const std::vector<Node>& xs);
+
+ /** Return the length of word x */
+ static size_t getLength(TNode x);
+
+ /** Return true if x is empty */
+ static bool isEmpty(TNode x);
+
+ /** Return the first position y occurs in x, or std::string::npos otherwise */
+ static std::size_t find(TNode x, TNode y, std::size_t start = 0);
+
+ /**
+ * Return the first position y occurs in x searching from the end of x, or
+ * std::string::npos otherwise
+ */
+ static std::size_t rfind(TNode x, TNode y, std::size_t start = 0);
+
+ /** Returns true if y is a prefix of x */
+ static bool hasPrefix(TNode x, TNode y);
+
+ /** Returns true if y is a suffix of x */
+ static bool hasSuffix(TNode x, TNode y);
+
+ /** Replace the first occurrence of y in x with t */
+ static Node replace(TNode x, TNode y, TNode t);
+
+ /** Return the substring/subsequence of x starting at index i */
+ static Node substr(TNode x, std::size_t i);
+
+ /** Return the substring/subsequence of x starting at index i with size at most
+ * j */
+ static Node substr(TNode x, std::size_t i, std::size_t j);
+
+ /** Return the prefix of x of size at most i */
+ static Node prefix(TNode x, std::size_t i);
+
+ /** Return the suffix of x of size at most i */
+ static Node suffix(TNode x, std::size_t i);
+
+ /**
+ * Checks if there is any overlap between word x and another word y. This
+ * corresponds to checking whether one string contains the other and whether a
+ * substring/subsequence of one is a prefix of the other and vice-versa.
+ *
+ * @param x The first string
+ * @param y The second string
+ * @return True if there is an overlap, false otherwise
+ */
+ static bool noOverlapWith(TNode x, TNode y);
+
+ /** overlap
+ *
+ * if overlap returns m>0,
+ * then the maximal suffix of this string that is a prefix of y is of length m.
+ *
+ * For example, if x is "abcdef", then:
+ * x.overlap("defg") = 3
+ * x.overlap("ab") = 0
+ * x.overlap("d") = 0
+ * x.overlap("bcdefdef") = 5
+ */
+ static std::size_t overlap(TNode x, TNode y);
+
+ /** reverse overlap
+ *
+ * if roverlap returns m>0,
+ * then the maximal prefix of this word that is a suffix of y is of length m.
+ *
+ * For example, if x is "abcdef", then:
+ * x.roverlap("aaabc") = 3
+ * x.roverlap("def") = 0
+ * x.roverlap("d") = 0
+ * x.roverlap("defabcde") = 5
+ *
+ * Notice that x.overlap(y) = y.roverlap(x)
+ */
+ static std::size_t roverlap(TNode x, TNode y);
+};
+
+// ------------------------------ end for words (string or sequence constants)
+
+} // namespace strings
+} // namespace theory
+} // namespace CVC4
+
+#endif
diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp
index ee63308ab..4dfe68b51 100644
--- a/src/util/regexp.cpp
+++ b/src/util/regexp.cpp
@@ -462,6 +462,13 @@ String String::substr(std::size_t i, std::size_t j) const {
return String(ret_vec);
}
+bool String::noOverlapWith(const String& y) const
+{
+ return y.find(*this) == std::string::npos
+ && this->find(y) == std::string::npos && this->overlap(y) == 0
+ && y.overlap(*this) == 0;
+}
+
bool String::isNumber() const {
if (d_str.empty()) {
return false;
diff --git a/src/util/regexp.h b/src/util/regexp.h
index 5b6dc2b62..1cde53687 100644
--- a/src/util/regexp.h
+++ b/src/util/regexp.h
@@ -140,34 +140,41 @@ class CVC4_PUBLIC String {
bool isRepeated() const;
bool tailcmp(const String& y, int& c) const;
+ /**
+ * Return the first position y occurs in this string, or std::string::npos
+ * otherwise.
+ */
std::size_t find(const String& y, const std::size_t start = 0) const;
+ /**
+ * Return the first position y occurs in this string searching from the end,
+ * or std::string::npos otherwise.
+ */
std::size_t rfind(const String& y, const std::size_t start = 0) const;
/** Returns true if y is a prefix of this */
bool hasPrefix(const String& y) const;
/** Returns true if y is a suffix of this */
bool hasSuffix(const String& y) const;
-
+ /** Replace the first occurrence of s in this string with t */
String replace(const String& s, const String& t) const;
+ /** Return the substring of this string starting at index i */
String substr(std::size_t i) const;
+ /** Return the substring of this string starting at index i with size at most
+ * j */
String substr(std::size_t i, std::size_t j) const;
-
+ /** Return the prefix of this string of size at most i */
String prefix(std::size_t i) const { return substr(0, i); }
+ /** Return the suffix of this string of size at most i */
String suffix(std::size_t i) const { return substr(size() - i, i); }
/**
* Checks if there is any overlap between this string and another string. This
- * corresponds to checking whether one string contains the other and wether a
+ * corresponds to checking whether one string contains the other and whether a
* substring of one is a prefix of the other and vice-versa.
*
* @param y The other string
* @return True if there is an overlap, false otherwise
*/
- bool noOverlapWith(const String& y) const
- {
- return y.find(*this) == std::string::npos
- && this->find(y) == std::string::npos && this->overlap(y) == 0
- && y.overlap(*this) == 0;
- }
+ bool noOverlapWith(const String& y) const;
/** string overlap
*
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback