summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndres Noetzli <andres.noetzli@gmail.com>2020-09-22 17:41:09 -0700
committerGitHub <noreply@github.com>2020-09-22 19:41:09 -0500
commit115511f6c5732cac1c3718d365d1cf5596541c95 (patch)
tree5510c2e9874c8241ade6218e09e5223f29282031
parentbb0190a3d272e8d3207cfc358f79c647ed67acaf (diff)
[Python API] Conversion to/from Unicode strings (#5120)
Fixes #5024. This commit adds a conversion from constant string terms to native Python Unicode strings in Term.toPythonObj() and improves Solver.mkString() to accept strings containing characters outside of the printable range.
-rw-r--r--src/api/python/cvc4.pxi25
-rw-r--r--src/util/string.cpp6
-rw-r--r--test/unit/api/python/test_to_python_obj.py14
3 files changed, 39 insertions, 6 deletions
diff --git a/src/api/python/cvc4.pxi b/src/api/python/cvc4.pxi
index 9e4102b90..daff390b4 100644
--- a/src/api/python/cvc4.pxi
+++ b/src/api/python/cvc4.pxi
@@ -700,7 +700,9 @@ cdef class Solver:
cdef Term term = Term(self)
cdef vector[unsigned] v
if isinstance(str_or_vec, str):
- term.cterm = self.csolver.mkString(<string &> str_or_vec.encode())
+ for u in str_or_vec:
+ v.push_back(<unsigned> ord(u))
+ term.cterm = self.csolver.mkString(<const vector[unsigned]&> v)
elif isinstance(str_or_vec, list):
for u in str_or_vec:
if not isinstance(u, int):
@@ -1517,6 +1519,7 @@ cdef class Term:
BV -- returns a Python int (treats BV as unsigned)
Array -- returns a Python dict mapping indices to values
-- the constant base is returned as the default value
+ String -- returns a Python Unicode string
'''
if not self.isConst():
@@ -1590,6 +1593,26 @@ cdef class Term:
res = defaultdict(lambda : base_value)
for k, v in zip(keys, values):
res[k] = v
+ elif sort.isString():
+ # Strip leading and trailing double quotes and replace double
+ # double quotes by single quotes
+ string_repr = string_repr[1:-1].replace('""', '"')
+
+ # Convert escape sequences
+ res = ''
+ escape_prefix = '\\u{'
+ i = 0
+ while True:
+ prev_i = i
+ i = string_repr.find(escape_prefix, i)
+ if i == -1:
+ res += string_repr[prev_i:]
+ break
+
+ res += string_repr[prev_i:i]
+ val = string_repr[i + len(escape_prefix):string_repr.find('}', i)]
+ res += chr(int(val, 16))
+ i += len(escape_prefix) + len(val) + 1
else:
raise ValueError("Cannot convert term {}"
" of sort {} to Python object".format(string_repr,
diff --git a/src/util/string.cpp b/src/util/string.cpp
index af16e6a62..0d40ebb05 100644
--- a/src/util/string.cpp
+++ b/src/util/string.cpp
@@ -274,9 +274,9 @@ std::size_t String::roverlap(const String &y) const {
std::string String::toString(bool useEscSequences) const {
std::stringstream str;
for (unsigned int i = 0; i < size(); ++i) {
- // we always print forward slash as a code point so that it cannot
- // be interpreted as specifying part of a code point, e.g. the string
- // '\' + 'u' + '0' of length three.
+ // we always print backslash as a code point so that it cannot be
+ // interpreted as specifying part of a code point, e.g. the string '\' +
+ // 'u' + '0' of length three.
if (isPrintable(d_str[i]) && d_str[i] != '\\' && !useEscSequences)
{
str << static_cast<char>(d_str[i]);
diff --git a/test/unit/api/python/test_to_python_obj.py b/test/unit/api/python/test_to_python_obj.py
index 69b7b9228..f8cd234ee 100644
--- a/test/unit/api/python/test_to_python_obj.py
+++ b/test/unit/api/python/test_to_python_obj.py
@@ -49,8 +49,6 @@ def testGetArray():
array_dict = stores.toPythonObj()
- print(array_dict)
-
assert array_dict[1] == 2
assert array_dict[2] == 3
assert array_dict[4] == 5
@@ -64,3 +62,15 @@ def testGetSymbol():
with pytest.raises(RuntimeError):
x.toPythonObj()
+
+
+def testGetString():
+ solver = pycvc4.Solver()
+
+ s1 = '"test\n"😃\\u{a}'
+ t1 = solver.mkString(s1)
+ assert s1 == t1.toPythonObj()
+
+ s2 = '❤️CVC4❤️'
+ t2 = solver.mkString(s2)
+ assert s2 == t2.toPythonObj()
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback