24 files changed, 549 insertions, 285 deletions
diff --git a/src/parser/cvc/Cvc.g b/src/parser/cvc/Cvc.g
index 82c0581ce..033389610 100644
--- a/src/parser/cvc/Cvc.g
+++ b/src/parser/cvc/Cvc.g
@@ -2083,7 +2083,7 @@ stringTerm[CVC4::api::Term& f]
 
     /* string literal */
   | str[s]
-    { f = SOLVER->mkString(s, true); }
+    { f = PARSER_STATE->mkStringConstant(s); }
 
   | setsTerm[f]
   ;
diff --git a/src/parser/parser.cpp b/src/parser/parser.cpp
index b36f36a93..5dca92370 100644
--- a/src/parser/parser.cpp
+++ b/src/parser/parser.cpp
@@ -757,5 +757,153 @@ void Parser::attributeNotSupported(const std::string& attr) {
   }
 }
 
+std::vector<unsigned> Parser::processAdHocStringEsc(const std::string& s)
+{
+  std::vector<unsigned> str;
+  unsigned i = 0;
+  while (i < s.size())
+  {
+    // get the current character
+    if (s[i] != '\\')
+    {
+      // don't worry about printable here
+      str.push_back(static_cast<unsigned>(s[i]));
+      ++i;
+      continue;
+    }
+    // slash is always escaped
+    ++i;
+    if (i >= s.size())
+    {
+      // slash cannot be the last character if we are parsing escape sequences
+      std::stringstream serr;
+      serr << "Escape sequence at the end of string: \"" << s
+           << "\" should be handled by lexer";
+      parseError(serr.str());
+    }
+    switch (s[i])
+    {
+      case 'n':
+      {
+        str.push_back(static_cast<unsigned>('\n'));
+        i++;
+      }
+      break;
+      case 't':
+      {
+        str.push_back(static_cast<unsigned>('\t'));
+        i++;
+      }
+      break;
+      case 'v':
+      {
+        str.push_back(static_cast<unsigned>('\v'));
+        i++;
+      }
+      break;
+      case 'b':
+      {
+        str.push_back(static_cast<unsigned>('\b'));
+        i++;
+      }
+      break;
+      case 'r':
+      {
+        str.push_back(static_cast<unsigned>('\r'));
+        i++;
+      }
+      break;
+      case 'f':
+      {
+        str.push_back(static_cast<unsigned>('\f'));
+        i++;
+      }
+      break;
+      case 'a':
+      {
+        str.push_back(static_cast<unsigned>('\a'));
+        i++;
+      }
+      break;
+      case '\\':
+      {
+        str.push_back(static_cast<unsigned>('\\'));
+        i++;
+      }
+      break;
+      case 'x':
+      {
+        bool isValid = false;
+        if (i + 2 < s.size())
+        {
+          if (std::isxdigit(s[i + 1]) && std::isxdigit(s[i + 2]))
+          {
+            std::stringstream shex;
+            shex << s[i + 1] << s[i + 2];
+            unsigned val;
+            shex >> std::hex >> val;
+            str.push_back(val);
+            i += 3;
+            isValid = true;
+          }
+        }
+        if (!isValid)
+        {
+          std::stringstream serr;
+          serr << "Illegal String Literal: \"" << s
+               << "\", must have two digits after \\x";
+          parseError(serr.str());
+        }
+      }
+      break;
+      default:
+      {
+        if (std::isdigit(s[i]))
+        {
+          // octal escape sequences  TODO : revisit (issue #1251).
+          unsigned num = static_cast<unsigned>(s[i]) - 48;
+          bool flag = num < 4;
+          if (i + 1 < s.size() && num < 8 && std::isdigit(s[i + 1])
+              && s[i + 1] < '8')
+          {
+            num = num * 8 + static_cast<unsigned>(s[i + 1]) - 48;
+            if (flag && i + 2 < s.size() && std::isdigit(s[i + 2])
+                && s[i + 2] < '8')
+            {
+              num = num * 8 + static_cast<unsigned>(s[i + 2]) - 48;
+              str.push_back(num);
+              i += 3;
+            }
+            else
+            {
+              str.push_back(num);
+              i += 2;
+            }
+          }
+          else
+          {
+            str.push_back(num);
+            i++;
+          }
+        }
+      }
+    }
+  }
+  return str;
+}
+
+Expr Parser::mkStringConstant(const std::string& s)
+{
+  ExprManager* em = d_solver->getExprManager();
+  if (em->getOptions().getInputLanguage()
+      == language::input::LANG_SMTLIB_V2_6_1)
+  {
+    return d_solver->mkString(s, true).getExpr();
+  }
+  // otherwise, we must process ad-hoc escape sequences
+  std::vector<unsigned> str = processAdHocStringEsc(s);
+  return d_solver->mkString(str).getExpr();
+}
+
 } /* CVC4::parser namespace */
 } /* CVC4 namespace */
diff --git a/src/parser/parser.h b/src/parser/parser.h
index ecea4d3bd..d6c0e0e15 100644
--- a/src/parser/parser.h
+++ b/src/parser/parser.h
@@ -889,6 +889,28 @@ public:
         name, api::sortVectorToTypes(argTypes));
   }
   //------------------------ end operator overloading
+  /**
+   * Make string constant
+   *
+   * This makes the string constant based on the string s. This may involve
+   * processing ad-hoc escape sequences (if the language is not
+   * SMT-LIB 2.6.1 or higher), or otherwise calling the solver to construct
+   * the string.
+   */
+  Expr mkStringConstant(const std::string& s);
+
+ private:
+  /** ad-hoc string escaping
+   *
+   * Returns the (internal) vector of code points corresponding to processing
+   * the escape sequences in string s. This is to support string inputs that
+   * do no comply with the SMT-LIB standard.
+   *
+   * This method handles escape sequences, including \n, \t, \v, \b, \r, \f, \a,
+   * \\, \x[N] and octal escape sequences of the form \[c1]([c2]([c3])?)? where
+   * c1, c2, c3 are digits from 0 to 7.
+   */
+  std::vector<unsigned> processAdHocStringEsc(const std::string& s);
 };/* class Parser */
 
 }/* CVC4::parser namespace */
diff --git a/src/parser/smt2/Smt2.g b/src/parser/smt2/Smt2.g
index ec1eae7da..69f21acb7 100644
--- a/src/parser/smt2/Smt2.g
+++ b/src/parser/smt2/Smt2.g
@@ -2091,7 +2091,7 @@ termAtomic[CVC4::api::Term& atomTerm]
     }
 
   // String constant
-  | str[s,false] { atomTerm = SOLVER->mkString(s, true); }
+  | str[s,false] { atomTerm = PARSER_STATE->mkStringConstant(s); }
 
   // NOTE: Theory constants go here
 
diff --git a/src/preprocessing/passes/synth_rew_rules.cpp b/src/preprocessing/passes/synth_rew_rules.cpp
index 7b8e61359..f1e9e39c5 100644
--- a/src/preprocessing/passes/synth_rew_rules.cpp
+++ b/src/preprocessing/passes/synth_rew_rules.cpp
@@ -169,7 +169,7 @@ PreprocessingPassResult SynthRewRulesPass::applyInternal(
       std::stringstream ssv;
       if (varCounter < 26)
       {
-        ssv << String::convertUnsignedIntToChar(varCounter + 32);
+        ssv << static_cast<char>(varCounter + 61);
       }
       else
       {
diff --git a/src/printer/cvc/cvc_printer.cpp b/src/printer/cvc/cvc_printer.cpp
index cad3c4640..1178c7299 100644
--- a/src/printer/cvc/cvc_printer.cpp
+++ b/src/printer/cvc/cvc_printer.cpp
@@ -160,6 +160,11 @@ void CvcPrinter::toStream(
       toStreamRational(out, n, false);
       break;
     }
+    case kind::CONST_STRING:
+    {
+      out << '"' << n.getConst<String>().toString() << '"';
+      break;
+    }
     case kind::TYPE_CONSTANT:
       switch(TypeConstant tc = n.getConst<TypeConstant>()) {
       case REAL_TYPE:
diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp
index 541827f89..6e4fcb63a 100644
--- a/src/printer/smt2/smt2_printer.cpp
+++ b/src/printer/smt2/smt2_printer.cpp
@@ -202,7 +202,7 @@ void Smt2Printer::toStream(std::ostream& out,
     }
 
     case kind::CONST_STRING: {
-      std::string s = n.getConst<String>().toString(true);
+      std::string s = n.getConst<String>().toString();
       out << '"';
       for(size_t i = 0; i < s.size(); ++i) {
         char c = s[i];
diff --git a/src/theory/evaluator.cpp b/src/theory/evaluator.cpp
index b827912d5..646f903f5 100644
--- a/src/theory/evaluator.cpp
+++ b/src/theory/evaluator.cpp
@@ -626,8 +626,7 @@ EvalResult Evaluator::evalInternal(
           const String& s = results[currNode[0]].d_str;
           if (s.size() == 1)
           {
-            results[currNode] = EvalResult(
-                Rational(String::convertUnsignedIntToCode(s.getVec()[0])));
+            results[currNode] = EvalResult(Rational(s.getVec()[0]));
           }
           else
           {
diff --git a/src/theory/quantifiers/sygus_sampler.cpp b/src/theory/quantifiers/sygus_sampler.cpp
index 28cfa69df..e9c858814 100644
--- a/src/theory/quantifiers/sygus_sampler.cpp
+++ b/src/theory/quantifiers/sygus_sampler.cpp
@@ -560,8 +560,7 @@ Node SygusSampler::getRandomValue(TypeNode tn)
       for (unsigned ch : alphas)
       {
         d_rstring_alphabet.push_back(ch);
-        Trace("sygus-sample-str-alpha")
-            << " \"" << String::convertUnsignedIntToChar(ch) << "\"";
+        Trace("sygus-sample-str-alpha") << " \\u" << ch;
       }
       Trace("sygus-sample-str-alpha") << std::endl;
     }
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp
index d5105a489..9a2091eac 100644
--- a/src/theory/strings/regexp_operation.cpp
+++ b/src/theory/strings/regexp_operation.cpp
@@ -739,9 +739,7 @@ void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
       }
       case kind::REGEXP_RANGE: {
         unsigned a = r[0].getConst<String>().front();
-        a = String::convertUnsignedIntToCode(a);
         unsigned b = r[1].getConst<String>().front();
-        b = String::convertUnsignedIntToCode(b);
         Assert(a < b);
         Assert(b < std::numeric_limits<unsigned>::max());
         for (unsigned c = a; c <= b; c++)
@@ -756,7 +754,6 @@ void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
           String s = st.getConst<String>();
           if(s.size() != 0) {
             unsigned sc = s.front();
-            sc = String::convertUnsignedIntToCode(sc);
             cset.insert(sc);
           }
         }
@@ -765,7 +762,6 @@ void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
           if(st[0].isConst()) {
             String s = st[0].getConst<String>();
             unsigned sc = s.front();
-            sc = String::convertUnsignedIntToCode(sc);
             cset.insert(sc);
           } else {
             vset.insert( st[0] );
@@ -887,13 +883,11 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes
       case kind::REGEXP_RANGE: {
         std::vector< Node > vec;
         unsigned a = r[0].getConst<String>().front();
-        a = String::convertUnsignedIntToCode(a);
         unsigned b = r[1].getConst<String>().front();
-        b = String::convertUnsignedIntToCode(b);
         for (unsigned c = a; c <= b; c++)
         {
           std::vector<unsigned> tmpVec;
-          tmpVec.push_back(String::convertCodeToUnsignedInt(c));
+          tmpVec.push_back(c);
           Node tmp = s.eqNode(nm->mkConst(String(tmpVec))).negate();
           vec.push_back( tmp );
         }
@@ -1522,7 +1516,7 @@ Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node >
              ++it)
         {
           std::vector<unsigned> cvec;
-          cvec.push_back(String::convertCodeToUnsignedInt(*it));
+          cvec.push_back(*it);
           String c(cvec);
           Trace("regexp-int-debug") << "Try character " << c << " ... " << std::endl;
           Node r1l = derivativeSingle(r1, c);
diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp
index 716634d5f..b0940b7e1 100644
--- a/src/theory/strings/sequences_rewriter.cpp
+++ b/src/theory/strings/sequences_rewriter.cpp
@@ -1421,11 +1421,8 @@ bool SequencesRewriter::testConstStringInRegExp(CVC4::String& s,
       if (s.size() == index_start + 1)
       {
         unsigned a = r[0].getConst<String>().front();
-        a = String::convertUnsignedIntToCode(a);
         unsigned b = r[1].getConst<String>().front();
-        b = String::convertUnsignedIntToCode(b);
         unsigned c = s.back();
-        c = String::convertUnsignedIntToCode(c);
         return (a <= c && c <= b);
       }
       else
diff --git a/src/theory/strings/strings_rewriter.cpp b/src/theory/strings/strings_rewriter.cpp
index 75dfe7432..c7676d049 100644
--- a/src/theory/strings/strings_rewriter.cpp
+++ b/src/theory/strings/strings_rewriter.cpp
@@ -93,7 +93,7 @@ Node StringsRewriter::rewriteStrConvert(Node node)
     std::vector<unsigned> nvec = node[0].getConst<String>().getVec();
     for (unsigned i = 0, nvsize = nvec.size(); i < nvsize; i++)
     {
-      unsigned newChar = String::convertUnsignedIntToCode(nvec[i]);
+      unsigned newChar = nvec[i];
       // transform it
       // upper 65 ... 90
       // lower 97 ... 122
@@ -111,7 +111,6 @@ Node StringsRewriter::rewriteStrConvert(Node node)
           newChar = newChar + 32;
         }
       }
-      newChar = String::convertCodeToUnsignedInt(newChar);
       nvec[i] = newChar;
     }
     Node retNode = nm->mkConst(String(nvec));
@@ -231,7 +230,7 @@ Node StringsRewriter::rewriteStringToCode(Node n)
     {
       std::vector<unsigned> vec = s.getVec();
       Assert(vec.size() == 1);
-      ret = nm->mkConst(Rational(String::convertUnsignedIntToCode(vec[0])));
+      ret = nm->mkConst(Rational(vec[0]));
     }
     else
     {
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp
index 16183abdd..a81c96318 100644
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -382,7 +382,7 @@ bool TheoryStrings::collectModelInfoType(
                   ctv.getConst<Rational>().getNumerator().toUnsignedInt();
               Trace("strings-model") << "(code: " << cvalue << ") ";
               std::vector<unsigned> vec;
-              vec.push_back(String::convertCodeToUnsignedInt(cvalue));
+              vec.push_back(cvalue);
               Node mv = nm->mkConst(String(vec));
               pure_eq_assign[eqc] = mv;
               m->getEqualityEngine()->addTerm(mv);
@@ -1099,13 +1099,14 @@ void TheoryStrings::registerTerm(Node n, int effort)
   else if (n.getKind() == STRING_TO_CODE)
   {
     d_has_str_code = true;
-    // ite( str.len(s)==1, 0 <= str.code(s) < num_codes, str.code(s)=-1 )
+    // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 )
     Node code_len = utils::mkNLength(n[0]).eqNode(d_one);
     Node code_eq_neg1 = n.eqNode(d_neg_one);
     Node code_range = nm->mkNode(
         AND,
         nm->mkNode(GEQ, n, d_zero),
-        nm->mkNode(LT, n, nm->mkConst(Rational(CVC4::String::num_codes()))));
+        nm->mkNode(
+            LT, n, nm->mkConst(Rational(utils::getAlphabetCardinality()))));
     regTermLem = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
   }
   else if (n.getKind() == STRING_STRIDOF)
diff --git a/src/theory/strings/theory_strings_type_rules.h b/src/theory/strings/theory_strings_type_rules.h
index 7ef31a92a..93a32f26e 100644
--- a/src/theory/strings/theory_strings_type_rules.h
+++ b/src/theory/strings/theory_strings_type_rules.h
@@ -299,7 +299,7 @@ public:
           throw TypeCheckingExceptionPrivate(n, "expecting a single constant string term in regexp range");
         }
         unsigned ci = (*it).getConst<String>().front();
-        ch[i] = String::convertUnsignedIntToCode(ci);
+        ch[i] = ci;
         ++it;
       }
       if(ch[0] > ch[1]) {
diff --git a/src/theory/strings/type_enumerator.cpp b/src/theory/strings/type_enumerator.cpp
index 12cf899b4..7352ae5de 100644
--- a/src/theory/strings/type_enumerator.cpp
+++ b/src/theory/strings/type_enumerator.cpp
@@ -21,6 +21,44 @@ namespace CVC4 {
 namespace theory {
 namespace strings {
 
+Node makeStandardModelConstant(const std::vector<unsigned>& vec,
+                               uint32_t cardinality)
+{
+  std::vector<unsigned> mvec;
+  // if we contain all of the printable characters
+  if (cardinality >= 255)
+  {
+    for (unsigned i = 0, vsize = vec.size(); i < vsize; i++)
+    {
+      unsigned curr = vec[i];
+      // convert
+      Assert(vec[i] < cardinality);
+      if (vec[i] <= 61)
+      {
+        // first 62 printable characters [\u{65}-\u{126}]: 'A', 'B', 'C', ...
+        curr = vec[i] + 65;
+      }
+      else if (vec[i] <= 94)
+      {
+        // remaining 33 printable characters [\u{32}-\u{64}]: ' ', '!', '"', ...
+        curr = vec[i] - 30;
+      }
+      else
+      {
+        // the remaining characters, starting with \u{127} and wrapping around
+        // the first 32 non-printable characters.
+        curr = (vec[i] + 32) % cardinality;
+      }
+      mvec.push_back(curr);
+    }
+  }
+  else
+  {
+    mvec = vec;
+  }
+  return NodeManager::currentNM()->mkConst(String(mvec));
+}
+
 WordIter::WordIter(uint32_t startLength) : d_hasEndLength(false), d_endLength(0)
 {
   for (uint32_t i = 0; i < startLength; i++)
@@ -117,7 +155,7 @@ bool StringEnumLen::increment()
 
 void StringEnumLen::mkCurr()
 {
-  d_curr = NodeManager::currentNM()->mkConst(String(d_witer->getData()));
+  d_curr = makeStandardModelConstant(d_witer->getData(), d_cardinality);
 }
 
 StringEnumerator::StringEnumerator(TypeNode type, TypeEnumeratorProperties* tep)
diff --git a/src/theory/strings/type_enumerator.h b/src/theory/strings/type_enumerator.h
index 2061628a5..b379ce5c3 100644
--- a/src/theory/strings/type_enumerator.h
+++ b/src/theory/strings/type_enumerator.h
@@ -28,6 +28,26 @@ namespace theory {
 namespace strings {
 
 /**
+ * Make standard model constant
+ *
+ * In our string representation, we represent characters using vectors
+ * of unsigned integers indicating code points for the characters of that
+ * string.
+ *
+ * To make models user-friendly, we make unsigned integer 0 correspond to the
+ * 65th character ("A") in the ASCII alphabet to make models intuitive. In
+ * particular, say if we have a set of string variables that are distinct but
+ * otherwise unconstrained, then the model may assign them "A", "B", "C", ...
+ *
+ * @param vec The code points of the string in a given model,
+ * @param cardinality The cardinality of the alphabet,
+ * @return A string whose characters have the code points corresponding
+ * to vec in the standard model construction described above.
+ */
+Node makeStandardModelConstant(const std::vector<unsigned>& vec,
+                               uint32_t cardinality);
+
+/**
  * Generic iteration over vectors of indices of a given start/end length.
  */
 class WordIter
diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp
index 00066edb6..36ba7182b 100644
--- a/src/util/regexp.cpp
+++ b/src/util/regexp.cpp
@@ -32,38 +32,12 @@ namespace CVC4 {
 
 static_assert(UCHAR_MAX == 255, "Unsigned char is assumed to have 256 values.");
 
-unsigned String::convertCharToUnsignedInt(unsigned char c)
-{
-  return convertCodeToUnsignedInt(static_cast<unsigned>(c));
-}
-unsigned char String::convertUnsignedIntToChar(unsigned i)
-{
-  Assert(i < num_codes());
-  return static_cast<unsigned char>(convertUnsignedIntToCode(i));
-}
-bool String::isPrintable(unsigned i)
-{
-  Assert(i < num_codes());
-  unsigned char c = convertUnsignedIntToChar(i);
-  return (c >= ' ' && c <= '~');
-}
-unsigned String::convertCodeToUnsignedInt(unsigned c)
-{
-  Assert(c < num_codes());
-  return (c < start_code() ? c + num_codes() : c) - start_code();
-}
-unsigned String::convertUnsignedIntToCode(unsigned i)
-{
-  Assert(i < num_codes());
-  return (i + start_code()) % num_codes();
-}
-
 String::String(const std::vector<unsigned> &s) : d_str(s)
 {
 #ifdef CVC4_ASSERTIONS
   for (unsigned u : d_str)
   {
-    Assert(convertUnsignedIntToCode(u) < num_codes());
+    Assert(u < num_codes());
   }
 #endif
 }
@@ -74,8 +48,8 @@ int String::cmp(const String &y) const {
   }
   for (unsigned int i = 0; i < size(); ++i) {
     if (d_str[i] != y.d_str[i]) {
-      unsigned cp = convertUnsignedIntToCode(d_str[i]);
-      unsigned cpy = convertUnsignedIntToCode(y.d_str[i]);
+      unsigned cp = d_str[i];
+      unsigned cpy = y.d_str[i];
       return cp < cpy ? -1 : 1;
     }
   }
@@ -122,107 +96,143 @@ bool String::rstrncmp(const String& y, std::size_t n) const
   return true;
 }
 
-std::vector<unsigned> String::toInternal(const std::string &s,
-                                         bool useEscSequences) {
+void String::addCharToInternal(unsigned char ch, std::vector<unsigned>& str)
+{
+  // if not a printable character
+  if (ch > 127 || ch < 32)
+  {
+    std::stringstream serr;
+    serr << "Illegal string character: \"" << ch
+         << "\", must use escape sequence";
+    throw CVC4::Exception(serr.str());
+  }
+  else
+  {
+    str.push_back(static_cast<unsigned>(ch));
+  }
+}
+
+std::vector<unsigned> String::toInternal(const std::string& s,
+                                         bool useEscSequences)
+{
   std::vector<unsigned> str;
   unsigned i = 0;
-  while (i < s.size()) {
-    if (s[i] == '\\' && useEscSequences) {
-      i++;
-      if (i < s.size()) {
-        switch (s[i]) {
-          case 'n': {
-            str.push_back(convertCharToUnsignedInt('\n'));
-            i++;
-          } break;
-          case 't': {
-            str.push_back(convertCharToUnsignedInt('\t'));
-            i++;
-          } break;
-          case 'v': {
-            str.push_back(convertCharToUnsignedInt('\v'));
-            i++;
-          } break;
-          case 'b': {
-            str.push_back(convertCharToUnsignedInt('\b'));
-            i++;
-          } break;
-          case 'r': {
-            str.push_back(convertCharToUnsignedInt('\r'));
-            i++;
-          } break;
-          case 'f': {
-            str.push_back(convertCharToUnsignedInt('\f'));
-            i++;
-          } break;
-          case 'a': {
-            str.push_back(convertCharToUnsignedInt('\a'));
-            i++;
-          } break;
-          case '\\': {
-            str.push_back(convertCharToUnsignedInt('\\'));
-            i++;
-          } break;
-          case 'x': {
-            if (i + 2 < s.size()) {
-              if (isxdigit(s[i + 1]) && isxdigit(s[i + 2])) {
-                str.push_back(convertCharToUnsignedInt(hexToDec(s[i + 1]) * 16 +
-                                                       hexToDec(s[i + 2])));
-                i += 3;
-              } else {
-                throw CVC4::Exception("Illegal String Literal: \"" + s + "\"");
-              }
-            } else {
-              throw CVC4::Exception("Illegal String Literal: \"" + s +
-                                    "\", must have two digits after \\x");
-            }
-          } break;
-          default: {
-            if (isdigit(s[i])) {
-              // octal escape sequences  TODO : revisit (issue #1251).
-              int num = (int)s[i] - (int)'0';
-              bool flag = num < 4;
-              if (i + 1 < s.size() && num < 8 && isdigit(s[i + 1]) &&
-                  s[i + 1] < '8') {
-                num = num * 8 + (int)s[i + 1] - (int)'0';
-                if (flag && i + 2 < s.size() && isdigit(s[i + 2]) &&
-                    s[i + 2] < '8') {
-                  num = num * 8 + (int)s[i + 2] - (int)'0';
-                  str.push_back(convertCharToUnsignedInt((unsigned char)num));
-                  i += 3;
-                } else {
-                  str.push_back(convertCharToUnsignedInt((unsigned char)num));
-                  i += 2;
-                }
-              } else {
-                str.push_back(convertCharToUnsignedInt((unsigned char)num));
-                i++;
-              }
-            } else if ((unsigned)s[i] > 127) {
-              throw CVC4::Exception("Illegal String Literal: \"" + s +
-                                    "\", must use escaped sequence");
-            } else {
-              str.push_back(convertCharToUnsignedInt(s[i]));
-              i++;
-            }
+  while (i < s.size())
+  {
+    // get the current character
+    char si = s[i];
+    if (si != '\\' || !useEscSequences)
+    {
+      addCharToInternal(si, str);
+      ++i;
+      continue;
+    }
+    // the vector of characters, in case we fail to read an escape sequence
+    std::vector<unsigned> nonEscCache;
+    // process the '\'
+    addCharToInternal(si, nonEscCache);
+    ++i;
+    // are we an escape sequence?
+    bool isEscapeSequence = true;
+    // the string corresponding to the hexidecimal code point
+    std::stringstream hexString;
+    // is the slash followed by a 'u'? Could be last character.
+    if (i >= s.size() || s[i] != 'u')
+    {
+      isEscapeSequence = false;
+    }
+    else
+    {
+      // process the 'u'
+      addCharToInternal(s[i], nonEscCache);
+      ++i;
+      bool isStart = true;
+      bool isEnd = false;
+      bool hasBrace = false;
+      while (i < s.size())
+      {
+        // add the next character
+        si = s[i];
+        if (isStart)
+        {
+          isStart = false;
+          // possibly read '{'
+          if (si == '{')
+          {
+            hasBrace = true;
+            addCharToInternal(si, nonEscCache);
+            ++i;
+            continue;
           }
         }
-      } else {
-        throw CVC4::Exception("should be handled by lexer: \"" + s + "\"");
-        // str.push_back( convertCharToUnsignedInt('\\') );
+        else if (si == '}')
+        {
+          // can only end if we had an open brace and read at least one digit
+          isEscapeSequence = hasBrace && !hexString.str().empty();
+          isEnd = true;
+          addCharToInternal(si, nonEscCache);
+          ++i;
+          break;
+        }
+        // must be a hex digit at this point
+        if (!isHexDigit(static_cast<unsigned>(si)))
+        {
+          isEscapeSequence = false;
+          break;
+        }
+        hexString << si;
+        addCharToInternal(si, nonEscCache);
+        ++i;
+        if (!hasBrace && hexString.str().size() == 4)
+        {
+          // will be finished reading \ u d_3 d_2 d_1 d_0 with no parens
+          isEnd = true;
+          break;
+        }
+        else if (hasBrace && hexString.str().size() > 5)
+        {
+          // too many digits enclosed in brace, not an escape sequence
+          isEscapeSequence = false;
+          break;
+        }
+      }
+      if (!isEnd)
+      {
+        // if we were interupted before ending, then this is not a valid
+        // escape sequence
+        isEscapeSequence = false;
+      }
+    }
+    if (isEscapeSequence)
+    {
+      Assert(!hexString.str().empty() && hexString.str().size() <= 5);
+      // Otherwise, we add the escaped character.
+      // This is guaranteed not to overflow due to the length of hstr.
+      uint32_t val;
+      hexString >> std::hex >> val;
+      if (val > num_codes())
+      {
+        // Failed due to being out of range. This can happen for strings of
+        // the form \ u { d_4 d_3 d_2 d_1 d_0 } where d_4 is a hexidecimal not
+        // in the range [0-2].
+        isEscapeSequence = false;
+      }
+      else
+      {
+        str.push_back(val);
       }
-    } else if ((unsigned)s[i] > 127 && useEscSequences) {
-      throw CVC4::Exception("Illegal String Literal: \"" + s +
-                            "\", must use escaped sequence");
-    } else {
-      str.push_back(convertCharToUnsignedInt(s[i]));
-      i++;
+    }
+    // if we did not successfully parse an escape sequence, we add back all
+    // characters that we cached
+    if (!isEscapeSequence)
+    {
+      str.insert(str.end(), nonEscCache.begin(), nonEscCache.end());
     }
   }
 #ifdef CVC4_ASSERTIONS
   for (unsigned u : str)
   {
-    Assert(convertUnsignedIntToCode(u) < num_codes());
+    Assert(u < num_codes());
   }
 #endif
   return str;
@@ -265,62 +275,23 @@ std::size_t String::roverlap(const String &y) const {
 }
 
 std::string String::toString(bool useEscSequences) const {
-  std::string str;
+  std::stringstream str;
   for (unsigned int i = 0; i < size(); ++i) {
-    unsigned char c = convertUnsignedIntToChar(d_str[i]);
-    if (!useEscSequences) {
-      str += c;
-    } else if (isprint(c)) {
-      if (c == '\\') {
-        str += "\\\\";
-      }
-      // else if(c == '\"') {
-      //  str += "\\\"";
-      //}
-      else {
-        str += c;
-      }
-    } else {
-      std::string s;
-      switch (c) {
-        case '\a':
-          s = "\\a";
-          break;
-        case '\b':
-          s = "\\b";
-          break;
-        case '\t':
-          s = "\\t";
-          break;
-        case '\r':
-          s = "\\r";
-          break;
-        case '\v':
-          s = "\\v";
-          break;
-        case '\f':
-          s = "\\f";
-          break;
-        case '\n':
-          s = "\\n";
-          break;
-        case '\e':
-          s = "\\e";
-          break;
-        default: {
-          std::stringstream ss;
-          ss << std::setfill('0') << std::setw(2) << std::hex << ((int)c);
-          std::string t = ss.str();
-          t = t.substr(t.size() - 2, 2);
-          s = "\\x" + t;
-          // std::string s2 = static_cast<std::ostringstream*>(
-          // &(std::ostringstream() << (int)c) )->str();
-        }
-      }
-      str += s;
+    // we always print forward slash as a code point so that it cannot
+    // be interpreted as specifying part of a code point, e.g. the string
+    // '\' + 'u' + '0' of length three.
+    if (isPrintable(d_str[i]) && d_str[i] != '\\' && !useEscSequences)
+    {
+      str << static_cast<char>(d_str[i]);
+    }
+    else
+    {
+      std::stringstream ss;
+      ss << std::hex << d_str[i];
+      str << "\\u{" << ss.str() << "}";
     }
   }
-  return str;
+  return str.str();
 }
 
 bool String::isLeq(const String &y) const
@@ -331,8 +302,8 @@ bool String::isLeq(const String &y) const
     {
       return false;
     }
-    unsigned ci = convertUnsignedIntToCode(d_str[i]);
-    unsigned cyi = convertUnsignedIntToCode(y.d_str[i]);
+    unsigned ci = d_str[i];
+    unsigned cyi = y.d_str[i];
     if (ci > cyi)
     {
       return false;
@@ -484,8 +455,21 @@ bool String::isNumber() const {
 
 bool String::isDigit(unsigned character)
 {
-  unsigned char c = convertUnsignedIntToChar(character);
-  return c >= '0' && c <= '9';
+  // '0' to '9'
+  return 48 <= character && character <= 57;
+}
+
+bool String::isHexDigit(unsigned character)
+{
+  // '0' to '9' or 'A' to 'F' or 'a' to 'f'
+  return isDigit(character) || (65 <= character && character <= 70)
+         || (97 <= character && character <= 102);
+}
+
+bool String::isPrintable(unsigned character)
+{
+  // Unicode 0x00020 (' ') to 0x0007E ('~')
+  return 32 <= character && character <= 126;
 }
 
 size_t String::maxSize() { return std::numeric_limits<uint32_t>::max(); }
@@ -497,17 +481,6 @@ Rational String::toNumber() const
   return Rational(toString());
 }
 
-unsigned char String::hexToDec(unsigned char c) {
-  if (c >= '0' && c <= '9') {
-    return c - '0';
-  } else if (c >= 'a' && c <= 'f') {
-    return c - 'a' + 10;
-  } else {
-    Assert(c >= 'A' && c <= 'F');
-    return c - 'A' + 10;
-  }
-}
-
 std::ostream &operator<<(std::ostream &os, const String &s) {
   return os << "\"" << s.toString(true) << "\"";
 }
diff --git a/src/util/regexp.h b/src/util/regexp.h
index 731736f72..56fb969a3 100644
--- a/src/util/regexp.h
+++ b/src/util/regexp.h
@@ -37,60 +37,44 @@ namespace CVC4 {
 class CVC4_PUBLIC String {
  public:
   /**
-   * The start ASCII code. In our string representation below, we represent
-   * characters using a vector d_str of unsigned integers. We refer to this as
-   * the "internal representation" for the string.
-   *
-   * We make unsigned integer 0 correspond to the 65th character ("A") in the
-   * ASCII alphabet to make models intuitive. In particular, say if we have
-   * a set of string variables that are distinct but otherwise unconstrained,
-   * then the model may assign them "A", "B", "C", ...
-   */
-  static inline unsigned start_code() { return 65; }
-  /**
    * This is the cardinality of the alphabet that is representable by this
    * class. Notice that this must be greater than or equal to the cardinality
    * of the alphabet that the string theory reasons about.
    *
    * This must be strictly less than std::numeric_limits<unsigned>::max().
+   *
+   * As per the SMT-LIB standard for strings, we support the first 3 planes of
+   * Unicode characters, where 196608 = 3*16^4.
    */
-  static inline unsigned num_codes() { return 256; }
-  /**
-   * Convert unsigned char to the unsigned used in the internal representation
-   * in d_str below.
-   */
-  static unsigned convertCharToUnsignedInt(unsigned char c);
-  /** Convert the internal unsigned to a unsigned char. */
-  static unsigned char convertUnsignedIntToChar(unsigned i);
-  /** Does the internal unsigned correspond to a printable character? */
-  static bool isPrintable(unsigned i);
-  /** get the internal unsigned for ASCII code c. */
-  static unsigned convertCodeToUnsignedInt(unsigned c);
-  /** get the ASCII code number that internal unsigned i corresponds to. */
-  static unsigned convertUnsignedIntToCode(unsigned i);
-
+  static inline unsigned num_codes() { return 196608; }
   /** constructors for String
-  *
-  * Internally, a CVC4::String is represented by a vector of unsigned
-  * integers (d_str), where the correspondence between C++ characters
-  * to and from unsigned integers is determined by
-  * by convertCharToUnsignedInt and convertUnsignedIntToChar.
-  *
-  * If useEscSequences is true, then the escape sequences in the input
-  * are converted to the corresponding character. This constructor may
-  * throw an exception if the input contains unrecognized escape sequences.
-  * Currently supported escape sequences are \n, \t, \v, \b, \r, \f, \a, \\,
-  * \x[N] where N is a hexidecimal, and octal escape sequences of the
-  * form \[c1]([c2]([c3])?)? where c1, c2, c3 are digits from 0 to 7.
-  *
-  * If useEscSequences is false, then the characters of the constructed
-  * CVC4::String correspond one-to-one with the input string.
-  */
+   *
+   * Internally, a CVC4::String is represented by a vector of unsigned
+   * integers (d_str) representing the code points of the characters.
+   *
+   * To build a string from a C++ string, we may process escape sequences
+   * according to the SMT-LIB standard. In particular, if useEscSequences is
+   * true, we convert unicode escape sequences:
+   *  \u d_3 d_2 d_1 d_0
+   *  \u{d_0}
+   *  \u{d_1 d_0}
+   *  \u{d_2 d_1 d_0}
+   *  \u{d_3 d_2 d_1 d_0}
+   *  \u{d_4 d_3 d_2 d_1 d_0}
+   * where d_0 ... d_4 are hexidecimal digits, to the appropriate character.
+   *
+   * If useEscSequences is false, then the characters of the constructed
+   * CVC4::String correspond one-to-one with the input string.
+   */
   String() = default;
   explicit String(const std::string& s, bool useEscSequences = false)
-      : d_str(toInternal(s, useEscSequences)) {}
+      : d_str(toInternal(s, useEscSequences))
+  {
+  }
   explicit String(const char* s, bool useEscSequences = false)
-      : d_str(toInternal(std::string(s), useEscSequences)) {}
+      : d_str(toInternal(std::string(s), useEscSequences))
+  {
+  }
   explicit String(const std::vector<unsigned>& s);
 
   String& operator=(const String& y) {
@@ -123,20 +107,16 @@ class CVC4_PUBLIC String {
   bool rstrncmp(const String& y, std::size_t n) const;
 
   /* toString
-  * Converts this string to a std::string.
-  *
-  * If useEscSequences is true, then unprintable characters
-  * are converted to escape sequences. The escape sequences
-  * \n, \t, \v, \b, \r, \f, \a, \\ are printed in this way.
-  * For all other unprintable characters, we print \x[N] where
-  * [N] is the 2 digit hexidecimal corresponding to value of
-  * the character.
-  *
-  * If useEscSequences is false, the returned std::string's characters
-  * map one-to-one with the characters in this string.
-  * Notice that for all std::string s, we have that
-  *    CVC4::String( s ).toString() = s.
-  */
+   * Converts this string to a std::string.
+   *
+   * The unprintable characters are converted to unicode escape sequences as
+   * described above.
+   *
+   * If useEscSequences is false, the string's printable characters are
+   * printed as characters. Notice that for all std::string s having only
+   * printable characters, we have that
+   *    CVC4::String( s ).toString() = s.
+   */
   std::string toString(bool useEscSequences = false) const;
   /** is this the empty string? */
   bool empty() const { return d_str.empty(); }
@@ -221,16 +201,32 @@ class CVC4_PUBLIC String {
   bool isNumber() const;
   /** Returns the corresponding rational for the text of this string. */
   Rational toNumber() const;
-  /** get the internal unsigned representation of this string */
+  /** Get the unsigned representation (code points) of this string */
   const std::vector<unsigned>& getVec() const { return d_str; }
-  /** get the internal unsigned value of the first character in this string */
+  /**
+   * Get the unsigned (code point) value of the first character in this string
+   */
   unsigned front() const;
-  /** get the internal unsigned value of the last character in this string */
+  /**
+   * Get the unsigned (code point) value of the last character in this string
+   */
   unsigned back() const;
   /** is the unsigned a digit?
-  * The input should be the same type as the element type of d_str
-  */
+   *
+   * This is true for code points between 48 ('0') and 57 ('9').
+   */
   static bool isDigit(unsigned character);
+  /** is the unsigned a hexidecimal digit?
+   *
+   * This is true for code points between 48 ('0') and 57 ('9'), code points
+   * between 65 ('A') and 70 ('F) and code points between 97 ('a') and 102 ('f).
+   */
+  static bool isHexDigit(unsigned character);
+  /** is the unsigned a printable code point?
+   *
+   * This is true for Unicode 32 (' ') to 126 ('~').
+   */
+  static bool isPrintable(unsigned character);
 
   /**
    * Returns the maximum length of string representable by this class.
@@ -238,11 +234,19 @@ class CVC4_PUBLIC String {
    */
   static size_t maxSize();
  private:
-  // guarded
-  static unsigned char hexToDec(unsigned char c);
-
+  /**
+   * Helper for toInternal: add character ch to vector vec, storing a string in
+   * internal format. This throws an error if ch is not a printable character,
+   * since non-printable characters must be escaped in SMT-LIB.
+   */
+  static void addCharToInternal(unsigned char ch, std::vector<unsigned>& vec);
+  /**
+   * Convert the string s to the internal format (vector of code points).
+   * The argument useEscSequences is whether to process unicode escape
+   * sequences.
+   */
   static std::vector<unsigned> toInternal(const std::string& s,
-                                          bool useEscSequences = true);
+                                          bool useEscSequences);
 
   /**
    * Returns a negative number if *this < y, 0 if *this and y are equal and a
diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt
index 8fab16b44..8382e40fc 100644
--- a/test/regress/CMakeLists.txt
+++ b/test/regress/CMakeLists.txt
@@ -922,6 +922,7 @@ set(regress_0_tests
   regress0/strings/escchar.smt2
   regress0/strings/escchar_25.smt2
   regress0/strings/from_code.smt2
+  regress0/strings/gen-esc-seq.smt2
   regress0/strings/hconst-092618.smt2
   regress0/strings/idof-rewrites.smt2
   regress0/strings/idof-sem.smt2
@@ -939,6 +940,8 @@ set(regress_0_tests
   regress0/strings/leadingzero001.smt2
   regress0/strings/loop001.smt2
   regress0/strings/model001.smt2
+  regress0/strings/model-code-point.smt2
+  regress0/strings/model-friendly.smt2
   regress0/strings/ncontrib-rewrites.smt2
   regress0/strings/norn-31.smt2
   regress0/strings/norn-simp-rew.smt2
@@ -967,6 +970,7 @@ set(regress_0_tests
   regress0/strings/tolower-rrs.smt2
   regress0/strings/tolower-simple.smt2
   regress0/strings/type001.smt2
+  regress0/strings/unicode-esc.smt2
   regress0/strings/unsound-0908.smt2
   regress0/strings/unsound-repl-rewrite.smt2
   regress0/sygus/General_plus10.sy
diff --git a/test/regress/regress0/strings/gen-esc-seq.smt2 b/test/regress/regress0/strings/gen-esc-seq.smt2
new file mode 100644
index 000000000..59f66046f
--- /dev/null
+++ b/test/regress/regress0/strings/gen-esc-seq.smt2
@@ -0,0 +1,9 @@
+; COMMAND-LINE: --produce-models --lang=smt2.6.1
+; EXPECT: sat
+; EXPECT: ((x "\u{5c}u1000"))
+(set-logic ALL)
+(set-info :status sat)
+(declare-const x String)
+(assert (= x (str.++ "\u" "1000")))
+(check-sat)
+(get-value (x))
diff --git a/test/regress/regress0/strings/model-code-point.smt2 b/test/regress/regress0/strings/model-code-point.smt2
new file mode 100644
index 000000000..1200ae704
--- /dev/null
+++ b/test/regress/regress0/strings/model-code-point.smt2
@@ -0,0 +1,13 @@
+; COMMAND-LINE: --lang=smt2.6.1 --produce-models
+; EXPECT: sat
+; EXPECT: ((x "\u{a}"))
+; EXPECT: ((y "\u{7f}"))
+(set-logic ALL)
+(set-info :status sat)
+(declare-fun x () String)
+(declare-fun y () String)
+(assert (= (str.to_code x) 10))
+(assert (= (str.to_code y) 127))
+(check-sat)
+(get-value (x))
+(get-value (y))
diff --git a/test/regress/regress0/strings/model-friendly.smt2 b/test/regress/regress0/strings/model-friendly.smt2
new file mode 100644
index 000000000..985ffaa62
--- /dev/null
+++ b/test/regress/regress0/strings/model-friendly.smt2
@@ -0,0 +1,9 @@
+; COMMAND-LINE: --lang=smt2.6.1 --produce-models
+; EXPECT: sat
+; EXPECT: ((x "AAAAA"))
+(set-logic ALL)
+(set-info :status sat)
+(declare-fun x () String)
+(assert (= (str.len x) 5))
+(check-sat)
+(get-value (x))
diff --git a/test/regress/regress0/strings/unicode-esc.smt2 b/test/regress/regress0/strings/unicode-esc.smt2
new file mode 100644
index 000000000..01f5f30ab
--- /dev/null
+++ b/test/regress/regress0/strings/unicode-esc.smt2
@@ -0,0 +1,30 @@
+; COMMAND-LINE: --strings-exp --lang=smt2.6.1
+; EXPECT: sat
+(set-logic ALL)
+
+(assert (= "\u{14}" "\u0014"))
+(assert (= "\u{00}" "\u{0}"))
+(assert (= "\u0000" "\u{0}"))
+(assert (= (str.len "\u1234") 1))
+(assert (= (str.len "\u{1}") 1))
+(assert (= (str.len "\u{99}") 1))
+(assert (= (str.len "\u{779}") 1))
+(assert (= (str.len "\u{0779}") 1))
+(assert (= (str.len "\u{01779}") 1))
+(assert (= (str.len "\u{001779}") 10))
+(assert (= (str.len "\u{0vv79}") 9))
+(assert (= (str.len "\u{11\u1234}") 7))
+(assert (= (str.len "\u12345") 2))
+(assert (= (str.len "\uu") 3))
+(assert (= (str.len "\u{123}\u{567}") 2))
+(assert (= (str.len "\u{0017") 7))
+(assert (= (str.len "\\u00178") 3))
+(assert (= (str.len "2\u{}") 5))
+(assert (= (str.len "\uaaaa") 1))
+(assert (= (str.len "\uAAAA") 1))
+(assert (= (str.len "\u{0AbC}") 1))
+(assert (= (str.len "\u{E}") 1))
+(assert (= (str.len "\u{44444}") 9))
+(assert (= (str.len "\u") 2))
+
+(check-sat)
diff --git a/test/unit/api/solver_black.h b/test/unit/api/solver_black.h
index 27f5aca12..0eefde700 100644
--- a/test/unit/api/solver_black.h
+++ b/test/unit/api/solver_black.h
@@ -556,9 +556,9 @@ void SolverBlack::testMkString()
   TS_ASSERT_THROWS_NOTHING(d_solver->mkString(""));
   TS_ASSERT_THROWS_NOTHING(d_solver->mkString("asdfasdf"));
   TS_ASSERT_EQUALS(d_solver->mkString("asdf\\nasdf").toString(),
-                   "\"asdf\\\\nasdf\"");
-  TS_ASSERT_EQUALS(d_solver->mkString("asdf\\nasdf", true).toString(),
-                   "\"asdf\\nasdf\"");
+                   "\"asdf\\u{5c}nasdf\"");
+  TS_ASSERT_EQUALS(d_solver->mkString("asdf\\u{005c}nasdf", true).toString(),
+                    "\"asdf\\u{5c}nasdf\"");
 }
 
 void SolverBlack::testMkTerm()