12 files changed, 929 insertions, 772 deletions
diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp
index abcd8189a..e574e4fa9 100644
--- a/src/theory/strings/core_solver.cpp
+++ b/src/theory/strings/core_solver.cpp
@@ -30,10 +30,13 @@ namespace CVC4 {
 namespace theory {
 namespace strings {
 
-CoreSolver::CoreSolver(context::Context* c, context::UserContext* u, SolverState& s, InferenceManager& im, SkolemCache& skc, BaseSolver& bs) :
-d_state(s), d_im(im), d_skCache(skc),
-d_bsolver(bs),
-d_nf_pairs(c)
+CoreSolver::CoreSolver(context::Context* c,
+                       context::UserContext* u,
+                       SolverState& s,
+                       InferenceManager& im,
+                       TermRegistry& tr,
+                       BaseSolver& bs)
+    : d_state(s), d_im(im), d_termReg(tr), d_bsolver(bs), d_nf_pairs(c)
 {
   d_zero = NodeManager::currentNM()->mkConst( Rational( 0 ) );
   d_one = NodeManager::currentNM()->mkConst( Rational( 1 ) );
@@ -1252,7 +1255,8 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
           // If the equality rewrites to a constant, we must use the
           // purify variable for this string to communicate that
           // we have inferred whether it is empty.
-          Node p = d_skCache.mkSkolemCached(nc, SkolemCache::SK_PURIFY, "lsym");
+          SkolemCache* skc = d_termReg.getSkolemCache();
+          Node p = skc->mkSkolemCached(nc, SkolemCache::SK_PURIFY, "lsym");
           Node pEq = p.eqNode(emp);
           // should not be constant
           Assert(!Rewriter::rewrite(pEq).isConst());
@@ -1326,7 +1330,8 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
           Node prea = p == straLen ? stra
                                    : (isRev ? Word::suffix(stra, p)
                                             : Word::prefix(stra, p));
-          Node sk = d_skCache.mkSkolemCached(
+          SkolemCache* skc = d_termReg.getSkolemCache();
+          Node sk = skc->mkSkolemCached(
               nc,
               prea,
               isRev ? SkolemCache::SK_ID_C_SPT_REV : SkolemCache::SK_ID_C_SPT,
@@ -1352,7 +1357,8 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
       Node firstChar = straLen == 1 ? stra
                                     : (isRev ? Word::suffix(stra, 1)
                                              : Word::prefix(stra, 1));
-      Node sk = d_skCache.mkSkolemCached(
+      SkolemCache* skc = d_termReg.getSkolemCache();
+      Node sk = skc->mkSkolemCached(
           nc,
           isRev ? SkolemCache::SK_ID_VC_SPT_REV : SkolemCache::SK_ID_VC_SPT,
           "c_spt");
@@ -1425,7 +1431,8 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
         info.d_antn.push_back(tnz);
       }
     }
-    Node sk = d_skCache.mkSkolemCached(
+    SkolemCache* skc = d_termReg.getSkolemCache();
+    Node sk = skc->mkSkolemCached(
         x,
         y,
         isRev ? SkolemCache::SK_ID_V_SPT_REV : SkolemCache::SK_ID_V_SPT,
@@ -1664,10 +1671,11 @@ CoreSolver::ProcessLoopResult CoreSolver::processLoop(NormalForm& nfi,
     Trace("strings-loop") << "Strings::Loop: Normal Loop Breaking."
                           << std::endl;
     // right
-    Node sk_w = d_skCache.mkSkolem("w_loop");
-    Node sk_y = d_skCache.mkSkolem("y_loop");
-    d_im.registerTermAtomic(sk_y, LENGTH_GEQ_ONE);
-    Node sk_z = d_skCache.mkSkolem("z_loop");
+    SkolemCache* skc = d_termReg.getSkolemCache();
+    Node sk_w = skc->mkSkolem("w_loop");
+    Node sk_y = skc->mkSkolem("y_loop");
+    d_termReg.registerTermAtomic(sk_y, LENGTH_GEQ_ONE);
+    Node sk_z = skc->mkSkolem("z_loop");
     // t1 * ... * tn = y * z
     Node conc1 = t_yz.eqNode(utils::mkNConcat(sk_y, sk_z));
     // s1 * ... * sk = z * y * r
@@ -1838,10 +1846,11 @@ void CoreSolver::processDeq(Node ni, Node nj)
           //
           // E.g. x ++ x' ++ ... != "abc" ++ y' ++ ... ^ len(x) != "" --->
           //      x = k1 ++ k2 ^ len(k1) = 1 ^ (k1 != "a" v x = "a" ++  k2)
-          Node sk = d_skCache.mkSkolemCached(
-              nck, SkolemCache::SK_ID_DC_SPT, "dc_spt");
-          d_im.registerTermAtomic(sk, LENGTH_ONE);
-          Node skr = d_skCache.mkSkolemCached(
+          SkolemCache* skc = d_termReg.getSkolemCache();
+          Node sk =
+              skc->mkSkolemCached(nck, SkolemCache::SK_ID_DC_SPT, "dc_spt");
+          d_termReg.registerTermAtomic(sk, LENGTH_ONE);
+          Node skr = skc->mkSkolemCached(
               nck, SkolemCache::SK_ID_DC_SPT_REM, "dc_spt_rem");
           Node eq1 = nck.eqNode(nm->mkNode(kind::STRING_CONCAT, sk, skr));
           eq1 = Rewriter::rewrite(eq1);
@@ -1884,13 +1893,14 @@ void CoreSolver::processDeq(Node ni, Node nj)
         antecNewLits.push_back(xLenTerm.eqNode(yLenTerm).negate());
 
         std::vector<Node> conc;
-        Node sk1 = d_skCache.mkSkolemCached(
-            x, y, SkolemCache::SK_ID_DEQ_X, "x_dsplit");
-        Node sk2 = d_skCache.mkSkolemCached(
-            x, y, SkolemCache::SK_ID_DEQ_Y, "y_dsplit");
-        Node sk3 = d_skCache.mkSkolemCached(
-            x, y, SkolemCache::SK_ID_DEQ_Z, "z_dsplit");
-        d_im.registerTermAtomic(sk3, LENGTH_GEQ_ONE);
+        SkolemCache* skc = d_termReg.getSkolemCache();
+        Node sk1 =
+            skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_X, "x_dsplit");
+        Node sk2 =
+            skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_Y, "y_dsplit");
+        Node sk3 =
+            skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_Z, "z_dsplit");
+        d_termReg.registerTermAtomic(sk3, LENGTH_GEQ_ONE);
         Node sk1Len = utils::mkNLength(sk1);
         conc.push_back(sk1Len.eqNode(xLenTerm));
         Node sk2Len = utils::mkNLength(sk2);
@@ -2294,7 +2304,7 @@ void CoreSolver::doInferInfo(const InferInfo& ii)
   {
     for (const Node& n : sks.second)
     {
-      d_im.registerTermAtomic(n, sks.first);
+      d_termReg.registerTermAtomic(n, sks.first);
     }
   }
 }
diff --git a/src/theory/strings/core_solver.h b/src/theory/strings/core_solver.h
index b7b487ff7..cef5bd3be 100644
--- a/src/theory/strings/core_solver.h
+++ b/src/theory/strings/core_solver.h
@@ -24,8 +24,8 @@
 #include "theory/strings/infer_info.h"
 #include "theory/strings/inference_manager.h"
 #include "theory/strings/normal_form.h"
-#include "theory/strings/skolem_cache.h"
 #include "theory/strings/solver_state.h"
+#include "theory/strings/term_registry.h"
 
 namespace CVC4 {
 namespace theory {
@@ -46,7 +46,7 @@ class CoreSolver
              context::UserContext* u,
              SolverState& s,
              InferenceManager& im,
-             SkolemCache& skc,
+             TermRegistry& tr,
              BaseSolver& bs);
   ~CoreSolver();
 
@@ -370,8 +370,8 @@ class CoreSolver
   SolverState& d_state;
   /** The (custom) output channel of the theory of strings */
   InferenceManager& d_im;
-  /** cache of all skolems */
-  SkolemCache& d_skCache;
+  /** Reference to the term registry of theory of strings */
+  TermRegistry& d_termReg;
   /** reference to the base solver, used for certain queries */
   BaseSolver& d_bsolver;
   /** Commonly used constants */
diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp
index 0f4af2458..1bcd67cb4 100644
--- a/src/theory/strings/extf_solver.cpp
+++ b/src/theory/strings/extf_solver.cpp
@@ -31,7 +31,7 @@ ExtfSolver::ExtfSolver(context::Context* c,
                        context::UserContext* u,
                        SolverState& s,
                        InferenceManager& im,
-                       SkolemCache& skc,
+                       TermRegistry& tr,
                        StringsRewriter& rewriter,
                        BaseSolver& bs,
                        CoreSolver& cs,
@@ -39,13 +39,13 @@ ExtfSolver::ExtfSolver(context::Context* c,
                        SequencesStatistics& statistics)
     : d_state(s),
       d_im(im),
-      d_skCache(skc),
+      d_termReg(tr),
       d_rewriter(rewriter),
       d_bsolver(bs),
       d_csolver(cs),
       d_extt(et),
       d_statistics(statistics),
-      d_preproc(&skc, u, statistics),
+      d_preproc(d_termReg.getSkolemCache(), u, statistics),
       d_hasExtf(c, false),
       d_extfInferCache(c),
       d_reduced(u)
@@ -160,10 +160,9 @@ bool ExtfSolver::doReduction(int effort, Node n)
     Node x = n[0];
     Node s = n[1];
     // positive contains reduces to a equality
-    Node sk1 =
-        d_skCache.mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_PRE, "sc1");
-    Node sk2 =
-        d_skCache.mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_POST, "sc2");
+    SkolemCache* skc = d_termReg.getSkolemCache();
+    Node sk1 = skc->mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_PRE, "sc1");
+    Node sk2 = skc->mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_POST, "sc2");
     Node eq = Rewriter::rewrite(x.eqNode(utils::mkNConcat(sk1, s, sk2)));
     std::vector<Node> exp_vec;
     exp_vec.push_back(n);
@@ -305,7 +304,7 @@ void ExtfSolver::checkExtfEval(int effort)
           // only use symbolic definitions if option is set
           if (options::stringInferSym())
           {
-            nrs = d_im.getSymbolicDefinition(sn, exps);
+            nrs = d_termReg.getSymbolicDefinition(sn, exps);
           }
           if (!nrs.isNull())
           {
diff --git a/src/theory/strings/extf_solver.h b/src/theory/strings/extf_solver.h
index f0f0d790b..c5dd24f70 100644
--- a/src/theory/strings/extf_solver.h
+++ b/src/theory/strings/extf_solver.h
@@ -87,7 +87,7 @@ class ExtfSolver
              context::UserContext* u,
              SolverState& s,
              InferenceManager& im,
-             SkolemCache& skc,
+             TermRegistry& tr,
              StringsRewriter& rewriter,
              BaseSolver& bs,
              CoreSolver& cs,
@@ -181,8 +181,8 @@ class ExtfSolver
   SolverState& d_state;
   /** The (custom) output channel of the theory of strings */
   InferenceManager& d_im;
-  /** cache of all skolems */
-  SkolemCache& d_skCache;
+  /** Reference to the term registry of theory of strings */
+  TermRegistry& d_termReg;
   /** The theory rewriter for this theory. */
   StringsRewriter& d_rewriter;
   /** reference to the base solver, used for certain queries */
diff --git a/src/theory/strings/inference_manager.cpp b/src/theory/strings/inference_manager.cpp
index d19ce538d..b6bbcb7df 100644
--- a/src/theory/strings/inference_manager.cpp
+++ b/src/theory/strings/inference_manager.cpp
@@ -34,18 +34,15 @@ InferenceManager::InferenceManager(TheoryStrings& p,
                                    context::Context* c,
                                    context::UserContext* u,
                                    SolverState& s,
-                                   SkolemCache& skc,
+                                   TermRegistry& tr,
                                    OutputChannel& out,
                                    SequencesStatistics& statistics)
     : d_parent(p),
       d_state(s),
-      d_skCache(skc),
+      d_termReg(tr),
       d_out(out),
       d_statistics(statistics),
-      d_keep(c),
-      d_proxyVar(u),
-      d_proxyVarToLength(u),
-      d_lengthLemmaTermsCache(u)
+      d_keep(c)
 {
   NodeManager* nm = NodeManager::currentNM();
   d_zero = nm->mkConst(Rational(0));
@@ -233,7 +230,7 @@ void InferenceManager::sendInfer(Node eq_exp, Node eq, Inference infer)
     std::vector<Node> vars;
     std::vector<Node> subs;
     std::vector<Node> unproc;
-    inferSubstitutionProxyVars(eq_exp, vars, subs, unproc);
+    d_termReg.inferSubstitutionProxyVars(eq_exp, vars, subs, unproc);
     if (unproc.empty())
     {
       Node eqs =
@@ -297,235 +294,6 @@ void InferenceManager::sendPhaseRequirement(Node lit, bool pol)
   d_pendingReqPhase[lit] = pol;
 }
 
-Node InferenceManager::getProxyVariableFor(Node n) const
-{
-  NodeNodeMap::const_iterator it = d_proxyVar.find(n);
-  if (it != d_proxyVar.end())
-  {
-    return (*it).second;
-  }
-  return Node::null();
-}
-
-Node InferenceManager::getSymbolicDefinition(Node n,
-                                             std::vector<Node>& exp) const
-{
-  if (n.getNumChildren() == 0)
-  {
-    Node pn = getProxyVariableFor(n);
-    if (pn.isNull())
-    {
-      return Node::null();
-    }
-    Node eq = n.eqNode(pn);
-    eq = Rewriter::rewrite(eq);
-    if (std::find(exp.begin(), exp.end(), eq) == exp.end())
-    {
-      exp.push_back(eq);
-    }
-    return pn;
-  }
-  std::vector<Node> children;
-  if (n.getMetaKind() == metakind::PARAMETERIZED)
-  {
-    children.push_back(n.getOperator());
-  }
-  for (const Node& nc : n)
-  {
-    if (n.getType().isRegExp())
-    {
-      children.push_back(nc);
-    }
-    else
-    {
-      Node ns = getSymbolicDefinition(nc, exp);
-      if (ns.isNull())
-      {
-        return Node::null();
-      }
-      else
-      {
-        children.push_back(ns);
-      }
-    }
-  }
-  return NodeManager::currentNM()->mkNode(n.getKind(), children);
-}
-
-Node InferenceManager::registerTerm(Node n)
-{
-  Assert(n.getType().isStringLike());
-  NodeManager* nm = NodeManager::currentNM();
-  // register length information:
-  //  for variables, split on empty vs positive length
-  //  for concat/const/replace, introduce proxy var and state length relation
-  Node lsum;
-  if (n.getKind() != STRING_CONCAT && !n.isConst())
-  {
-    Node lsumb = nm->mkNode(STRING_LENGTH, n);
-    lsum = Rewriter::rewrite(lsumb);
-    // can register length term if it does not rewrite
-    if (lsum == lsumb)
-    {
-      registerTermAtomic(n, LENGTH_SPLIT);
-      return Node::null();
-    }
-  }
-  Node sk = d_skCache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym");
-  StringsProxyVarAttribute spva;
-  sk.setAttribute(spva, true);
-  Node eq = Rewriter::rewrite(sk.eqNode(n));
-  d_proxyVar[n] = sk;
-  // If we are introducing a proxy for a constant or concat term, we do not
-  // need to send lemmas about its length, since its length is already
-  // implied.
-  if (n.isConst() || n.getKind() == STRING_CONCAT)
-  {
-    // do not send length lemma for sk.
-    registerTermAtomic(sk, LENGTH_IGNORE);
-  }
-  Node skl = nm->mkNode(STRING_LENGTH, sk);
-  if (n.getKind() == STRING_CONCAT)
-  {
-    std::vector<Node> nodeVec;
-    for (const Node& nc : n)
-    {
-      if (nc.getAttribute(StringsProxyVarAttribute()))
-      {
-        Assert(d_proxyVarToLength.find(nc) != d_proxyVarToLength.end());
-        nodeVec.push_back(d_proxyVarToLength[nc]);
-      }
-      else
-      {
-        Node lni = nm->mkNode(STRING_LENGTH, nc);
-        nodeVec.push_back(lni);
-      }
-    }
-    lsum = nm->mkNode(PLUS, nodeVec);
-    lsum = Rewriter::rewrite(lsum);
-  }
-  else if (n.isConst())
-  {
-    lsum = nm->mkConst(Rational(Word::getLength(n)));
-  }
-  Assert(!lsum.isNull());
-  d_proxyVarToLength[sk] = lsum;
-  Node ceq = Rewriter::rewrite(skl.eqNode(lsum));
-
-  return nm->mkNode(AND, eq, ceq);
-}
-
-void InferenceManager::registerTermAtomic(Node n, LengthStatus s)
-{
-  if (d_lengthLemmaTermsCache.find(n) != d_lengthLemmaTermsCache.end())
-  {
-    return;
-  }
-  d_lengthLemmaTermsCache.insert(n);
-
-  if (s == LENGTH_IGNORE)
-  {
-    // ignore it
-    return;
-  }
-  std::map<Node, bool> reqPhase;
-  Node lenLem = getRegisterTermAtomicLemma(n, s, reqPhase);
-  if (!lenLem.isNull())
-  {
-    Trace("strings-lemma") << "Strings::Lemma REGISTER-TERM-ATOMIC : " << lenLem
-                           << std::endl;
-    Trace("strings-assert") << "(assert " << lenLem << ")" << std::endl;
-    ++(d_statistics.d_lemmasRegisterTermAtomic);
-    d_out.lemma(lenLem);
-  }
-  for (const std::pair<const Node, bool>& rp : reqPhase)
-  {
-    d_out.requirePhase(rp.first, rp.second);
-  }
-}
-
-Node InferenceManager::getRegisterTermAtomicLemma(
-    Node n, LengthStatus s, std::map<Node, bool>& reqPhase)
-{
-  NodeManager* nm = NodeManager::currentNM();
-  Node n_len = nm->mkNode(kind::STRING_LENGTH, n);
-
-  if (s == LENGTH_GEQ_ONE)
-  {
-    Node emp = Word::mkEmptyWord(n.getType());
-    Node neq_empty = n.eqNode(emp).negate();
-    Node len_n_gt_z = nm->mkNode(GT, n_len, d_zero);
-    Node len_geq_one = nm->mkNode(AND, neq_empty, len_n_gt_z);
-    Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one
-                           << std::endl;
-    Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl;
-    return len_geq_one;
-  }
-
-  if (s == LENGTH_ONE)
-  {
-    Node len_one = n_len.eqNode(d_one);
-    Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one
-                           << std::endl;
-    Trace("strings-assert") << "(assert " << len_one << ")" << std::endl;
-    return len_one;
-  }
-  Assert(s == LENGTH_SPLIT);
-
-  Node emp = Word::mkEmptyWord(n.getType());
-  std::vector<Node> lems;
-  // split whether the string is empty
-  Node n_len_eq_z = n_len.eqNode(d_zero);
-  Node n_len_eq_z_2 = n.eqNode(emp);
-  Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2);
-  case_empty = Rewriter::rewrite(case_empty);
-  Node case_nempty = nm->mkNode(GT, n_len, d_zero);
-  if (!case_empty.isConst())
-  {
-    Node lem = nm->mkNode(OR, case_empty, case_nempty);
-    lems.push_back(lem);
-    Trace("strings-lemma") << "Strings::Lemma LENGTH >= 0 : " << lem
-                           << std::endl;
-    // prefer trying the empty case first
-    // notice that requirePhase must only be called on rewritten literals that
-    // occur in the CNF stream.
-    n_len_eq_z = Rewriter::rewrite(n_len_eq_z);
-    Assert(!n_len_eq_z.isConst());
-    reqPhase[n_len_eq_z] = true;
-    n_len_eq_z_2 = Rewriter::rewrite(n_len_eq_z_2);
-    Assert(!n_len_eq_z_2.isConst());
-    reqPhase[n_len_eq_z_2] = true;
-  }
-  else if (!case_empty.getConst<bool>())
-  {
-    // the rewriter knows that n is non-empty
-    Trace("strings-lemma") << "Strings::Lemma LENGTH > 0 (non-empty): "
-                           << case_nempty << std::endl;
-    lems.push_back(case_nempty);
-  }
-  else
-  {
-    // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that
-    // n ---> "". Since this method is only called on non-constants n, it must
-    // be that n = "" ^ len( n ) = 0 does not rewrite to true.
-    Assert(false);
-  }
-
-  // additionally add len( x ) >= 0 ?
-  if (options::stringLenGeqZ())
-  {
-    Node n_len_geq = nm->mkNode(kind::GEQ, n_len, d_zero);
-    n_len_geq = Rewriter::rewrite(n_len_geq);
-    lems.push_back(n_len_geq);
-  }
-
-  if (lems.empty())
-  {
-    return Node::null();
-  }
-  return lems.size() == 1 ? lems[0] : nm->mkNode(AND, lems);
-}
-
 void InferenceManager::addToExplanation(Node a,
                                         Node b,
                                         std::vector<Node>& exp) const
@@ -601,88 +369,6 @@ bool InferenceManager::hasProcessed() const
   return d_state.isInConflict() || !d_pendingLem.empty() || !d_pending.empty();
 }
 
-void InferenceManager::inferSubstitutionProxyVars(
-    Node n,
-    std::vector<Node>& vars,
-    std::vector<Node>& subs,
-    std::vector<Node>& unproc) const
-{
-  if (n.getKind() == AND)
-  {
-    for (const Node& nc : n)
-    {
-      inferSubstitutionProxyVars(nc, vars, subs, unproc);
-    }
-    return;
-  }
-  if (n.getKind() == EQUAL)
-  {
-    Node ns = n.substitute(vars.begin(), vars.end(), subs.begin(), subs.end());
-    ns = Rewriter::rewrite(ns);
-    if (ns.getKind() == EQUAL)
-    {
-      Node s;
-      Node v;
-      for (unsigned i = 0; i < 2; i++)
-      {
-        Node ss;
-        // determine whether this side has a proxy variable
-        if (ns[i].getAttribute(StringsProxyVarAttribute()))
-        {
-          // it is a proxy variable
-          ss = ns[i];
-        }
-        else if (ns[i].isConst())
-        {
-          ss = getProxyVariableFor(ns[i]);
-        }
-        if (!ss.isNull())
-        {
-          v = ns[1 - i];
-          // if the other side is a constant or variable
-          if (v.getNumChildren() == 0)
-          {
-            if (s.isNull())
-            {
-              s = ss;
-            }
-            else
-            {
-              // both sides of the equality correspond to a proxy variable
-              if (ss == s)
-              {
-                // it is a trivial equality, e.g. between a proxy variable
-                // and its definition
-                return;
-              }
-              else
-              {
-                // equality between proxy variables, non-trivial
-                s = Node::null();
-              }
-            }
-          }
-        }
-      }
-      if (!s.isNull())
-      {
-        // the equality can be turned into a substitution
-        subs.push_back(s);
-        vars.push_back(v);
-        return;
-      }
-    }
-    else
-    {
-      n = ns;
-    }
-  }
-  if (n != d_true)
-  {
-    unproc.push_back(n);
-  }
-}
-
 Node InferenceManager::mkExplain(const std::vector<Node>& a) const
 {
   std::vector<Node> an;
diff --git a/src/theory/strings/inference_manager.h b/src/theory/strings/inference_manager.h
index 60139ca83..c1550328c 100644
--- a/src/theory/strings/inference_manager.h
+++ b/src/theory/strings/inference_manager.h
@@ -26,8 +26,8 @@
 #include "theory/output_channel.h"
 #include "theory/strings/infer_info.h"
 #include "theory/strings/sequences_stats.h"
-#include "theory/strings/skolem_cache.h"
 #include "theory/strings/solver_state.h"
+#include "theory/strings/term_registry.h"
 #include "theory/uf/equality_engine.h"
 
 namespace CVC4 {
@@ -76,7 +76,7 @@ class InferenceManager
                    context::Context* c,
                    context::UserContext* u,
                    SolverState& s,
-                   SkolemCache& skc,
+                   TermRegistry& tr,
                    OutputChannel& out,
                    SequencesStatistics& statistics);
   ~InferenceManager() {}
@@ -176,60 +176,6 @@ class InferenceManager
    */
   void sendPhaseRequirement(Node lit, bool pol);
 
-  //---------------------------- proxy variables and length elaboration
-  /** Get symbolic definition
-   *
-   * This method returns the "symbolic definition" of n, call it n', and
-   * populates the vector exp with an explanation such that exp => n = n'.
-   *
-   * The symbolic definition of n is the term where (maximal) subterms of n
-   * are replaced by their proxy variables. For example, if we introduced
-   * proxy variable v for x ++ y, then given input x ++ y = w, this method
-   * returns v = w and adds v = x ++ y to exp.
-   */
-  Node getSymbolicDefinition(Node n, std::vector<Node>& exp) const;
-  /** Get proxy variable
-   *
-   * If this method returns the proxy variable for (string) term n if one
-   * exists, otherwise it returns null.
-   */
-  Node getProxyVariableFor(Node n) const;
-  /** register term
-   *
-   * This method is called on non-constant string terms n. It returns a lemma
-   * that should be sent on the output channel of theory of strings upon
-   * registration of this term, or null if no lemma is necessary.
-   *
-   * If n is an atomic term, the method registerTermAtomic is called for n
-   * and s = LENGTH_SPLIT and no lemma is returned.
-   */
-  Node registerTerm(Node n);
-  /** register length
-   *
-   * This method is called on non-constant string terms n that are "atomic"
-   * with respect to length. That is, the rewritten form of len(n) is itself.
-   *
-   * It sends a lemma on the output channel that ensures that the length n
-   * satisfies its assigned status (given by argument s).
-   *
-   * If the status is LENGTH_ONE, we send the lemma len( n ) = 1.
-   *
-   * If the status is LENGTH_GEQ, we send a lemma n != "" ^ len( n ) > 0.
-   *
-   * If the status is LENGTH_SPLIT, we send a send a lemma of the form:
-   *   ( n = "" ^ len( n ) = 0 ) OR len( n ) > 0
-   * This method also ensures that, when applicable, the left branch is taken
-   * first via calls to requirePhase.
-   *
-   * If the status is LENGTH_IGNORE, then no lemma is sent. This status is used
-   * e.g. when the length of n is already implied by other constraints.
-   *
-   * In contrast to the above functions, it makes immediate calls to the output
-   * channel instead of adding them to pending lists.
-   */
-  void registerTermAtomic(Node n, LengthStatus s);
-  //---------------------------- end proxy variables and length elaboration
-
   //----------------------------constructing antecedants
   /**
    * Adds equality a = b to the vector exp if a and b are distinct terms. It
@@ -326,32 +272,15 @@ class InferenceManager
    * equality engine of this class.
    */
   void sendInfer(Node eq_exp, Node eq, Inference infer);
-  /**
-   * Get the lemma required for registering the length information for
-   * atomic term n given length status s. For details, see registerTermAtomic.
-   *
-   * Additionally, this method may map literals to a required polarity in the
-   * argument reqPhase, which should be processed by a call to requiredPhase by
-   * the caller of this method.
-   */
-  Node getRegisterTermAtomicLemma(Node n,
-                                  LengthStatus s,
-                                  std::map<Node, bool>& reqPhase);
 
   /** the parent theory of strings object */
   TheoryStrings& d_parent;
-  /**
-   * This is a reference to the solver state of the theory of strings.
-   */
+  /** Reference to the solver state of the theory of strings. */
   SolverState& d_state;
-  /** cache of all skolems */
-  SkolemCache& d_skCache;
-  /** the output channel
-   *
-   * This is a reference to the output channel of the theory of strings.
-   */
+  /** Reference to the term registry of theory of strings */
+  TermRegistry& d_termReg;
+  /** Reference to the output channel of the theory of strings. */
   OutputChannel& d_out;
-
   /** Reference to the statistics for the theory of strings/sequences. */
   SequencesStatistics& d_statistics;
 
@@ -375,58 +304,6 @@ class InferenceManager
    * SAT-context-dependent.
    */
   NodeSet d_keep;
-  /**
-   * Map string terms to their "proxy variables". Proxy variables are used are
-   * intermediate variables so that length information can be communicated for
-   * constants. For example, to communicate that "ABC" has length 3, we
-   * introduce a proxy variable v_{"ABC"} for "ABC", and assert:
-   *   v_{"ABC"} = "ABC" ^ len( v_{"ABC"} ) = 3
-   * Notice this is required since we cannot directly write len( "ABC" ) = 3,
-   * which rewrites to 3 = 3.
-   * In the above example, we store "ABC" -> v_{"ABC"} in this map.
-   */
-  NodeNodeMap d_proxyVar;
-  /**
-   * Map from proxy variables to their normalized length. In the above example,
-   * we store "ABC" -> 3.
-   */
-  NodeNodeMap d_proxyVarToLength;
-  /** List of terms that we have register length for */
-  NodeSet d_lengthLemmaTermsCache;
-
-  /** infer substitution proxy vars
-   *
-   * This method attempts to (partially) convert the formula n into a
-   * substitution of the form:
-   *   v1 -> s1, ..., vn -> sn
-   * where s1 ... sn are proxy variables and v1 ... vn are either variables
-   * or constants.
-   *
-   * This method ensures that P ^ v1 = s1 ^ ... ^ vn = sn ^ unproc is equivalent
-   * to P ^ n, where P is the conjunction of equalities corresponding to the
-   * definition of all proxy variables introduced by the theory of strings.
-   *
-   * For example, say that v1 was introduced as a proxy variable for "ABC", and
-   * v2 was introduced as a proxy variable for "AA".
-   *
-   * Given the input n := v1 = "ABC" ^ v2 = x ^ x = "AA", this method sets:
-   * vars = { x },
-   * subs = { v2 },
-   * unproc = {}.
-   * In particular, this says that the information content of n is essentially
-   * x = v2. The first and third conjunctions can be dropped from the
-   * explanation since these equalities simply correspond to definitions
-   * of proxy variables.
-   *
-   * This method is used as a performance heuristic. It can infer when the
-   * explanation of a fact depends only trivially on equalities corresponding
-   * to definitions of proxy variables, which can be omitted since they are
-   * assumed to hold globally.
-   */
-  void inferSubstitutionProxyVars(Node n,
-                                  std::vector<Node>& vars,
-                                  std::vector<Node>& subs,
-                                  std::vector<Node>& unproc) const;
 };
 
 }  // namespace strings
diff --git a/src/theory/strings/strings_fmf.cpp b/src/theory/strings/strings_fmf.cpp
index 02af3949e..87ce5e7c6 100644
--- a/src/theory/strings/strings_fmf.cpp
+++ b/src/theory/strings/strings_fmf.cpp
@@ -26,45 +26,26 @@ namespace strings {
 StringsFmf::StringsFmf(context::Context* c,
                        context::UserContext* u,
                        Valuation valuation,
-                       SkolemCache& skc)
+                       TermRegistry& tr)
     : d_sslds(nullptr),
       d_satContext(c),
       d_userContext(u),
       d_valuation(valuation),
-      d_skCache(skc),
-      d_inputVars(u)
+      d_termReg(tr)
 {
 }
 
 StringsFmf::~StringsFmf() {}
 
-void StringsFmf::preRegisterTerm(TNode n)
-{
-  if (!n.getType().isStringLike())
-  {
-    return;
-  }
-  Kind k = n.getKind();
-  // Our decision strategy will minimize the length of this term if it is a
-  // variable but not an internally generated Skolem, or a term that does
-  // not belong to this theory.
-  if (n.isVar() ? !d_skCache.isSkolem(n) : kindToTheoryId(k) != THEORY_STRINGS)
-  {
-    d_inputVars.insert(n);
-    Trace("strings-dstrat-reg") << "input variable: " << n << std::endl;
-  }
-}
-
 void StringsFmf::presolve()
 {
   d_sslds.reset(new StringSumLengthDecisionStrategy(
       d_satContext, d_userContext, d_valuation));
   Trace("strings-dstrat-reg")
       << "presolve: register decision strategy." << std::endl;
+  const NodeSet& ivars = d_termReg.getInputVars();
   std::vector<Node> inputVars;
-  for (NodeSet::const_iterator itr = d_inputVars.begin();
-       itr != d_inputVars.end();
-       ++itr)
+  for (NodeSet::const_iterator itr = ivars.begin(); itr != ivars.end(); ++itr)
   {
     inputVars.push_back(*itr);
   }
diff --git a/src/theory/strings/strings_fmf.h b/src/theory/strings/strings_fmf.h
index 375a100ff..9dfb1f590 100644
--- a/src/theory/strings/strings_fmf.h
+++ b/src/theory/strings/strings_fmf.h
@@ -22,7 +22,7 @@
 #include "context/context.h"
 #include "expr/node.h"
 #include "theory/decision_strategy.h"
-#include "theory/strings/skolem_cache.h"
+#include "theory/strings/term_registry.h"
 #include "theory/valuation.h"
 
 namespace CVC4 {
@@ -42,15 +42,8 @@ class StringsFmf
   StringsFmf(context::Context* c,
              context::UserContext* u,
              Valuation valuation,
-             SkolemCache& skc);
+             TermRegistry& tr);
   ~StringsFmf();
-  /** preRegister term
-   *
-   * This determines if the term n should be added to d_inputVars, the set
-   * of terms of type string whose length we are minimizing with this decision
-   * strategy.
-   */
-  void preRegisterTerm(TNode n);
   /** presolve
    *
    * This initializes a (new copy) of the decision strategy d_sslds.
@@ -105,15 +98,10 @@ class StringsFmf
   context::Context* d_satContext;
   /** The user level assertion context for the theory of strings. */
   context::UserContext* d_userContext;
-  /** The valuation object */
+  /** The valuation object of theory of strings */
   Valuation d_valuation;
-  /** reference to the skolem cache */
-  SkolemCache& d_skCache;
-  /**
-   * The set of terms of type string whose length we are minimizing
-   * with this decision strategy.
-   */
-  NodeSet d_inputVars;
+  /** The term registry of theory of strings */
+  TermRegistry& d_termReg;
 };
 
 }  // namespace strings
diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp
new file mode 100644
index 000000000..4f1e41ebb
--- /dev/null
+++ b/src/theory/strings/term_registry.cpp
@@ -0,0 +1,578 @@
+/*********************                                                        */
+/*! \file term_registry.cpp
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds, Tianyi Liang, Morgan Deters
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Implementation of term registry for the theory of strings.
+ **/
+
+#include "theory/strings/term_registry.h"
+
+#include "expr/attribute.h"
+#include "options/strings_options.h"
+#include "smt/logic_exception.h"
+#include "theory/rewriter.h"
+#include "theory/strings/theory_strings_utils.h"
+#include "theory/strings/word.h"
+
+using namespace std;
+using namespace CVC4::context;
+using namespace CVC4::kind;
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+struct StringsProxyVarAttributeId
+{
+};
+typedef expr::Attribute<StringsProxyVarAttributeId, bool>
+    StringsProxyVarAttribute;
+
+TermRegistry::TermRegistry(context::Context* c,
+                           context::UserContext* u,
+                           eq::EqualityEngine& ee,
+                           OutputChannel& out,
+                           SequencesStatistics& statistics)
+    : d_ee(ee),
+      d_out(out),
+      d_statistics(statistics),
+      d_hasStrCode(false),
+      d_functionsTerms(c),
+      d_inputVars(u),
+      d_preregisteredTerms(u),
+      d_registeredTerms(u),
+      d_registeredTypes(u),
+      d_proxyVar(u),
+      d_proxyVarToLength(u),
+      d_lengthLemmaTermsCache(u)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  d_zero = nm->mkConst(Rational(0));
+  d_one = nm->mkConst(Rational(1));
+  d_negOne = NodeManager::currentNM()->mkConst(Rational(-1));
+  d_cardSize = utils::getAlphabetCardinality();
+}
+
+TermRegistry::~TermRegistry() {}
+
+void TermRegistry::preRegisterTerm(TNode n)
+{
+  if (d_preregisteredTerms.find(n) != d_preregisteredTerms.end())
+  {
+    return;
+  }
+  d_preregisteredTerms.insert(n);
+  Trace("strings-preregister")
+      << "TheoryString::preregister : " << n << std::endl;
+  // check for logic exceptions
+  Kind k = n.getKind();
+  if (!options::stringExp())
+  {
+    if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI
+        || k == STRING_STRREPL || k == STRING_STRREPLALL || k == STRING_STRCTN
+        || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER
+        || k == STRING_REV)
+    {
+      std::stringstream ss;
+      ss << "Term of kind " << k
+         << " not supported in default mode, try --strings-exp";
+      throw LogicException(ss.str());
+    }
+  }
+  if (k == EQUAL)
+  {
+    d_ee.addTriggerEquality(n);
+    return;
+  }
+  else if (k == STRING_IN_REGEXP)
+  {
+    d_out.requirePhase(n, true);
+    d_ee.addTriggerPredicate(n);
+    d_ee.addTerm(n[0]);
+    d_ee.addTerm(n[1]);
+    return;
+  }
+  else if (k == STRING_TO_CODE)
+  {
+    d_hasStrCode = true;
+  }
+  registerTerm(n, 0);
+  TypeNode tn = n.getType();
+  if (tn.isRegExp() && n.isVar())
+  {
+    std::stringstream ss;
+    ss << "Regular expression variables are not supported.";
+    throw LogicException(ss.str());
+  }
+  if (tn.isString())
+  {
+    // all characters of constants should fall in the alphabet
+    if (n.isConst())
+    {
+      std::vector<unsigned> vec = n.getConst<String>().getVec();
+      for (unsigned u : vec)
+      {
+        if (u >= d_cardSize)
+        {
+          std::stringstream ss;
+          ss << "Characters in string \"" << n
+             << "\" are outside of the given alphabet.";
+          throw LogicException(ss.str());
+        }
+      }
+    }
+    d_ee.addTerm(n);
+  }
+  else if (tn.isBoolean())
+  {
+    // Get triggered for both equal and dis-equal
+    d_ee.addTriggerPredicate(n);
+  }
+  else
+  {
+    // Function applications/predicates
+    d_ee.addTerm(n);
+  }
+  // Set d_functionsTerms stores all function applications that are
+  // relevant to theory combination. Notice that this is a subset of
+  // the applications whose kinds are function kinds in the equality
+  // engine. This means it does not include applications of operators
+  // like re.++, which is not a function kind in the equality engine.
+  // Concatenation terms do not need to be considered here because
+  // their arguments have string type and do not introduce any shared
+  // terms.
+  if (n.hasOperator() && d_ee.isFunctionKind(k) && k != STRING_CONCAT)
+  {
+    d_functionsTerms.push_back(n);
+  }
+  if (options::stringFMF())
+  {
+    if (tn.isStringLike())
+    {
+      // Our decision strategy will minimize the length of this term if it is a
+      // variable but not an internally generated Skolem, or a term that does
+      // not belong to this theory.
+      if (n.isVar() ? !d_skCache.isSkolem(n)
+                    : kindToTheoryId(k) != THEORY_STRINGS)
+      {
+        d_inputVars.insert(n);
+        Trace("strings-preregister") << "input variable: " << n << std::endl;
+      }
+    }
+  }
+}
+
+void TermRegistry::registerTerm(Node n, int effort)
+{
+  TypeNode tn = n.getType();
+  bool do_register = true;
+  if (!tn.isStringLike())
+  {
+    if (options::stringEagerLen())
+    {
+      do_register = effort == 0;
+    }
+    else
+    {
+      do_register = effort > 0 || n.getKind() != STRING_CONCAT;
+    }
+  }
+  if (!do_register)
+  {
+    return;
+  }
+  if (d_registeredTerms.find(n) != d_registeredTerms.end())
+  {
+    return;
+  }
+  d_registeredTerms.insert(n);
+  // ensure the type is registered
+  registerType(tn);
+  NodeManager* nm = NodeManager::currentNM();
+  Debug("strings-register") << "TheoryStrings::registerTerm() " << n
+                            << ", effort = " << effort << std::endl;
+  Node regTermLem;
+  if (tn.isStringLike())
+  {
+    // register length information:
+    //  for variables, split on empty vs positive length
+    //  for concat/const/replace, introduce proxy var and state length relation
+    regTermLem = getRegisterTermLemma(n);
+  }
+  else if (n.getKind() == STRING_TO_CODE)
+  {
+    // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 )
+    Node code_len = utils::mkNLength(n[0]).eqNode(d_one);
+    Node code_eq_neg1 = n.eqNode(d_negOne);
+    Node code_range = nm->mkNode(
+        AND,
+        nm->mkNode(GEQ, n, d_zero),
+        nm->mkNode(
+            LT, n, nm->mkConst(Rational(utils::getAlphabetCardinality()))));
+    regTermLem = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
+  }
+  else if (n.getKind() == STRING_STRIDOF)
+  {
+    Node len = utils::mkNLength(n[0]);
+    regTermLem = nm->mkNode(AND,
+                            nm->mkNode(GEQ, n, nm->mkConst(Rational(-1))),
+                            nm->mkNode(LEQ, n, len));
+  }
+  if (!regTermLem.isNull())
+  {
+    Trace("strings-lemma") << "Strings::Lemma REG-TERM : " << regTermLem
+                           << std::endl;
+    Trace("strings-assert") << "(assert " << regTermLem << ")" << std::endl;
+    ++(d_statistics.d_lemmasRegisterTerm);
+    d_out.lemma(regTermLem);
+  }
+}
+
+void TermRegistry::registerType(TypeNode tn)
+{
+  if (d_registeredTypes.find(tn) != d_registeredTypes.end())
+  {
+    return;
+  }
+  d_registeredTypes.insert(tn);
+  if (tn.isStringLike())
+  {
+    // preregister the empty word for the type
+    Node emp = Word::mkEmptyWord(tn);
+    if (!d_ee.hasTerm(emp))
+    {
+      preRegisterTerm(emp);
+    }
+  }
+}
+
+Node TermRegistry::getRegisterTermLemma(Node n)
+{
+  Assert(n.getType().isStringLike());
+  NodeManager* nm = NodeManager::currentNM();
+  // register length information:
+  //  for variables, split on empty vs positive length
+  //  for concat/const/replace, introduce proxy var and state length relation
+  Node lsum;
+  if (n.getKind() != STRING_CONCAT && !n.isConst())
+  {
+    Node lsumb = nm->mkNode(STRING_LENGTH, n);
+    lsum = Rewriter::rewrite(lsumb);
+    // can register length term if it does not rewrite
+    if (lsum == lsumb)
+    {
+      registerTermAtomic(n, LENGTH_SPLIT);
+      return Node::null();
+    }
+  }
+  Node sk = d_skCache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym");
+  StringsProxyVarAttribute spva;
+  sk.setAttribute(spva, true);
+  Node eq = Rewriter::rewrite(sk.eqNode(n));
+  d_proxyVar[n] = sk;
+  // If we are introducing a proxy for a constant or concat term, we do not
+  // need to send lemmas about its length, since its length is already
+  // implied.
+  if (n.isConst() || n.getKind() == STRING_CONCAT)
+  {
+    // do not send length lemma for sk.
+    registerTermAtomic(sk, LENGTH_IGNORE);
+  }
+  Node skl = nm->mkNode(STRING_LENGTH, sk);
+  if (n.getKind() == STRING_CONCAT)
+  {
+    std::vector<Node> nodeVec;
+    for (const Node& nc : n)
+    {
+      if (nc.getAttribute(StringsProxyVarAttribute()))
+      {
+        Assert(d_proxyVarToLength.find(nc) != d_proxyVarToLength.end());
+        nodeVec.push_back(d_proxyVarToLength[nc]);
+      }
+      else
+      {
+        Node lni = nm->mkNode(STRING_LENGTH, nc);
+        nodeVec.push_back(lni);
+      }
+    }
+    lsum = nm->mkNode(PLUS, nodeVec);
+    lsum = Rewriter::rewrite(lsum);
+  }
+  else if (n.isConst())
+  {
+    lsum = nm->mkConst(Rational(Word::getLength(n)));
+  }
+  Assert(!lsum.isNull());
+  d_proxyVarToLength[sk] = lsum;
+  Node ceq = Rewriter::rewrite(skl.eqNode(lsum));
+
+  return nm->mkNode(AND, eq, ceq);
+}
+
+void TermRegistry::registerTermAtomic(Node n, LengthStatus s)
+{
+  if (d_lengthLemmaTermsCache.find(n) != d_lengthLemmaTermsCache.end())
+  {
+    return;
+  }
+  d_lengthLemmaTermsCache.insert(n);
+
+  if (s == LENGTH_IGNORE)
+  {
+    // ignore it
+    return;
+  }
+  std::map<Node, bool> reqPhase;
+  Node lenLem = getRegisterTermAtomicLemma(n, s, reqPhase);
+  if (!lenLem.isNull())
+  {
+    Trace("strings-lemma") << "Strings::Lemma REGISTER-TERM-ATOMIC : " << lenLem
+                           << std::endl;
+    Trace("strings-assert") << "(assert " << lenLem << ")" << std::endl;
+    ++(d_statistics.d_lemmasRegisterTermAtomic);
+    d_out.lemma(lenLem);
+  }
+  for (const std::pair<const Node, bool>& rp : reqPhase)
+  {
+    d_out.requirePhase(rp.first, rp.second);
+  }
+}
+
+SkolemCache* TermRegistry::getSkolemCache() { return &d_skCache; }
+
+const context::CDList<TNode>& TermRegistry::getFunctionTerms() const
+{
+  return d_functionsTerms;
+}
+
+const context::CDHashSet<Node, NodeHashFunction>& TermRegistry::getInputVars()
+    const
+{
+  return d_inputVars;
+}
+
+bool TermRegistry::hasStringCode() const { return d_hasStrCode; }
+
+Node TermRegistry::getRegisterTermAtomicLemma(Node n,
+                                              LengthStatus s,
+                                              std::map<Node, bool>& reqPhase)
+{
+  Assert(n.getType().isStringLike());
+  NodeManager* nm = NodeManager::currentNM();
+  Node n_len = nm->mkNode(kind::STRING_LENGTH, n);
+  Node emp = Word::mkEmptyWord(n.getType());
+  if (s == LENGTH_GEQ_ONE)
+  {
+    Node neq_empty = n.eqNode(emp).negate();
+    Node len_n_gt_z = nm->mkNode(GT, n_len, d_zero);
+    Node len_geq_one = nm->mkNode(AND, neq_empty, len_n_gt_z);
+    Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one
+                           << std::endl;
+    Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl;
+    return len_geq_one;
+  }
+
+  if (s == LENGTH_ONE)
+  {
+    Node len_one = n_len.eqNode(d_one);
+    Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one
+                           << std::endl;
+    Trace("strings-assert") << "(assert " << len_one << ")" << std::endl;
+    return len_one;
+  }
+  Assert(s == LENGTH_SPLIT);
+
+  std::vector<Node> lems;
+  // split whether the string is empty
+  Node n_len_eq_z = n_len.eqNode(d_zero);
+  Node n_len_eq_z_2 = n.eqNode(emp);
+  Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2);
+  case_empty = Rewriter::rewrite(case_empty);
+  Node case_nempty = nm->mkNode(GT, n_len, d_zero);
+  if (!case_empty.isConst())
+  {
+    Node lem = nm->mkNode(OR, case_empty, case_nempty);
+    lems.push_back(lem);
+    // prefer trying the empty case first
+    // notice that requirePhase must only be called on rewritten literals that
+    // occur in the CNF stream.
+    n_len_eq_z = Rewriter::rewrite(n_len_eq_z);
+    Assert(!n_len_eq_z.isConst());
+    reqPhase[n_len_eq_z] = true;
+    n_len_eq_z_2 = Rewriter::rewrite(n_len_eq_z_2);
+    Assert(!n_len_eq_z_2.isConst());
+    reqPhase[n_len_eq_z_2] = true;
+  }
+  else if (!case_empty.getConst<bool>())
+  {
+    // the rewriter knows that n is non-empty
+    lems.push_back(case_nempty);
+  }
+  else
+  {
+    // If n = "" ---> true or len( n ) = 0 ----> true, then we expect that
+    // n ---> "". Since this method is only called on non-constants n, it must
+    // be that n = "" ^ len( n ) = 0 does not rewrite to true.
+    Assert(false);
+  }
+
+  // additionally add len( x ) >= 0 ?
+  if (options::stringLenGeqZ())
+  {
+    Node n_len_geq = nm->mkNode(kind::GEQ, n_len, d_zero);
+    n_len_geq = Rewriter::rewrite(n_len_geq);
+    lems.push_back(n_len_geq);
+  }
+
+  if (lems.empty())
+  {
+    return Node::null();
+  }
+  return lems.size() == 1 ? lems[0] : nm->mkNode(AND, lems);
+}
+
+Node TermRegistry::getSymbolicDefinition(Node n, std::vector<Node>& exp) const
+{
+  if (n.getNumChildren() == 0)
+  {
+    Node pn = getProxyVariableFor(n);
+    if (pn.isNull())
+    {
+      return Node::null();
+    }
+    Node eq = n.eqNode(pn);
+    eq = Rewriter::rewrite(eq);
+    if (std::find(exp.begin(), exp.end(), eq) == exp.end())
+    {
+      exp.push_back(eq);
+    }
+    return pn;
+  }
+  std::vector<Node> children;
+  if (n.getMetaKind() == metakind::PARAMETERIZED)
+  {
+    children.push_back(n.getOperator());
+  }
+  for (const Node& nc : n)
+  {
+    if (n.getType().isRegExp())
+    {
+      children.push_back(nc);
+    }
+    else
+    {
+      Node ns = getSymbolicDefinition(nc, exp);
+      if (ns.isNull())
+      {
+        return Node::null();
+      }
+      else
+      {
+        children.push_back(ns);
+      }
+    }
+  }
+  return NodeManager::currentNM()->mkNode(n.getKind(), children);
+}
+
+Node TermRegistry::getProxyVariableFor(Node n) const
+{
+  NodeNodeMap::const_iterator it = d_proxyVar.find(n);
+  if (it != d_proxyVar.end())
+  {
+    return (*it).second;
+  }
+  return Node::null();
+}
+
+void TermRegistry::inferSubstitutionProxyVars(Node n,
+                                              std::vector<Node>& vars,
+                                              std::vector<Node>& subs,
+                                              std::vector<Node>& unproc) const
+{
+  if (n.getKind() == AND)
+  {
+    for (const Node& nc : n)
+    {
+      inferSubstitutionProxyVars(nc, vars, subs, unproc);
+    }
+    return;
+  }
+  if (n.getKind() == EQUAL)
+  {
+    Node ns = n.substitute(vars.begin(), vars.end(), subs.begin(), subs.end());
+    ns = Rewriter::rewrite(ns);
+    if (ns.getKind() == EQUAL)
+    {
+      Node s;
+      Node v;
+      for (unsigned i = 0; i < 2; i++)
+      {
+        Node ss;
+        // determine whether this side has a proxy variable
+        if (ns[i].getAttribute(StringsProxyVarAttribute()))
+        {
+          // it is a proxy variable
+          ss = ns[i];
+        }
+        else if (ns[i].isConst())
+        {
+          ss = getProxyVariableFor(ns[i]);
+        }
+        if (!ss.isNull())
+        {
+          v = ns[1 - i];
+          // if the other side is a constant or variable
+          if (v.getNumChildren() == 0)
+          {
+            if (s.isNull())
+            {
+              s = ss;
+            }
+            else
+            {
+              // both sides of the equality correspond to a proxy variable
+              if (ss == s)
+              {
+                // it is a trivial equality, e.g. between a proxy variable
+                // and its definition
+                return;
+              }
+              else
+              {
+                // equality between proxy variables, non-trivial
+                s = Node::null();
+              }
+            }
+          }
+        }
+      }
+      if (!s.isNull())
+      {
+        // the equality can be turned into a substitution
+        subs.push_back(s);
+        vars.push_back(v);
+        return;
+      }
+    }
+    else
+    {
+      n = ns;
+    }
+  }
+  if (!n.isConst() || !n.getConst<bool>())
+  {
+    unproc.push_back(n);
+  }
+}
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace CVC4
diff --git a/src/theory/strings/term_registry.h b/src/theory/strings/term_registry.h
new file mode 100644
index 000000000..b68e44b81
--- /dev/null
+++ b/src/theory/strings/term_registry.h
@@ -0,0 +1,261 @@
+/*********************                                                        */
+/*! \file theory_strings.h
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds, Tianyi Liang, Tim King
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Term registry for the theory of strings.
+ **/
+
+#include "cvc4_private.h"
+
+#ifndef CVC4__THEORY__STRINGS__TERM_REGISTRY_H
+#define CVC4__THEORY__STRINGS__TERM_REGISTRY_H
+
+#include "context/cdhashset.h"
+#include "context/cdlist.h"
+#include "theory/output_channel.h"
+#include "theory/strings/infer_info.h"
+#include "theory/strings/sequences_stats.h"
+#include "theory/strings/skolem_cache.h"
+#include "theory/uf/equality_engine.h"
+
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+/**
+ * This class manages all the (pre)registration tasks for terms. These tasks
+ * include:
+ * (1) Sending out preregistration lemmas for terms,
+ * (2) Add terms to the equality engine,
+ * (3) Maintaining a list of terms d_functionsTerms (for theory combination),
+ * (4) Maintaining a list of input variables d_inputVars (for fmf).
+ * (5) Maintaining a skolem cache. Notice that this skolem cache is the
+ * official skolem cache that should be used by all modules in TheoryStrings.
+ */
+class TermRegistry
+{
+  typedef context::CDHashSet<Node, NodeHashFunction> NodeSet;
+  typedef context::CDHashSet<TypeNode, TypeNodeHashFunction> TypeNodeSet;
+  typedef context::CDHashMap<Node, Node, NodeHashFunction> NodeNodeMap;
+
+ public:
+  TermRegistry(context::Context* c,
+               context::UserContext* u,
+               eq::EqualityEngine& ee,
+               OutputChannel& out,
+               SequencesStatistics& statistics);
+  ~TermRegistry();
+  /**
+   * Preregister term, called when TheoryStrings::preRegisterTerm(n) is called.
+   * This does the following:
+   * - Checks for illegal terms and throws a LogicException if any term is
+   * not handled.
+   * - Adds the appropriate terms and triggers to the equality engine.
+   * - Updates information about which terms exist, including
+   * d_functionsTerms and d_hasStrCode. If we are using strings finite model
+   * finding (options::stringsFmf), we determine if the term n should be
+   * added to d_inputVars, the set of terms of type string whose length we are
+   * minimizing with its decision strategy.
+   * - Setting phase requirements on n if it is a formula and we prefer
+   * decisions with a particular polarity (e.g. positive regular expression
+   * memberships).
+   */
+  void preRegisterTerm(TNode n);
+  /** Register term
+   *
+   * This performs SAT-context-independent registration for a term n, which
+   * may cause lemmas to be sent on the output channel that involve
+   * "initial refinement lemmas" for n. This includes introducing proxy
+   * variables for string terms and asserting that str.code terms are within
+   * proper bounds.
+   *
+   * Effort is one of the following (TODO make enum #1881):
+   * 0 : upon preregistration or internal assertion
+   * 1 : upon occurrence in length term
+   * 2 : before normal form computation
+   * 3 : called on normal form terms
+   *
+   * Based on the strategy, we may choose to add these initial refinement
+   * lemmas at one of the following efforts, where if it is not the given
+   * effort, the call to this method does nothing.
+   */
+  void registerTerm(Node n, int effort);
+  /** register length
+   *
+   * This method is called on non-constant string terms n that are "atomic"
+   * with respect to length. That is, the rewritten form of len(n) is itself.
+   *
+   * It sends a lemma on the output channel that ensures that the length n
+   * satisfies its assigned status (given by argument s).
+   *
+   * If the status is LENGTH_ONE, we send the lemma len( n ) = 1.
+   *
+   * If the status is LENGTH_GEQ_ONE, we send a lemma n != "" ^ len( n ) > 0.
+   *
+   * If the status is LENGTH_SPLIT, we send a send a lemma of the form:
+   *   ( n = "" ^ len( n ) = 0 ) OR len( n ) > 0
+   * This method also ensures that, when applicable, the left branch is taken
+   * first via calls to requirePhase.
+   *
+   * If the status is LENGTH_IGNORE, then no lemma is sent. This status is used
+   * e.g. when the length of n is already implied by other constraints.
+   *
+   * In contrast to the above functions, it makes immediate calls to the output
+   * channel instead of adding them to pending lists.
+   */
+  void registerTermAtomic(Node n, LengthStatus s);
+  //---------------------------- queries
+  /** Get the skolem cache of this object */
+  SkolemCache* getSkolemCache();
+  /** Get all function terms that have been preregistered to this object */
+  const context::CDList<TNode>& getFunctionTerms() const;
+  /**
+   * Get the "input variables", corresponding to the set of leaf nodes of
+   * string-like type that have been preregistered as terms to this object.
+   */
+  const context::CDHashSet<Node, NodeHashFunction>& getInputVars() const;
+  /** Returns true if any str.code terms have been preregistered */
+  bool hasStringCode() const;
+  //---------------------------- end queries
+  //---------------------------- proxy variables
+  /** Get symbolic definition
+   *
+   * This method returns the "symbolic definition" of n, call it n', and
+   * populates the vector exp with an explanation such that exp => n = n'.
+   *
+   * The symbolic definition of n is the term where (maximal) subterms of n
+   * are replaced by their proxy variables. For example, if we introduced
+   * proxy variable v for x ++ y, then given input x ++ y = w, this method
+   * returns v = w and adds v = x ++ y to exp.
+   */
+  Node getSymbolicDefinition(Node n, std::vector<Node>& exp) const;
+  /** Get proxy variable
+   *
+   * If this method returns the proxy variable for (string) term n if one
+   * exists, otherwise it returns null.
+   */
+  Node getProxyVariableFor(Node n) const;
+
+  /** infer substitution proxy vars
+   *
+   * This method attempts to (partially) convert the formula n into a
+   * substitution of the form:
+   *   v1 -> s1, ..., vn -> sn
+   * where s1 ... sn are proxy variables and v1 ... vn are either variables
+   * or constants.
+   *
+   * This method ensures that P ^ v1 = s1 ^ ... ^ vn = sn ^ unproc is equivalent
+   * to P ^ n, where P is the conjunction of equalities corresponding to the
+   * definition of all proxy variables introduced by the theory of strings.
+   *
+   * For example, say that v1 was introduced as a proxy variable for "ABC", and
+   * v2 was introduced as a proxy variable for "AA".
+   *
+   * Given the input n := v1 = "ABC" ^ v2 = x ^ x = "AA", this method sets:
+   * vars = { x },
+   * subs = { v2 },
+   * unproc = {}.
+   * In particular, this says that the information content of n is essentially
+   * x = v2. The first and third conjunctions can be dropped from the
+   * explanation since these equalities simply correspond to definitions
+   * of proxy variables.
+   *
+   * This method is used as a performance heuristic. It can infer when the
+   * explanation of a fact depends only trivially on equalities corresponding
+   * to definitions of proxy variables, which can be omitted since they are
+   * assumed to hold globally.
+   */
+  void inferSubstitutionProxyVars(Node n,
+                                  std::vector<Node>& vars,
+                                  std::vector<Node>& subs,
+                                  std::vector<Node>& unproc) const;
+  //---------------------------- end proxy variables
+ private:
+  /** Common constants */
+  Node d_zero;
+  Node d_one;
+  Node d_negOne;
+  /** the cardinality of the alphabet */
+  uint32_t d_cardSize;
+  /** Reference to equality engine of the theory of strings. */
+  eq::EqualityEngine& d_ee;
+  /** Reference to the output channel of the theory of strings. */
+  OutputChannel& d_out;
+  /** Reference to the statistics for the theory of strings/sequences. */
+  SequencesStatistics& d_statistics;
+  /** have we asserted any str.code terms? */
+  bool d_hasStrCode;
+  /** The cache of all skolems, which is owned by this class. */
+  SkolemCache d_skCache;
+  /** All function terms that the theory has seen in the current SAT context */
+  context::CDList<TNode> d_functionsTerms;
+  /**
+   * The set of terms of type string that are abstracted as leaf nodes.
+   */
+  NodeSet d_inputVars;
+  /** The user-context dependent cache of terms that have been preregistered */
+  NodeSet d_preregisteredTerms;
+  /** The user-context dependent cache of terms that have been registered */
+  NodeSet d_registeredTerms;
+  /** The types that we have preregistered */
+  TypeNodeSet d_registeredTypes;
+  /**
+   * Map string terms to their "proxy variables". Proxy variables are used are
+   * intermediate variables so that length information can be communicated for
+   * constants. For example, to communicate that "ABC" has length 3, we
+   * introduce a proxy variable v_{"ABC"} for "ABC", and assert:
+   *   v_{"ABC"} = "ABC" ^ len( v_{"ABC"} ) = 3
+   * Notice this is required since we cannot directly write len( "ABC" ) = 3,
+   * which rewrites to 3 = 3.
+   * In the above example, we store "ABC" -> v_{"ABC"} in this map.
+   */
+  NodeNodeMap d_proxyVar;
+  /**
+   * Map from proxy variables to their normalized length. In the above example,
+   * we store "ABC" -> 3.
+   */
+  NodeNodeMap d_proxyVarToLength;
+  /** List of terms that we have register length for */
+  NodeSet d_lengthLemmaTermsCache;
+  /** Register type
+   *
+   * Ensures the theory solver is setup to handle string-like type tn. In
+   * particular, this includes:
+   * - Calling preRegisterTerm on the empty word for tn
+   */
+  void registerType(TypeNode tn);
+  /** register term
+   *
+   * This method is called on non-constant string terms n. It returns a lemma
+   * that should be sent on the output channel of theory of strings upon
+   * registration of this term, or null if no lemma is necessary.
+   *
+   * If n is an atomic term, the method registerTermAtomic is called for n
+   * and s = LENGTH_SPLIT and no lemma is returned.
+   */
+  Node getRegisterTermLemma(Node n);
+  /**
+   * Get the lemma required for registering the length information for
+   * atomic term n given length status s. For details, see registerTermAtomic.
+   *
+   * Additionally, this method may map literals to a required polarity in the
+   * argument reqPhase, which should be processed by a call to requiredPhase by
+   * the caller of this method.
+   */
+  Node getRegisterTermAtomicLemma(Node n,
+                                  LengthStatus s,
+                                  std::map<Node, bool>& reqPhase);
+};
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace CVC4
+
+#endif /* CVC4__THEORY__STRINGS__TERM_REGISTRY_H */
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp
index 81e43c595..3e490f285 100644
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -72,18 +72,14 @@ TheoryStrings::TheoryStrings(context::Context* c,
       d_statistics(),
       d_equalityEngine(d_notify, c, "theory::strings::ee", true),
       d_state(c, d_equalityEngine, d_valuation),
-      d_im(*this, c, u, d_state, d_sk_cache, out, d_statistics),
-      d_pregistered_terms_cache(u),
-      d_registered_terms_cache(u),
-      d_registeredTypesCache(u),
-      d_functionsTerms(c),
-      d_has_str_code(false),
+      d_termReg(c, u, d_equalityEngine, out, d_statistics),
+      d_im(*this, c, u, d_state, d_termReg, out, d_statistics),
       d_rewriter(&d_statistics.d_rewrites),
       d_bsolver(c, u, d_state, d_im),
-      d_csolver(c, u, d_state, d_im, d_sk_cache, d_bsolver),
+      d_csolver(c, u, d_state, d_im, d_termReg, d_bsolver),
       d_esolver(nullptr),
       d_rsolver(nullptr),
-      d_stringsFmf(c, u, valuation, d_sk_cache),
+      d_stringsFmf(c, u, valuation, d_termReg),
       d_strategy_init(false)
 {
   setupExtTheory();
@@ -92,7 +88,7 @@ TheoryStrings::TheoryStrings(context::Context* c,
                                  u,
                                  d_state,
                                  d_im,
-                                 d_sk_cache,
+                                 d_termReg,
                                  d_rewriter,
                                  d_bsolver,
                                  d_csolver,
@@ -374,7 +370,7 @@ bool TheoryStrings::collectModelInfoType(
         {
           // does it have a code and the length of these equivalence classes are
           // one?
-          if (d_has_str_code && lts_values[i] == d_one)
+          if (d_termReg.hasStringCode() && lts_values[i] == d_one)
           {
             EqcInfo* eip = d_state.getOrMakeEqcInfo(eqc, false);
             if (eip && !eip->d_codeTerm.get().isNull())
@@ -550,94 +546,11 @@ bool TheoryStrings::collectModelInfoType(
 // MAIN SOLVER
 /////////////////////////////////////////////////////////////////////////////
 
-
-void TheoryStrings::preRegisterTerm(TNode n) {
-  if( d_pregistered_terms_cache.find(n) == d_pregistered_terms_cache.end() ) {
-    d_pregistered_terms_cache.insert(n);
-    Trace("strings-preregister")
-        << "TheoryString::preregister : " << n << std::endl;
-    //check for logic exceptions
-    Kind k = n.getKind();
-    if( !options::stringExp() ){
-      if (k == kind::STRING_STRIDOF || k == kind::STRING_ITOS
-          || k == kind::STRING_STOI || k == kind::STRING_STRREPL
-          || k == kind::STRING_STRREPLALL || k == kind::STRING_STRCTN
-          || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER
-          || k == STRING_REV)
-      {
-        std::stringstream ss;
-        ss << "Term of kind " << k
-           << " not supported in default mode, try --strings-exp";
-        throw LogicException(ss.str());
-      }
-    }
-    switch (k)
-    {
-      case kind::EQUAL: {
-        d_equalityEngine.addTriggerEquality(n);
-        break;
-      }
-      case kind::STRING_IN_REGEXP: {
-        d_out->requirePhase(n, true);
-        d_equalityEngine.addTriggerPredicate(n);
-        d_equalityEngine.addTerm(n[0]);
-        d_equalityEngine.addTerm(n[1]);
-        break;
-      }
-      default: {
-        registerTerm(n, 0);
-        TypeNode tn = n.getType();
-        if (tn.isRegExp() && n.isVar())
-        {
-          std::stringstream ss;
-          ss << "Regular expression variables are not supported.";
-          throw LogicException(ss.str());
-        }
-        if( tn.isString() ) {
-          // all characters of constants should fall in the alphabet
-          if (n.isConst())
-          {
-            std::vector<unsigned> vec = n.getConst<String>().getVec();
-            for (unsigned u : vec)
-            {
-              if (u >= d_cardSize)
-              {
-                std::stringstream ss;
-                ss << "Characters in string \"" << n
-                   << "\" are outside of the given alphabet.";
-                throw LogicException(ss.str());
-              }
-            }
-          }
-          d_equalityEngine.addTerm(n);
-        } else if (tn.isBoolean()) {
-          // Get triggered for both equal and dis-equal
-          d_equalityEngine.addTriggerPredicate(n);
-        } else {
-          // Function applications/predicates
-          d_equalityEngine.addTerm(n);
-        }
-        // Set d_functionsTerms stores all function applications that are
-        // relevant to theory combination. Notice that this is a subset of
-        // the applications whose kinds are function kinds in the equality
-        // engine. This means it does not include applications of operators
-        // like re.++, which is not a function kind in the equality engine.
-        // Concatenation terms do not need to be considered here because
-        // their arguments have string type and do not introduce any shared
-        // terms.
-        if (n.hasOperator() && d_equalityEngine.isFunctionKind(k)
-            && k != kind::STRING_CONCAT)
-        {
-          d_functionsTerms.push_back( n );
-        }
-      }
-    }
-    // register with finite model finding
-    if (options::stringFMF())
-    {
-      d_stringsFmf.preRegisterTerm(n);
-    }
-  }
+void TheoryStrings::preRegisterTerm(TNode n)
+{
+  Trace("strings-preregister")
+      << "TheoryStrings::preRegisterTerm: " << n << std::endl;
+  d_termReg.preRegisterTerm(n);
 }
 
 Node TheoryStrings::expandDefinition(Node node)
@@ -799,7 +712,7 @@ void TheoryStrings::eqNotifyNewClass(TNode t){
   {
     Trace("strings-debug") << "New length eqc : " << t << std::endl;
     //we care about the length of this string
-    registerTerm( t[0], 1 );
+    d_termReg.registerTerm(t[0], 1);
   }
   d_state.eqNotifyNewClass(t);
 }
@@ -887,9 +800,10 @@ void TheoryStrings::computeCareGraph(){
   // since operators are polymorphic, taking strings/sequences.
   std::map<std::pair<TypeNode, Node>, TNodeTrie> index;
   std::map< Node, unsigned > arity;
-  unsigned functionTerms = d_functionsTerms.size();
+  const context::CDList<TNode>& fterms = d_termReg.getFunctionTerms();
+  size_t functionTerms = fterms.size();
   for (unsigned i = 0; i < functionTerms; ++ i) {
-    TNode f1 = d_functionsTerms[i];
+    TNode f1 = fterms[i];
     Trace("strings-cg") << "...build for " << f1 << std::endl;
     Node op = f1.getOperator();
     std::vector< TNode > reps;
@@ -926,7 +840,7 @@ void TheoryStrings::assertPendingFact(Node atom, bool polarity, Node exp) {
       if (!d_equalityEngine.hasTerm(atom[j])
           && atom[j].getType().isStringLike())
       {
-        registerTerm( atom[j], 0 );
+        d_termReg.registerTerm(atom[j], 0);
       }
     }
     Trace("strings-pending-debug") << "  Now assert equality" << std::endl;
@@ -980,7 +894,7 @@ void TheoryStrings::checkRegisterTermsPreNormalForm()
       Node n = (*eqc_i);
       if (!d_bsolver.isCongruent(n))
       {
-        registerTerm(n, 2);
+        d_termReg.registerTerm(n, 2);
       }
       ++eqc_i;
     }
@@ -991,7 +905,7 @@ void TheoryStrings::checkCodes()
 {
   // ensure that lemmas regarding str.code been added for each constant string
   // of length one
-  if (d_has_str_code)
+  if (d_termReg.hasStringCode())
   {
     NodeManager* nm = NodeManager::currentNM();
     // str.code applied to the code term for each equivalence class that has a
@@ -1012,7 +926,7 @@ void TheoryStrings::checkCodes()
         Node cc = nm->mkNode(kind::STRING_TO_CODE, c);
         cc = Rewriter::rewrite(cc);
         Assert(cc.isConst());
-        Node cp = d_im.getProxyVariableFor(c);
+        Node cp = d_termReg.getProxyVariableFor(c);
         AlwaysAssert(!cp.isNull());
         Node vc = nm->mkNode(STRING_TO_CODE, cp);
         if (!d_state.areEqual(cc, vc))
@@ -1062,91 +976,6 @@ void TheoryStrings::checkCodes()
   }
 }
 
-void TheoryStrings::registerTerm(Node n, int effort)
-{
-  TypeNode tn = n.getType();
-  bool do_register = true;
-  if (!tn.isStringLike())
-  {
-    if (options::stringEagerLen())
-    {
-      do_register = effort == 0;
-    }
-    else
-    {
-      do_register = effort > 0 || n.getKind() != STRING_CONCAT;
-    }
-  }
-  if (!do_register)
-  {
-    return;
-  }
-  if (d_registered_terms_cache.find(n) != d_registered_terms_cache.end())
-  {
-    return;
-  }
-  d_registered_terms_cache.insert(n);
-  // ensure the type is registered
-  registerType(tn);
-  NodeManager* nm = NodeManager::currentNM();
-  Debug("strings-register") << "TheoryStrings::registerTerm() " << n
-                            << ", effort = " << effort << std::endl;
-  Node regTermLem;
-  if (tn.isStringLike())
-  {
-    // register length information:
-    //  for variables, split on empty vs positive length
-    //  for concat/const/replace, introduce proxy var and state length relation
-    regTermLem = d_im.registerTerm(n);
-  }
-  else if (n.getKind() == STRING_TO_CODE)
-  {
-    d_has_str_code = true;
-    // ite( str.len(s)==1, 0 <= str.code(s) < |A|, str.code(s)=-1 )
-    Node code_len = utils::mkNLength(n[0]).eqNode(d_one);
-    Node code_eq_neg1 = n.eqNode(d_neg_one);
-    Node code_range = nm->mkNode(
-        AND,
-        nm->mkNode(GEQ, n, d_zero),
-        nm->mkNode(
-            LT, n, nm->mkConst(Rational(utils::getAlphabetCardinality()))));
-    regTermLem = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
-  }
-  else if (n.getKind() == STRING_STRIDOF)
-  {
-    Node len = utils::mkNLength(n[0]);
-    regTermLem = nm->mkNode(AND,
-                            nm->mkNode(GEQ, n, nm->mkConst(Rational(-1))),
-                            nm->mkNode(LEQ, n, len));
-  }
-  if (!regTermLem.isNull())
-  {
-    Trace("strings-lemma") << "Strings::Lemma REG-TERM : " << regTermLem
-                           << std::endl;
-    Trace("strings-assert") << "(assert " << regTermLem << ")" << std::endl;
-    ++(d_statistics.d_lemmasRegisterTerm);
-    d_out->lemma(regTermLem);
-  }
-}
-
-void TheoryStrings::registerType(TypeNode tn)
-{
-  if (d_registeredTypesCache.find(tn) != d_registeredTypesCache.end())
-  {
-    return;
-  }
-  d_registeredTypesCache.insert(tn);
-  if (tn.isStringLike())
-  {
-    // preregister the empty word for the type
-    Node emp = Word::mkEmptyWord(tn);
-    if (!d_equalityEngine.hasTerm(emp))
-    {
-      preRegisterTerm(emp);
-    }
-  }
-}
-
 void TheoryStrings::checkRegisterTermsNormalForms()
 {
   const std::vector<Node>& seqc = d_bsolver.getStringEqc();
@@ -1159,7 +988,7 @@ void TheoryStrings::checkRegisterTermsNormalForms()
     if (lt.isNull())
     {
       Node c = utils::mkNConcat(nfi.d_nf, eqc.getType());
-      registerTerm(c, 3);
+      d_termReg.registerTerm(c, 3);
     }
   }
 }
diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h
index 453e4eca9..4a61730f4 100644
--- a/src/theory/strings/theory_strings.h
+++ b/src/theory/strings/theory_strings.h
@@ -24,7 +24,6 @@
 
 #include "context/cdhashset.h"
 #include "context/cdlist.h"
-#include "expr/attribute.h"
 #include "expr/node_trie.h"
 #include "theory/strings/base_solver.h"
 #include "theory/strings/core_solver.h"
@@ -40,6 +39,7 @@
 #include "theory/strings/solver_state.h"
 #include "theory/strings/strings_fmf.h"
 #include "theory/strings/strings_rewriter.h"
+#include "theory/strings/term_registry.h"
 #include "theory/theory.h"
 #include "theory/uf/equality_engine.h"
 
@@ -93,9 +93,6 @@ enum InferStep
 };
 std::ostream& operator<<(std::ostream& out, Inference i);
 
-struct StringsProxyVarAttributeId {};
-typedef expr::Attribute< StringsProxyVarAttributeId, bool > StringsProxyVarAttribute;
-
 class TheoryStrings : public Theory {
   friend class InferenceManager;
   typedef context::CDList<Node> NodeList;
@@ -238,13 +235,10 @@ class TheoryStrings : public Theory {
   eq::EqualityEngine d_equalityEngine;
   /** The solver state object */
   SolverState d_state;
+  /** The term registry for this theory */
+  TermRegistry d_termReg;
   /** The (custom) output channel of the theory of strings */
   InferenceManager d_im;
-  // preReg cache
-  NodeSet d_pregistered_terms_cache;
-  NodeSet d_registered_terms_cache;
-  /** The types that we have preregistered */
-  TypeNodeSet d_registeredTypesCache;
   std::vector< Node > d_empty_vec;
 private:
 
@@ -266,24 +260,6 @@ private:
   EqualityStatus getEqualityStatus(TNode a, TNode b) override;
 
  private:
-  /** All the function terms that the theory has seen */
-  context::CDList<TNode> d_functionsTerms;
-private:
-  /** have we asserted any str.code terms? */
-  bool d_has_str_code;
-  // static information about extf
-  class ExtfInfo {
-  public:
-    //all variables in this term
-    std::vector< Node > d_vars;
-  };
-
- private:
-
-  /** cache of all skolems */
-  SkolemCache d_sk_cache;
-
- private:
   void addCarePairs(TNodeTrie* t1,
                     TNodeTrie* t2,
                     unsigned arity,
@@ -334,38 +310,10 @@ private:
    */
   void assertPendingFact(Node atom, bool polarity, Node exp);
 
-  /** Register term
-   *
-   * This performs SAT-context-independent registration for a term n, which
-   * may cause lemmas to be sent on the output channel that involve
-   * "initial refinement lemmas" for n. This includes introducing proxy
-   * variables for string terms and asserting that str.code terms are within
-   * proper bounds.
-   *
-   * Effort is one of the following (TODO make enum #1881):
-   * 0 : upon preregistration or internal assertion
-   * 1 : upon occurrence in length term
-   * 2 : before normal form computation
-   * 3 : called on normal form terms
-   *
-   * Based on the strategy, we may choose to add these initial refinement
-   * lemmas at one of the following efforts, where if it is not the given
-   * effort, the call to this method does nothing.
-   */
-  void registerTerm(Node n, int effort);
-  /** Register type
-   *
-   * Ensures the theory solver is setup to handle string-like type tn. In
-   * particular, this includes:
-   * - Calling preRegisterTerm on the empty word for tn
-   */
-  void registerType(TypeNode tn);
-
   // Symbolic Regular Expression
  private:
   /** The theory rewriter for this theory. */
   StringsRewriter d_rewriter;
-
   /**
    * The base solver, responsible for reasoning about congruent terms and
    * inferring constants for equivalence classes.