Add LFSC signature for strings (#7523)

This includes the currently supported rules and cases, although it is highly incomplete.
author: Andrew Reynolds <andrew.j.reynolds@gmail.com> 2021-11-09 09:58:30 -0600
committer: GitHub <noreply@github.com> 2021-11-09 15:58:30 +0000
commit: 29dfa6b2c58887b44d9f89ce67435e75e84f12b9 (patch)
tree: c67860af3177e0f1d4584af0c1ac1ddea7f6ae8a
parent: 1d145ff68ba6e4abaf77adfe97b072f63c4d7231 (diff)
2 files changed, 469 insertions, 0 deletions
diff --git a/proofs/lfsc/signatures/strings_programs.plf b/proofs/lfsc/signatures/strings_programs.plf
new file mode 100644
index 000000000..6a152772f
--- /dev/null
+++ b/proofs/lfsc/signatures/strings_programs.plf
@@ -0,0 +1,345 @@
+; depends: nary_programs.plf theory_def.plf
+
+; Make empty string of the given string-like sort s.
+(program sc_mk_emptystr ((s sort)) term
+  (match s
+    ((Seq t) (seq.empty s))
+    (String emptystr)))
+
+; Get the term corresponding to the prefix of term s of fixed length n.
+(program sc_skolem_prefix ((s term) (n term)) term 
+  (str.substr s (int 0) n)
+)
+
+; Get the term corresponding to the suffix of term s of fixed length n.
+(program sc_skolem_suffix_rem ((s term) (n term)) term
+  (str.substr s n (a.- (str.len s) n))
+)
+
+; Get the term corresponding to the prefix of s before the first occurrence of
+; t in s.
+(program sc_skolem_first_ctn_pre ((s term) (t term)) term 
+  (sc_skolem_prefix s (str.indexof s t (int 0))))
+
+; Get the term corresponding to the suffix of s after the first occurrence of
+; t in s.
+(program sc_skolem_first_ctn_post ((s term) (t term)) term 
+  (sc_skolem_suffix_rem s (a.+ (str.len (sc_skolem_first_ctn_pre s t)) (a.+ (str.len t) (int 0)))))
+
+; Head and tail for string concatenation.
+(program sc_string_head ((t term)) term (nary_head f_str.++ t))
+(program sc_string_tail ((t term)) term (nary_tail f_str.++ t))
+
+; Concatenation str.++ applications t1 and t2.
+(program sc_string_concat ((t1 term) (t2 term)) term (nary_concat f_str.++ t1 t2 emptystr))
+
+; Decompose str.++ term t into a head and tail.
+(program sc_string_decompose ((t term)) termPair (nary_decompose f_str.++ t emptystr))
+
+; Insert a string into str.++ term t.
+(program sc_string_insert ((elem term) (t term)) term (nary_insert f_str.++ elem t emptystr))
+
+; Return reverse of t if rev = tt, return t unchanged otherwise.
+(program sc_string_rev ((t term) (rev flag)) term (ifequal rev tt (nary_reverse f_str.++ t emptystr) t))
+
+; Convert a str.++ application t into its element, if it is a singleton, return t unchanged otherwise.
+(program sc_string_nary_elim ((t term)) term (nary_elim f_str.++ t emptystr))
+
+; Convert t into a str.++ application, if it is not already in n-ary form.
+(program sc_string_nary_intro ((t term)) term (nary_intro f_str.++ t emptystr))
+
+; In the following, a "reduction predicate" refers to a formula that is used
+; to axiomatize an extended string function in terms of (simpler) functions.
+
+; Compute the reduction predicate for (str.substr x n m) of sort s.
+(program sc_string_reduction_substr ((x term) (n term) (m term) (s sort)) term
+  (let k (sc_mk_skolem (str.substr x n m))
+  (let npm (a.+ n (a.+ m (int 0)))
+  (let k1 (sc_mk_skolem (sc_skolem_prefix x n))
+  (let k2 (sc_mk_skolem (sc_skolem_suffix_rem x npm))
+  (ite
+    ; condition
+    (and (a.>= n (int 0))
+      (and (a.> (str.len x) n)
+        (and (a.> m (int 0))
+          true)))
+    ; if branch
+    (and (= x (str.++ k1 (str.++ k (str.++ k2 (sc_mk_emptystr s)))))
+      (and (= (str.len k1) n)
+        (and (or (= (str.len k2) (a.- (str.len x) npm))
+                (or (= (str.len k2) (int 0))
+                  false))
+          (and (a.<= (str.len k) m)
+            true))))
+    ; else branch
+    (= k (sc_mk_emptystr s))
+    )))))
+)
+
+; Compute the reduction predicate for (str.indexof x y n) of sort s.
+(program sc_string_reduction_indexof ((x term) (y term) (n term) (s sort)) term
+  (let k (sc_mk_skolem (str.indexof x y n))
+  (let xn (str.substr x n (a.- (str.len x) n))
+  (let k1 (sc_mk_skolem (sc_skolem_first_ctn_pre xn y))
+  (let k2 (sc_mk_skolem (sc_skolem_first_ctn_post xn y))
+  (ite
+    (or (not (str.contains xn y))
+      (or (a.> n (str.len x))
+        (or (a.> (int 0) n)
+          false)))
+    (= k (int (~ 1)))
+    (ite
+      (= y (sc_mk_emptystr s))
+      (= k n)
+      (and (= xn (str.++ k1 (str.++ y (str.++ k2 (sc_mk_emptystr s)))))
+        (and (not (str.contains
+                    (str.++ k1
+                      (str.++ (str.substr y (int 0) (a.- (str.len y) (int 1)))
+                        (sc_mk_emptystr s))) y))
+          (and (= k (a.+ n (a.+ (str.len k1) (int 0))))
+            true)))
+)))))))
+
+; Compute the reduction predicate for term t of sort s. Note that the operators
+; listed in comments are missing from the signature currently.
+(program sc_string_reduction_pred ((t term) (s sort)) term
+  (match t 
+    ((apply t1 t2)
+      (match t1
+        ((apply t11 t12)
+          (match t11
+            ; str.contains
+            ((apply t111 t112) 
+              (match t111
+                (f_str.substr (sc_string_reduction_substr t112 t12 t2 s))
+                (f_str.indexof (sc_string_reduction_indexof t112 t12 t2 s))
+                ; str.replace
+            ; str.update
+            ; str.from_int
+            ; str.to_int
+            ; seq.nth
+            ; str.replaceall
+            ; str.replace_re
+            ; str.replace_re_all
+            ; str.tolower
+            ; str.toupper
+            ; str.rev
+            ; str.leq
+))))))))
+
+; Returns the reduction predicate and purification equality for term t
+; of sort s.
+(program sc_string_reduction ((t term) (s sort)) term
+  (and (sc_string_reduction_pred t s) (and (= t (sc_mk_skolem t)) true))
+)
+
+; Compute the eager reduction predicate for (str.contains t r) where s
+; is the sort of t and r.
+; This is the formula:
+;    (ite (str.contains t r) (= t (str.++ sk1 r sk2)) (not (= t r)))
+(program sc_string_eager_reduction_contains ((t term) (r term) (s sort)) term
+  (let k1 (sc_mk_skolem (sc_skolem_first_ctn_pre t r))
+  (let k2 (sc_mk_skolem (sc_skolem_first_ctn_post t r))
+  (ite
+    (str.contains t r)
+    (= t (str.++ k1 (str.++ r (str.++ k2 (sc_mk_emptystr s)))))
+    (not (= t r)))
+  ))
+)
+
+; Compute the eager reduction predicate for (str.code s), which is the formula:
+;   (ite (= (str.len s) 1) 
+;     (and (<= 0 (str.code s)) (< (str.code s) A))
+;     (= (str.code s) (- 1)))
+(program sc_string_eager_reduction_to_code ((s term)) term
+  (let t (str.to_code s)
+  (ite
+    (= (str.len s) (int 1))
+    (and (a.>= t (int 0)) (and (a.< t (int 196608)) true))
+    (= t (int (~ 1)))))
+)
+
+; Compute the eager reduction predicate for (str.indexof x y n), which is the
+; formula: 
+; (and 
+;   (or (= (str.indexof x y n) (- 1)) (>= (str.indexof x y n) n)) 
+;   (<= (str.indexof x y n) (str.len x)))
+(program sc_string_eager_reduction_indexof ((x term) (y term) (n term)) term
+  (let t (str.indexof x y n)
+  (and (or (= t (int (~ 1))) (or (a.>= t n) false))
+    (and (a.<= t (str.len x))
+      true)))
+)
+
+; Compute the eager reduction predicate of term t of sort s.
+(program sc_string_eager_reduction ((t term) (s sort)) term
+  (match t 
+    ((apply t1 t2)
+      (match t1
+        (f_str.to_code (sc_string_eager_reduction_to_code t2))
+        ((apply t11 t12)
+          (match t11
+            (f_str.contains (sc_string_eager_reduction_contains t12 t2 s))
+            ((apply t111 t112)
+              (match t111
+                (f_str.indexof (sc_string_eager_reduction_indexof t112 t12 t2)))))))))
+)
+
+; A helper method for computing the conclusion of PfRule::RE_UNFOLD_POS.
+; For a regular expression (re.++ R1 ... Rn), this returns a pair of terms
+; where the first term is a concatentation (str.++ t1 ... tn) and the second
+; is a conjunction M of literals of the form (str.in_re ti Ri), such that
+;   (str.in_re x (re.++ R1 ... Rn))
+; is equivalent to
+;   (and (= x (str.++ t1 ... tn)) M)
+; We use the optimization that Rj may be (str.to_re tj); otherwise tj is an
+; application of the unfolding skolem function skolem_re_unfold_pos.
+(program sc_re_unfold_pos_concat ((t term) (r term) (ro term) (i mpz)) termPair
+  (match r
+    ((apply r1 r2)
+      (match (sc_re_unfold_pos_concat t r2 ro (mp_add i 1))
+        ((pair p1 p2)
+        (let r12 (getarg f_re.++ r1)
+        (let r122 (try_getarg f_str.to_re r12)
+        (ifequal r122 r12 
+          (let k (skolem_re_unfold_pos t ro i)
+          (pair (str.++ k p1) (and (str.in_re k r12) p2)))
+          (pair (str.++ r122 p1) p2)))))))
+    (default (pair emptystr true))
+))
+
+; Returns a formula corresponding to a conjunction saying that each of the
+; elements of str.++ application t is empty. For example for
+;   (str.++ x (str.++ y ""))
+; this returns:
+;  (and (= x "") (and (= y "") true))
+(program sc_non_empty_concats ((t term)) term
+  (match t
+    ((apply t1 t2)
+      (and (not (= (getarg f_str.++ t1) emptystr)) (sc_non_empty_concats t2)))
+    (emptystr true)))
+
+; Get first character or empty string from term t.
+; If t is of the form (str.++ (char n) ...), return (char n).
+; If t is of the form emptystr, return emptystr.
+; Otherwise, this side condition fails
+(program sc_string_first_char_or_empty ((t term)) term
+  (match t
+    ((apply t1 t2)
+      (let t12 (getarg f_str.++ t1)
+      (match t12
+        ((char n) t12))))
+    (emptystr t)))
+
+
+; Flatten constants in str.++ application s. Notice that the rewritten form
+; of strings in cvc5 are such that constants are grouped into constants of
+; length >=1 which we call "word" constants. For example, the cvc5 rewritten
+; form of (str.++ "A" "B" x) is (str.++ "AB" x) which in LFSC is represented as:
+;    (str.++ (str.++ (char 65) (str.++ (char 66) emptystr)) (str.++ x emptystr))
+; For convenience, in this documentation, we will write this simply as:
+;    (str.++ (str.++ "A" (str.++ "B" "")) (str.++ x ""))
+; e.g. we assume that word constants are represented using char and emptystr.
+; Many string rules rely on processing the prefix of strings, which in LFSC
+; involves reasoning about the characters one-by-one. Since the above term
+; has a level of nesting when word constants of size > 1 are involved, this
+; method is used to "flatten" str.++ applications so that we have a uniform
+; way of reasoning about them in proof rules. In this method, we take a
+; str.++ application corresponding to a string term in cvc5 rewritten form.
+; It returns the flattened form such that there are no nested applications of
+; str.++. For example, given input:
+;    (str.++ (str.++ "A" (str.++ "B" "")) (str.++ x ""))
+; we return:
+;    (str.++ "A" (str.++ "B" (str.++ x "")))
+; Notice that this is done for all word constants in the chain recursively.
+; It does not insist that the nested concatenations are over characters only.
+; This rule may fail if s is not a str.++ application corresponding to a term
+; in cvc5 rewritten form.
+(program sc_string_flatten ((s term)) term
+  (match s
+    ((apply s1 s2)
+      (let s12 (getarg f_str.++ s1)
+        ; Must handle nested concatenation for word constant. We know there is no nested concatenation within s12, so we don't need to flatten it.
+        ; Since s12 may not be a concat term, we must use n-ary intro to ensure it is in n-ary form
+        (sc_string_concat (sc_string_nary_intro s12) (sc_string_flatten s2))))
+    (emptystr s))
+)
+
+; Helper for collecting adjacent constants. This side condition takes as input
+; a str.++ application s. It returns a pair whose concatenation is equal to s,
+; whose first component corresponds to a word constant, and whose second
+; component is a str.++ application whose first element is not a character.
+; For example, for:
+;   (str.++ "A" (str.++ "B" (str.++ x "")))
+; We return:
+;   (pair (str.++ "A" (str.++ "B" "")) (str.++ x ""))
+(program sc_string_collect_acc ((s term)) termPair
+  (match s
+    ((apply s1 s2)
+      (match (getarg f_str.++ s1)
+        ((char n)
+          (match (sc_string_collect_acc s2)
+            ((pair ssc1 ssc2) (pair (apply s1 ssc1) ssc2))))
+        (default (pair emptystr s))))
+    (emptystr (pair emptystr s)))
+)
+
+; Collect adjacent constants for the prefix of string s. For example:
+;    (str.++ "A" (str.++ "B" (str.++ x "")))
+; we return:
+;    (str.++ (str.++ "A" (str.++ "B" "")) (str.++ x ""))
+; This side condition may fail if s is not a str.++ application.
+; Notice that the collection of constants is done for all word constants in the
+; term s recursively.
+(program sc_string_collect ((s term)) term
+  (match (sc_string_collect_acc s)
+    ((pair sc1 sc2)
+      (match sc1
+        ; did not strip a constant prefix
+        (emptystr 
+          (match s
+            ((apply s1 s2) (apply s1 (sc_string_collect s2)))
+            (emptystr s)))
+        ; stripped a constant prefix, must eliminate singleton
+        (default (str.++ (sc_string_nary_elim sc1) (sc_string_collect sc2))))))
+)
+
+; Strip equal prefix of s and t. This returns the pair corresponding to s and
+; t after dropping the maximal equal prefix removed. For example, for:
+;   (str.++ x (str.++ y (str.++ z "")))
+;   (str.++ x (str.++ w ""))
+; This method will return:
+;   (pair (str.++ y (str.++ z "")) (str.++ w ""))
+; This side condition may fail if s or t is not a str.++ application.
+(program sc_strip_prefix ((s term) (t term)) termPair
+  (match s
+    ((apply s1 s2)
+      (let s12 (getarg f_str.++ s1)
+        (match t
+          ((apply t1 t2)
+            (let t12 (getarg f_str.++ t1)
+              (ifequal s12 t12
+                (sc_strip_prefix s2 t2)
+                (pair s t))))
+          (emptystr (pair s t)))))
+    (emptystr (pair s t)))
+)
+
+; Converts a str.++ application into "flat form" so that we are ready to
+; process its prefix. This consists of the following steps:
+; (1) convert s to n-ary form if it is not already a str.++ application,
+; (2) flatten so that its constant prefix,
+; (3) (optionally) reverse.
+(program sc_string_to_flat_form ((s term) (rev flag)) term
+  ; intro, flatten, reverse
+  (sc_string_rev (sc_string_flatten (sc_string_nary_intro s)) rev))
+
+; Converts a term in "flat form" to a term that is in a form that corresponds
+; to one in cvc5 rewritten form. This is the dual method to
+; sc_string_to_flat_form. This consists of the following steps:
+; (1) (optionally) reverse,
+; (2) collect constants 
+; (3) eliminate n-ary form to its element if the term is a singleton list.
+(program sc_string_from_flat_form ((s term) (rev flag)) term
+  ; reverse, collect, elim
+  (sc_string_nary_elim (sc_string_collect (sc_string_rev s rev))))
diff --git a/proofs/lfsc/signatures/strings_rules.plf b/proofs/lfsc/signatures/strings_rules.plf
new file mode 100644
index 000000000..af981096a
--- /dev/null
+++ b/proofs/lfsc/signatures/strings_rules.plf
@@ -0,0 +1,124 @@
+; depends: strings_programs.plf
+
+(declare string_length_non_empty (! s term (! p (holds (not (= s emptystr))) (holds (not (= (str.len s) (int 0)))))))
+
+; Computes the conclusion of the PfRule::CONCAT_EQ rule
+(program sc_concat_eq ((s term) (t term) (rev flag)) term
+  (match (sc_strip_prefix
+           (sc_string_to_flat_form s rev) 
+           (sc_string_to_flat_form t rev))
+    ((pair ss ts)
+      (= 
+        (sc_string_from_flat_form ss rev)
+        (sc_string_from_flat_form ts rev)))))
+
+(declare concat_eq (! s term 
+                   (! t term 
+                   (! res term 
+                   (! rev flag
+                   (! p (holds (= s t))
+                   (! r (^ (sc_concat_eq s t rev) res) 
+                      (holds res))))))))
+
+; Computes whether the heads of the premise (= s t) match s1 and t1 for the
+; PfRule::CONCAT_UNIFY rule
+(program sc_concat_unify ((s term) (t term) (s1 term) (t1 term) (rev flag)) Ok
+  (ifequal (sc_string_head (sc_string_rev s rev)) s1
+    (ifequal (sc_string_head (sc_string_rev t rev)) t1
+      ok
+      (fail Ok))
+    (fail Ok)))
+
+(declare concat_unify (! s term
+                      (! t term
+                      (! s1 term
+                      (! t1 term
+                      (! rev flag
+                      (! p (holds (= s t))
+                      (! p1 (holds (= (str.len s1) (str.len t1)))
+                      (! r (^ (sc_concat_unify s t s1 t1 rev) ok)
+                          (holds (= s1 t1)))))))))))
+
+; Computes the conclusion of the PfRule::CONCAT_CSPLIT rule
+(program sc_concat_csplit ((thead term) (t term) (s term) (rev flag)) term
+  (match (sc_string_to_flat_form t rev)
+    ((apply t1 t2)
+      (ifequal (getarg f_str.++ t1) thead
+        (match (sc_string_to_flat_form s rev)
+          ((apply s1 s2)
+            (let s12 (getarg f_str.++ s1)
+            (match s12
+              ((char n)
+                (= thead
+                (ifequal rev ff
+                  (str.++ s12 (str.++ (sc_mk_skolem (sc_skolem_suffix_rem thead (int 1))) emptystr))
+                  (str.++ (sc_mk_skolem (sc_skolem_prefix thead (a.- (str.len thead) (int 1)))) (str.++ s12 emptystr)))))))))
+          (fail term))))
+)
+
+(declare concat_csplit 
+  (! t1 term
+  (! t term
+  (! s term
+  (! res term
+  (! rev flag
+  (! p1 (holds (= t s))
+  (! p2 (holds (not (= (str.len t1) (int 0))))
+  (! r (^ (sc_concat_csplit t1 t s rev) res)
+    (holds res))))))))))
+
+; Checks whether the conditions on the premise (= s t) is satisfied for the
+; PfRule::CONCAT_CONFLICT rule
+(program sc_concat_conflict ((s term) (t term) (rev flag)) Ok
+  (match (sc_strip_prefix
+           (sc_string_to_flat_form s rev)
+           (sc_string_to_flat_form t rev))
+    ((pair ss ts)
+      (ifequal
+        (sc_string_first_char_or_empty ss)
+        (sc_string_first_char_or_empty ts)
+        (fail Ok)
+        ok))))
+
+(declare concat_conflict (! s term
+                         (! t term
+                         (! rev flag
+                         (! p (holds (= s t))
+                         (! r (^ (sc_concat_conflict s t rev) ok)
+                            (holds false)))))))
+
+(declare string_length_pos (! t term 
+  (holds (or (and (= (str.len t) (int 0)) (and (= t emptystr) true)) (or (a.> (str.len t) (int 0)) false)))))
+
+(declare re_inter (! x term (! s term (! t term (! p1 (holds (str.in_re x s)) (! p2 (holds (str.in_re x t)) 
+  (holds (str.in_re x (re.inter s t)))))))))
+
+(declare string_reduction (! r term (! t term (! s sort (! u (^ (sc_string_reduction t s) r)
+  (holds r))))))))
+
+(declare string_eager_reduction (! r term (! t term (! s sort (! u (^ (sc_string_eager_reduction t s) r)
+  (holds r))))))
+
+; Computes the conclusion of PfRule::RE_UNFOLD_POS
+(program sc_re_unfold_pos ((t term) (r term)) term
+  (match r
+    ((apply r1 r2)
+      (match r1
+        ; case for star
+        (f_re.*
+          (let rr (re.++ r2 (re.++ r (re.++ r2 re.empty)))
+          (match (sc_re_unfold_pos_concat t rr rr 0)
+            ((pair p1 p2)
+              (or (= t emptystr)
+                (or (str.in_re t r2)
+                  (or (and (sc_string_nary_elim (and (= t p1) p2)) (sc_non_empty_concats p1))
+                    false)))))))
+        ((apply r11 r12)
+          (match r11
+            ; case for concatenation
+            (f_re.++
+              (match (sc_re_unfold_pos_concat t r r 0)
+                ((pair p1 p2) (sc_string_nary_elim (and (= t p1) p2)))))
+))))))
+
+(declare re_unfold_pos (! t term (! r term (! s term (! f (holds (str.in_re t r)) (! u (^ (sc_re_unfold_pos t r) s) (holds s)))))))
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>	2021-11-09 09:58:30 -0600
committer	GitHub <noreply@github.com>	2021-11-09 15:58:30 +0000
commit	29dfa6b2c58887b44d9f89ce67435e75e84f12b9 (patch)
tree	c67860af3177e0f1d4584af0c1ac1ddea7f6ae8a
parent	1d145ff68ba6e4abaf77adfe97b072f63c4d7231 (diff)