summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>2021-11-09 09:58:30 -0600
committerGitHub <noreply@github.com>2021-11-09 15:58:30 +0000
commit29dfa6b2c58887b44d9f89ce67435e75e84f12b9 (patch)
treec67860af3177e0f1d4584af0c1ac1ddea7f6ae8a
parent1d145ff68ba6e4abaf77adfe97b072f63c4d7231 (diff)
Add LFSC signature for strings (#7523)
This includes the currently supported rules and cases, although it is highly incomplete.
-rw-r--r--proofs/lfsc/signatures/strings_programs.plf345
-rw-r--r--proofs/lfsc/signatures/strings_rules.plf124
2 files changed, 469 insertions, 0 deletions
diff --git a/proofs/lfsc/signatures/strings_programs.plf b/proofs/lfsc/signatures/strings_programs.plf
new file mode 100644
index 000000000..6a152772f
--- /dev/null
+++ b/proofs/lfsc/signatures/strings_programs.plf
@@ -0,0 +1,345 @@
+; depends: nary_programs.plf theory_def.plf
+
+; Make empty string of the given string-like sort s.
+(program sc_mk_emptystr ((s sort)) term
+ (match s
+ ((Seq t) (seq.empty s))
+ (String emptystr)))
+
+; Get the term corresponding to the prefix of term s of fixed length n.
+(program sc_skolem_prefix ((s term) (n term)) term
+ (str.substr s (int 0) n)
+)
+
+; Get the term corresponding to the suffix of term s of fixed length n.
+(program sc_skolem_suffix_rem ((s term) (n term)) term
+ (str.substr s n (a.- (str.len s) n))
+)
+
+; Get the term corresponding to the prefix of s before the first occurrence of
+; t in s.
+(program sc_skolem_first_ctn_pre ((s term) (t term)) term
+ (sc_skolem_prefix s (str.indexof s t (int 0))))
+
+; Get the term corresponding to the suffix of s after the first occurrence of
+; t in s.
+(program sc_skolem_first_ctn_post ((s term) (t term)) term
+ (sc_skolem_suffix_rem s (a.+ (str.len (sc_skolem_first_ctn_pre s t)) (a.+ (str.len t) (int 0)))))
+
+; Head and tail for string concatenation.
+(program sc_string_head ((t term)) term (nary_head f_str.++ t))
+(program sc_string_tail ((t term)) term (nary_tail f_str.++ t))
+
+; Concatenation str.++ applications t1 and t2.
+(program sc_string_concat ((t1 term) (t2 term)) term (nary_concat f_str.++ t1 t2 emptystr))
+
+; Decompose str.++ term t into a head and tail.
+(program sc_string_decompose ((t term)) termPair (nary_decompose f_str.++ t emptystr))
+
+; Insert a string into str.++ term t.
+(program sc_string_insert ((elem term) (t term)) term (nary_insert f_str.++ elem t emptystr))
+
+; Return reverse of t if rev = tt, return t unchanged otherwise.
+(program sc_string_rev ((t term) (rev flag)) term (ifequal rev tt (nary_reverse f_str.++ t emptystr) t))
+
+; Convert a str.++ application t into its element, if it is a singleton, return t unchanged otherwise.
+(program sc_string_nary_elim ((t term)) term (nary_elim f_str.++ t emptystr))
+
+; Convert t into a str.++ application, if it is not already in n-ary form.
+(program sc_string_nary_intro ((t term)) term (nary_intro f_str.++ t emptystr))
+
+; In the following, a "reduction predicate" refers to a formula that is used
+; to axiomatize an extended string function in terms of (simpler) functions.
+
+; Compute the reduction predicate for (str.substr x n m) of sort s.
+(program sc_string_reduction_substr ((x term) (n term) (m term) (s sort)) term
+ (let k (sc_mk_skolem (str.substr x n m))
+ (let npm (a.+ n (a.+ m (int 0)))
+ (let k1 (sc_mk_skolem (sc_skolem_prefix x n))
+ (let k2 (sc_mk_skolem (sc_skolem_suffix_rem x npm))
+ (ite
+ ; condition
+ (and (a.>= n (int 0))
+ (and (a.> (str.len x) n)
+ (and (a.> m (int 0))
+ true)))
+ ; if branch
+ (and (= x (str.++ k1 (str.++ k (str.++ k2 (sc_mk_emptystr s)))))
+ (and (= (str.len k1) n)
+ (and (or (= (str.len k2) (a.- (str.len x) npm))
+ (or (= (str.len k2) (int 0))
+ false))
+ (and (a.<= (str.len k) m)
+ true))))
+ ; else branch
+ (= k (sc_mk_emptystr s))
+ )))))
+)
+
+; Compute the reduction predicate for (str.indexof x y n) of sort s.
+(program sc_string_reduction_indexof ((x term) (y term) (n term) (s sort)) term
+ (let k (sc_mk_skolem (str.indexof x y n))
+ (let xn (str.substr x n (a.- (str.len x) n))
+ (let k1 (sc_mk_skolem (sc_skolem_first_ctn_pre xn y))
+ (let k2 (sc_mk_skolem (sc_skolem_first_ctn_post xn y))
+ (ite
+ (or (not (str.contains xn y))
+ (or (a.> n (str.len x))
+ (or (a.> (int 0) n)
+ false)))
+ (= k (int (~ 1)))
+ (ite
+ (= y (sc_mk_emptystr s))
+ (= k n)
+ (and (= xn (str.++ k1 (str.++ y (str.++ k2 (sc_mk_emptystr s)))))
+ (and (not (str.contains
+ (str.++ k1
+ (str.++ (str.substr y (int 0) (a.- (str.len y) (int 1)))
+ (sc_mk_emptystr s))) y))
+ (and (= k (a.+ n (a.+ (str.len k1) (int 0))))
+ true)))
+)))))))
+
+; Compute the reduction predicate for term t of sort s. Note that the operators
+; listed in comments are missing from the signature currently.
+(program sc_string_reduction_pred ((t term) (s sort)) term
+ (match t
+ ((apply t1 t2)
+ (match t1
+ ((apply t11 t12)
+ (match t11
+ ; str.contains
+ ((apply t111 t112)
+ (match t111
+ (f_str.substr (sc_string_reduction_substr t112 t12 t2 s))
+ (f_str.indexof (sc_string_reduction_indexof t112 t12 t2 s))
+ ; str.replace
+ ; str.update
+ ; str.from_int
+ ; str.to_int
+ ; seq.nth
+ ; str.replaceall
+ ; str.replace_re
+ ; str.replace_re_all
+ ; str.tolower
+ ; str.toupper
+ ; str.rev
+ ; str.leq
+))))))))
+
+; Returns the reduction predicate and purification equality for term t
+; of sort s.
+(program sc_string_reduction ((t term) (s sort)) term
+ (and (sc_string_reduction_pred t s) (and (= t (sc_mk_skolem t)) true))
+)
+
+; Compute the eager reduction predicate for (str.contains t r) where s
+; is the sort of t and r.
+; This is the formula:
+; (ite (str.contains t r) (= t (str.++ sk1 r sk2)) (not (= t r)))
+(program sc_string_eager_reduction_contains ((t term) (r term) (s sort)) term
+ (let k1 (sc_mk_skolem (sc_skolem_first_ctn_pre t r))
+ (let k2 (sc_mk_skolem (sc_skolem_first_ctn_post t r))
+ (ite
+ (str.contains t r)
+ (= t (str.++ k1 (str.++ r (str.++ k2 (sc_mk_emptystr s)))))
+ (not (= t r)))
+ ))
+)
+
+; Compute the eager reduction predicate for (str.code s), which is the formula:
+; (ite (= (str.len s) 1)
+; (and (<= 0 (str.code s)) (< (str.code s) A))
+; (= (str.code s) (- 1)))
+(program sc_string_eager_reduction_to_code ((s term)) term
+ (let t (str.to_code s)
+ (ite
+ (= (str.len s) (int 1))
+ (and (a.>= t (int 0)) (and (a.< t (int 196608)) true))
+ (= t (int (~ 1)))))
+)
+
+; Compute the eager reduction predicate for (str.indexof x y n), which is the
+; formula:
+; (and
+; (or (= (str.indexof x y n) (- 1)) (>= (str.indexof x y n) n))
+; (<= (str.indexof x y n) (str.len x)))
+(program sc_string_eager_reduction_indexof ((x term) (y term) (n term)) term
+ (let t (str.indexof x y n)
+ (and (or (= t (int (~ 1))) (or (a.>= t n) false))
+ (and (a.<= t (str.len x))
+ true)))
+)
+
+; Compute the eager reduction predicate of term t of sort s.
+(program sc_string_eager_reduction ((t term) (s sort)) term
+ (match t
+ ((apply t1 t2)
+ (match t1
+ (f_str.to_code (sc_string_eager_reduction_to_code t2))
+ ((apply t11 t12)
+ (match t11
+ (f_str.contains (sc_string_eager_reduction_contains t12 t2 s))
+ ((apply t111 t112)
+ (match t111
+ (f_str.indexof (sc_string_eager_reduction_indexof t112 t12 t2)))))))))
+)
+
+; A helper method for computing the conclusion of PfRule::RE_UNFOLD_POS.
+; For a regular expression (re.++ R1 ... Rn), this returns a pair of terms
+; where the first term is a concatentation (str.++ t1 ... tn) and the second
+; is a conjunction M of literals of the form (str.in_re ti Ri), such that
+; (str.in_re x (re.++ R1 ... Rn))
+; is equivalent to
+; (and (= x (str.++ t1 ... tn)) M)
+; We use the optimization that Rj may be (str.to_re tj); otherwise tj is an
+; application of the unfolding skolem function skolem_re_unfold_pos.
+(program sc_re_unfold_pos_concat ((t term) (r term) (ro term) (i mpz)) termPair
+ (match r
+ ((apply r1 r2)
+ (match (sc_re_unfold_pos_concat t r2 ro (mp_add i 1))
+ ((pair p1 p2)
+ (let r12 (getarg f_re.++ r1)
+ (let r122 (try_getarg f_str.to_re r12)
+ (ifequal r122 r12
+ (let k (skolem_re_unfold_pos t ro i)
+ (pair (str.++ k p1) (and (str.in_re k r12) p2)))
+ (pair (str.++ r122 p1) p2)))))))
+ (default (pair emptystr true))
+))
+
+; Returns a formula corresponding to a conjunction saying that each of the
+; elements of str.++ application t is empty. For example for
+; (str.++ x (str.++ y ""))
+; this returns:
+; (and (= x "") (and (= y "") true))
+(program sc_non_empty_concats ((t term)) term
+ (match t
+ ((apply t1 t2)
+ (and (not (= (getarg f_str.++ t1) emptystr)) (sc_non_empty_concats t2)))
+ (emptystr true)))
+
+; Get first character or empty string from term t.
+; If t is of the form (str.++ (char n) ...), return (char n).
+; If t is of the form emptystr, return emptystr.
+; Otherwise, this side condition fails
+(program sc_string_first_char_or_empty ((t term)) term
+ (match t
+ ((apply t1 t2)
+ (let t12 (getarg f_str.++ t1)
+ (match t12
+ ((char n) t12))))
+ (emptystr t)))
+
+
+; Flatten constants in str.++ application s. Notice that the rewritten form
+; of strings in cvc5 are such that constants are grouped into constants of
+; length >=1 which we call "word" constants. For example, the cvc5 rewritten
+; form of (str.++ "A" "B" x) is (str.++ "AB" x) which in LFSC is represented as:
+; (str.++ (str.++ (char 65) (str.++ (char 66) emptystr)) (str.++ x emptystr))
+; For convenience, in this documentation, we will write this simply as:
+; (str.++ (str.++ "A" (str.++ "B" "")) (str.++ x ""))
+; e.g. we assume that word constants are represented using char and emptystr.
+; Many string rules rely on processing the prefix of strings, which in LFSC
+; involves reasoning about the characters one-by-one. Since the above term
+; has a level of nesting when word constants of size > 1 are involved, this
+; method is used to "flatten" str.++ applications so that we have a uniform
+; way of reasoning about them in proof rules. In this method, we take a
+; str.++ application corresponding to a string term in cvc5 rewritten form.
+; It returns the flattened form such that there are no nested applications of
+; str.++. For example, given input:
+; (str.++ (str.++ "A" (str.++ "B" "")) (str.++ x ""))
+; we return:
+; (str.++ "A" (str.++ "B" (str.++ x "")))
+; Notice that this is done for all word constants in the chain recursively.
+; It does not insist that the nested concatenations are over characters only.
+; This rule may fail if s is not a str.++ application corresponding to a term
+; in cvc5 rewritten form.
+(program sc_string_flatten ((s term)) term
+ (match s
+ ((apply s1 s2)
+ (let s12 (getarg f_str.++ s1)
+ ; Must handle nested concatenation for word constant. We know there is no nested concatenation within s12, so we don't need to flatten it.
+ ; Since s12 may not be a concat term, we must use n-ary intro to ensure it is in n-ary form
+ (sc_string_concat (sc_string_nary_intro s12) (sc_string_flatten s2))))
+ (emptystr s))
+)
+
+; Helper for collecting adjacent constants. This side condition takes as input
+; a str.++ application s. It returns a pair whose concatenation is equal to s,
+; whose first component corresponds to a word constant, and whose second
+; component is a str.++ application whose first element is not a character.
+; For example, for:
+; (str.++ "A" (str.++ "B" (str.++ x "")))
+; We return:
+; (pair (str.++ "A" (str.++ "B" "")) (str.++ x ""))
+(program sc_string_collect_acc ((s term)) termPair
+ (match s
+ ((apply s1 s2)
+ (match (getarg f_str.++ s1)
+ ((char n)
+ (match (sc_string_collect_acc s2)
+ ((pair ssc1 ssc2) (pair (apply s1 ssc1) ssc2))))
+ (default (pair emptystr s))))
+ (emptystr (pair emptystr s)))
+)
+
+; Collect adjacent constants for the prefix of string s. For example:
+; (str.++ "A" (str.++ "B" (str.++ x "")))
+; we return:
+; (str.++ (str.++ "A" (str.++ "B" "")) (str.++ x ""))
+; This side condition may fail if s is not a str.++ application.
+; Notice that the collection of constants is done for all word constants in the
+; term s recursively.
+(program sc_string_collect ((s term)) term
+ (match (sc_string_collect_acc s)
+ ((pair sc1 sc2)
+ (match sc1
+ ; did not strip a constant prefix
+ (emptystr
+ (match s
+ ((apply s1 s2) (apply s1 (sc_string_collect s2)))
+ (emptystr s)))
+ ; stripped a constant prefix, must eliminate singleton
+ (default (str.++ (sc_string_nary_elim sc1) (sc_string_collect sc2))))))
+)
+
+; Strip equal prefix of s and t. This returns the pair corresponding to s and
+; t after dropping the maximal equal prefix removed. For example, for:
+; (str.++ x (str.++ y (str.++ z "")))
+; (str.++ x (str.++ w ""))
+; This method will return:
+; (pair (str.++ y (str.++ z "")) (str.++ w ""))
+; This side condition may fail if s or t is not a str.++ application.
+(program sc_strip_prefix ((s term) (t term)) termPair
+ (match s
+ ((apply s1 s2)
+ (let s12 (getarg f_str.++ s1)
+ (match t
+ ((apply t1 t2)
+ (let t12 (getarg f_str.++ t1)
+ (ifequal s12 t12
+ (sc_strip_prefix s2 t2)
+ (pair s t))))
+ (emptystr (pair s t)))))
+ (emptystr (pair s t)))
+)
+
+; Converts a str.++ application into "flat form" so that we are ready to
+; process its prefix. This consists of the following steps:
+; (1) convert s to n-ary form if it is not already a str.++ application,
+; (2) flatten so that its constant prefix,
+; (3) (optionally) reverse.
+(program sc_string_to_flat_form ((s term) (rev flag)) term
+ ; intro, flatten, reverse
+ (sc_string_rev (sc_string_flatten (sc_string_nary_intro s)) rev))
+
+; Converts a term in "flat form" to a term that is in a form that corresponds
+; to one in cvc5 rewritten form. This is the dual method to
+; sc_string_to_flat_form. This consists of the following steps:
+; (1) (optionally) reverse,
+; (2) collect constants
+; (3) eliminate n-ary form to its element if the term is a singleton list.
+(program sc_string_from_flat_form ((s term) (rev flag)) term
+ ; reverse, collect, elim
+ (sc_string_nary_elim (sc_string_collect (sc_string_rev s rev))))
diff --git a/proofs/lfsc/signatures/strings_rules.plf b/proofs/lfsc/signatures/strings_rules.plf
new file mode 100644
index 000000000..af981096a
--- /dev/null
+++ b/proofs/lfsc/signatures/strings_rules.plf
@@ -0,0 +1,124 @@
+; depends: strings_programs.plf
+
+(declare string_length_non_empty (! s term (! p (holds (not (= s emptystr))) (holds (not (= (str.len s) (int 0)))))))
+
+; Computes the conclusion of the PfRule::CONCAT_EQ rule
+(program sc_concat_eq ((s term) (t term) (rev flag)) term
+ (match (sc_strip_prefix
+ (sc_string_to_flat_form s rev)
+ (sc_string_to_flat_form t rev))
+ ((pair ss ts)
+ (=
+ (sc_string_from_flat_form ss rev)
+ (sc_string_from_flat_form ts rev)))))
+
+(declare concat_eq (! s term
+ (! t term
+ (! res term
+ (! rev flag
+ (! p (holds (= s t))
+ (! r (^ (sc_concat_eq s t rev) res)
+ (holds res))))))))
+
+; Computes whether the heads of the premise (= s t) match s1 and t1 for the
+; PfRule::CONCAT_UNIFY rule
+(program sc_concat_unify ((s term) (t term) (s1 term) (t1 term) (rev flag)) Ok
+ (ifequal (sc_string_head (sc_string_rev s rev)) s1
+ (ifequal (sc_string_head (sc_string_rev t rev)) t1
+ ok
+ (fail Ok))
+ (fail Ok)))
+
+(declare concat_unify (! s term
+ (! t term
+ (! s1 term
+ (! t1 term
+ (! rev flag
+ (! p (holds (= s t))
+ (! p1 (holds (= (str.len s1) (str.len t1)))
+ (! r (^ (sc_concat_unify s t s1 t1 rev) ok)
+ (holds (= s1 t1)))))))))))
+
+; Computes the conclusion of the PfRule::CONCAT_CSPLIT rule
+(program sc_concat_csplit ((thead term) (t term) (s term) (rev flag)) term
+ (match (sc_string_to_flat_form t rev)
+ ((apply t1 t2)
+ (ifequal (getarg f_str.++ t1) thead
+ (match (sc_string_to_flat_form s rev)
+ ((apply s1 s2)
+ (let s12 (getarg f_str.++ s1)
+ (match s12
+ ((char n)
+ (= thead
+ (ifequal rev ff
+ (str.++ s12 (str.++ (sc_mk_skolem (sc_skolem_suffix_rem thead (int 1))) emptystr))
+ (str.++ (sc_mk_skolem (sc_skolem_prefix thead (a.- (str.len thead) (int 1)))) (str.++ s12 emptystr)))))))))
+ (fail term))))
+)
+
+(declare concat_csplit
+ (! t1 term
+ (! t term
+ (! s term
+ (! res term
+ (! rev flag
+ (! p1 (holds (= t s))
+ (! p2 (holds (not (= (str.len t1) (int 0))))
+ (! r (^ (sc_concat_csplit t1 t s rev) res)
+ (holds res))))))))))
+
+; Checks whether the conditions on the premise (= s t) is satisfied for the
+; PfRule::CONCAT_CONFLICT rule
+(program sc_concat_conflict ((s term) (t term) (rev flag)) Ok
+ (match (sc_strip_prefix
+ (sc_string_to_flat_form s rev)
+ (sc_string_to_flat_form t rev))
+ ((pair ss ts)
+ (ifequal
+ (sc_string_first_char_or_empty ss)
+ (sc_string_first_char_or_empty ts)
+ (fail Ok)
+ ok))))
+
+(declare concat_conflict (! s term
+ (! t term
+ (! rev flag
+ (! p (holds (= s t))
+ (! r (^ (sc_concat_conflict s t rev) ok)
+ (holds false)))))))
+
+(declare string_length_pos (! t term
+ (holds (or (and (= (str.len t) (int 0)) (and (= t emptystr) true)) (or (a.> (str.len t) (int 0)) false)))))
+
+(declare re_inter (! x term (! s term (! t term (! p1 (holds (str.in_re x s)) (! p2 (holds (str.in_re x t))
+ (holds (str.in_re x (re.inter s t)))))))))
+
+(declare string_reduction (! r term (! t term (! s sort (! u (^ (sc_string_reduction t s) r)
+ (holds r))))))))
+
+(declare string_eager_reduction (! r term (! t term (! s sort (! u (^ (sc_string_eager_reduction t s) r)
+ (holds r))))))
+
+; Computes the conclusion of PfRule::RE_UNFOLD_POS
+(program sc_re_unfold_pos ((t term) (r term)) term
+ (match r
+ ((apply r1 r2)
+ (match r1
+ ; case for star
+ (f_re.*
+ (let rr (re.++ r2 (re.++ r (re.++ r2 re.empty)))
+ (match (sc_re_unfold_pos_concat t rr rr 0)
+ ((pair p1 p2)
+ (or (= t emptystr)
+ (or (str.in_re t r2)
+ (or (and (sc_string_nary_elim (and (= t p1) p2)) (sc_non_empty_concats p1))
+ false)))))))
+ ((apply r11 r12)
+ (match r11
+ ; case for concatenation
+ (f_re.++
+ (match (sc_re_unfold_pos_concat t r r 0)
+ ((pair p1 p2) (sc_string_nary_elim (and (= t p1) p2)))))
+))))))
+
+(declare re_unfold_pos (! t term (! r term (! s term (! f (holds (str.in_re t r)) (! u (^ (sc_re_unfold_pos t r) s) (holds s)))))))
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback