summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xconfig/tap-driver.sh651
-rw-r--r--configure.ac1
-rw-r--r--test/regress/Makefile.am8
-rw-r--r--test/regress/regress0/arrays/incorrect10.smt4
-rw-r--r--test/regress/regress0/expect/scrub.02.smt10
-rw-r--r--test/regress/regress0/fmf/Arrow_Order-smtlib.778341.smt4
-rw-r--r--test/regress/regress0/fmf/QEpres-uf.855035.smt4
-rw-r--r--test/regress/regress1/fmf/Hoare-z3.931718.smt4
-rwxr-xr-xtest/regress/run_regression424
-rwxr-xr-xtest/regress/run_regression.py287
10 files changed, 957 insertions, 440 deletions
diff --git a/config/tap-driver.sh b/config/tap-driver.sh
new file mode 100755
index 000000000..d3e68bdc1
--- /dev/null
+++ b/config/tap-driver.sh
@@ -0,0 +1,651 @@
+#! /bin/sh
+# Copyright (C) 2011-2018 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# This file is maintained in Automake, please report
+# bugs to <bug-automake@gnu.org> or send patches to
+# <automake-patches@gnu.org>.
+
+scriptversion=2013-12-23.17; # UTC
+
+# Make unconditional expansion of undefined variables an error. This
+# helps a lot in preventing typo-related bugs.
+set -u
+
+me=tap-driver.sh
+
+fatal ()
+{
+ echo "$me: fatal: $*" >&2
+ exit 1
+}
+
+usage_error ()
+{
+ echo "$me: $*" >&2
+ print_usage >&2
+ exit 2
+}
+
+print_usage ()
+{
+ cat <<END
+Usage:
+ tap-driver.sh --test-name=NAME --log-file=PATH --trs-file=PATH
+ [--expect-failure={yes|no}] [--color-tests={yes|no}]
+ [--enable-hard-errors={yes|no}] [--ignore-exit]
+ [--diagnostic-string=STRING] [--merge|--no-merge]
+ [--comments|--no-comments] [--] TEST-COMMAND
+The '--test-name', '-log-file' and '--trs-file' options are mandatory.
+END
+}
+
+# TODO: better error handling in option parsing (in particular, ensure
+# TODO: $log_file, $trs_file and $test_name are defined).
+test_name= # Used for reporting.
+log_file= # Where to save the result and output of the test script.
+trs_file= # Where to save the metadata of the test run.
+expect_failure=0
+color_tests=0
+merge=0
+ignore_exit=0
+comments=0
+diag_string='#'
+while test $# -gt 0; do
+ case $1 in
+ --help) print_usage; exit $?;;
+ --version) echo "$me $scriptversion"; exit $?;;
+ --test-name) test_name=$2; shift;;
+ --log-file) log_file=$2; shift;;
+ --trs-file) trs_file=$2; shift;;
+ --color-tests) color_tests=$2; shift;;
+ --expect-failure) expect_failure=$2; shift;;
+ --enable-hard-errors) shift;; # No-op.
+ --merge) merge=1;;
+ --no-merge) merge=0;;
+ --ignore-exit) ignore_exit=1;;
+ --comments) comments=1;;
+ --no-comments) comments=0;;
+ --diagnostic-string) diag_string=$2; shift;;
+ --) shift; break;;
+ -*) usage_error "invalid option: '$1'";;
+ esac
+ shift
+done
+
+test $# -gt 0 || usage_error "missing test command"
+
+case $expect_failure in
+ yes) expect_failure=1;;
+ *) expect_failure=0;;
+esac
+
+if test $color_tests = yes; then
+ init_colors='
+ color_map["red"]="" # Red.
+ color_map["grn"]="" # Green.
+ color_map["lgn"]="" # Light green.
+ color_map["blu"]="" # Blue.
+ color_map["mgn"]="" # Magenta.
+ color_map["std"]="" # No color.
+ color_for_result["ERROR"] = "mgn"
+ color_for_result["PASS"] = "grn"
+ color_for_result["XPASS"] = "red"
+ color_for_result["FAIL"] = "red"
+ color_for_result["XFAIL"] = "lgn"
+ color_for_result["SKIP"] = "blu"'
+else
+ init_colors=''
+fi
+
+# :; is there to work around a bug in bash 3.2 (and earlier) which
+# does not always set '$?' properly on redirection failure.
+# See the Autoconf manual for more details.
+:;{
+ (
+ # Ignore common signals (in this subshell only!), to avoid potential
+ # problems with Korn shells. Some Korn shells are known to propagate
+ # to themselves signals that have killed a child process they were
+ # waiting for; this is done at least for SIGINT (and usually only for
+ # it, in truth). Without the `trap' below, such a behaviour could
+ # cause a premature exit in the current subshell, e.g., in case the
+ # test command it runs gets terminated by a SIGINT. Thus, the awk
+ # script we are piping into would never seen the exit status it
+ # expects on its last input line (which is displayed below by the
+ # last `echo $?' statement), and would thus die reporting an internal
+ # error.
+ # For more information, see the Autoconf manual and the threads:
+ # <https://lists.gnu.org/archive/html/bug-autoconf/2011-09/msg00004.html>
+ # <http://mail.opensolaris.org/pipermail/ksh93-integration-discuss/2009-February/004121.html>
+ trap : 1 3 2 13 15
+ if test $merge -gt 0; then
+ exec 2>&1
+ else
+ exec 2>&3
+ fi
+ "$@"
+ echo $?
+ ) | LC_ALL=C ${AM_TAP_AWK-awk} \
+ -v me="$me" \
+ -v test_script_name="$test_name" \
+ -v log_file="$log_file" \
+ -v trs_file="$trs_file" \
+ -v expect_failure="$expect_failure" \
+ -v merge="$merge" \
+ -v ignore_exit="$ignore_exit" \
+ -v comments="$comments" \
+ -v diag_string="$diag_string" \
+'
+# TODO: the usages of "cat >&3" below could be optimized when using
+# GNU awk, and/on on systems that supports /dev/fd/.
+
+# Implementation note: in what follows, `result_obj` will be an
+# associative array that (partly) simulates a TAP result object
+# from the `TAP::Parser` perl module.
+
+## ----------- ##
+## FUNCTIONS ##
+## ----------- ##
+
+function fatal(msg)
+{
+ print me ": " msg | "cat >&2"
+ exit 1
+}
+
+function abort(where)
+{
+ fatal("internal error " where)
+}
+
+# Convert a boolean to a "yes"/"no" string.
+function yn(bool)
+{
+ return bool ? "yes" : "no";
+}
+
+function add_test_result(result)
+{
+ if (!test_results_index)
+ test_results_index = 0
+ test_results_list[test_results_index] = result
+ test_results_index += 1
+ test_results_seen[result] = 1;
+}
+
+# Whether the test script should be re-run by "make recheck".
+function must_recheck()
+{
+ for (k in test_results_seen)
+ if (k != "XFAIL" && k != "PASS" && k != "SKIP")
+ return 1
+ return 0
+}
+
+# Whether the content of the log file associated to this test should
+# be copied into the "global" test-suite.log.
+function copy_in_global_log()
+{
+ for (k in test_results_seen)
+ if (k != "PASS")
+ return 1
+ return 0
+}
+
+function get_global_test_result()
+{
+ if ("ERROR" in test_results_seen)
+ return "ERROR"
+ if ("FAIL" in test_results_seen || "XPASS" in test_results_seen)
+ return "FAIL"
+ all_skipped = 1
+ for (k in test_results_seen)
+ if (k != "SKIP")
+ all_skipped = 0
+ if (all_skipped)
+ return "SKIP"
+ return "PASS";
+}
+
+function stringify_result_obj(result_obj)
+{
+ if (result_obj["is_unplanned"] || result_obj["number"] != testno)
+ return "ERROR"
+
+ if (plan_seen == LATE_PLAN)
+ return "ERROR"
+
+ if (result_obj["directive"] == "TODO")
+ return result_obj["is_ok"] ? "XPASS" : "XFAIL"
+
+ if (result_obj["directive"] == "SKIP")
+ return result_obj["is_ok"] ? "SKIP" : COOKED_FAIL;
+
+ if (length(result_obj["directive"]))
+ abort("in function stringify_result_obj()")
+
+ return result_obj["is_ok"] ? COOKED_PASS : COOKED_FAIL
+}
+
+function decorate_result(result)
+{
+ color_name = color_for_result[result]
+ if (color_name)
+ return color_map[color_name] "" result "" color_map["std"]
+ # If we are not using colorized output, or if we do not know how
+ # to colorize the given result, we should return it unchanged.
+ return result
+}
+
+function report(result, details)
+{
+ if (result ~ /^(X?(PASS|FAIL)|SKIP|ERROR)/)
+ {
+ msg = ": " test_script_name
+ add_test_result(result)
+ }
+ else if (result == "#")
+ {
+ msg = " " test_script_name ":"
+ }
+ else
+ {
+ abort("in function report()")
+ }
+ if (length(details))
+ msg = msg " " details
+ # Output on console might be colorized.
+ print decorate_result(result) msg
+ # Log the result in the log file too, to help debugging (this is
+ # especially true when said result is a TAP error or "Bail out!").
+ print result msg | "cat >&3";
+}
+
+function testsuite_error(error_message)
+{
+ report("ERROR", "- " error_message)
+}
+
+function handle_tap_result()
+{
+ details = result_obj["number"];
+ if (length(result_obj["description"]))
+ details = details " " result_obj["description"]
+
+ if (plan_seen == LATE_PLAN)
+ {
+ details = details " # AFTER LATE PLAN";
+ }
+ else if (result_obj["is_unplanned"])
+ {
+ details = details " # UNPLANNED";
+ }
+ else if (result_obj["number"] != testno)
+ {
+ details = sprintf("%s # OUT-OF-ORDER (expecting %d)",
+ details, testno);
+ }
+ else if (result_obj["directive"])
+ {
+ details = details " # " result_obj["directive"];
+ if (length(result_obj["explanation"]))
+ details = details " " result_obj["explanation"]
+ }
+
+ report(stringify_result_obj(result_obj), details)
+}
+
+# `skip_reason` should be empty whenever planned > 0.
+function handle_tap_plan(planned, skip_reason)
+{
+ planned += 0 # Avoid getting confused if, say, `planned` is "00"
+ if (length(skip_reason) && planned > 0)
+ abort("in function handle_tap_plan()")
+ if (plan_seen)
+ {
+ # Error, only one plan per stream is acceptable.
+ testsuite_error("multiple test plans")
+ return;
+ }
+ planned_tests = planned
+ # The TAP plan can come before or after *all* the TAP results; we speak
+ # respectively of an "early" or a "late" plan. If we see the plan line
+ # after at least one TAP result has been seen, assume we have a late
+ # plan; in this case, any further test result seen after the plan will
+ # be flagged as an error.
+ plan_seen = (testno >= 1 ? LATE_PLAN : EARLY_PLAN)
+ # If testno > 0, we have an error ("too many tests run") that will be
+ # automatically dealt with later, so do not worry about it here. If
+ # $plan_seen is true, we have an error due to a repeated plan, and that
+ # has already been dealt with above. Otherwise, we have a valid "plan
+ # with SKIP" specification, and should report it as a particular kind
+ # of SKIP result.
+ if (planned == 0 && testno == 0)
+ {
+ if (length(skip_reason))
+ skip_reason = "- " skip_reason;
+ report("SKIP", skip_reason);
+ }
+}
+
+function extract_tap_comment(line)
+{
+ if (index(line, diag_string) == 1)
+ {
+ # Strip leading `diag_string` from `line`.
+ line = substr(line, length(diag_string) + 1)
+ # And strip any leading and trailing whitespace left.
+ sub("^[ \t]*", "", line)
+ sub("[ \t]*$", "", line)
+ # Return what is left (if any).
+ return line;
+ }
+ return "";
+}
+
+# When this function is called, we know that line is a TAP result line,
+# so that it matches the (perl) RE "^(not )?ok\b".
+function setup_result_obj(line)
+{
+ # Get the result, and remove it from the line.
+ result_obj["is_ok"] = (substr(line, 1, 2) == "ok" ? 1 : 0)
+ sub("^(not )?ok[ \t]*", "", line)
+
+ # If the result has an explicit number, get it and strip it; otherwise,
+ # automatically assing the next progresive number to it.
+ if (line ~ /^[0-9]+$/ || line ~ /^[0-9]+[^a-zA-Z0-9_]/)
+ {
+ match(line, "^[0-9]+")
+ # The final `+ 0` is to normalize numbers with leading zeros.
+ result_obj["number"] = substr(line, 1, RLENGTH) + 0
+ line = substr(line, RLENGTH + 1)
+ }
+ else
+ {
+ result_obj["number"] = testno
+ }
+
+ if (plan_seen == LATE_PLAN)
+ # No further test results are acceptable after a "late" TAP plan
+ # has been seen.
+ result_obj["is_unplanned"] = 1
+ else if (plan_seen && testno > planned_tests)
+ result_obj["is_unplanned"] = 1
+ else
+ result_obj["is_unplanned"] = 0
+
+ # Strip trailing and leading whitespace.
+ sub("^[ \t]*", "", line)
+ sub("[ \t]*$", "", line)
+
+ # This will have to be corrected if we have a "TODO"/"SKIP" directive.
+ result_obj["description"] = line
+ result_obj["directive"] = ""
+ result_obj["explanation"] = ""
+
+ if (index(line, "#") == 0)
+ return # No possible directive, nothing more to do.
+
+ # Directives are case-insensitive.
+ rx = "[ \t]*#[ \t]*([tT][oO][dD][oO]|[sS][kK][iI][pP])[ \t]*"
+
+ # See whether we have the directive, and if yes, where.
+ pos = match(line, rx "$")
+ if (!pos)
+ pos = match(line, rx "[^a-zA-Z0-9_]")
+
+ # If there was no TAP directive, we have nothing more to do.
+ if (!pos)
+ return
+
+ # Let`s now see if the TAP directive has been escaped. For example:
+ # escaped: ok \# SKIP
+ # not escaped: ok \\# SKIP
+ # escaped: ok \\\\\# SKIP
+ # not escaped: ok \ # SKIP
+ if (substr(line, pos, 1) == "#")
+ {
+ bslash_count = 0
+ for (i = pos; i > 1 && substr(line, i - 1, 1) == "\\"; i--)
+ bslash_count += 1
+ if (bslash_count % 2)
+ return # Directive was escaped.
+ }
+
+ # Strip the directive and its explanation (if any) from the test
+ # description.
+ result_obj["description"] = substr(line, 1, pos - 1)
+ # Now remove the test description from the line, that has been dealt
+ # with already.
+ line = substr(line, pos)
+ # Strip the directive, and save its value (normalized to upper case).
+ sub("^[ \t]*#[ \t]*", "", line)
+ result_obj["directive"] = toupper(substr(line, 1, 4))
+ line = substr(line, 5)
+ # Now get the explanation for the directive (if any), with leading
+ # and trailing whitespace removed.
+ sub("^[ \t]*", "", line)
+ sub("[ \t]*$", "", line)
+ result_obj["explanation"] = line
+}
+
+function get_test_exit_message(status)
+{
+ if (status == 0)
+ return ""
+ if (status !~ /^[1-9][0-9]*$/)
+ abort("getting exit status")
+ if (status < 127)
+ exit_details = ""
+ else if (status == 127)
+ exit_details = " (command not found?)"
+ else if (status >= 128 && status <= 255)
+ exit_details = sprintf(" (terminated by signal %d?)", status - 128)
+ else if (status > 256 && status <= 384)
+ # We used to report an "abnormal termination" here, but some Korn
+ # shells, when a child process die due to signal number n, can leave
+ # in $? an exit status of 256+n instead of the more standard 128+n.
+ # Apparently, both behaviours are allowed by POSIX (2008), so be
+ # prepared to handle them both. See also Austing Group report ID
+ # 0000051 <http://www.austingroupbugs.net/view.php?id=51>
+ exit_details = sprintf(" (terminated by signal %d?)", status - 256)
+ else
+ # Never seen in practice.
+ exit_details = " (abnormal termination)"
+ return sprintf("exited with status %d%s", status, exit_details)
+}
+
+function write_test_results()
+{
+ print ":global-test-result: " get_global_test_result() > trs_file
+ print ":recheck: " yn(must_recheck()) > trs_file
+ print ":copy-in-global-log: " yn(copy_in_global_log()) > trs_file
+ for (i = 0; i < test_results_index; i += 1)
+ print ":test-result: " test_results_list[i] > trs_file
+ close(trs_file);
+}
+
+BEGIN {
+
+## ------- ##
+## SETUP ##
+## ------- ##
+
+'"$init_colors"'
+
+# Properly initialized once the TAP plan is seen.
+planned_tests = 0
+
+COOKED_PASS = expect_failure ? "XPASS": "PASS";
+COOKED_FAIL = expect_failure ? "XFAIL": "FAIL";
+
+# Enumeration-like constants to remember which kind of plan (if any)
+# has been seen. It is important that NO_PLAN evaluates "false" as
+# a boolean.
+NO_PLAN = 0
+EARLY_PLAN = 1
+LATE_PLAN = 2
+
+testno = 0 # Number of test results seen so far.
+bailed_out = 0 # Whether a "Bail out!" directive has been seen.
+
+# Whether the TAP plan has been seen or not, and if yes, which kind
+# it is ("early" is seen before any test result, "late" otherwise).
+plan_seen = NO_PLAN
+
+## --------- ##
+## PARSING ##
+## --------- ##
+
+is_first_read = 1
+
+while (1)
+ {
+ # Involutions required so that we are able to read the exit status
+ # from the last input line.
+ st = getline
+ if (st < 0) # I/O error.
+ fatal("I/O error while reading from input stream")
+ else if (st == 0) # End-of-input
+ {
+ if (is_first_read)
+ abort("in input loop: only one input line")
+ break
+ }
+ if (is_first_read)
+ {
+ is_first_read = 0
+ nextline = $0
+ continue
+ }
+ else
+ {
+ curline = nextline
+ nextline = $0
+ $0 = curline
+ }
+ # Copy any input line verbatim into the log file.
+ print | "cat >&3"
+ # Parsing of TAP input should stop after a "Bail out!" directive.
+ if (bailed_out)
+ continue
+
+ # TAP test result.
+ if ($0 ~ /^(not )?ok$/ || $0 ~ /^(not )?ok[^a-zA-Z0-9_]/)
+ {
+ testno += 1
+ setup_result_obj($0)
+ handle_tap_result()
+ }
+ # TAP plan (normal or "SKIP" without explanation).
+ else if ($0 ~ /^1\.\.[0-9]+[ \t]*$/)
+ {
+ # The next two lines will put the number of planned tests in $0.
+ sub("^1\\.\\.", "")
+ sub("[^0-9]*$", "")
+ handle_tap_plan($0, "")
+ continue
+ }
+ # TAP "SKIP" plan, with an explanation.
+ else if ($0 ~ /^1\.\.0+[ \t]*#/)
+ {
+ # The next lines will put the skip explanation in $0, stripping
+ # any leading and trailing whitespace. This is a little more
+ # tricky in truth, since we want to also strip a potential leading
+ # "SKIP" string from the message.
+ sub("^[^#]*#[ \t]*(SKIP[: \t][ \t]*)?", "")
+ sub("[ \t]*$", "");
+ handle_tap_plan(0, $0)
+ }
+ # "Bail out!" magic.
+ # Older versions of prove and TAP::Harness (e.g., 3.17) did not
+ # recognize a "Bail out!" directive when preceded by leading
+ # whitespace, but more modern versions (e.g., 3.23) do. So we
+ # emulate the latter, "more modern" behaviour.
+ else if ($0 ~ /^[ \t]*Bail out!/)
+ {
+ bailed_out = 1
+ # Get the bailout message (if any), with leading and trailing
+ # whitespace stripped. The message remains stored in `$0`.
+ sub("^[ \t]*Bail out![ \t]*", "");
+ sub("[ \t]*$", "");
+ # Format the error message for the
+ bailout_message = "Bail out!"
+ if (length($0))
+ bailout_message = bailout_message " " $0
+ testsuite_error(bailout_message)
+ }
+ # Maybe we have too look for dianogtic comments too.
+ else if (comments != 0)
+ {
+ comment = extract_tap_comment($0);
+ if (length(comment))
+ report("#", comment);
+ }
+ }
+
+## -------- ##
+## FINISH ##
+## -------- ##
+
+# A "Bail out!" directive should cause us to ignore any following TAP
+# error, as well as a non-zero exit status from the TAP producer.
+if (!bailed_out)
+ {
+ if (!plan_seen)
+ {
+ testsuite_error("missing test plan")
+ }
+ else if (planned_tests != testno)
+ {
+ bad_amount = testno > planned_tests ? "many" : "few"
+ testsuite_error(sprintf("too %s tests run (expected %d, got %d)",
+ bad_amount, planned_tests, testno))
+ }
+ if (!ignore_exit)
+ {
+ # Fetch exit status from the last line.
+ exit_message = get_test_exit_message(nextline)
+ if (exit_message)
+ testsuite_error(exit_message)
+ }
+ }
+
+write_test_results()
+
+exit 0
+
+} # End of "BEGIN" block.
+'
+
+# TODO: document that we consume the file descriptor 3 :-(
+} 3>"$log_file"
+
+test $? -eq 0 || fatal "I/O or internal error"
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC0"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/configure.ac b/configure.ac
index fb225b688..92053daf6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -14,6 +14,7 @@ AC_PREREQ([2.61])
AC_INIT([cvc4], _CVC4_RELEASE_STRING, [cvc4-bugs@cs.stanford.edu])
AC_CONFIG_SRCDIR([src/include/cvc4_public.h])
AC_CONFIG_AUX_DIR([config])
+AC_REQUIRE_AUX_FILE([tap-driver.sh])
AC_CONFIG_MACRO_DIR([config])
AC_CONFIG_LIBOBJ_DIR([src/lib])
diff --git a/test/regress/Makefile.am b/test/regress/Makefile.am
index f068ce9ad..19cbbae4a 100644
--- a/test/regress/Makefile.am
+++ b/test/regress/Makefile.am
@@ -9,7 +9,9 @@ TESTS = $(REG0_TESTS) $(REG1_TESTS) $(REG2_TESTS) $(REG3_TESTS) $(REG4_TESTS)
@mk_empty@BINARY = cvc4
end@mk_if@
-LOG_COMPILER = $(top_srcdir)/test/regress/run_regression
+LOG_DRIVER = env AM_TAP_AWK='$(AWK)' $(SHELL) \
+ $(top_srcdir)/config/tap-driver.sh --comments
+LOG_COMPILER = @abs_top_srcdir@/test/regress/run_regression.py
AM_LOG_FLAGS = $(RUN_REGRESSION_ARGS) @abs_top_builddir@/src/main/$(BINARY)$(EXEEXT)
if AUTOMAKE_1_11
@@ -55,5 +57,5 @@ EXTRA_DIST = \
regress0/tptp/Axioms/SYN000_0.ax \
Makefile.levels \
Makefile.tests \
- run_regression \
- README.md
+ run_regression.py \
+ README.md \ No newline at end of file
diff --git a/test/regress/regress0/arrays/incorrect10.smt b/test/regress/regress0/arrays/incorrect10.smt
index 6fbdb7ee9..6aacc37c0 100644
--- a/test/regress/regress0/arrays/incorrect10.smt
+++ b/test/regress/regress0/arrays/incorrect10.smt
@@ -1,5 +1,5 @@
-% COMMAND-LINE: --no-check-proofs
-% EXPECT: unsat
+; COMMAND-LINE: --no-check-proofs
+; EXPECT: unsat
(benchmark fuzzsmt
:logic QF_AX
:status unsat
diff --git a/test/regress/regress0/expect/scrub.02.smt b/test/regress/regress0/expect/scrub.02.smt
index 145801619..65c61ecd0 100644
--- a/test/regress/regress0/expect/scrub.02.smt
+++ b/test/regress/regress0/expect/scrub.02.smt
@@ -1,8 +1,8 @@
-% SCRUBBER: sed -e 's/The fact in question: .*$/The fact in question: TERM/'
-% EXPECT: (error "A non-linear fact was asserted to arithmetic in a linear logic.
-% EXPECT: The fact in question: TERM
-% EXPECT: ")
-% EXIT: 1
+; SCRUBBER: sed -e 's/The fact in question: .*$/The fact in question: TERM/'
+; EXPECT: (error "A non-linear fact was asserted to arithmetic in a linear logic.
+; EXPECT: The fact in question: TERM
+; EXPECT: ")
+; EXIT: 1
(benchmark reject_nonlinear
:logic QF_LRA
:extrafuns ((n Real))
diff --git a/test/regress/regress0/fmf/Arrow_Order-smtlib.778341.smt b/test/regress/regress0/fmf/Arrow_Order-smtlib.778341.smt
index f62f057e4..e8c7949dc 100644
--- a/test/regress/regress0/fmf/Arrow_Order-smtlib.778341.smt
+++ b/test/regress/regress0/fmf/Arrow_Order-smtlib.778341.smt
@@ -1,5 +1,5 @@
-% COMMAND-LINE: --finite-model-find --mbqi=gen-ev
-% EXPECT: unsat
+; COMMAND-LINE: --finite-model-find --mbqi=gen-ev
+; EXPECT: unsat
(benchmark Isabelle
:status sat
:logic AUFLIA
diff --git a/test/regress/regress0/fmf/QEpres-uf.855035.smt b/test/regress/regress0/fmf/QEpres-uf.855035.smt
index 2945c8f4d..4fe592638 100644
--- a/test/regress/regress0/fmf/QEpres-uf.855035.smt
+++ b/test/regress/regress0/fmf/QEpres-uf.855035.smt
@@ -1,5 +1,5 @@
-% COMMAND-LINE: --finite-model-find --mbqi=gen-ev
-% EXPECT: sat
+; COMMAND-LINE: --finite-model-find --mbqi=gen-ev
+; EXPECT: sat
(benchmark Isabelle
:status sat
:logic AUFLIA
diff --git a/test/regress/regress1/fmf/Hoare-z3.931718.smt b/test/regress/regress1/fmf/Hoare-z3.931718.smt
index 754e19e88..4ab90756c 100644
--- a/test/regress/regress1/fmf/Hoare-z3.931718.smt
+++ b/test/regress/regress1/fmf/Hoare-z3.931718.smt
@@ -1,5 +1,5 @@
-% COMMAND-LINE: --finite-model-find
-% EXPECT: sat
+; COMMAND-LINE: --finite-model-find
+; EXPECT: sat
(benchmark Isabelle
:status sat
:logic AUFLIA
diff --git a/test/regress/run_regression b/test/regress/run_regression
deleted file mode 100755
index 4ae013911..000000000
--- a/test/regress/run_regression
+++ /dev/null
@@ -1,424 +0,0 @@
-#!/bin/bash
-#
-# Morgan Deters <mdeters@cs.nyu.edu>
-# for the CVC4 project
-#
-# usage:
-#
-# run_regression [ --proof | --dump ] [ wrapper ] cvc4-binary [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]
-#
-# Runs benchmark and checks for correct exit status and output.
-#
-# The TEST_GROUP and TEST_GROUPS environment variables can be used to split the
-# benchmarks into groups and only run one of the groups. The benchmarks are
-# associated with a group based on a hash of their name. TEST_GROUPS controls
-# how many groups are created while TEST_GROUP specifies the group to run.
-# Currently, the primary use case for this is to split the benchmarks across
-# multiple Travis jobs.
-
-# ulimit -t 1 # For detecting long running regressions
-ulimit -s 65000 # Needed for some (esp. portfolio and model-building)
-
-prog=`basename "$0"`
-
-if [ $# -lt 2 ]; then
- echo "usage: $prog [ --proof | --dump ] [ wrapper ] cvc4-binary [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]" >&2
- exit 1
-fi
-
-proof=no
-dump=no
-if [ x"$1" = x--proof ]; then
- proof=yes
- shift
-elif [ x"$1" = x--dump ]; then
- dump=yes
- shift
-fi
-
-if [ $# -lt 2 ]; then
- echo "usage: $prog [ --proof | --dump ] [ wrapper ] cvc4-binary [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]" >&2
- exit 1
-fi
-
-wrapper=
-while [ $# -gt 2 ]; do
- wrapper="$wrapper$1 "
- shift
-done
-
-[[ "$VALGRIND" = "1" ]] && {
- wrapper="libtool --mode=execute valgrind $wrapper"
-}
-
-cvc4=$1
-benchmark_orig=$2
-benchmark="$benchmark_orig"
-
-if [ -n "$TEST_GROUP" ]; then
- # Use the checksum of the benchmark name to determine the group of the
- # benchmark
- benchmark_group=$(( $(echo "$benchmark" | cksum | cut -d " " -f1) % $TEST_GROUPS ))
- if (( $benchmark_group != $TEST_GROUP )); then
- # Skip the benchmark if it is not in the expected test group
- exit 77
- fi
-fi
-
-function error {
- echo "$prog: error: $*"
- exit 1
-}
-
-if ! [ -x "$cvc4" ]; then
- error "\`$cvc4' doesn't exist or isn't executable" >&2
-fi
-if ! [ -r "$benchmark" ]; then
- error "\`$benchmark' doesn't exist or isn't readable" >&2
-fi
-
-# gettemp() and its associated tempfiles[] array are intended to never
-# allow a temporary file to leak---the trap ensures that when this script
-# exits, whether via a regular exit or an -INT or other signal, the
-# temp files are deleted.
-declare -a tempfiles
-trap -- 'test ${#tempfiles[@]} -gt 0 && rm -f "${tempfiles[@]}"' EXIT
-function gettemp {
- local temp="`mktemp -t "$2"`"
- tempfiles[${#tempfiles[@]}]="$temp"
- eval "$1"="$temp"
-}
-
-tmpbenchmark=
-if expr "$benchmark" : '.*\.smt$' &>/dev/null; then
- lang=smt1
- if test -e "$benchmark.expect"; then
- scrubber=`grep '^% SCRUBBER: ' "$benchmark.expect" | sed 's,^% SCRUBBER: ,,'`
- errscrubber=`grep '^% ERROR-SCRUBBER: ' "$benchmark.expect" | sed 's,^% ERROR-SCRUBBER: ,,'`
- expected_output=`grep '^% EXPECT: ' "$benchmark.expect" | sed 's,^% EXPECT: ,,'`
- expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark.expect" | sed 's,^% EXPECT-ERROR: ,,'`
- expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark.expect" | perl -pe 's,^% EXIT: ,,;s,\r,,'`
- command_line=`grep '^% COMMAND-LINE: ' "$benchmark.expect" | sed 's,^% COMMAND-LINE: ,,'`
- if [ -z "$expected_exit_status" ]; then
- expected_exit_status=0
- fi
- elif grep '^% \(PROOF\|EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\|SCRUBBER\|ERROR-SCRUBBER\): ' "$benchmark" "$benchmark" &>/dev/null; then
- scrubber=`grep '^% SCRUBBER: ' "$benchmark" | sed 's,^% SCRUBBER: ,,'`
- errscrubber=`grep '^% ERROR-SCRUBBER: ' "$benchmark" | sed 's,^% ERROR-SCRUBBER: ,,'`
- expected_output=`grep '^% EXPECT: ' "$benchmark" | sed 's,^% EXPECT: ,,'`
- expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark" | sed 's,^% EXPECT-ERROR: ,,'`
- expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark" | perl -pe 's,^% EXIT: ,,;s,\r,,'`
- command_line=`grep '^% COMMAND-LINE: ' "$benchmark" | sed 's,^% COMMAND-LINE: ,,'`
- # old mktemp from coreutils 7.x is broken, can't do XXXX in the middle
- # this frustrates our auto-language-detection
- gettemp tmpbenchmark cvc4_benchmark.smt.$$.XXXXXXXXXX
- grep -v '^% \(PROOF\|EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\|SCRUBBER\|ERROR-SCRUBBER\): ' "$benchmark" >"$tmpbenchmark"
- if [ -z "$expected_exit_status" ]; then
- expected_exit_status=0
- fi
- benchmark=$tmpbenchmark
- elif grep '^ *:status *sat' "$benchmark" &>/dev/null; then
- expected_output=sat
- expected_exit_status=0
- command_line=
- elif grep '^ *:status *unsat' "$benchmark" &>/dev/null; then
- expected_output=unsat
- expected_exit_status=0
- command_line=
- else
- error "cannot determine status of \`$benchmark'"
- fi
-elif expr "$benchmark" : '.*\.smt2$' &>/dev/null; then
- lang=smt2
-
- # If this test case requires unsat cores but CVC4 is not built with proof
- # support, skip it. Note: checking $CVC4_REGRESSION_ARGS instead of $proof
- # here because $proof is not set for the second run.
- requires_proof=`grep '(get-unsat-core)\|(get-unsat-assumptions)' "$benchmark"`
- if [[ ! "$CVC4_REGRESSION_ARGS" = *"--proof"* ]] && [ -n "$requires_proof" ]; then
- exit 77
- fi
-
- if test -e "$benchmark.expect"; then
- scrubber=`grep '^% SCRUBBER: ' "$benchmark.expect" | sed 's,^% SCRUBBER: ,,'`
- errscrubber=`grep '^% ERROR-SCRUBBER: ' "$benchmark.expect" | sed 's,^% ERROR-SCRUBBER: ,,'`
- expected_output=`grep '^% EXPECT: ' "$benchmark.expect" | sed 's,^% EXPECT: ,,'`
- expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark.expect" | sed 's,^% EXPECT-ERROR: ,,'`
- expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark.expect" | perl -pe 's,^% EXIT: ,,;s,\r,,'`
- command_line=`grep '^% COMMAND-LINE: ' "$benchmark.expect" | sed 's,^% COMMAND-LINE: ,,'`
- elif grep '^; \(EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\|SCRUBBER\|ERROR-SCRUBBER\): ' "$benchmark" "$benchmark" &>/dev/null; then
- scrubber=`grep '^; SCRUBBER: ' "$benchmark" | sed 's,^; SCRUBBER: ,,'`
- errscrubber=`grep '^; ERROR-SCRUBBER: ' "$benchmark" | sed 's,^; ERROR-SCRUBBER: ,,'`
- expected_output=`grep '^; EXPECT: ' "$benchmark" | sed 's,^; EXPECT: ,,'`
- expected_error=`grep '^; EXPECT-ERROR: ' "$benchmark" | sed 's,^; EXPECT-ERROR: ,,'`
- expected_exit_status=`grep -m 1 '^; EXIT: ' "$benchmark" | perl -pe 's,^; EXIT: ,,;s,\r,,'`
- command_line=`grep '^; COMMAND-LINE: ' "$benchmark" | sed 's,^; COMMAND-LINE: ,,'`
- fi
-
- # If expected output/error was not given, try to determine the status from
- # the commands.
- if [ -z "$expected_output" -a -z "$expected_error" ]; then
- if grep '^ *( *set-info *:status *sat' "$benchmark" &>/dev/null; then
- expected_output=sat
- elif grep '^ *( *set-info *:status *unsat' "$benchmark" &>/dev/null; then
- expected_output=unsat
- fi
- fi
-
- # A valid test case needs at least an expected output (explicit or through
- # SMT2 commands) or an expected exit status.
- if [ -z "$expected_output" -a -z "$expected_error" -a -z "$expected_exit_status" ]; then
- error "cannot determine status of \`$benchmark'"
- fi
-
- # If no explicit expected exit status was given, we assume that the solver is
- # supposed to succeed.
- if [ -z "$expected_exit_status" ]; then
- expected_exit_status=0
- fi
-elif expr "$benchmark" : '.*\.cvc$' &>/dev/null; then
- lang=cvc4
- scrubber=`grep '^% SCRUBBER: ' "$benchmark" | sed 's,^% SCRUBBER: ,,'`
- errscrubber=`grep '^% ERROR-SCRUBBER: ' "$benchmark" | sed 's,^% ERROR-SCRUBBER: ,,'`
- expected_output=$(grep '^% EXPECT: ' "$benchmark")
- expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark" | sed 's,^% EXPECT-ERROR: ,,'`
- if [ -z "$expected_output" -a -z "$expected_error" ]; then
- error "cannot determine expected output of \`$benchmark': " \
- "please use \`% EXPECT:' and/or \`% EXPECT-ERROR:' gestures"
- fi
- expected_output=$(echo "$expected_output" | perl -pe 's,^% EXPECT: ,,;s,\r,,')
- expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark" | perl -pe 's,^% EXIT: ,,;s,\r,,'`
- if [ -z "$expected_exit_status" ]; then
- expected_exit_status=0
- fi
- command_line=`grep '^% COMMAND-LINE: ' "$benchmark" | sed 's,^% COMMAND-LINE: ,,'`
-elif expr "$benchmark" : '.*\.p$' &>/dev/null; then
- lang=tptp
- command_line=--finite-model-find
- scrubber=`grep '^% SCRUBBER: ' "$benchmark" | sed 's,^% SCRUBBER: ,,'`
- errscrubber=`grep '^% ERROR-SCRUBBER: ' "$benchmark" | sed 's,^% ERROR-SCRUBBER: ,,'`
- expected_output=$(grep '^% EXPECT: ' "$benchmark")
- expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark" | sed 's,^% EXPECT-ERROR: ,,'`
- if [ -z "$expected_output" -a -z "$expected_error" ]; then
- if grep -q '^% Status *: ' "$benchmark"; then
- expected_output="$(grep '^% *Status *: ' "$benchmark" | head -1 | awk '{print$NF}')"
- case "$expected_output" in
- Theorem|Unsatisfiable) expected_exit_status=0 ;;
- CounterSatisfiable|Satisfiable) expected_exit_status=0 ;;
- GaveUp) expected_exit_status=0 ;;
- esac
- expected_output="% SZS status $expected_output for $(basename "$benchmark" | sed 's,\.p$,,')"
- else
- error "cannot determine expected output of \`$benchmark': " \
- "please use \`% EXPECT:' and/or \`% EXPECT-ERROR:' gestures"
- fi
- else
- expected_output=$(echo "$expected_output" | perl -pe 's,^% EXPECT: ,,;s,\r,,')
- expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark" | perl -pe 's,^% EXIT: ,,;s,\r,,'`
- fi
- if [ -z "$expected_exit_status" ]; then
- expected_exit_status=0
- fi
- if grep -q '^% COMMAND-LINE: ' "$benchmark"; then
- command_line=`grep '^% COMMAND-LINE: ' "$benchmark" | sed 's,^% COMMAND-LINE: ,,'`
- fi
-elif expr "$benchmark" : '.*\.sy$' &>/dev/null; then
- lang=sygus
- if test -e "$benchmark.expect"; then
- scrubber=`grep '^% SCRUBBER: ' "$benchmark.expect" | sed 's,^% SCRUBBER: ,,'`
- errscrubber=`grep '^% ERROR-SCRUBBER: ' "$benchmark.expect" | sed 's,^% ERROR-SCRUBBER: ,,'`
- expected_output=`grep '^% EXPECT: ' "$benchmark.expect" | sed 's,^% EXPECT: ,,'`
- expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark.expect" | sed 's,^% EXPECT-ERROR: ,,'`
- expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark.expect" | perl -pe 's,^% EXIT: ,,;s,\r,,'`
- command_line=`grep '^% COMMAND-LINE: ' "$benchmark.expect" | sed 's,^% COMMAND-LINE: ,,'`
- if [ -z "$expected_exit_status" ]; then
- expected_exit_status=0
- fi
- elif grep '^; \(EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\|SCRUBBER\|ERROR-SCRUBBER\): ' "$benchmark" "$benchmark" &>/dev/null; then
- scrubber=`grep '^; SCRUBBER: ' "$benchmark" | sed 's,^; SCRUBBER: ,,'`
- errscrubber=`grep '^; ERROR-SCRUBBER: ' "$benchmark" | sed 's,^; ERROR-SCRUBBER: ,,'`
- expected_output=`grep '^; EXPECT: ' "$benchmark" | sed 's,^; EXPECT: ,,'`
- expected_error=`grep '^; EXPECT-ERROR: ' "$benchmark" | sed 's,^; EXPECT-ERROR: ,,'`
- expected_exit_status=`grep -m 1 '^; EXIT: ' "$benchmark" | perl -pe 's,^; EXIT: ,,;s,\r,,'`
- command_line=`grep '^; COMMAND-LINE: ' "$benchmark" | sed 's,^; COMMAND-LINE: ,,'`
- if [ -z "$expected_exit_status" ]; then
- expected_exit_status=0
- fi
- else
- error "cannot determine expected output of \`$benchmark'"
- fi
-else
- error "benchmark \`$benchmark' must be *.cvc or *.smt or *.smt2 or *.p or *.sy"
-fi
-
-command_line="--lang=$lang ${command_line:+$command_line }"
-
-gettemp expoutfile cvc4_expect_stdout.$$.XXXXXXXXXX
-gettemp experrfile cvc4_expect_stderr.$$.XXXXXXXXXX
-gettemp outfile cvc4_stdout.$$.XXXXXXXXXX
-gettemp errfile cvc4_stderr.$$.XXXXXXXXXX
-gettemp errfilefix cvc4_stderr.$$.XXXXXXXXXX
-gettemp exitstatusfile cvc4_exitstatus.$$.XXXXXXXXXX
-
-if [ -z "$expected_output" ]; then
- # in case expected stdout output is empty, make sure we don't differ
- # by a newline, which we would if we echo "" >"$expoutfile"
- touch "$expoutfile"
-else
- echo "$expected_output" >"$expoutfile"
-fi
-
-if [ "$lang" = sygus ]; then
- if ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--check-synth-sol' &>/dev/null &&
- ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--no-check-synth-sol' &>/dev/null; then
- # later on, we'll run another test with --check-models on
- command_line="$command_line --check-synth-sol"
- fi
-fi
-check_models=false
-if grep '^sat$' "$expoutfile" &>/dev/null || grep '^invalid$' "$expoutfile" &>/dev/null || grep '^unknown$' "$expoptfile" &>/dev/null; then
- if ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--check-models' &>/dev/null &&
- ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--no-check-models' &>/dev/null; then
- # later on, we'll run another test with --check-models on
- check_models=true
- fi
-fi
-check_proofs=false
-check_unsat_cores=false
-if [ "$proof" = yes ]; then
- # proofs not currently supported in incremental mode, turn it off
- if grep '^unsat$' "$expoutfile" &>/dev/null || grep '^valid$' "$expoutfile" &>/dev/null &>/dev/null; then
- if ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--check-proofs' &>/dev/null &&
- ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--no-check-proofs' &>/dev/null &&
- ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--incremental' &>/dev/null &&
- ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--unconstrained-simp' &>/dev/null &&
- ! expr " $CVC4_REGRESSION_ARGS $command_line" : '.* -[a-zA-Z]*i' &>/dev/null &&
- ! expr "$BINARY" : '.*pcvc4' &>/dev/null &&
- ! expr "$benchmark" : '.*\.sy$' &>/dev/null; then
- # later on, we'll run another test with --check-proofs on
- check_proofs=true
- fi
- if ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--check-unsat-cores' &>/dev/null &&
- ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--no-check-unsat-cores' &>/dev/null &&
- ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--incremental' &>/dev/null &&
- ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--unconstrained-simp' &>/dev/null &&
- ! expr " $CVC4_REGRESSION_ARGS $command_line" : '.* -[a-zA-Z]*i' &>/dev/null &&
- ! expr "$BINARY" : '.*pcvc4' &>/dev/null &&
- ! expr "$benchmark" : '.*\.sy$' &>/dev/null; then
- # later on, we'll run another test with --check-unsat-cores on
- check_unsat_cores=true
- fi
- fi
-fi
-
-if [ -z "$expected_error" ]; then
- # in case expected stderr output is empty, make sure we don't differ
- # by a newline, which we would if we echo "" >"$experrfile"
- touch "$experrfile"
-else
- echo "$expected_error" >"$experrfile"
-fi
-
-cvc4dir=`dirname "$cvc4"`
-cvc4dirfull=`cd "$cvc4dir" && pwd`
-if [ -z "$cvc4dirfull" ]; then
- error "getting directory of \`$cvc4 !?"
-fi
-cvc4base=`basename "$cvc4"`
-cvc4full="$cvc4dirfull/$cvc4base"
-if [ $dump = no ]; then
- echo running $wrapper $cvc4full $CVC4_REGRESSION_ARGS $command_line `basename "$benchmark"` [from working dir `dirname "$benchmark"`]
- time ( :; \
- ( cd `dirname "$benchmark"`;
- $wrapper "$cvc4full" $CVC4_REGRESSION_ARGS $command_line `basename "$benchmark"`;
- echo $? >"$exitstatusfile"
- ) > "$outfile" 2> "$errfile" )
-else
- echo running $wrapper $cvc4full $CVC4_REGRESSION_ARGS $command_line --preprocess-only --dump=clauses --output-lang=smt2 -q `basename "$benchmark"` \| $wrapper $cvc4full $CVC4_REGRESSION_ARGS $command_line --lang=smt2 - [from working dir `dirname "$benchmark"`]
- time ( :; \
- ( cd `dirname "$benchmark"`;
- $wrapper "$cvc4full" $CVC4_REGRESSION_ARGS $command_line --preprocess-only --dump=clauses --output-lang=smt2 -q `basename "$benchmark"` | $wrapper "$cvc4full" $CVC4_REGRESSION_ARGS $command_line --lang=smt2 -;
- echo $? >"$exitstatusfile"
- ) > "$outfile" 2> "$errfile" )
-fi
-
-# we have to actual error file same treatment as other files. differences in
-# versions of echo/bash were causing failure on some platforms and not on others
-# (also grep out valgrind output, if 0 errors reported by valgrind)
-actual_error=$(cat $errfile)
-if [[ "$VALGRIND" = "1" ]]; then
- #valgrind_output=$(cat $errfile|grep -E "^==[0-9]+== "|)
- valgrind_num_errors=$(cat $errfile|grep -E "^==[0-9]+== "|tail -n1|awk '{print $4}')
- echo "valgrind errors (not suppressed): $valgrind_num_errors" 1>&2
-
- ((valgrind_num_errors == 0)) && actual_error=$(echo "$actual_error"|grep -vE "^==[0-9]+== ")
-fi
-if [ -z "$actual_error" ]; then
- # in case expected stderr output is empty, make sure we don't differ
- # by a newline, which we would if we echo "" >"$experrfile"
- touch "$errfilefix"
-else
- echo "$actual_error" >"$errfilefix"
-fi
-
-# Scrub the output file if a scrubber has been specified.
-if [ -n "$scrubber" ]; then
- echo "About to scrub $outfile using $scrubber"
- mv "$outfile" "$outfile.prescrub"
- cat "$outfile.prescrub" | eval $scrubber >"$outfile"
-else
- echo "not scrubbing"
-fi
-
-# Scrub the error file if a scrubber has been specified.
-if [ -n "$errscrubber" ]; then
- echo "About to scrub $errfilefix using $errscrubber"
- mv "$errfilefix" "$errfilefix.prescrub"
- cat "$errfilefix.prescrub" | eval $errscrubber >"$errfilefix"
-else
- echo "not scrubbing error file"
-fi
-
-diffs=`diff -u --strip-trailing-cr "$expoutfile" "$outfile"`
-diffexit=$?
-diffserr=`diff -u --strip-trailing-cr "$experrfile" "$errfilefix"`
-diffexiterr=$?
-exit_status=`cat "$exitstatusfile"`
-
-exitcode=0
-if [ $diffexit -ne 0 ]; then
- echo "$prog: error: differences between expected and actual output on stdout"
- echo "$diffs"
- exitcode=1
-fi
-if [ $diffexiterr -ne 0 ]; then
- echo "$prog: error: differences between expected and actual output on stderr"
- echo "$diffserr"
- exitcode=1
-fi
-
-if [ "$exit_status" != "$expected_exit_status" ]; then
- echo "$prog: error: expected exit status \`$expected_exit_status' but got \`$exit_status'"
- exitcode=1
-fi
-
-if $check_models || $check_proofs || $check_unsat_cores; then
- check_cmdline=
- if $check_models; then
- check_cmdline="$check_cmdline --check-models"
- fi
- if $check_proofs; then
- # taking: Make the extra flags part of --check-proofs.
- check_cmdline="$check_cmdline --check-proofs --no-bv-eq --no-bv-ineq --no-bv-algebraic"
- fi
- if $check_unsat_cores; then
- check_cmdline="$check_cmdline --check-unsat-cores"
- fi
- # run an extra model/proof/core-checking pass
- if ! CVC4_REGRESSION_ARGS="$CVC4_REGRESSION_ARGS$check_cmdline" "$0" $wrapper "$cvc4" "$benchmark_orig"; then
- exitcode=1
- fi
-fi
-
-
-exit $exitcode
diff --git a/test/regress/run_regression.py b/test/regress/run_regression.py
new file mode 100755
index 000000000..c861b0d71
--- /dev/null
+++ b/test/regress/run_regression.py
@@ -0,0 +1,287 @@
+#!/usr/bin/env python3
+"""
+Usage:
+
+ run_regression.py [ --proof | --dump ] [ wrapper ] cvc4-binary
+ [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]
+
+Runs benchmark and checks for correct exit status and output.
+"""
+
+import argparse
+import difflib
+import os
+import re
+import shlex
+import subprocess
+import sys
+
+SCRUBBER = 'SCRUBBER: '
+ERROR_SCRUBBER = 'ERROR-SCRUBBER: '
+EXPECT = 'EXPECT: '
+EXPECT_ERROR = 'EXPECT-ERROR: '
+EXIT = 'EXIT: '
+COMMAND_LINE = 'COMMAND-LINE: '
+
+
+def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
+ command_line, benchmark_dir, benchmark_filename):
+ """Runs CVC4 on the file `benchmark_filename` in the directory
+ `benchmark_dir` using the binary `cvc4_binary` with the command line
+ options `command_line`. The output is scrubbed using `scrubber` and
+ `error_scrubber` for stdout and stderr, respectively. If dump is true, the
+ function first uses CVC4 to read in and dump the benchmark file and then
+ uses that as input."""
+
+ bin_args = wrapper[:]
+ bin_args.append(cvc4_binary)
+
+ output = None
+ error = None
+ exit_status = None
+ if dump:
+ dump_args = [
+ '--preprocess-only', '--dump', 'raw-benchmark',
+ '--output-lang=smt2', '-qq'
+ ]
+ dump_process = subprocess.Popen(
+ bin_args + command_line + dump_args + [benchmark_filename],
+ cwd=benchmark_dir,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ dump_output, _ = dump_process.communicate()
+ process = subprocess.Popen(
+ bin_args + command_line + ['--lang=smt2', '-'],
+ cwd=benchmark_dir,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ output, error = process.communicate(input=dump_output)
+ exit_status = process.returncode
+ else:
+ process = subprocess.Popen(
+ bin_args + command_line + [benchmark_filename],
+ cwd=benchmark_dir,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ output, error = process.communicate()
+ exit_status = process.returncode
+
+ # If a scrubber command has been specified then apply it to the output.
+ if scrubber:
+ scrubber_process = subprocess.Popen(
+ shlex.split(scrubber),
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ output, _ = scrubber_process.communicate(input=output)
+ if error_scrubber:
+ error_scrubber_process = subprocess.Popen(
+ shlex.split(error_scrubber),
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ error, _ = error_scrubber_process.communicate(input=error)
+
+ # Popen in Python 3 returns a bytes object instead of a string for
+ # stdout/stderr.
+ if isinstance(output, bytes):
+ output = output.decode()
+ if isinstance(error, bytes):
+ error = error.decode()
+ return (output.strip(), error.strip(), exit_status)
+
+
+def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path):
+ """Determines the expected output for a benchmark, runs CVC4 on it and then
+ checks whether the output corresponds to the expected output. Optionally
+ uses a wrapper `wrapper`, tests proof generation (if proof is true), or
+ dumps a benchmark and uses that as the input (if dump is true)."""
+
+ if not os.access(cvc4_binary, os.X_OK):
+ sys.exit(
+ '"{}" does not exist or is not executable'.format(cvc4_binary))
+ if not os.path.isfile(benchmark_path):
+ sys.exit('"{}" does not exist or is not a file'.format(benchmark_path))
+
+ basic_command_line_args = []
+
+ benchmark_basename = os.path.basename(benchmark_path)
+ benchmark_filename, benchmark_ext = os.path.splitext(benchmark_basename)
+ benchmark_dir = os.path.dirname(benchmark_path)
+ comment_char = '%'
+ status_regex = None
+ status_to_output = lambda s: s
+ if benchmark_ext == '.smt':
+ status_regex = r':status\s*(sat|unsat)'
+ comment_char = ';'
+ elif benchmark_ext == '.smt2':
+ status_regex = r'set-info\s*:status\s*(sat|unsat)'
+ comment_char = ';'
+ elif benchmark_ext == '.cvc':
+ pass
+ elif benchmark_ext == '.p':
+ basic_command_line_args.append('--finite-model-find')
+ status_regex = r'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
+ status_to_output = lambda s: '% SZS status {} for {}'.format(s, benchmark_filename)
+ elif benchmark_ext == '.sy':
+ comment_char = ';'
+ # Do not use proofs/unsat-cores with .sy files
+ proof = False
+ else:
+ sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
+ benchmark_basename))
+
+ # If there is an ".expect" file for the benchmark, read the metadata
+ # from there, otherwise from the benchmark file.
+ metadata_filename = benchmark_path + '.expect'
+ if os.path.isfile(metadata_filename):
+ comment_char = '%'
+ else:
+ metadata_filename = benchmark_path
+
+ metadata_lines = None
+ with open(metadata_filename, 'r') as metadata_file:
+ metadata_lines = metadata_file.readlines()
+ metadata_content = ''.join(metadata_lines)
+
+ # Extract the metadata for the benchmark.
+ scrubber = None
+ error_scrubber = None
+ expected_output = ''
+ expected_error = ''
+ expected_exit_status = None
+ command_line = ''
+ for line in metadata_lines:
+ # Skip lines that do not start with "%"
+ if line[0] != comment_char:
+ continue
+ line = line[2:]
+
+ if line.startswith(SCRUBBER):
+ scrubber = line[len(SCRUBBER):]
+ elif line.startswith(ERROR_SCRUBBER):
+ error_scrubber = line[len(ERROR_SCRUBBER):]
+ elif line.startswith(EXPECT):
+ expected_output += line[len(EXPECT):]
+ elif line.startswith(EXPECT_ERROR):
+ expected_error += line[len(EXPECT_ERROR):]
+ elif line.startswith(EXIT):
+ expected_exit_status = int(line[len(EXIT):])
+ elif line.startswith(COMMAND_LINE):
+ command_line += line[len(COMMAND_LINE):]
+ expected_output = expected_output.strip()
+ expected_error = expected_error.strip()
+
+ # Expected output/expected error has not been defined in the metadata for
+ # the benchmark. Try to extract the information from the benchmark itself.
+ if expected_output == '' and expected_error == '':
+ match = None
+ if status_regex:
+ match = re.search(status_regex, metadata_content)
+
+ if match:
+ expected_output = status_to_output(match.group(1))
+ elif expected_exit_status is None:
+ # If there is no expected output/error and the exit status has not
+ # been set explicitly, the benchmark is invalid.
+ sys.exit('Cannot determine status of "{}"'.format(benchmark_path))
+
+ if not proof and ('(get-unsat-cores)' in metadata_content
+ or '(get-unsat-assumptions)' in metadata_content):
+ print(
+ '1..0 # Skipped: unsat cores not supported without proof support')
+ return
+
+ if expected_exit_status is None:
+ expected_exit_status = 0
+
+ if 'CVC4_REGRESSION_ARGS' in os.environ:
+ basic_command_line_args += shlex.split(
+ os.environ['CVC4_REGRESSION_ARGS'])
+ basic_command_line_args += shlex.split(command_line)
+ command_line_args_configs = [basic_command_line_args]
+
+ extra_command_line_args = []
+ if benchmark_ext == '.sy' and \
+ '--no-check-synth-sol' not in basic_command_line_args and \
+ '--check-synth-sol' not in basic_command_line_args:
+ extra_command_line_args = ['--check-synth-sol']
+ if re.search(r'^(sat|invalid|unknown)$', expected_output) and \
+ '--no-check-models' not in basic_command_line_args:
+ extra_command_line_args = ['--check-models']
+ if proof and re.search(r'^(sat|valid)$', expected_output):
+ if '--no-check-proofs' not in basic_command_line_args and \
+ '--incremental' not in basic_command_line_args and \
+ '--unconstrained-simp' not in basic_command_line_args and \
+ not cvc4_binary.endswith('pcvc4'):
+ extra_command_line_args = [
+ '--check-proofs', '--no-bv-eq', '--no-bv-ineq',
+ '--no-bv-algebraic'
+ ]
+ if '--no-check-unsat-cores' not in basic_command_line_args and \
+ '--incremental' not in basic_command_line_args and \
+ '--unconstrained-simp' not in basic_command_line_args and \
+ not cvc4_binary.endswith('pcvc4'):
+ extra_command_line_args = [
+ '--check-proofs', '--no-bv-eq', '--no-bv-ineq',
+ '--no-bv-algebraic'
+ ]
+ if extra_command_line_args:
+ command_line_args_configs.append(
+ basic_command_line_args + extra_command_line_args)
+
+ # Run CVC4 on the benchmark with the different option sets and check
+ # whether the exit status, stdout output, stderr output are as expected.
+ print('1..{}'.format(len(command_line_args_configs)))
+ print('# Starting')
+ for command_line_args in command_line_args_configs:
+ output, error, exit_status = run_benchmark(
+ dump, wrapper, scrubber, error_scrubber, cvc4_binary,
+ command_line_args, benchmark_dir, benchmark_basename)
+ if output != expected_output:
+ print(
+ 'not ok - Differences between expected and actual output on stdout - Flags: {}'.
+ format(command_line_args))
+ for line in difflib.context_diff(output.splitlines(),
+ expected_output.splitlines()):
+ print(line)
+ elif error != expected_error:
+ print(
+ 'not ok - Differences between expected and actual output on stderr - Flags: {}'.
+ format(command_line_args))
+ for line in difflib.context_diff(error.splitlines(),
+ expected_error.splitlines()):
+ print(line)
+ elif expected_exit_status != exit_status:
+ print(
+ 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
+ format(expected_exit_status, exit_status, command_line_args))
+ else:
+ print('ok - Flags: {}'.format(command_line_args))
+
+
+def main():
+ """Parses the command line arguments and then calls the core of the
+ script."""
+
+ parser = argparse.ArgumentParser(
+ description=
+ 'Runs benchmark and checks for correct exit status and output.')
+ parser.add_argument('--proof', action='store_true')
+ parser.add_argument('--dump', action='store_true')
+ parser.add_argument('wrapper', nargs='*')
+ parser.add_argument('cvc4_binary')
+ parser.add_argument('benchmark')
+ args = parser.parse_args()
+ cvc4_binary = os.path.abspath(args.cvc4_binary)
+
+ wrapper = args.wrapper
+ if os.environ.get('VALGRIND') == '1' and not wrapper:
+ wrapper = ['libtool', '--mode=execute', 'valgrind']
+
+ run_regression(args.proof, args.dump, wrapper, cvc4_binary, args.benchmark)
+
+
+if __name__ == "__main__":
+ main()
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback