#!/bin/bash # # Morgan Deters # for the CVC4 project # # usage: # # run_regression cvc4-binary [ --proof | --dump ] [ benchmark.cvc | benchmark.smt | benchmark.smt2 ] # # Runs benchmark and checks for correct exit status and output. # # ulimit -t 1 # For detecting long running regressions ulimit -s 65000 # Needed for some (esp. portfolio and model-building) prog=`basename "$0"` if [ $# -lt 2 ]; then echo "usage: $prog [ --proof | --dump ] [ wrapper ] cvc4-binary [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]" >&2 exit 1 fi proof=no dump=no if [ x"$1" = x--proof ]; then proof=yes shift elif [ x"$1" = x--dump ]; then dump=yes shift fi if [ $# -lt 2 ]; then echo "usage: $prog [ --proof | --dump ] [ wrapper ] cvc4-binary [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]" >&2 exit 1 fi wrapper= while [ $# -gt 2 ]; do wrapper="$wrapper$1 " shift done [[ "$VALGRIND" = "1" ]] && { wrapper="libtool --mode=execute valgrind $wrapper" } cvc4=$1 benchmark_orig=$2 benchmark="$benchmark_orig" function error { echo "$prog: error: $*" exit 1 } if ! [ -x "$cvc4" ]; then error "\`$cvc4' doesn't exist or isn't executable" >&2 fi if ! [ -r "$benchmark" ]; then error "\`$benchmark' doesn't exist or isn't readable" >&2 fi # gettemp() and its associated tempfiles[] array are intended to never # allow a temporary file to leak---the trap ensures that when this script # exits, whether via a regular exit or an -INT or other signal, the # temp files are deleted. declare -a tempfiles trap -- 'test ${#tempfiles[@]} -gt 0 && rm -f "${tempfiles[@]}"' EXIT function gettemp { local temp="`mktemp -t "$2"`" tempfiles[${#tempfiles[@]}]="$temp" eval "$1"="$temp" } tmpbenchmark= if expr "$benchmark" : '.*\.smt$' &>/dev/null; then lang=smt1 if test -e "$benchmark.expect"; then expected_output=`grep '^% EXPECT: ' "$benchmark.expect" | sed 's,^% EXPECT: ,,'` expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark.expect" | sed 's,^% EXPECT-ERROR: ,,'` expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark.expect" | perl -pe 's,^% EXIT: ,,;s,\r,,'` command_line=`grep '^% COMMAND-LINE: ' "$benchmark.expect" | sed 's,^% COMMAND-LINE: ,,'` if [ -z "$expected_exit_status" ]; then expected_exit_status=0 fi elif grep '^% \(PROOF\|EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\): ' "$benchmark" "$benchmark" &>/dev/null; then expected_output=`grep '^% EXPECT: ' "$benchmark" | sed 's,^% EXPECT: ,,'` expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark" | sed 's,^% EXPECT-ERROR: ,,'` expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark" | perl -pe 's,^% EXIT: ,,;s,\r,,'` command_line=`grep '^% COMMAND-LINE: ' "$benchmark" | sed 's,^% COMMAND-LINE: ,,'` # old mktemp from coreutils 7.x is broken, can't do XXXX in the middle # this frustrates our auto-language-detection gettemp tmpbenchmark cvc4_benchmark.smt.$$.XXXXXXXXXX grep -v '^% \(PROOF\|EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\): ' "$benchmark" >"$tmpbenchmark" if [ -z "$expected_exit_status" ]; then expected_exit_status=0 fi benchmark=$tmpbenchmark elif grep '^ *:status *sat' "$benchmark" &>/dev/null; then expected_output=sat expected_exit_status=0 command_line= elif grep '^ *:status *unsat' "$benchmark" &>/dev/null; then expected_output=unsat expected_exit_status=0 command_line= else error "cannot determine status of \`$benchmark'" fi elif expr "$benchmark" : '.*\.smt2$' &>/dev/null; then lang=smt2 if test -e "$benchmark.expect"; then expected_output=`grep '^% EXPECT: ' "$benchmark.expect" | sed 's,^% EXPECT: ,,'` expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark.expect" | sed 's,^% EXPECT-ERROR: ,,'` expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark.expect" | perl -pe 's,^% EXIT: ,,;s,\r,,'` command_line=`grep '^% COMMAND-LINE: ' "$benchmark.expect" | sed 's,^% COMMAND-LINE: ,,'` if [ -z "$expected_exit_status" ]; then expected_exit_status=0 fi elif grep '^; \(EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\): ' "$benchmark" "$benchmark" &>/dev/null; then expected_output=`grep '^; EXPECT: ' "$benchmark" | sed 's,^; EXPECT: ,,'` expected_error=`grep '^; EXPECT-ERROR: ' "$benchmark" | sed 's,^; EXPECT-ERROR: ,,'` expected_exit_status=`grep -m 1 '^; EXIT: ' "$benchmark" | perl -pe 's,^; EXIT: ,,;s,\r,,'` command_line=`grep '^; COMMAND-LINE: ' "$benchmark" | sed 's,^; COMMAND-LINE: ,,'` if [ -z "$expected_exit_status" ]; then expected_exit_status=0 fi elif grep '^ *( *set-info *:status *sat' "$benchmark" &>/dev/null; then expected_output=sat expected_exit_status=0 command_line= elif grep '^ *( *set-info *:status *unsat' "$benchmark" &>/dev/null; then expected_output=unsat expected_exit_status=0 command_line= else error "cannot determine status of \`$benchmark'" fi elif expr "$benchmark" : '.*\.cvc$' &>/dev/null; then lang=cvc4 expected_output=$(grep '^% EXPECT: ' "$benchmark") expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark" | sed 's,^% EXPECT-ERROR: ,,'` if [ -z "$expected_output" -a -z "$expected_error" ]; then error "cannot determine expected output of \`$benchmark': " \ "please use \`% EXPECT:' and/or \`% EXPECT-ERROR:' gestures" fi expected_output=$(echo "$expected_output" | perl -pe 's,^% EXPECT: ,,;s,\r,,') expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark" | perl -pe 's,^% EXIT: ,,;s,\r,,'` if [ -z "$expected_exit_status" ]; then expected_exit_status=0 fi command_line=`grep '^% COMMAND-LINE: ' "$benchmark" | sed 's,^% COMMAND-LINE: ,,'` elif expr "$benchmark" : '.*\.p$' &>/dev/null; then lang=tptp command_line=--finite-model-find expected_output=$(grep '^% EXPECT: ' "$benchmark") expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark" | sed 's,^% EXPECT-ERROR: ,,'` if [ -z "$expected_output" -a -z "$expected_error" ]; then if grep -q '^% Status *: ' "$benchmark"; then expected_output="$(grep '^% *Status *: ' "$benchmark" | head -1 | awk '{print$NF}')" case "$expected_output" in Theorem|Unsatisfiable) expected_exit_status=0 ;; CounterSatisfiable|Satisfiable) expected_exit_status=0 ;; GaveUp) expected_exit_status=0 ;; esac expected_output="% SZS status $expected_output for $(basename "$benchmark" | sed 's,\.p$,,')" else error "cannot determine expected output of \`$benchmark': " \ "please use \`% EXPECT:' and/or \`% EXPECT-ERROR:' gestures" fi else expected_output=$(echo "$expected_output" | perl -pe 's,^% EXPECT: ,,;s,\r,,') expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark" | perl -pe 's,^% EXIT: ,,;s,\r,,'` fi if [ -z "$expected_exit_status" ]; then expected_exit_status=0 fi if grep -q '^% COMMAND-LINE: ' "$benchmark"; then command_line=`grep '^% COMMAND-LINE: ' "$benchmark" | sed 's,^% COMMAND-LINE: ,,'` fi elif expr "$benchmark" : '.*\.sy$' &>/dev/null; then lang=sygus if test -e "$benchmark.expect"; then expected_output=`grep '^% EXPECT: ' "$benchmark.expect" | sed 's,^% EXPECT: ,,'` expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark.expect" | sed 's,^% EXPECT-ERROR: ,,'` expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark.expect" | perl -pe 's,^% EXIT: ,,;s,\r,,'` command_line=`grep '^% COMMAND-LINE: ' "$benchmark.expect" | sed 's,^% COMMAND-LINE: ,,'` if [ -z "$expected_exit_status" ]; then expected_exit_status=0 fi elif grep '^; \(EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\): ' "$benchmark" "$benchmark" &>/dev/null; then expected_output=`grep '^; EXPECT: ' "$benchmark" | sed 's,^; EXPECT: ,,'` expected_error=`grep '^; EXPECT-ERROR: ' "$benchmark" | sed 's,^; EXPECT-ERROR: ,,'` expected_exit_status=`grep -m 1 '^; EXIT: ' "$benchmark" | perl -pe 's,^; EXIT: ,,;s,\r,,'` command_line=`grep '^; COMMAND-LINE: ' "$benchmark" | sed 's,^; COMMAND-LINE: ,,'` if [ -z "$expected_exit_status" ]; then expected_exit_status=0 fi else error "cannot determine expected output of \`$benchmark'" fi else error "benchmark \`$benchmark' must be *.cvc or *.smt or *.smt2 or *.p or *.sy" fi command_line="${command_line:+$command_line }--lang=$lang" gettemp expoutfile cvc4_expect_stdout.$$.XXXXXXXXXX gettemp experrfile cvc4_expect_stderr.$$.XXXXXXXXXX gettemp outfile cvc4_stdout.$$.XXXXXXXXXX gettemp errfile cvc4_stderr.$$.XXXXXXXXXX gettemp errfilefix cvc4_stderr.$$.XXXXXXXXXX gettemp exitstatusfile cvc4_exitstatus.$$.XXXXXXXXXX if [ -z "$expected_output" ]; then # in case expected stdout output is empty, make sure we don't differ # by a newline, which we would if we echo "" >"$expoutfile" touch "$expoutfile" else echo "$expected_output" >"$expoutfile" fi check_models=false if grep '^sat$' "$expoutfile" &>/dev/null || grep '^invalid$' "$expoutfile" &>/dev/null || grep '^unknown$' "$expoptfile" &>/dev/null; then if ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--check-models' &>/dev/null && ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--no-check-models' &>/dev/null; then # later on, we'll run another test with --check-models on check_models=true fi fi check_proofs=false check_unsat_cores=false if [ "$proof" = yes ]; then # proofs not currently supported in incremental mode, turn it off if grep '^unsat$' "$expoutfile" &>/dev/null || grep '^valid$' "$expoutfile" &>/dev/null &>/dev/null; then if ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--check-proofs' &>/dev/null && ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--no-check-proofs' &>/dev/null && ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--incremental' &>/dev/null && ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--unconstrained-simp' &>/dev/null && ! expr " $CVC4_REGRESSION_ARGS $command_line" : '.* -[a-zA-Z]*i' &>/dev/null && ! expr "$BINARY" : '.*pcvc4' &>/dev/null && ! expr "$benchmark" : '.*\.sy$' &>/dev/null; then # later on, we'll run another test with --check-proofs on check_proofs=true fi if ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--check-unsat-cores' &>/dev/null && ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--no-check-unsat-cores' &>/dev/null && ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--incremental' &>/dev/null && ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--unconstrained-simp' &>/dev/null && ! expr " $CVC4_REGRESSION_ARGS $command_line" : '.* -[a-zA-Z]*i' &>/dev/null && ! expr "$BINARY" : '.*pcvc4' &>/dev/null && ! expr "$benchmark" : '.*\.sy$' &>/dev/null; then # later on, we'll run another test with --check-unsat-cores on check_unsat_cores=true fi fi fi if [ -z "$expected_error" ]; then # in case expected stderr output is empty, make sure we don't differ # by a newline, which we would if we echo "" >"$experrfile" touch "$experrfile" else echo "$expected_error" >"$experrfile" fi cvc4dir=`dirname "$cvc4"` cvc4dirfull=`cd "$cvc4dir" && pwd` if [ -z "$cvc4dirfull" ]; then error "getting directory of \`$cvc4 !?" fi cvc4base=`basename "$cvc4"` cvc4full="$cvc4dirfull/$cvc4base" if [ $dump = no ]; then echo running $wrapper $cvc4full $CVC4_REGRESSION_ARGS $command_line `basename "$benchmark"` [from working dir `dirname "$benchmark"`] time ( :; \ ( cd `dirname "$benchmark"`; $wrapper "$cvc4full" $CVC4_REGRESSION_ARGS $command_line `basename "$benchmark"`; echo $? >"$exitstatusfile" ) > "$outfile" 2> "$errfile" ) else echo running $wrapper $cvc4full $CVC4_REGRESSION_ARGS $command_line --preprocess-only --dump=clauses --output-lang=smt2 -q `basename "$benchmark"` \| $wrapper $cvc4full $CVC4_REGRESSION_ARGS $command_line --lang=smt2 - [from working dir `dirname "$benchmark"`] time ( :; \ ( cd `dirname "$benchmark"`; $wrapper "$cvc4full" $CVC4_REGRESSION_ARGS $command_line --preprocess-only --dump=clauses --output-lang=smt2 -q `basename "$benchmark"` | $wrapper "$cvc4full" $CVC4_REGRESSION_ARGS $command_line --lang=smt2 -; echo $? >"$exitstatusfile" ) > "$outfile" 2> "$errfile" ) fi # we have to actual error file same treatment as other files. differences in # versions of echo/bash were causing failure on some platforms and not on others # (also grep out valgrind output, if 0 errors reported by valgrind) actual_error=$(cat $errfile) if [[ "$VALGRIND" = "1" ]]; then #valgrind_output=$(cat $errfile|grep -E "^==[0-9]+== "|) valgrind_num_errors=$(cat $errfile|grep -E "^==[0-9]+== "|tail -n1|awk '{print $4}') echo "valgrind errors (not suppressed): $valgrind_num_errors" 1>&2 ((valgrind_num_errors == 0)) && actual_error=$(echo "$actual_error"|grep -vE "^==[0-9]+== ") fi if [ -z "$actual_error" ]; then # in case expected stderr output is empty, make sure we don't differ # by a newline, which we would if we echo "" >"$experrfile" touch "$errfilefix" else echo "$actual_error" >"$errfilefix" fi diffs=`diff -u --strip-trailing-cr "$expoutfile" "$outfile"` diffexit=$? diffserr=`diff -u --strip-trailing-cr "$experrfile" "$errfilefix"` diffexiterr=$? exit_status=`cat "$exitstatusfile"` exitcode=0 if [ $diffexit -ne 0 ]; then echo "$prog: error: differences between expected and actual output on stdout" echo "$diffs" exitcode=1 fi if [ $diffexiterr -ne 0 ]; then echo "$prog: error: differences between expected and actual output on stderr" echo "$diffserr" exitcode=1 fi if [ "$exit_status" != "$expected_exit_status" ]; then echo "$prog: error: expected exit status \`$expected_exit_status' but got \`$exit_status'" exitcode=1 fi if $check_models || $check_proofs || $check_unsat_cores; then check_cmdline= if $check_models; then check_cmdline="$check_cmdline --check-models" fi if $check_proofs; then check_cmdline="$check_cmdline --check-proofs" fi if $check_unsat_cores; then check_cmdline="$check_cmdline --check-unsat-cores" fi # run an extra model/proof/core-checking pass if ! CVC4_REGRESSION_ARGS="$CVC4_REGRESSION_ARGS$check_cmdline" "$0" $wrapper "$cvc4" "$benchmark_orig"; then exitcode=1 fi fi exit $exitcode