#!/bin/bash # # Morgan Deters # for the CVC4 project # # usage: # # run_regression cvc4-binary [ --proof | --dump ] [ benchmark.cvc | benchmark.smt | benchmark.smt2 ] # # Runs benchmark and checks for correct exit status and output. # # ulimit -t 1 # For detecting long running regressions prog=`basename "$0"` if [ $# -lt 2 ]; then echo "usage: $prog [ --proof | --dump ] [ wrapper ] cvc4-binary [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]" >&2 exit 1 fi proof=no dump=no if [ x"$1" = x--proof ]; then proof=yes shift elif [ x"$1" = x--dump ]; then dump=yes shift fi if [ $# -lt 2 ]; then echo "usage: $prog [ --proof | --dump ] [ wrapper ] cvc4-binary [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]" >&2 exit 1 fi wrapper= while [ $# -gt 2 ]; do wrapper="$wrapper$1 " shift done cvc4=$1 benchmark_orig=$2 benchmark="$benchmark_orig" function error { echo "$prog: error: $*" exit 1 } if ! [ -x "$cvc4" ]; then error "\`$cvc4' doesn't exist or isn't executable" >&2 fi if ! [ -r "$benchmark" ]; then error "\`$benchmark' doesn't exist or isn't readable" >&2 fi # gettemp() and its associated tempfiles[] array are intended to never # allow a temporary file to leak---the trap ensures that when this script # exits, whether via a regular exit or an -INT or other signal, the # temp files are deleted. declare -a tempfiles trap -- 'test ${#tempfiles[@]} -gt 0 && rm -f "${tempfiles[@]}"' EXIT function gettemp { local temp="`mktemp -t "$2"`" tempfiles[${#tempfiles[@]}]="$temp" eval "$1"="$temp" } tmpbenchmark= if expr "$benchmark" : '.*\.smt$' &>/dev/null; then proof_command=PROOFS-NOT-SUPPORTED-IN-SMTLIB-V1 lang=smt1 if test -e "$benchmark.expect"; then expected_proof=`grep '^% PROOF' "$benchmark.expect" &>/dev/null && echo yes` expected_output=`grep '^% EXPECT: ' "$benchmark.expect" | sed 's,^% EXPECT: ,,'` expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark.expect" | sed 's,^% EXPECT-ERROR: ,,'` expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark.expect" | perl -pe 's,^% EXIT: ,,;s,\r,,'` command_line=`grep '^% COMMAND-LINE: ' "$benchmark.expect" | sed 's,^% COMMAND-LINE: ,,'` if [ -z "$expected_exit_status" ]; then expected_exit_status=0 fi elif grep '^% \(PROOF\|EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\): ' "$benchmark" "$benchmark" &>/dev/null; then expected_proof=`grep '^% PROOF' "$benchmark" &>/dev/null && echo yes` expected_output=`grep '^% EXPECT: ' "$benchmark" | sed 's,^% EXPECT: ,,'` expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark" | sed 's,^% EXPECT-ERROR: ,,'` expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark" | perl -pe 's,^% EXIT: ,,;s,\r,,'` command_line=`grep '^% COMMAND-LINE: ' "$benchmark" | sed 's,^% COMMAND-LINE: ,,'` # old mktemp from coreutils 7.x is broken, can't do XXXX in the middle # this frustrates our auto-language-detection gettemp tmpbenchmark cvc4_benchmark.smt.$$.XXXXXXXXXX grep -v '^% \(PROOF\|EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\): ' "$benchmark" >"$tmpbenchmark" if [ -z "$expected_exit_status" ]; then expected_exit_status=0 fi benchmark=$tmpbenchmark elif grep '^ *:status *sat' "$benchmark" &>/dev/null; then expected_proof= expected_output=sat expected_exit_status=0 command_line= elif grep '^ *:status *unsat' "$benchmark" &>/dev/null; then expected_proof= expected_output=unsat expected_exit_status=0 command_line= else error "cannot determine status of \`$benchmark'" fi elif expr "$benchmark" : '.*\.smt2$' &>/dev/null; then proof_command='(get-proof)' lang=smt2 if test -e "$benchmark.expect"; then expected_proof=`grep '^[%;] PROOF' "$benchmark.expect" &>/dev/null && echo yes` expected_output=`grep '^% EXPECT: ' "$benchmark.expect" | sed 's,^% EXPECT: ,,'` expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark.expect" | sed 's,^% EXPECT-ERROR: ,,'` expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark.expect" | perl -pe 's,^% EXIT: ,,;s,\r,,'` command_line=`grep '^% COMMAND-LINE: ' "$benchmark.expect" | sed 's,^% COMMAND-LINE: ,,'` if [ -z "$expected_exit_status" ]; then expected_exit_status=0 fi elif grep '^\(%\|;\) \(EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\): ' "$benchmark" "$benchmark" &>/dev/null; then expected_proof=`grep '^[%;] PROOF' "$benchmark" &>/dev/null && echo yes` expected_output=`grep '^[%;] EXPECT: ' "$benchmark" | sed 's,^[%;] EXPECT: ,,'` expected_error=`grep '^[%;] EXPECT-ERROR: ' "$benchmark" | sed 's,^[%;] EXPECT-ERROR: ,,'` expected_exit_status=`grep -m 1 '^[%;] EXIT: ' "$benchmark" | perl -pe 's,^[%;] EXIT: ,,;s,\r,,'` command_line=`grep '^[%;] COMMAND-LINE: ' "$benchmark" | sed 's,^[%;] COMMAND-LINE: ,,'` # old mktemp from coreutils 7.x is broken, can't do XXXX in the middle # this frustrates our auto-language-detection gettemp tmpbenchmark cvc4_benchmark.smt2.$$.XXXXXXXXXX grep -v '^% \(EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\): ' "$benchmark" >"$tmpbenchmark" if [ -z "$expected_exit_status" ]; then expected_exit_status=0 fi benchmark=$tmpbenchmark elif grep '^ *( *set-info *:status *sat' "$benchmark" &>/dev/null; then expected_proof= expected_output=sat expected_exit_status=0 command_line= elif grep '^ *( *set-info *:status *unsat' "$benchmark" &>/dev/null; then expected_proof=`grep '^; PROOF' "$benchmark" &>/dev/null && echo yes` expected_output=unsat expected_exit_status=0 command_line= else error "cannot determine status of \`$benchmark'" fi elif expr "$benchmark" : '.*\.cvc$' &>/dev/null; then proof_command='DUMP_PROOF;' lang=cvc4 expected_proof=`grep '^% PROOF' "$benchmark" &>/dev/null && echo yes` expected_output=$(grep '^% EXPECT: ' "$benchmark") expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark" | sed 's,^% EXPECT-ERROR: ,,'` if [ -z "$expected_output" -a -z "$expected_error" ]; then error "cannot determine expected output of \`$benchmark': " \ "please use \`% EXPECT:' and/or \`% EXPECT-ERROR:' gestures" fi expected_output=$(echo "$expected_output" | perl -pe 's,^% EXPECT: ,,;s,\r,,') expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark" | perl -pe 's,^% EXIT: ,,;s,\r,,'` if [ -z "$expected_exit_status" ]; then expected_exit_status=0 fi command_line=`grep '^% COMMAND-LINE: ' "$benchmark" | sed 's,^% COMMAND-LINE: ,,'` elif expr "$benchmark" : '.*\.p$' &>/dev/null; then proof_command=PROOFS-NOT-SUPPORTED-IN-TPTP; lang=tptp command_line=--finite-model-find expected_proof=`grep '^% PROOF' "$benchmark" &>/dev/null && echo yes` expected_output=$(grep '^% EXPECT: ' "$benchmark") expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark" | sed 's,^% EXPECT-ERROR: ,,'` if [ -z "$expected_output" -a -z "$expected_error" ]; then if grep -q '^% Status *: ' "$benchmark"; then expected_output="$(grep '^% *Status *: ' "$benchmark" | head -1 | awk '{print$NF}')" case "$expected_output" in Theorem|Unsatisfiable) expected_exit_status=0 ;; CounterSatisfiable|Satisfiable) expected_exit_status=0 ;; GaveUp) expected_exit_status=0 ;; esac expected_output="% SZS status $expected_output for $(basename "$benchmark" | sed 's,\.p$,,')" else error "cannot determine expected output of \`$benchmark': " \ "please use \`% EXPECT:' and/or \`% EXPECT-ERROR:' gestures" fi else expected_output=$(echo "$expected_output" | perl -pe 's,^% EXPECT: ,,;s,\r,,') expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark" | perl -pe 's,^% EXIT: ,,;s,\r,,'` fi if [ -z "$expected_exit_status" ]; then expected_exit_status=0 fi if grep -q '^% COMMAND-LINE: ' "$benchmark"; then command_line=`grep '^% COMMAND-LINE: ' "$benchmark" | sed 's,^% COMMAND-LINE: ,,'` fi else error "benchmark \`$benchmark' must be *.cvc or *.smt or *.smt2 or *.p" fi command_line="${command_line:+$command_line }--lang=$lang" gettemp expoutfile cvc4_expect_stdout.$$.XXXXXXXXXX gettemp experrfile cvc4_expect_stderr.$$.XXXXXXXXXX gettemp outfile cvc4_stdout.$$.XXXXXXXXXX gettemp errfile cvc4_stderr.$$.XXXXXXXXXX gettemp exitstatusfile cvc4_exitstatus.$$.XXXXXXXXXX if [ -z "$expected_output" ]; then # in case expected stdout output is empty, make sure we don't differ # by a newline, which we would if we echo "" >"$expoutfile" touch "$expoutfile" else echo "$expected_output" >"$expoutfile" fi check_models=false if grep '^sat$' "$expoutfile" &>/dev/null || grep '^invalid$' "$expoutfile" &>/dev/null || grep '^unknown$' "$expoptfile" &>/dev/null; then if ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--check-models' &>/dev/null && ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--no-check-models' &>/dev/null; then # later on, we'll run another test with --check-models on check_models=true fi fi if [ -z "$expected_error" ]; then # in case expected stderr output is empty, make sure we don't differ # by a newline, which we would if we echo "" >"$experrfile" touch "$experrfile" else echo "$expected_error" >"$experrfile" fi cvc4dir=`dirname "$cvc4"` cvc4dirfull=`cd "$cvc4dir" && pwd` if [ -z "$cvc4dirfull" ]; then error "getting directory of \`$cvc4 !?" fi cvc4base=`basename "$cvc4"` cvc4full="$cvc4dirfull/$cvc4base" if [ $dump = no ]; then echo running $wrapper $cvc4full $CVC4_REGRESSION_ARGS $command_line --segv-nospin `basename "$benchmark"` [from working dir `dirname "$benchmark"`] time ( :; \ ( cd `dirname "$benchmark"`; $wrapper "$cvc4full" $CVC4_REGRESSION_ARGS $command_line --segv-nospin `basename "$benchmark"`; echo $? >"$exitstatusfile" ) > "$outfile" 2> "$errfile" ) else echo running $wrapper $cvc4full $CVC4_REGRESSION_ARGS $command_line --preprocess-only --dump=clauses --output-lang=smt2 -q --segv-nospin `basename "$benchmark"` \| $wrapper $cvc4full $CVC4_REGRESSION_ARGS $command_line --lang=smt2 - [from working dir `dirname "$benchmark"`] time ( :; \ ( cd `dirname "$benchmark"`; $wrapper "$cvc4full" $CVC4_REGRESSION_ARGS $command_line --preprocess-only --dump=clauses --output-lang=smt2 -q --segv-nospin `basename "$benchmark"` | $wrapper "$cvc4full" $CVC4_REGRESSION_ARGS $command_line --lang=smt2 -; echo $? >"$exitstatusfile" ) > "$outfile" 2> "$errfile" ) fi diffs=`diff -u --strip-trailing-cr "$expoutfile" "$outfile"` diffexit=$? diffserr=`diff -u --strip-trailing-cr "$experrfile" "$errfile"` diffexiterr=$? exit_status=`cat "$exitstatusfile"` exitcode=0 if [ $diffexit -ne 0 ]; then echo "$prog: error: differences between expected and actual output on stdout" echo "$diffs" exitcode=1 fi if [ $diffexiterr -ne 0 ]; then echo "$prog: error: differences between expected and actual output on stderr" echo "$diffserr" exitcode=1 fi if [ "$exit_status" != "$expected_exit_status" ]; then echo "$prog: error: expected exit status \`$expected_exit_status' but got \`$exit_status'" exitcode=1 fi if [ "$proof" = yes -a "$expected_proof" = yes ]; then gettemp pfbenchmark cvc4_pfbenchmark.$$.XXXXXXXXXX # remove exit command to add proof command for smt2 benchmarks if expr "$benchmark" : '.*\.smt2$' &>/dev/null; then head -n -0 "$benchmark" > "$pfbenchmark"; echo "$proof_command" >>"$pfbenchmark"; echo "(exit)" >> "$pfbenchmark"; else cp $benchmark $pfbenchmark echo "$proof_command" >>"$pfbenchmark"; fi echo running $wrapper $cvc4full $CVC4_REGRESSION_ARGS $command_line --proof --segv-nospin `basename "$pfbenchmark"` [from working dir `dirname "$pfbenchmark"`] time ( :; \ ( cd `dirname "$pfbenchmark"`; $wrapper "$cvc4full" $CVC4_REGRESSION_ARGS $command_line --proof --segv-nospin `basename "$pfbenchmark"`; echo $? >"$exitstatusfile" ) > "$outfile" 2> "$errfile" ) gettemp pfoutfile cvc4_proof.$$.XXXXXXXXXX diff --unchanged-group-format='' \ --old-group-format='' \ --new-group-format='%>' \ "$expoutfile" "$outfile" > "$pfoutfile" if [ ! -s "$pfoutfile" ]; then echo "$prog: error: proof generation failed with empty output (stderr follows)" cat "$errfile" exitcode=1 else echo running $LFSC "$pfoutfile" [from working dir `dirname "$pfbenchmark"`] if ! $LFSC "$pfoutfile" &> "$errfile"; then echo "$prog: error: proof checker failed (output follows)" cat "$errfile" exitcode=1 fi fi fi if $check_models; then # at least one sat/invalid response: run an extra model-checking pass if ! CVC4_REGRESSION_ARGS="$CVC4_REGRESSION_ARGS --check-models" "$0" $wrapper "$cvc4" "$benchmark_orig"; then exitcode=1 fi fi exit $exitcode