#!/bin/bash
#
# Morgan Deters <mdeters@cs.nyu.edu>
# for the CVC4 project
#
# usage:
#
#     run_regression cvc4-binary [ --proof | --dump ] [ benchmark.cvc | benchmark.smt | benchmark.smt2 ]
#
# Runs benchmark and checks for correct exit status and output.
#

# ulimit -t 1    # For detecting long running regressions

prog=`basename "$0"`

if [ $# -lt 2 ]; then
  echo "usage: $prog [ --proof | --dump ] [ wrapper ] cvc4-binary [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]" >&2
  exit 1
fi

proof=no
dump=no
if [ x"$1" = x--proof ]; then
  proof=yes
  shift
elif [ x"$1" = x--dump ]; then
  dump=yes
  shift
fi

if [ $# -lt 2 ]; then
  echo "usage: $prog [ --proof | --dump ] [ wrapper ] cvc4-binary [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]" >&2
  exit 1
fi

wrapper=
while [ $# -gt 2 ]; do
  wrapper="$wrapper$1 "
  shift
done

cvc4=$1
benchmark_orig=$2
benchmark="$benchmark_orig"

function error {
  echo "$prog: error: $*"
  exit 1
}

if ! [ -x "$cvc4" ]; then
  error "\`$cvc4' doesn't exist or isn't executable" >&2
fi
if ! [ -r "$benchmark" ]; then
  error "\`$benchmark' doesn't exist or isn't readable" >&2
fi

# gettemp() and its associated tempfiles[] array are intended to never
# allow a temporary file to leak---the trap ensures that when this script
# exits, whether via a regular exit or an -INT or other signal, the
# temp files are deleted.
declare -a tempfiles
trap -- 'test ${#tempfiles[@]} -gt 0 && rm -f "${tempfiles[@]}"' EXIT
function gettemp {
  local temp="`mktemp -t "$2"`"
  tempfiles[${#tempfiles[@]}]="$temp"
  eval "$1"="$temp"
}

tmpbenchmark=
if expr "$benchmark" : '.*\.smt$' &>/dev/null; then
  proof_command=PROOFS-NOT-SUPPORTED-IN-SMTLIB-V1
  lang=smt1
  if test -e "$benchmark.expect"; then
    expected_proof=`grep '^% PROOF' "$benchmark.expect" &>/dev/null && echo yes`
    expected_output=`grep '^% EXPECT: ' "$benchmark.expect" | sed 's,^% EXPECT: ,,'`
    expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark.expect" | sed 's,^% EXPECT-ERROR: ,,'`
    expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark.expect" | perl -pe 's,^% EXIT: ,,;s,\r,,'`
    command_line=`grep '^% COMMAND-LINE: ' "$benchmark.expect" | sed 's,^% COMMAND-LINE: ,,'`
    if [ -z "$expected_exit_status" ]; then
      expected_exit_status=0
    fi
  elif grep '^% \(PROOF\|EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\): ' "$benchmark" "$benchmark" &>/dev/null; then
    expected_proof=`grep '^% PROOF' "$benchmark" &>/dev/null && echo yes`
    expected_output=`grep '^% EXPECT: ' "$benchmark" | sed 's,^% EXPECT: ,,'`
    expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark" | sed 's,^% EXPECT-ERROR: ,,'`
    expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark" | perl -pe 's,^% EXIT: ,,;s,\r,,'`
    command_line=`grep '^% COMMAND-LINE: ' "$benchmark" | sed 's,^% COMMAND-LINE: ,,'`
    # old mktemp from coreutils 7.x is broken, can't do XXXX in the middle
    # this frustrates our auto-language-detection
    gettemp tmpbenchmark cvc4_benchmark.smt.$$.XXXXXXXXXX
    grep -v '^% \(PROOF\|EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\): ' "$benchmark" >"$tmpbenchmark"
    if [ -z "$expected_exit_status" ]; then
      expected_exit_status=0
    fi
    benchmark=$tmpbenchmark
  elif grep '^ *:status  *sat' "$benchmark" &>/dev/null; then
    expected_proof=
    expected_output=sat
    expected_exit_status=0
    command_line=
  elif grep '^ *:status  *unsat' "$benchmark" &>/dev/null; then
    expected_proof=
    expected_output=unsat
    expected_exit_status=0
    command_line=
  else
    error "cannot determine status of \`$benchmark'"
  fi
elif expr "$benchmark" : '.*\.smt2$' &>/dev/null; then
  proof_command='(get-proof)'
  lang=smt2
  if test -e "$benchmark.expect"; then
    expected_proof=`grep '^[%;] PROOF' "$benchmark.expect" &>/dev/null && echo yes`
    expected_output=`grep '^% EXPECT: ' "$benchmark.expect" | sed 's,^% EXPECT: ,,'`
    expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark.expect" | sed 's,^% EXPECT-ERROR: ,,'`
    expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark.expect" | perl -pe 's,^% EXIT: ,,;s,\r,,'`
    command_line=`grep '^% COMMAND-LINE: ' "$benchmark.expect" | sed 's,^% COMMAND-LINE: ,,'`
    if [ -z "$expected_exit_status" ]; then
      expected_exit_status=0
    fi
  elif grep '^\(%\|;\) \(EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\): ' "$benchmark" "$benchmark" &>/dev/null; then
    expected_proof=`grep '^[%;] PROOF' "$benchmark" &>/dev/null && echo yes`
    expected_output=`grep '^[%;] EXPECT: ' "$benchmark" | sed 's,^[%;] EXPECT: ,,'`
    expected_error=`grep '^[%;] EXPECT-ERROR: ' "$benchmark" | sed 's,^[%;] EXPECT-ERROR: ,,'`
    expected_exit_status=`grep -m 1 '^[%;] EXIT: ' "$benchmark" | perl -pe 's,^[%;] EXIT: ,,;s,\r,,'`
    command_line=`grep '^[%;] COMMAND-LINE: ' "$benchmark" | sed 's,^[%;] COMMAND-LINE: ,,'`
    # old mktemp from coreutils 7.x is broken, can't do XXXX in the middle
    # this frustrates our auto-language-detection
    gettemp tmpbenchmark cvc4_benchmark.smt2.$$.XXXXXXXXXX
    grep -v '^% \(EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\): ' "$benchmark" >"$tmpbenchmark"
    if [ -z "$expected_exit_status" ]; then
      expected_exit_status=0
    fi
    benchmark=$tmpbenchmark
  elif grep '^ *( *set-info  *:status  *sat' "$benchmark" &>/dev/null; then
    expected_proof=
    expected_output=sat
    expected_exit_status=0
    command_line=
  elif grep '^ *( *set-info  *:status  *unsat' "$benchmark" &>/dev/null; then
    expected_proof=`grep '^; PROOF' "$benchmark" &>/dev/null && echo yes`
    expected_output=unsat
    expected_exit_status=0
    command_line=
  else
    error "cannot determine status of \`$benchmark'"
  fi
elif expr "$benchmark" : '.*\.cvc$' &>/dev/null; then
  proof_command='DUMP_PROOF;'
  lang=cvc4
  expected_proof=`grep '^% PROOF' "$benchmark" &>/dev/null && echo yes`
  expected_output=$(grep '^% EXPECT: ' "$benchmark")
  expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark" | sed 's,^% EXPECT-ERROR: ,,'`
  if [ -z "$expected_output" -a -z "$expected_error" ]; then
    error "cannot determine expected output of \`$benchmark': " \
          "please use \`% EXPECT:' and/or \`% EXPECT-ERROR:' gestures"
  fi
  expected_output=$(echo "$expected_output" | perl -pe 's,^% EXPECT: ,,;s,\r,,')
  expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark" | perl -pe 's,^% EXIT: ,,;s,\r,,'`
  if [ -z "$expected_exit_status" ]; then
    expected_exit_status=0
  fi
  command_line=`grep '^% COMMAND-LINE: ' "$benchmark" | sed 's,^% COMMAND-LINE: ,,'`
elif expr "$benchmark" : '.*\.p$' &>/dev/null; then
  proof_command=PROOFS-NOT-SUPPORTED-IN-TPTP;
  lang=tptp
  command_line=--finite-model-find
  expected_proof=`grep '^% PROOF' "$benchmark" &>/dev/null && echo yes`
  expected_output=$(grep '^% EXPECT: ' "$benchmark")
  expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark" | sed 's,^% EXPECT-ERROR: ,,'`
  if [ -z "$expected_output" -a -z "$expected_error" ]; then
    if grep -q '^% Status *: ' "$benchmark"; then
      expected_output="$(grep '^% *Status *: ' "$benchmark" | head -1 | awk '{print$NF}')"
      case "$expected_output" in
        Theorem|Unsatisfiable) expected_exit_status=0 ;;
        CounterSatisfiable|Satisfiable) expected_exit_status=0 ;;
        GaveUp) expected_exit_status=0 ;;
      esac
      expected_output="% SZS status $expected_output for $(basename "$benchmark" | sed 's,\.p$,,')"
    else
      error "cannot determine expected output of \`$benchmark': " \
            "please use \`% EXPECT:' and/or \`% EXPECT-ERROR:' gestures"
    fi
  else
    expected_output=$(echo "$expected_output" | perl -pe 's,^% EXPECT: ,,;s,\r,,')
    expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark" | perl -pe 's,^% EXIT: ,,;s,\r,,'`
  fi
  if [ -z "$expected_exit_status" ]; then
    expected_exit_status=0
  fi
  if grep -q '^% COMMAND-LINE: ' "$benchmark"; then
    command_line=`grep '^% COMMAND-LINE: ' "$benchmark" | sed 's,^% COMMAND-LINE: ,,'`
  fi
else
  error "benchmark \`$benchmark' must be *.cvc or *.smt or *.smt2 or *.p"
fi

command_line="${command_line:+$command_line }--lang=$lang"

gettemp expoutfile cvc4_expect_stdout.$$.XXXXXXXXXX
gettemp experrfile cvc4_expect_stderr.$$.XXXXXXXXXX
gettemp outfile cvc4_stdout.$$.XXXXXXXXXX
gettemp errfile cvc4_stderr.$$.XXXXXXXXXX
gettemp exitstatusfile cvc4_exitstatus.$$.XXXXXXXXXX

if [ -z "$expected_output" ]; then
  # in case expected stdout output is empty, make sure we don't differ
  # by a newline, which we would if we echo "" >"$expoutfile"
  touch "$expoutfile"
else
  echo "$expected_output" >"$expoutfile"
fi
check_models=false
if grep '^sat$' "$expoutfile" &>/dev/null || grep '^invalid$' "$expoutfile" &>/dev/null || grep '^unknown$' "$expoptfile" &>/dev/null; then
  if ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--check-models' &>/dev/null &&
     ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--no-check-models' &>/dev/null; then
    # later on, we'll run another test with --check-models on
    check_models=true
  fi
fi
if [ -z "$expected_error" ]; then
  # in case expected stderr output is empty, make sure we don't differ
  # by a newline, which we would if we echo "" >"$experrfile"
  touch "$experrfile"
else
  echo "$expected_error" >"$experrfile"
fi

cvc4dir=`dirname "$cvc4"`
cvc4dirfull=`cd "$cvc4dir" && pwd`
if [ -z "$cvc4dirfull" ]; then
  error "getting directory of \`$cvc4 !?"
fi
cvc4base=`basename "$cvc4"`
cvc4full="$cvc4dirfull/$cvc4base"
if [ $dump = no ]; then
  echo running $wrapper $cvc4full $CVC4_REGRESSION_ARGS $command_line --segv-nospin `basename "$benchmark"` [from working dir `dirname "$benchmark"`]
  time ( :; \
  ( cd `dirname "$benchmark"`;
    $wrapper "$cvc4full" $CVC4_REGRESSION_ARGS $command_line --segv-nospin `basename "$benchmark"`;
    echo $? >"$exitstatusfile"
  ) > "$outfile" 2> "$errfile" )
else
  echo running $wrapper $cvc4full $CVC4_REGRESSION_ARGS $command_line --preprocess-only --dump=clauses --output-lang=smt2 -q --segv-nospin `basename "$benchmark"` \| $wrapper $cvc4full $CVC4_REGRESSION_ARGS $command_line --lang=smt2 - [from working dir `dirname "$benchmark"`]
  time ( :; \
  ( cd `dirname "$benchmark"`;
    $wrapper "$cvc4full" $CVC4_REGRESSION_ARGS $command_line --preprocess-only --dump=clauses --output-lang=smt2 -q --segv-nospin `basename "$benchmark"` | $wrapper "$cvc4full" $CVC4_REGRESSION_ARGS $command_line --lang=smt2 -;
    echo $? >"$exitstatusfile"
  ) > "$outfile" 2> "$errfile" )
fi

diffs=`diff -u --strip-trailing-cr "$expoutfile" "$outfile"`
diffexit=$?
diffserr=`diff -u --strip-trailing-cr "$experrfile" "$errfile"`
diffexiterr=$?
exit_status=`cat "$exitstatusfile"`

exitcode=0
if [ $diffexit -ne 0 ]; then
  echo "$prog: error: differences between expected and actual output on stdout"
  echo "$diffs"
  exitcode=1
fi
if [ $diffexiterr -ne 0 ]; then
  echo "$prog: error: differences between expected and actual output on stderr"
  echo "$diffserr"
  exitcode=1
fi

if [ "$exit_status" != "$expected_exit_status" ]; then
  echo "$prog: error: expected exit status \`$expected_exit_status' but got \`$exit_status'"
  exitcode=1
fi

if [ "$proof" = yes -a "$expected_proof" = yes ]; then
  gettemp pfbenchmark cvc4_pfbenchmark.$$.XXXXXXXXXX
  # remove exit command to add proof command for smt2 benchmarks
  if expr "$benchmark" : '.*\.smt2$' &>/dev/null; then
      head -n -0 "$benchmark" > "$pfbenchmark";
      echo "$proof_command" >>"$pfbenchmark";
      echo "(exit)" >> "$pfbenchmark";
  else
      cp $benchmark $pfbenchmark
      echo "$proof_command" >>"$pfbenchmark";
  fi
  echo running $wrapper $cvc4full $CVC4_REGRESSION_ARGS $command_line --proof --segv-nospin `basename "$pfbenchmark"` [from working dir `dirname "$pfbenchmark"`]
  time ( :; \
  ( cd `dirname "$pfbenchmark"`;
    $wrapper "$cvc4full" $CVC4_REGRESSION_ARGS $command_line --proof --segv-nospin `basename "$pfbenchmark"`;
    echo $? >"$exitstatusfile"
  ) > "$outfile" 2> "$errfile" )

  gettemp pfoutfile cvc4_proof.$$.XXXXXXXXXX
  
  diff --unchanged-group-format='' \
       --old-group-format='' \
       --new-group-format='%>' \
       "$expoutfile" "$outfile" > "$pfoutfile"
  if [ ! -s "$pfoutfile" ]; then
    echo "$prog: error: proof generation failed with empty output (stderr follows)"
    cat "$errfile"
    exitcode=1
  else
    echo running $LFSC "$pfoutfile" [from working dir `dirname "$pfbenchmark"`]
    if ! $LFSC "$pfoutfile" &> "$errfile"; then
      echo "$prog: error: proof checker failed (output follows)"
      cat "$errfile"
      exitcode=1
    fi
  fi
fi

if $check_models; then
  # at least one sat/invalid response: run an extra model-checking pass
  if ! CVC4_REGRESSION_ARGS="$CVC4_REGRESSION_ARGS --check-models" "$0" $wrapper "$cvc4" "$benchmark_orig"; then
    exitcode=1
  fi
fi

exit $exitcode