#!/bin/bash
#
# Morgan Deters <mdeters@cs.nyu.edu>
# for the CVC4 project
#
# usage:
#
#     run_regression cvc4-binary [ --proof | --dump ] [ benchmark.cvc | benchmark.smt | benchmark.smt2 ]
#
# Runs benchmark and checks for correct exit status and output.
#

# ulimit -t 1    # For detecting long running regressions
ulimit -s 65000  # Needed for some (esp. portfolio and model-building)

prog=`basename "$0"`

if [ $# -lt 2 ]; then
  echo "usage: $prog [ --proof | --dump ] [ wrapper ] cvc4-binary [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]" >&2
  exit 1
fi

proof=no
dump=no
if [ x"$1" = x--proof ]; then
  proof=yes
  shift
elif [ x"$1" = x--dump ]; then
  dump=yes
  shift
fi

if [ $# -lt 2 ]; then
  echo "usage: $prog [ --proof | --dump ] [ wrapper ] cvc4-binary [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]" >&2
  exit 1
fi

wrapper=
while [ $# -gt 2 ]; do
  wrapper="$wrapper$1 "
  shift
done

[[ "$VALGRIND" = "1" ]] && {
  wrapper="libtool --mode=execute valgrind $wrapper"
}

cvc4=$1
benchmark_orig=$2
benchmark="$benchmark_orig"

function error {
  echo "$prog: error: $*"
  exit 1
}

if ! [ -x "$cvc4" ]; then
  error "\`$cvc4' doesn't exist or isn't executable" >&2
fi
if ! [ -r "$benchmark" ]; then
  error "\`$benchmark' doesn't exist or isn't readable" >&2
fi

# gettemp() and its associated tempfiles[] array are intended to never
# allow a temporary file to leak---the trap ensures that when this script
# exits, whether via a regular exit or an -INT or other signal, the
# temp files are deleted.
declare -a tempfiles
trap -- 'test ${#tempfiles[@]} -gt 0 && rm -f "${tempfiles[@]}"' EXIT
function gettemp {
  local temp="`mktemp -t "$2"`"
  tempfiles[${#tempfiles[@]}]="$temp"
  eval "$1"="$temp"
}

tmpbenchmark=
if expr "$benchmark" : '.*\.smt$' &>/dev/null; then
  lang=smt1
  if test -e "$benchmark.expect"; then
    expected_output=`grep '^% EXPECT: ' "$benchmark.expect" | sed 's,^% EXPECT: ,,'`
    expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark.expect" | sed 's,^% EXPECT-ERROR: ,,'`
    expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark.expect" | perl -pe 's,^% EXIT: ,,;s,\r,,'`
    command_line=`grep '^% COMMAND-LINE: ' "$benchmark.expect" | sed 's,^% COMMAND-LINE: ,,'`
    if [ -z "$expected_exit_status" ]; then
      expected_exit_status=0
    fi
  elif grep '^% \(PROOF\|EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\): ' "$benchmark" "$benchmark" &>/dev/null; then
    expected_output=`grep '^% EXPECT: ' "$benchmark" | sed 's,^% EXPECT: ,,'`
    expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark" | sed 's,^% EXPECT-ERROR: ,,'`
    expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark" | perl -pe 's,^% EXIT: ,,;s,\r,,'`
    command_line=`grep '^% COMMAND-LINE: ' "$benchmark" | sed 's,^% COMMAND-LINE: ,,'`
    # old mktemp from coreutils 7.x is broken, can't do XXXX in the middle
    # this frustrates our auto-language-detection
    gettemp tmpbenchmark cvc4_benchmark.smt.$$.XXXXXXXXXX
    grep -v '^% \(PROOF\|EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\): ' "$benchmark" >"$tmpbenchmark"
    if [ -z "$expected_exit_status" ]; then
      expected_exit_status=0
    fi
    benchmark=$tmpbenchmark
  elif grep '^ *:status  *sat' "$benchmark" &>/dev/null; then
    expected_output=sat
    expected_exit_status=0
    command_line=
  elif grep '^ *:status  *unsat' "$benchmark" &>/dev/null; then
    expected_output=unsat
    expected_exit_status=0
    command_line=
  else
    error "cannot determine status of \`$benchmark'"
  fi
elif expr "$benchmark" : '.*\.smt2$' &>/dev/null; then
  lang=smt2
  if test -e "$benchmark.expect"; then
    expected_output=`grep '^% EXPECT: ' "$benchmark.expect" | sed 's,^% EXPECT: ,,'`
    expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark.expect" | sed 's,^% EXPECT-ERROR: ,,'`
    expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark.expect" | perl -pe 's,^% EXIT: ,,;s,\r,,'`
    command_line=`grep '^% COMMAND-LINE: ' "$benchmark.expect" | sed 's,^% COMMAND-LINE: ,,'`
    if [ -z "$expected_exit_status" ]; then
      expected_exit_status=0
    fi
  elif grep '^; \(EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\): ' "$benchmark" "$benchmark" &>/dev/null; then
    expected_output=`grep '^; EXPECT: ' "$benchmark" | sed 's,^; EXPECT: ,,'`
    expected_error=`grep '^; EXPECT-ERROR: ' "$benchmark" | sed 's,^; EXPECT-ERROR: ,,'`
    expected_exit_status=`grep -m 1 '^; EXIT: ' "$benchmark" | perl -pe 's,^; EXIT: ,,;s,\r,,'`
    command_line=`grep '^; COMMAND-LINE: ' "$benchmark" | sed 's,^; COMMAND-LINE: ,,'`
    if [ -z "$expected_exit_status" ]; then
      expected_exit_status=0
    fi
  elif grep '^ *( *set-info  *:status  *sat' "$benchmark" &>/dev/null; then
    expected_output=sat
    expected_exit_status=0
    command_line=
  elif grep '^ *( *set-info  *:status  *unsat' "$benchmark" &>/dev/null; then
    expected_output=unsat
    expected_exit_status=0
    command_line=
  else
    error "cannot determine status of \`$benchmark'"
  fi
elif expr "$benchmark" : '.*\.cvc$' &>/dev/null; then
  lang=cvc4
  expected_output=$(grep '^% EXPECT: ' "$benchmark")
  expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark" | sed 's,^% EXPECT-ERROR: ,,'`
  if [ -z "$expected_output" -a -z "$expected_error" ]; then
    error "cannot determine expected output of \`$benchmark': " \
          "please use \`% EXPECT:' and/or \`% EXPECT-ERROR:' gestures"
  fi
  expected_output=$(echo "$expected_output" | perl -pe 's,^% EXPECT: ,,;s,\r,,')
  expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark" | perl -pe 's,^% EXIT: ,,;s,\r,,'`
  if [ -z "$expected_exit_status" ]; then
    expected_exit_status=0
  fi
  command_line=`grep '^% COMMAND-LINE: ' "$benchmark" | sed 's,^% COMMAND-LINE: ,,'`
elif expr "$benchmark" : '.*\.p$' &>/dev/null; then
  lang=tptp
  command_line=--finite-model-find
  expected_output=$(grep '^% EXPECT: ' "$benchmark")
  expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark" | sed 's,^% EXPECT-ERROR: ,,'`
  if [ -z "$expected_output" -a -z "$expected_error" ]; then
    if grep -q '^% Status *: ' "$benchmark"; then
      expected_output="$(grep '^% *Status *: ' "$benchmark" | head -1 | awk '{print$NF}')"
      case "$expected_output" in
        Theorem|Unsatisfiable) expected_exit_status=0 ;;
        CounterSatisfiable|Satisfiable) expected_exit_status=0 ;;
        GaveUp) expected_exit_status=0 ;;
      esac
      expected_output="% SZS status $expected_output for $(basename "$benchmark" | sed 's,\.p$,,')"
    else
      error "cannot determine expected output of \`$benchmark': " \
            "please use \`% EXPECT:' and/or \`% EXPECT-ERROR:' gestures"
    fi
  else
    expected_output=$(echo "$expected_output" | perl -pe 's,^% EXPECT: ,,;s,\r,,')
    expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark" | perl -pe 's,^% EXIT: ,,;s,\r,,'`
  fi
  if [ -z "$expected_exit_status" ]; then
    expected_exit_status=0
  fi
  if grep -q '^% COMMAND-LINE: ' "$benchmark"; then
    command_line=`grep '^% COMMAND-LINE: ' "$benchmark" | sed 's,^% COMMAND-LINE: ,,'`
  fi
elif expr "$benchmark" : '.*\.sy$' &>/dev/null; then
  lang=sygus
  if test -e "$benchmark.expect"; then
    expected_output=`grep '^% EXPECT: ' "$benchmark.expect" | sed 's,^% EXPECT: ,,'`
    expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark.expect" | sed 's,^% EXPECT-ERROR: ,,'`
    expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark.expect" | perl -pe 's,^% EXIT: ,,;s,\r,,'`
    command_line=`grep '^% COMMAND-LINE: ' "$benchmark.expect" | sed 's,^% COMMAND-LINE: ,,'`
    if [ -z "$expected_exit_status" ]; then
      expected_exit_status=0
    fi
  elif grep '^; \(EXPECT\|EXPECT-ERROR\|EXIT\|COMMAND-LINE\): ' "$benchmark" "$benchmark" &>/dev/null; then
    expected_output=`grep '^; EXPECT: ' "$benchmark" | sed 's,^; EXPECT: ,,'`
    expected_error=`grep '^; EXPECT-ERROR: ' "$benchmark" | sed 's,^; EXPECT-ERROR: ,,'`
    expected_exit_status=`grep -m 1 '^; EXIT: ' "$benchmark" | perl -pe 's,^; EXIT: ,,;s,\r,,'`
    command_line=`grep '^; COMMAND-LINE: ' "$benchmark" | sed 's,^; COMMAND-LINE: ,,'`
    if [ -z "$expected_exit_status" ]; then
      expected_exit_status=0
    fi
  else
    error "cannot determine expected output of \`$benchmark'"
  fi
else
  error "benchmark \`$benchmark' must be *.cvc or *.smt or *.smt2 or *.p or *.sy"
fi

command_line="${command_line:+$command_line }--lang=$lang"

gettemp expoutfile cvc4_expect_stdout.$$.XXXXXXXXXX
gettemp experrfile cvc4_expect_stderr.$$.XXXXXXXXXX
gettemp outfile cvc4_stdout.$$.XXXXXXXXXX
gettemp errfile cvc4_stderr.$$.XXXXXXXXXX
gettemp errfilefix cvc4_stderr.$$.XXXXXXXXXX
gettemp exitstatusfile cvc4_exitstatus.$$.XXXXXXXXXX

if [ -z "$expected_output" ]; then
  # in case expected stdout output is empty, make sure we don't differ
  # by a newline, which we would if we echo "" >"$expoutfile"
  touch "$expoutfile"
else
  echo "$expected_output" >"$expoutfile"
fi

check_models=false
if grep '^sat$' "$expoutfile" &>/dev/null || grep '^invalid$' "$expoutfile" &>/dev/null || grep '^unknown$' "$expoptfile" &>/dev/null; then
  if ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--check-models' &>/dev/null &&
     ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--no-check-models' &>/dev/null; then
    # later on, we'll run another test with --check-models on
    check_models=true
  fi
fi
check_proofs=false
check_unsat_cores=false
if [ "$proof" = yes ]; then
  # proofs not currently supported in incremental mode, turn it off
  if grep '^unsat$' "$expoutfile" &>/dev/null || grep '^valid$' "$expoutfile" &>/dev/null &>/dev/null; then
    if ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--check-proofs' &>/dev/null &&
       ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--no-check-proofs' &>/dev/null &&
       ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--incremental' &>/dev/null &&
       ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--unconstrained-simp' &>/dev/null &&
       ! expr " $CVC4_REGRESSION_ARGS $command_line" : '.* -[a-zA-Z]*i' &>/dev/null &&
       ! expr "$BINARY" : '.*pcvc4' &>/dev/null &&
       ! expr "$benchmark" : '.*\.sy$' &>/dev/null; then
      # later on, we'll run another test with --check-proofs on
      check_proofs=true
    fi
    if ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--check-unsat-cores' &>/dev/null &&
       ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--no-check-unsat-cores' &>/dev/null &&
       ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--incremental' &>/dev/null &&
       ! expr "$CVC4_REGRESSION_ARGS $command_line" : '.*--unconstrained-simp' &>/dev/null &&
       ! expr " $CVC4_REGRESSION_ARGS $command_line" : '.* -[a-zA-Z]*i' &>/dev/null &&
       ! expr "$BINARY" : '.*pcvc4' &>/dev/null &&
       ! expr "$benchmark" : '.*\.sy$' &>/dev/null; then
      # later on, we'll run another test with --check-unsat-cores on
      check_unsat_cores=true
    fi
  fi
fi
if [ -z "$expected_error" ]; then
  # in case expected stderr output is empty, make sure we don't differ
  # by a newline, which we would if we echo "" >"$experrfile"
  touch "$experrfile"
else
  echo "$expected_error" >"$experrfile"
fi

cvc4dir=`dirname "$cvc4"`
cvc4dirfull=`cd "$cvc4dir" && pwd`
if [ -z "$cvc4dirfull" ]; then
  error "getting directory of \`$cvc4 !?"
fi
cvc4base=`basename "$cvc4"`
cvc4full="$cvc4dirfull/$cvc4base"
if [ $dump = no ]; then
  echo running $wrapper $cvc4full $CVC4_REGRESSION_ARGS $command_line `basename "$benchmark"` [from working dir `dirname "$benchmark"`]
  time ( :; \
  ( cd `dirname "$benchmark"`;
    $wrapper "$cvc4full" $CVC4_REGRESSION_ARGS $command_line `basename "$benchmark"`;
    echo $? >"$exitstatusfile"
  ) > "$outfile" 2> "$errfile" )
else
  echo running $wrapper $cvc4full $CVC4_REGRESSION_ARGS $command_line --preprocess-only --dump=clauses --output-lang=smt2 -q `basename "$benchmark"` \| $wrapper $cvc4full $CVC4_REGRESSION_ARGS $command_line --lang=smt2 - [from working dir `dirname "$benchmark"`]
  time ( :; \
  ( cd `dirname "$benchmark"`;
    $wrapper "$cvc4full" $CVC4_REGRESSION_ARGS $command_line --preprocess-only --dump=clauses --output-lang=smt2 -q `basename "$benchmark"` | $wrapper "$cvc4full" $CVC4_REGRESSION_ARGS $command_line --lang=smt2 -;
    echo $? >"$exitstatusfile"
  ) > "$outfile" 2> "$errfile" )
fi

# we have to actual error file same treatment as other files. differences in
# versions of echo/bash were causing failure on some platforms and not on others
# (also grep out valgrind output, if 0 errors reported by valgrind)
actual_error=$(cat $errfile)
if [[ "$VALGRIND" = "1" ]]; then
  #valgrind_output=$(cat $errfile|grep -E "^==[0-9]+== "|)
  valgrind_num_errors=$(cat $errfile|grep -E "^==[0-9]+== "|tail -n1|awk '{print $4}')
  echo "valgrind errors (not suppressed): $valgrind_num_errors" 1>&2

  ((valgrind_num_errors == 0)) && actual_error=$(echo "$actual_error"|grep -vE "^==[0-9]+== ")
fi
if [ -z "$actual_error" ]; then
  # in case expected stderr output is empty, make sure we don't differ
  # by a newline, which we would if we echo "" >"$experrfile"
  touch "$errfilefix"
else
  echo "$actual_error" >"$errfilefix"
fi

diffs=`diff -u --strip-trailing-cr "$expoutfile" "$outfile"`
diffexit=$?
diffserr=`diff -u --strip-trailing-cr "$experrfile" "$errfilefix"`
diffexiterr=$?
exit_status=`cat "$exitstatusfile"`

exitcode=0
if [ $diffexit -ne 0 ]; then
  echo "$prog: error: differences between expected and actual output on stdout"
  echo "$diffs"
  exitcode=1
fi
if [ $diffexiterr -ne 0 ]; then
  echo "$prog: error: differences between expected and actual output on stderr"
  echo "$diffserr"
  exitcode=1
fi

if [ "$exit_status" != "$expected_exit_status" ]; then
  echo "$prog: error: expected exit status \`$expected_exit_status' but got \`$exit_status'"
  exitcode=1
fi

if $check_models || $check_proofs || $check_unsat_cores; then
  check_cmdline=
  if $check_models; then
    check_cmdline="$check_cmdline --check-models"
  fi
  if $check_proofs; then
    check_cmdline="$check_cmdline --check-proofs"
  fi
  if $check_unsat_cores; then
    check_cmdline="$check_cmdline --check-unsat-cores"
  fi
  # run an extra model/proof/core-checking pass
  if ! CVC4_REGRESSION_ARGS="$CVC4_REGRESSION_ARGS$check_cmdline" "$0" $wrapper "$cvc4" "$benchmark_orig"; then
    exitcode=1
  fi
fi

exit $exitcode