summaryrefslogtreecommitdiff
path: root/test/regress/run_regression.py
diff options
context:
space:
mode:
Diffstat (limited to 'test/regress/run_regression.py')
-rwxr-xr-xtest/regress/run_regression.py287
1 files changed, 287 insertions, 0 deletions
diff --git a/test/regress/run_regression.py b/test/regress/run_regression.py
new file mode 100755
index 000000000..c861b0d71
--- /dev/null
+++ b/test/regress/run_regression.py
@@ -0,0 +1,287 @@
+#!/usr/bin/env python3
+"""
+Usage:
+
+ run_regression.py [ --proof | --dump ] [ wrapper ] cvc4-binary
+ [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]
+
+Runs benchmark and checks for correct exit status and output.
+"""
+
+import argparse
+import difflib
+import os
+import re
+import shlex
+import subprocess
+import sys
+
+SCRUBBER = 'SCRUBBER: '
+ERROR_SCRUBBER = 'ERROR-SCRUBBER: '
+EXPECT = 'EXPECT: '
+EXPECT_ERROR = 'EXPECT-ERROR: '
+EXIT = 'EXIT: '
+COMMAND_LINE = 'COMMAND-LINE: '
+
+
+def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
+ command_line, benchmark_dir, benchmark_filename):
+ """Runs CVC4 on the file `benchmark_filename` in the directory
+ `benchmark_dir` using the binary `cvc4_binary` with the command line
+ options `command_line`. The output is scrubbed using `scrubber` and
+ `error_scrubber` for stdout and stderr, respectively. If dump is true, the
+ function first uses CVC4 to read in and dump the benchmark file and then
+ uses that as input."""
+
+ bin_args = wrapper[:]
+ bin_args.append(cvc4_binary)
+
+ output = None
+ error = None
+ exit_status = None
+ if dump:
+ dump_args = [
+ '--preprocess-only', '--dump', 'raw-benchmark',
+ '--output-lang=smt2', '-qq'
+ ]
+ dump_process = subprocess.Popen(
+ bin_args + command_line + dump_args + [benchmark_filename],
+ cwd=benchmark_dir,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ dump_output, _ = dump_process.communicate()
+ process = subprocess.Popen(
+ bin_args + command_line + ['--lang=smt2', '-'],
+ cwd=benchmark_dir,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ output, error = process.communicate(input=dump_output)
+ exit_status = process.returncode
+ else:
+ process = subprocess.Popen(
+ bin_args + command_line + [benchmark_filename],
+ cwd=benchmark_dir,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ output, error = process.communicate()
+ exit_status = process.returncode
+
+ # If a scrubber command has been specified then apply it to the output.
+ if scrubber:
+ scrubber_process = subprocess.Popen(
+ shlex.split(scrubber),
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ output, _ = scrubber_process.communicate(input=output)
+ if error_scrubber:
+ error_scrubber_process = subprocess.Popen(
+ shlex.split(error_scrubber),
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ error, _ = error_scrubber_process.communicate(input=error)
+
+ # Popen in Python 3 returns a bytes object instead of a string for
+ # stdout/stderr.
+ if isinstance(output, bytes):
+ output = output.decode()
+ if isinstance(error, bytes):
+ error = error.decode()
+ return (output.strip(), error.strip(), exit_status)
+
+
+def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path):
+ """Determines the expected output for a benchmark, runs CVC4 on it and then
+ checks whether the output corresponds to the expected output. Optionally
+ uses a wrapper `wrapper`, tests proof generation (if proof is true), or
+ dumps a benchmark and uses that as the input (if dump is true)."""
+
+ if not os.access(cvc4_binary, os.X_OK):
+ sys.exit(
+ '"{}" does not exist or is not executable'.format(cvc4_binary))
+ if not os.path.isfile(benchmark_path):
+ sys.exit('"{}" does not exist or is not a file'.format(benchmark_path))
+
+ basic_command_line_args = []
+
+ benchmark_basename = os.path.basename(benchmark_path)
+ benchmark_filename, benchmark_ext = os.path.splitext(benchmark_basename)
+ benchmark_dir = os.path.dirname(benchmark_path)
+ comment_char = '%'
+ status_regex = None
+ status_to_output = lambda s: s
+ if benchmark_ext == '.smt':
+ status_regex = r':status\s*(sat|unsat)'
+ comment_char = ';'
+ elif benchmark_ext == '.smt2':
+ status_regex = r'set-info\s*:status\s*(sat|unsat)'
+ comment_char = ';'
+ elif benchmark_ext == '.cvc':
+ pass
+ elif benchmark_ext == '.p':
+ basic_command_line_args.append('--finite-model-find')
+ status_regex = r'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
+ status_to_output = lambda s: '% SZS status {} for {}'.format(s, benchmark_filename)
+ elif benchmark_ext == '.sy':
+ comment_char = ';'
+ # Do not use proofs/unsat-cores with .sy files
+ proof = False
+ else:
+ sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
+ benchmark_basename))
+
+ # If there is an ".expect" file for the benchmark, read the metadata
+ # from there, otherwise from the benchmark file.
+ metadata_filename = benchmark_path + '.expect'
+ if os.path.isfile(metadata_filename):
+ comment_char = '%'
+ else:
+ metadata_filename = benchmark_path
+
+ metadata_lines = None
+ with open(metadata_filename, 'r') as metadata_file:
+ metadata_lines = metadata_file.readlines()
+ metadata_content = ''.join(metadata_lines)
+
+ # Extract the metadata for the benchmark.
+ scrubber = None
+ error_scrubber = None
+ expected_output = ''
+ expected_error = ''
+ expected_exit_status = None
+ command_line = ''
+ for line in metadata_lines:
+ # Skip lines that do not start with "%"
+ if line[0] != comment_char:
+ continue
+ line = line[2:]
+
+ if line.startswith(SCRUBBER):
+ scrubber = line[len(SCRUBBER):]
+ elif line.startswith(ERROR_SCRUBBER):
+ error_scrubber = line[len(ERROR_SCRUBBER):]
+ elif line.startswith(EXPECT):
+ expected_output += line[len(EXPECT):]
+ elif line.startswith(EXPECT_ERROR):
+ expected_error += line[len(EXPECT_ERROR):]
+ elif line.startswith(EXIT):
+ expected_exit_status = int(line[len(EXIT):])
+ elif line.startswith(COMMAND_LINE):
+ command_line += line[len(COMMAND_LINE):]
+ expected_output = expected_output.strip()
+ expected_error = expected_error.strip()
+
+ # Expected output/expected error has not been defined in the metadata for
+ # the benchmark. Try to extract the information from the benchmark itself.
+ if expected_output == '' and expected_error == '':
+ match = None
+ if status_regex:
+ match = re.search(status_regex, metadata_content)
+
+ if match:
+ expected_output = status_to_output(match.group(1))
+ elif expected_exit_status is None:
+ # If there is no expected output/error and the exit status has not
+ # been set explicitly, the benchmark is invalid.
+ sys.exit('Cannot determine status of "{}"'.format(benchmark_path))
+
+ if not proof and ('(get-unsat-cores)' in metadata_content
+ or '(get-unsat-assumptions)' in metadata_content):
+ print(
+ '1..0 # Skipped: unsat cores not supported without proof support')
+ return
+
+ if expected_exit_status is None:
+ expected_exit_status = 0
+
+ if 'CVC4_REGRESSION_ARGS' in os.environ:
+ basic_command_line_args += shlex.split(
+ os.environ['CVC4_REGRESSION_ARGS'])
+ basic_command_line_args += shlex.split(command_line)
+ command_line_args_configs = [basic_command_line_args]
+
+ extra_command_line_args = []
+ if benchmark_ext == '.sy' and \
+ '--no-check-synth-sol' not in basic_command_line_args and \
+ '--check-synth-sol' not in basic_command_line_args:
+ extra_command_line_args = ['--check-synth-sol']
+ if re.search(r'^(sat|invalid|unknown)$', expected_output) and \
+ '--no-check-models' not in basic_command_line_args:
+ extra_command_line_args = ['--check-models']
+ if proof and re.search(r'^(sat|valid)$', expected_output):
+ if '--no-check-proofs' not in basic_command_line_args and \
+ '--incremental' not in basic_command_line_args and \
+ '--unconstrained-simp' not in basic_command_line_args and \
+ not cvc4_binary.endswith('pcvc4'):
+ extra_command_line_args = [
+ '--check-proofs', '--no-bv-eq', '--no-bv-ineq',
+ '--no-bv-algebraic'
+ ]
+ if '--no-check-unsat-cores' not in basic_command_line_args and \
+ '--incremental' not in basic_command_line_args and \
+ '--unconstrained-simp' not in basic_command_line_args and \
+ not cvc4_binary.endswith('pcvc4'):
+ extra_command_line_args = [
+ '--check-proofs', '--no-bv-eq', '--no-bv-ineq',
+ '--no-bv-algebraic'
+ ]
+ if extra_command_line_args:
+ command_line_args_configs.append(
+ basic_command_line_args + extra_command_line_args)
+
+ # Run CVC4 on the benchmark with the different option sets and check
+ # whether the exit status, stdout output, stderr output are as expected.
+ print('1..{}'.format(len(command_line_args_configs)))
+ print('# Starting')
+ for command_line_args in command_line_args_configs:
+ output, error, exit_status = run_benchmark(
+ dump, wrapper, scrubber, error_scrubber, cvc4_binary,
+ command_line_args, benchmark_dir, benchmark_basename)
+ if output != expected_output:
+ print(
+ 'not ok - Differences between expected and actual output on stdout - Flags: {}'.
+ format(command_line_args))
+ for line in difflib.context_diff(output.splitlines(),
+ expected_output.splitlines()):
+ print(line)
+ elif error != expected_error:
+ print(
+ 'not ok - Differences between expected and actual output on stderr - Flags: {}'.
+ format(command_line_args))
+ for line in difflib.context_diff(error.splitlines(),
+ expected_error.splitlines()):
+ print(line)
+ elif expected_exit_status != exit_status:
+ print(
+ 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
+ format(expected_exit_status, exit_status, command_line_args))
+ else:
+ print('ok - Flags: {}'.format(command_line_args))
+
+
+def main():
+ """Parses the command line arguments and then calls the core of the
+ script."""
+
+ parser = argparse.ArgumentParser(
+ description=
+ 'Runs benchmark and checks for correct exit status and output.')
+ parser.add_argument('--proof', action='store_true')
+ parser.add_argument('--dump', action='store_true')
+ parser.add_argument('wrapper', nargs='*')
+ parser.add_argument('cvc4_binary')
+ parser.add_argument('benchmark')
+ args = parser.parse_args()
+ cvc4_binary = os.path.abspath(args.cvc4_binary)
+
+ wrapper = args.wrapper
+ if os.environ.get('VALGRIND') == '1' and not wrapper:
+ wrapper = ['libtool', '--mode=execute', 'valgrind']
+
+ run_regression(args.proof, args.dump, wrapper, cvc4_binary, args.benchmark)
+
+
+if __name__ == "__main__":
+ main()
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback