diff options
Diffstat (limited to 'test/regress/run_regression.py')
-rwxr-xr-x | test/regress/run_regression.py | 287 |
1 files changed, 287 insertions, 0 deletions
diff --git a/test/regress/run_regression.py b/test/regress/run_regression.py new file mode 100755 index 000000000..c861b0d71 --- /dev/null +++ b/test/regress/run_regression.py @@ -0,0 +1,287 @@ +#!/usr/bin/env python3 +""" +Usage: + + run_regression.py [ --proof | --dump ] [ wrapper ] cvc4-binary + [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ] + +Runs benchmark and checks for correct exit status and output. +""" + +import argparse +import difflib +import os +import re +import shlex +import subprocess +import sys + +SCRUBBER = 'SCRUBBER: ' +ERROR_SCRUBBER = 'ERROR-SCRUBBER: ' +EXPECT = 'EXPECT: ' +EXPECT_ERROR = 'EXPECT-ERROR: ' +EXIT = 'EXIT: ' +COMMAND_LINE = 'COMMAND-LINE: ' + + +def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary, + command_line, benchmark_dir, benchmark_filename): + """Runs CVC4 on the file `benchmark_filename` in the directory + `benchmark_dir` using the binary `cvc4_binary` with the command line + options `command_line`. The output is scrubbed using `scrubber` and + `error_scrubber` for stdout and stderr, respectively. If dump is true, the + function first uses CVC4 to read in and dump the benchmark file and then + uses that as input.""" + + bin_args = wrapper[:] + bin_args.append(cvc4_binary) + + output = None + error = None + exit_status = None + if dump: + dump_args = [ + '--preprocess-only', '--dump', 'raw-benchmark', + '--output-lang=smt2', '-qq' + ] + dump_process = subprocess.Popen( + bin_args + command_line + dump_args + [benchmark_filename], + cwd=benchmark_dir, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + dump_output, _ = dump_process.communicate() + process = subprocess.Popen( + bin_args + command_line + ['--lang=smt2', '-'], + cwd=benchmark_dir, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + output, error = process.communicate(input=dump_output) + exit_status = process.returncode + else: + process = subprocess.Popen( + bin_args + command_line + [benchmark_filename], + cwd=benchmark_dir, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + output, error = process.communicate() + exit_status = process.returncode + + # If a scrubber command has been specified then apply it to the output. + if scrubber: + scrubber_process = subprocess.Popen( + shlex.split(scrubber), + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + output, _ = scrubber_process.communicate(input=output) + if error_scrubber: + error_scrubber_process = subprocess.Popen( + shlex.split(error_scrubber), + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + error, _ = error_scrubber_process.communicate(input=error) + + # Popen in Python 3 returns a bytes object instead of a string for + # stdout/stderr. + if isinstance(output, bytes): + output = output.decode() + if isinstance(error, bytes): + error = error.decode() + return (output.strip(), error.strip(), exit_status) + + +def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path): + """Determines the expected output for a benchmark, runs CVC4 on it and then + checks whether the output corresponds to the expected output. Optionally + uses a wrapper `wrapper`, tests proof generation (if proof is true), or + dumps a benchmark and uses that as the input (if dump is true).""" + + if not os.access(cvc4_binary, os.X_OK): + sys.exit( + '"{}" does not exist or is not executable'.format(cvc4_binary)) + if not os.path.isfile(benchmark_path): + sys.exit('"{}" does not exist or is not a file'.format(benchmark_path)) + + basic_command_line_args = [] + + benchmark_basename = os.path.basename(benchmark_path) + benchmark_filename, benchmark_ext = os.path.splitext(benchmark_basename) + benchmark_dir = os.path.dirname(benchmark_path) + comment_char = '%' + status_regex = None + status_to_output = lambda s: s + if benchmark_ext == '.smt': + status_regex = r':status\s*(sat|unsat)' + comment_char = ';' + elif benchmark_ext == '.smt2': + status_regex = r'set-info\s*:status\s*(sat|unsat)' + comment_char = ';' + elif benchmark_ext == '.cvc': + pass + elif benchmark_ext == '.p': + basic_command_line_args.append('--finite-model-find') + status_regex = r'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)' + status_to_output = lambda s: '% SZS status {} for {}'.format(s, benchmark_filename) + elif benchmark_ext == '.sy': + comment_char = ';' + # Do not use proofs/unsat-cores with .sy files + proof = False + else: + sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format( + benchmark_basename)) + + # If there is an ".expect" file for the benchmark, read the metadata + # from there, otherwise from the benchmark file. + metadata_filename = benchmark_path + '.expect' + if os.path.isfile(metadata_filename): + comment_char = '%' + else: + metadata_filename = benchmark_path + + metadata_lines = None + with open(metadata_filename, 'r') as metadata_file: + metadata_lines = metadata_file.readlines() + metadata_content = ''.join(metadata_lines) + + # Extract the metadata for the benchmark. + scrubber = None + error_scrubber = None + expected_output = '' + expected_error = '' + expected_exit_status = None + command_line = '' + for line in metadata_lines: + # Skip lines that do not start with "%" + if line[0] != comment_char: + continue + line = line[2:] + + if line.startswith(SCRUBBER): + scrubber = line[len(SCRUBBER):] + elif line.startswith(ERROR_SCRUBBER): + error_scrubber = line[len(ERROR_SCRUBBER):] + elif line.startswith(EXPECT): + expected_output += line[len(EXPECT):] + elif line.startswith(EXPECT_ERROR): + expected_error += line[len(EXPECT_ERROR):] + elif line.startswith(EXIT): + expected_exit_status = int(line[len(EXIT):]) + elif line.startswith(COMMAND_LINE): + command_line += line[len(COMMAND_LINE):] + expected_output = expected_output.strip() + expected_error = expected_error.strip() + + # Expected output/expected error has not been defined in the metadata for + # the benchmark. Try to extract the information from the benchmark itself. + if expected_output == '' and expected_error == '': + match = None + if status_regex: + match = re.search(status_regex, metadata_content) + + if match: + expected_output = status_to_output(match.group(1)) + elif expected_exit_status is None: + # If there is no expected output/error and the exit status has not + # been set explicitly, the benchmark is invalid. + sys.exit('Cannot determine status of "{}"'.format(benchmark_path)) + + if not proof and ('(get-unsat-cores)' in metadata_content + or '(get-unsat-assumptions)' in metadata_content): + print( + '1..0 # Skipped: unsat cores not supported without proof support') + return + + if expected_exit_status is None: + expected_exit_status = 0 + + if 'CVC4_REGRESSION_ARGS' in os.environ: + basic_command_line_args += shlex.split( + os.environ['CVC4_REGRESSION_ARGS']) + basic_command_line_args += shlex.split(command_line) + command_line_args_configs = [basic_command_line_args] + + extra_command_line_args = [] + if benchmark_ext == '.sy' and \ + '--no-check-synth-sol' not in basic_command_line_args and \ + '--check-synth-sol' not in basic_command_line_args: + extra_command_line_args = ['--check-synth-sol'] + if re.search(r'^(sat|invalid|unknown)$', expected_output) and \ + '--no-check-models' not in basic_command_line_args: + extra_command_line_args = ['--check-models'] + if proof and re.search(r'^(sat|valid)$', expected_output): + if '--no-check-proofs' not in basic_command_line_args and \ + '--incremental' not in basic_command_line_args and \ + '--unconstrained-simp' not in basic_command_line_args and \ + not cvc4_binary.endswith('pcvc4'): + extra_command_line_args = [ + '--check-proofs', '--no-bv-eq', '--no-bv-ineq', + '--no-bv-algebraic' + ] + if '--no-check-unsat-cores' not in basic_command_line_args and \ + '--incremental' not in basic_command_line_args and \ + '--unconstrained-simp' not in basic_command_line_args and \ + not cvc4_binary.endswith('pcvc4'): + extra_command_line_args = [ + '--check-proofs', '--no-bv-eq', '--no-bv-ineq', + '--no-bv-algebraic' + ] + if extra_command_line_args: + command_line_args_configs.append( + basic_command_line_args + extra_command_line_args) + + # Run CVC4 on the benchmark with the different option sets and check + # whether the exit status, stdout output, stderr output are as expected. + print('1..{}'.format(len(command_line_args_configs))) + print('# Starting') + for command_line_args in command_line_args_configs: + output, error, exit_status = run_benchmark( + dump, wrapper, scrubber, error_scrubber, cvc4_binary, + command_line_args, benchmark_dir, benchmark_basename) + if output != expected_output: + print( + 'not ok - Differences between expected and actual output on stdout - Flags: {}'. + format(command_line_args)) + for line in difflib.context_diff(output.splitlines(), + expected_output.splitlines()): + print(line) + elif error != expected_error: + print( + 'not ok - Differences between expected and actual output on stderr - Flags: {}'. + format(command_line_args)) + for line in difflib.context_diff(error.splitlines(), + expected_error.splitlines()): + print(line) + elif expected_exit_status != exit_status: + print( + 'not ok - Expected exit status "{}" but got "{}" - Flags: {}'. + format(expected_exit_status, exit_status, command_line_args)) + else: + print('ok - Flags: {}'.format(command_line_args)) + + +def main(): + """Parses the command line arguments and then calls the core of the + script.""" + + parser = argparse.ArgumentParser( + description= + 'Runs benchmark and checks for correct exit status and output.') + parser.add_argument('--proof', action='store_true') + parser.add_argument('--dump', action='store_true') + parser.add_argument('wrapper', nargs='*') + parser.add_argument('cvc4_binary') + parser.add_argument('benchmark') + args = parser.parse_args() + cvc4_binary = os.path.abspath(args.cvc4_binary) + + wrapper = args.wrapper + if os.environ.get('VALGRIND') == '1' and not wrapper: + wrapper = ['libtool', '--mode=execute', 'valgrind'] + + run_regression(args.proof, args.dump, wrapper, cvc4_binary, args.benchmark) + + +if __name__ == "__main__": + main() |