#!/usr/bin/env python3
"""
Usage:

    run_regression.py [--enable-proof] [--with-lfsc] [--dump]
        [--use-skip-return-code] [wrapper] cvc4-binary
        [benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p]

Runs benchmark and checks for correct exit status and output.
"""

import argparse
import difflib
import os
import re
import shlex
import subprocess
import sys
import threading


class Color:
    BLUE = '\033[94m'
    GREEN = '\033[92m'
    YELLOW = '\033[93m'
    RED = '\033[91m'
    ENDC = '\033[0m'


SCRUBBER = 'SCRUBBER:'
ERROR_SCRUBBER = 'ERROR-SCRUBBER:'
EXPECT = 'EXPECT:'
EXPECT_ERROR = 'EXPECT-ERROR:'
EXIT = 'EXIT:'
COMMAND_LINE = 'COMMAND-LINE:'
REQUIRES = 'REQUIRES:'

EXIT_OK = 0
EXIT_FAILURE = 1
EXIT_SKIP = 77
STATUS_TIMEOUT = 124


def print_colored(color, text):
    """Prints `text` in color `color`."""

    for line in text.splitlines():
        print(color + line + Color.ENDC)


def print_diff(actual, expected):
    """Prints the difference between `actual` and `expected`."""

    for line in difflib.unified_diff(expected.splitlines(),
                                     actual.splitlines(),
                                     'expected',
                                     'actual',
                                     lineterm=''):
        if line.startswith('+'):
            print_colored(Color.GREEN, line)
        elif line.startswith('-'):
            print_colored(Color.RED, line)
        else:
            print(line)


def run_process(args, cwd, timeout, s_input=None):
    """Runs a process with a timeout `timeout` in seconds. `args` are the
    arguments to execute, `cwd` is the working directory and `s_input` is the
    input to be sent to the process over stdin. Returns the output, the error
    output and the exit code of the process. If the process times out, the
    output and the error output are empty and the exit code is 124."""

    proc = subprocess.Popen(
        args,
        cwd=cwd,
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE)

    out = ''
    err = ''
    exit_status = STATUS_TIMEOUT
    try:
        if timeout:
            timer = threading.Timer(timeout, lambda p: p.kill(), [proc])
            timer.start()
        out, err = proc.communicate(input=s_input)
        exit_status = proc.returncode
    finally:
        if timeout:
            # The timer killed the process and is not active anymore.
            if exit_status == -9 and not timer.is_alive():
                exit_status = STATUS_TIMEOUT
            timer.cancel()

    return out, err, exit_status


def get_cvc4_features(cvc4_binary):
    """Returns a list of features supported by the CVC4 binary `cvc4_binary`."""

    output, _, _ = run_process([cvc4_binary, '--show-config'], None, None)
    if isinstance(output, bytes):
        output = output.decode()

    features = []
    for line in output.split('\n'):
        tokens = [t.strip() for t in line.split(':')]
        if len(tokens) == 2:
            key, value = tokens
            if value == 'yes':
                features.append(key)

    return features


def logic_supported_with_proofs(logic):
    assert logic is None or isinstance(logic, str)
    return logic in [
            #single theories
            "QF_BV",
            "QF_UF",
            "QF_A",
            "QF_LRA",
            #two theories
            "QF_UFBV",
            "QF_UFLRA",
            "QF_AUF",
            "QF_ALRA",
            "QF_ABV",
            "QF_BVLRA"
            #three theories
            "QF_AUFBV",
            "QF_ABVLRA",
            "QF_UFBVLRA",
            "QF_AUFLRA",
            #four theories
            "QF_AUFBVLRA"
            ]

def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
                  command_line, benchmark_dir, benchmark_filename, timeout):
    """Runs CVC4 on the file `benchmark_filename` in the directory
    `benchmark_dir` using the binary `cvc4_binary` with the command line
    options `command_line`. The output is scrubbed using `scrubber` and
    `error_scrubber` for stdout and stderr, respectively. If dump is true, the
    function first uses CVC4 to read in and dump the benchmark file and then
    uses that as input."""

    bin_args = wrapper[:]
    bin_args.append(cvc4_binary)

    output = None
    error = None
    exit_status = None
    if dump:
        dump_args = [
            '--preprocess-only', '--dump', 'raw-benchmark',
            '--output-lang=smt2', '-qq'
        ]
        dump_output, _, _ = run_process(
            bin_args + command_line + dump_args + [benchmark_filename],
            benchmark_dir, timeout)
        output, error, exit_status = run_process(
            bin_args + command_line + ['--lang=smt2', '-'], benchmark_dir,
            timeout, dump_output)
    else:
        output, error, exit_status = run_process(
            bin_args + command_line + [benchmark_filename], benchmark_dir,
            timeout)

    # If a scrubber command has been specified then apply it to the output.
    if scrubber:
        output, _, _ = run_process(
            shlex.split(scrubber), benchmark_dir, timeout, output)
    if error_scrubber:
        error, _, _ = run_process(
            shlex.split(error_scrubber), benchmark_dir, timeout, error)

    # Popen in Python 3 returns a bytes object instead of a string for
    # stdout/stderr.
    if isinstance(output, bytes):
        output = output.decode()
    if isinstance(error, bytes):
        error = error.decode()
    return (output.strip(), error.strip(), exit_status)


def run_regression(unsat_cores, proofs, dump, use_skip_return_code, wrapper,
                   cvc4_binary, benchmark_path, timeout):
    """Determines the expected output for a benchmark, runs CVC4 on it and then
    checks whether the output corresponds to the expected output. Optionally
    uses a wrapper `wrapper`, tests unsat cores (if unsat_cores is true),
    checks proofs (if proofs is true), or dumps a benchmark and uses that as
    the input (if dump is true). `use_skip_return_code` enables/disables
    returning 77 when a test is skipped."""

    if not os.access(cvc4_binary, os.X_OK):
        sys.exit(
            '"{}" does not exist or is not executable'.format(cvc4_binary))
    if not os.path.isfile(benchmark_path):
        sys.exit('"{}" does not exist or is not a file'.format(benchmark_path))

    cvc4_features = get_cvc4_features(cvc4_binary)

    basic_command_line_args = []

    benchmark_basename = os.path.basename(benchmark_path)
    benchmark_filename, benchmark_ext = os.path.splitext(benchmark_basename)
    benchmark_dir = os.path.dirname(benchmark_path)
    comment_char = '%'
    status_regex = None
    logic_regex = None
    status_to_output = lambda s: s
    if benchmark_ext == '.smt':
        status_regex = r':status\s*(sat|unsat)'
        comment_char = ';'
    elif benchmark_ext == '.smt2':
        status_regex = r'set-info\s*:status\s*(sat|unsat)'
        logic_regex = r'\(\s*set-logic\s*(.*)\)'
        comment_char = ';'
    elif benchmark_ext == '.cvc':
        pass
    elif benchmark_ext == '.p':
        status_regex = r'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
        status_to_output = lambda s: '% SZS status {} for {}'.format(s, benchmark_filename)
    elif benchmark_ext == '.sy':
        comment_char = ';'
        # Do not use proofs/unsat-cores with .sy files
        unsat_cores = False
        proofs = False
    else:
        sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
            benchmark_basename))

    benchmark_lines = None
    with open(benchmark_path, 'r') as benchmark_file:
        benchmark_lines = benchmark_file.readlines()
    benchmark_content = ''.join(benchmark_lines)

    # Extract the metadata for the benchmark.
    scrubber = None
    error_scrubber = None
    expected_output = ''
    expected_error = ''
    expected_exit_status = None
    command_lines = []
    requires = []
    logic = None
    for line in benchmark_lines:
        # Skip lines that do not start with a comment character.
        if line[0] != comment_char:
            continue
        line = line[1:].lstrip()

        if line.startswith(SCRUBBER):
            scrubber = line[len(SCRUBBER):].strip()
        elif line.startswith(ERROR_SCRUBBER):
            error_scrubber = line[len(ERROR_SCRUBBER):].strip()
        elif line.startswith(EXPECT):
            expected_output += line[len(EXPECT):].strip() + '\n'
        elif line.startswith(EXPECT_ERROR):
            expected_error += line[len(EXPECT_ERROR):].strip() + '\n'
        elif line.startswith(EXIT):
            expected_exit_status = int(line[len(EXIT):].strip())
        elif line.startswith(COMMAND_LINE):
            command_lines.append(line[len(COMMAND_LINE):].strip())
        elif line.startswith(REQUIRES):
            requires.append(line[len(REQUIRES):].strip())
    expected_output = expected_output.strip()
    expected_error = expected_error.strip()

    # Expected output/expected error has not been defined in the metadata for
    # the benchmark. Try to extract the information from the benchmark itself.
    if expected_output == '' and expected_error == '':
        match = None
        if status_regex:
            match = re.findall(status_regex, benchmark_content)

        if match:
            expected_output = status_to_output('\n'.join(match))
        elif expected_exit_status is None:
            # If there is no expected output/error and the exit status has not
            # been set explicitly, the benchmark is invalid.
            sys.exit('Cannot determine status of "{}"'.format(benchmark_path))
    if expected_exit_status is None:
        expected_exit_status = 0
    if logic_regex:
        logic_match = re.findall(logic_regex, benchmark_content)
        if logic_match and len(logic_match) == 1:
            logic = logic_match[0]

    if 'CVC4_REGRESSION_ARGS' in os.environ:
        basic_command_line_args += shlex.split(
            os.environ['CVC4_REGRESSION_ARGS'])

    if not unsat_cores and ('(get-unsat-core)' in benchmark_content
                            or '(get-unsat-assumptions)' in benchmark_content):
        print(
            '1..0 # Skipped regression: unsat cores not supported without proof support'
        )
        return (EXIT_SKIP if use_skip_return_code else EXIT_OK)

    for req_feature in requires:
        if req_feature.startswith("no-"):
            inv_feature = req_feature[len("no-"):]
            if inv_feature in cvc4_features:
                print('1..0 # Skipped regression: not valid with {}'.format(
                    inv_feature))
                return (EXIT_SKIP if use_skip_return_code else EXIT_OK)
        elif req_feature not in cvc4_features:
            print('1..0 # Skipped regression: {} not supported'.format(
                req_feature))
            return (EXIT_SKIP if use_skip_return_code else EXIT_OK)

    if not command_lines:
        command_lines.append('')

    command_line_args_configs = []
    for command_line in command_lines:
        args = shlex.split(command_line)
        all_args = basic_command_line_args + args

        if not unsat_cores and ('--check-unsat-cores' in all_args):
            print(
                '# Skipped command line options ({}): unsat cores not supported without proof support'
                .format(all_args))
            continue
        if not proofs and ('--check-proofs' in all_args
                           or '--dump-proofs' in all_args):
            print(
                '# Skipped command line options ({}): checking proofs not supported without LFSC support'
                .format(all_args))
            continue

        command_line_args_configs.append(all_args)

        extra_command_line_args = []
        if benchmark_ext == '.sy' and \
            '--no-check-synth-sol' not in all_args and \
            '--sygus-rr' not in all_args and \
            '--check-synth-sol' not in all_args:
            extra_command_line_args = ['--check-synth-sol']
        if re.search(r'^(sat|invalid|unknown)$', expected_output) and \
           '--no-check-models' not in all_args and \
           '--check-models' not in all_args:
            extra_command_line_args = ['--check-models']
        if proofs and re.search(r'^(unsat|valid)$', expected_output):
            if '--no-check-proofs' not in all_args and \
               '--check-proofs' not in all_args and \
               '--incremental' not in all_args and \
               '--unconstrained-simp' not in all_args and \
               logic_supported_with_proofs(logic) and \
               not cvc4_binary.endswith('pcvc4'):
                extra_command_line_args = ['--check-proofs']
        if unsat_cores and re.search(r'^(unsat|valid)$', expected_output):
            if '--no-check-unsat-cores' not in all_args and \
               '--check-unsat-cores' not in all_args and \
               '--incremental' not in all_args and \
               '--unconstrained-simp' not in all_args and \
               not cvc4_binary.endswith('pcvc4'):
                extra_command_line_args += ['--check-unsat-cores']
        if '--no-check-abducts' not in all_args and \
            '--check-abducts' not in all_args and \
            not cvc4_binary.endswith('pcvc4'):
            extra_command_line_args += ['--check-abducts']
        if extra_command_line_args:
            command_line_args_configs.append(all_args +
                                             extra_command_line_args)

    # Run CVC4 on the benchmark with the different option sets and check
    # whether the exit status, stdout output, stderr output are as expected.
    print('1..{}'.format(len(command_line_args_configs)))
    print('# Starting')
    exit_code = EXIT_OK
    for command_line_args in command_line_args_configs:
        output, error, exit_status = run_benchmark(
            dump, wrapper, scrubber, error_scrubber, cvc4_binary,
            command_line_args, benchmark_dir, benchmark_basename, timeout)
        output = re.sub(r'^[ \t]*', '', output, flags=re.MULTILINE)
        error = re.sub(r'^[ \t]*', '', error, flags=re.MULTILINE)
        if exit_status == STATUS_TIMEOUT:
            exit_code = EXIT_SKIP if use_skip_return_code else EXIT_FAILURE
            print('Timeout - Flags: {}'.format(command_line_args))
        elif output != expected_output:
            exit_code = EXIT_FAILURE
            print('not ok - Flags: {}'.format(command_line_args))
            print()
            print('Standard output difference')
            print('=' * 80)
            print_diff(output, expected_output)
            print('=' * 80)
            print()
            print()
            if error:
                print('Error output')
                print('=' * 80)
                print_colored(Color.YELLOW, error)
                print('=' * 80)
                print()
        elif error != expected_error:
            exit_code = EXIT_FAILURE
            print(
                'not ok - Differences between expected and actual output on stderr - Flags: {}'
                .format(command_line_args))
            print()
            print('Error output difference')
            print('=' * 80)
            print_diff(error, expected_error)
            print('=' * 80)
            print()
        elif expected_exit_status != exit_status:
            exit_code = EXIT_FAILURE
            print(
                'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
                format(expected_exit_status, exit_status, command_line_args))
            print()
            print('Output:')
            print('=' * 80)
            print_colored(Color.BLUE, output)
            print('=' * 80)
            print()
            print()
            print('Error output:')
            print('=' * 80)
            print_colored(Color.YELLOW, error)
            print('=' * 80)
            print()
        else:
            print('ok - Flags: {}'.format(command_line_args))

    return exit_code


def main():
    """Parses the command line arguments and then calls the core of the
    script."""

    parser = argparse.ArgumentParser(
        description=
        'Runs benchmark and checks for correct exit status and output.')
    parser.add_argument('--enable-proof', action='store_true')
    parser.add_argument('--with-lfsc', action='store_true')
    parser.add_argument('--dump', action='store_true')
    parser.add_argument('--use-skip-return-code', action='store_true')
    parser.add_argument('wrapper', nargs='*')
    parser.add_argument('cvc4_binary')
    parser.add_argument('benchmark')
    args = parser.parse_args()
    cvc4_binary = os.path.abspath(args.cvc4_binary)

    wrapper = args.wrapper
    if os.environ.get('VALGRIND') == '1' and not wrapper:
        wrapper = ['libtool', '--mode=execute', 'valgrind']

    timeout = float(os.getenv('TEST_TIMEOUT', 1200.0))

    return run_regression(args.enable_proof, args.with_lfsc, args.dump,
                          args.use_skip_return_code, wrapper, cvc4_binary,
                          args.benchmark, timeout)


if __name__ == "__main__":
    exit_code = main()
    sys.exit(exit_code)