#!/usr/bin/env python3
###############################################################################
# Top contributors (to current version):
#   Andres Noetzli, Mathias Preiner, Yoni Zohar
#
# This file is part of the cvc5 project.
#
# Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
# in the top-level source directory and their institutional affiliations.
# All rights reserved.  See the file COPYING in the top-level source
# directory for licensing information.
# #############################################################################
##

"""
Runs benchmark and checks for correct exit status and output.
"""

import argparse
import difflib
import os
import re
import shlex
import subprocess
import sys
import threading


class Color:
    BLUE = '\033[94m'
    GREEN = '\033[92m'
    YELLOW = '\033[93m'
    RED = '\033[91m'
    ENDC = '\033[0m'


SCRUBBER = 'SCRUBBER:'
ERROR_SCRUBBER = 'ERROR-SCRUBBER:'
EXPECT = 'EXPECT:'
EXPECT_ERROR = 'EXPECT-ERROR:'
EXIT = 'EXIT:'
COMMAND_LINE = 'COMMAND-LINE:'
REQUIRES = 'REQUIRES:'

EXIT_OK = 0
EXIT_FAILURE = 1
EXIT_SKIP = 77
STATUS_TIMEOUT = 124


def print_colored(color, text):
    """Prints `text` in color `color`."""

    for line in text.splitlines():
        print(color + line + Color.ENDC)


def print_diff(actual, expected):
    """Prints the difference between `actual` and `expected`."""

    for line in difflib.unified_diff(expected.splitlines(),
                                     actual.splitlines(),
                                     'expected',
                                     'actual',
                                     lineterm=''):
        if line.startswith('+'):
            print_colored(Color.GREEN, line)
        elif line.startswith('-'):
            print_colored(Color.RED, line)
        else:
            print(line)


def run_process(args, cwd, timeout, s_input=None):
    """Runs a process with a timeout `timeout` in seconds. `args` are the
    arguments to execute, `cwd` is the working directory and `s_input` is the
    input to be sent to the process over stdin. Returns the output, the error
    output and the exit code of the process. If the process times out, the
    output and the error output are empty and the exit code is 124."""

    proc = subprocess.Popen(args,
                            cwd=cwd,
                            stdin=subprocess.PIPE,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)

    out = ''
    err = ''
    exit_status = STATUS_TIMEOUT
    try:
        if timeout:
            timer = threading.Timer(timeout, lambda p: p.kill(), [proc])
            timer.start()
        out, err = proc.communicate(input=s_input)
        exit_status = proc.returncode
    finally:
        if timeout:
            # The timer killed the process and is not active anymore.
            if exit_status == -9 and not timer.is_alive():
                exit_status = STATUS_TIMEOUT
            timer.cancel()

    return out, err, exit_status


def get_cvc5_features(cvc5_binary):
    """Returns a list of features supported by the cvc5 binary `cvc5_binary`."""

    output, _, _ = run_process([cvc5_binary, '--show-config'], None, None)
    if isinstance(output, bytes):
        output = output.decode()

    features = []
    disabled_features = []
    for line in output.split('\n'):
        tokens = [t.strip() for t in line.split(':')]
        if len(tokens) == 2:
            key, value = tokens
            if value == 'yes':
                features.append(key)
            elif value == 'no':
                disabled_features.append(key)

    return features, disabled_features


def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc5_binary,
                  command_line, benchmark_dir, benchmark_filename, timeout):
    """Runs cvc5 on the file `benchmark_filename` in the directory
    `benchmark_dir` using the binary `cvc5_binary` with the command line
    options `command_line`. The output is scrubbed using `scrubber` and
    `error_scrubber` for stdout and stderr, respectively. If dump is true, the
    function first uses cvc5 to read in and dump the benchmark file and then
    uses that as input."""

    bin_args = wrapper[:]
    bin_args.append(cvc5_binary)

    output = None
    error = None
    exit_status = None
    if dump:
        dump_args = [
            '--parse-only', '-o', 'raw-benchmark', '--output-lang=smt2'
        ]
        dump_output, _, _ = run_process(
            bin_args + command_line + dump_args + [benchmark_filename],
            benchmark_dir, timeout)
        output, error, exit_status = run_process(
            bin_args + command_line + ['--lang=smt2', '-'], benchmark_dir,
            timeout, dump_output)
    else:
        output, error, exit_status = run_process(
            bin_args + command_line + [benchmark_filename], benchmark_dir,
            timeout)

    # If a scrubber command has been specified then apply it to the output.
    if scrubber:
        output, _, _ = run_process(shlex.split(scrubber), benchmark_dir,
                                   timeout, output)
    if error_scrubber:
        error, _, _ = run_process(shlex.split(error_scrubber), benchmark_dir,
                                  timeout, error)

    # Popen in Python 3 returns a bytes object instead of a string for
    # stdout/stderr.
    if isinstance(output, bytes):
        output = output.decode()
    if isinstance(error, bytes):
        error = error.decode()
    return (output.strip(), error.strip(), exit_status)


def run_regression(check_unsat_cores, check_proofs, dump, use_skip_return_code,
                   skip_timeout, wrapper, cvc5_binary, benchmark_path,
                   timeout):
    """Determines the expected output for a benchmark, runs cvc5 on it and then
    checks whether the output corresponds to the expected output. Optionally
    uses a wrapper `wrapper`, tests unsat cores (if check_unsat_cores is true),
    checks proofs (if check_proofs is true), or dumps a benchmark and uses that as
    the input (if dump is true). `use_skip_return_code` enables/disables
    returning 77 when a test is skipped."""

    if not os.access(cvc5_binary, os.X_OK):
        sys.exit(
            '"{}" does not exist or is not executable'.format(cvc5_binary))
    if not os.path.isfile(benchmark_path):
        sys.exit('"{}" does not exist or is not a file'.format(benchmark_path))

    cvc5_features, cvc5_disabled_features = get_cvc5_features(cvc5_binary)

    basic_command_line_args = []

    benchmark_basename = os.path.basename(benchmark_path)
    benchmark_filename, benchmark_ext = os.path.splitext(benchmark_basename)
    benchmark_dir = os.path.dirname(benchmark_path)
    comment_char = '%'
    status_regex = None
    status_to_output = lambda s: s
    if benchmark_ext == '.smt':
        status_regex = r':status\s*(sat|unsat)'
        comment_char = ';'
    elif benchmark_ext == '.smt2':
        status_regex = r'set-info\s*:status\s*(sat|unsat)'
        comment_char = ';'
    elif benchmark_ext == '.cvc':
        pass
    elif benchmark_ext == '.p':
        status_regex = r'% Status\s*:\s*(Theorem|Unsatisfiable|CounterSatisfiable|Satisfiable)'
        status_to_output = lambda s: '% SZS status {} for {}'.format(
            s, benchmark_filename)
    elif benchmark_ext == '.sy':
        comment_char = ';'
        # Do not check proofs/unsat-cores with .sy files
        check_unsat_cores = False
        check_proofs = False
    else:
        sys.exit('"{}" must be *.cvc or *.smt or *.smt2 or *.p or *.sy'.format(
            benchmark_basename))

    benchmark_lines = None
    with open(benchmark_path, 'r') as benchmark_file:
        benchmark_lines = benchmark_file.readlines()
    benchmark_content = ''.join(benchmark_lines)

    # Extract the metadata for the benchmark.
    scrubber = None
    error_scrubber = None
    expected_output = ''
    expected_error = ''
    expected_exit_status = None
    command_lines = []
    requires = []
    for line in benchmark_lines:
        # Skip lines that do not start with a comment character.
        if line[0] != comment_char:
            continue
        line = line[1:].lstrip()

        if line.startswith(SCRUBBER):
            scrubber = line[len(SCRUBBER):].strip()
        elif line.startswith(ERROR_SCRUBBER):
            error_scrubber = line[len(ERROR_SCRUBBER):].strip()
        elif line.startswith(EXPECT):
            expected_output += line[len(EXPECT):].strip() + '\n'
        elif line.startswith(EXPECT_ERROR):
            expected_error += line[len(EXPECT_ERROR):].strip() + '\n'
        elif line.startswith(EXIT):
            expected_exit_status = int(line[len(EXIT):].strip())
        elif line.startswith(COMMAND_LINE):
            command_lines.append(line[len(COMMAND_LINE):].strip())
        elif line.startswith(REQUIRES):
            requires.append(line[len(REQUIRES):].strip())
    expected_output = expected_output.strip()
    expected_error = expected_error.strip()

    # Expected output/expected error has not been defined in the metadata for
    # the benchmark. Try to extract the information from the benchmark itself.
    if expected_output == '' and expected_error == '':
        match = None
        if status_regex:
            match = re.findall(status_regex, benchmark_content)

        if match:
            expected_output = status_to_output('\n'.join(match))
        elif expected_exit_status is None:
            # If there is no expected output/error and the exit status has not
            # been set explicitly, the benchmark is invalid.
            sys.exit('Cannot determine status of "{}"'.format(benchmark_path))
    if expected_exit_status is None:
        expected_exit_status = 0

    if 'CVC5_REGRESSION_ARGS' in os.environ:
        basic_command_line_args += shlex.split(
            os.environ['CVC5_REGRESSION_ARGS'])

    if not check_unsat_cores and ('(get-unsat-core)' in benchmark_content
                            or '(get-unsat-assumptions)' in benchmark_content):
        print(
            '1..0 # Skipped regression: unsat cores not supported without proof support'
        )
        return (EXIT_SKIP if use_skip_return_code else EXIT_OK)

    for req_feature in requires:
        is_negative = False
        if req_feature.startswith("no-"):
            req_feature = req_feature[len("no-"):]
            is_negative = True
        if req_feature not in (cvc5_features + cvc5_disabled_features):
            print(
                'Illegal requirement in regression: {}\nAllowed requirements: {}'
                .format(req_feature,
                        ' '.join(cvc5_features + cvc5_disabled_features)))
            return EXIT_FAILURE
        if is_negative:
            if req_feature in cvc5_features:
                print('1..0 # Skipped regression: not valid with {}'.format(
                    req_feature))
                return (EXIT_SKIP if use_skip_return_code else EXIT_OK)
        elif req_feature not in cvc5_features:
            print('1..0 # Skipped regression: {} not supported'.format(
                req_feature))
            return (EXIT_SKIP if use_skip_return_code else EXIT_OK)

    if not command_lines:
        command_lines.append('')

    command_line_args_configs = []
    for command_line in command_lines:
        args = shlex.split(command_line)
        all_args = basic_command_line_args + args

        if not check_unsat_cores and ('--check-unsat-cores' in all_args):
            print(
                '# Skipped command line options ({}): unsat cores not supported without proof support'
                .format(all_args))
            continue
        if not check_proofs and '--dump-proofs' in all_args:
            print(
                '# Skipped command line options ({}): proof production not supported'
                .format(all_args))
            continue

        command_line_args_configs.append(all_args)

        expected_output_lines = expected_output.split()
        extra_command_line_args = []
        if benchmark_ext == '.sy' and \
            '--no-check-synth-sol' not in all_args and \
            '--sygus-rr' not in all_args and \
            '--check-synth-sol' not in all_args:
            all_args += ['--check-synth-sol']
        if ('sat' in expected_output_lines or \
            'not_entailed' in expected_output_lines or \
            'unknown' in expected_output_lines) and \
           '--no-debug-check-models' not in all_args and \
           '--no-check-models' not in all_args and \
           '--debug-check-models' not in all_args:
            extra_command_line_args += ['--debug-check-models']
        if 'unsat' in expected_output_lines or 'entailed' in expected_output_lines:
            if check_unsat_cores and \
               '--no-produce-unsat-cores' not in all_args and \
               '--no-check-unsat-cores' not in all_args and \
               '--check-unsat-cores' not in all_args and \
               'sygus-inference' not in benchmark_content and \
               '--unconstrained-simp' not in all_args:
                extra_command_line_args += ['--check-unsat-cores']
            if check_proofs and \
               '--no-produce-proofs' not in all_args and \
               '--no-check-proofs' not in all_args and \
               '--check-proofs' not in all_args:
                extra_command_line_args += ['--check-proofs']
        if '--no-check-abducts' not in all_args and \
            '--check-abducts' not in all_args and \
            'get-abduct' in benchmark_content:
            all_args += ['--check-abducts']

        # Create a test case for each extra argument
        for extra_arg in extra_command_line_args:
            command_line_args_configs.append(all_args + [extra_arg])

    # Run cvc5 on the benchmark with the different option sets and check
    # whether the exit status, stdout output, stderr output are as expected.
    print('1..{}'.format(len(command_line_args_configs)))
    print('# Starting')
    exit_code = EXIT_OK
    for command_line_args in command_line_args_configs:
        output, error, exit_status = run_benchmark(dump, wrapper, scrubber,
                                                   error_scrubber, cvc5_binary,
                                                   command_line_args,
                                                   benchmark_dir,
                                                   benchmark_basename, timeout)
        output = re.sub(r'^[ \t]*', '', output, flags=re.MULTILINE)
        error = re.sub(r'^[ \t]*', '', error, flags=re.MULTILINE)
        if exit_status == STATUS_TIMEOUT:
            exit_code = EXIT_SKIP if skip_timeout else EXIT_FAILURE
            print('Timeout - Flags: {}'.format(command_line_args))
        elif output != expected_output:
            exit_code = EXIT_FAILURE
            print('not ok - Flags: {}'.format(command_line_args))
            print()
            print('Standard output difference')
            print('=' * 80)
            print_diff(output, expected_output)
            print('=' * 80)
            print()
            print()
            if error:
                print('Error output')
                print('=' * 80)
                print_colored(Color.YELLOW, error)
                print('=' * 80)
                print()
        elif error != expected_error:
            exit_code = EXIT_FAILURE
            print(
                'not ok - Differences between expected and actual output on stderr - Flags: {}'
                .format(command_line_args))
            print()
            print('Error output difference')
            print('=' * 80)
            print_diff(error, expected_error)
            print('=' * 80)
            print()
        elif expected_exit_status != exit_status:
            exit_code = EXIT_FAILURE
            print(
                'not ok - Expected exit status "{}" but got "{}" - Flags: {}'.
                format(expected_exit_status, exit_status, command_line_args))
            print()
            print('Output:')
            print('=' * 80)
            print_colored(Color.BLUE, output)
            print('=' * 80)
            print()
            print()
            print('Error output:')
            print('=' * 80)
            print_colored(Color.YELLOW, error)
            print('=' * 80)
            print()
        else:
            print('ok - Flags: {}'.format(command_line_args))

    return exit_code


def main():
    """Parses the command line arguments and then calls the core of the
    script."""

    parser = argparse.ArgumentParser(
        description=
        'Runs benchmark and checks for correct exit status and output.')
    parser.add_argument('--dump', action='store_true')
    parser.add_argument('--use-skip-return-code', action='store_true')
    parser.add_argument('--skip-timeout', action='store_true')
    parser.add_argument('--check-unsat-cores', action='store_true',
                        default=True)
    parser.add_argument('--no-check-unsat-cores', dest='check_unsat_cores',
                        action='store_false')
    parser.add_argument('--check-proofs', action='store_true', default=True)
    parser.add_argument('--no-check-proofs', dest='check_proofs',
                        action='store_false')
    parser.add_argument('wrapper', nargs='*')
    parser.add_argument('cvc5_binary')
    parser.add_argument('benchmark')

    argv = sys.argv[1:]
    # Append options passed via RUN_REGRESSION_ARGS to argv
    if os.environ.get('RUN_REGRESSION_ARGS'):
        argv.extend(shlex.split(os.getenv('RUN_REGRESSION_ARGS')))

    args = parser.parse_args(argv)

    cvc5_binary = os.path.abspath(args.cvc5_binary)

    wrapper = args.wrapper
    if os.environ.get('VALGRIND') == '1' and not wrapper:
        wrapper = ['libtool', '--mode=execute', 'valgrind']

    timeout = float(os.getenv('TEST_TIMEOUT', '600'))

    return run_regression(args.check_unsat_cores, args.check_proofs, args.dump,
                          args.use_skip_return_code, args.skip_timeout,
                          wrapper, cvc5_binary, args.benchmark, timeout)


if __name__ == "__main__":
    exit_code = main()
    sys.exit(exit_code)