Add timeout (option) to regression script (#1786)

This commit adds the option to run regressions with a timeout using the `TEST_TIMEOUT` environment variable. The default timeout is set to 10 minutes. This should make it less likely that our nightly builds hang and makes it easier to sort out slow tests. Default timeout tested with regression level 2 on a debug build with proofs.
author: Andres Noetzli <andres.noetzli@gmail.com> 2018-04-16 21:08:38 -0700
committer: GitHub <noreply@github.com> 2018-04-16 21:08:38 -0700
commit: 8b9baa66ad6ffc9e323b83bc967992439a7d9bfa (patch)
tree: bdb58c5a3249dc5f573284c306d529e4e3bd8067 /test
parent: 8a079f9b982a502995da8e535a4b4487489af0d2 (diff)
3 files changed, 60 insertions, 39 deletions
diff --git a/test/regress/Makefile.am b/test/regress/Makefile.am
index 19cbbae4a..ba1442c69 100644
--- a/test/regress/Makefile.am
+++ b/test/regress/Makefile.am
@@ -57,5 +57,5 @@ EXTRA_DIST = \
 	regress0/tptp/Axioms/SYN000_0.ax \
 	Makefile.levels \
 	Makefile.tests \
-  run_regression.py \
-	README.md
-\ No newline at end of file
+	run_regression.py \
+	README.md
diff --git a/test/regress/README.md b/test/regress/README.md
index 2c347bc48..34cf7efde 100644
--- a/test/regress/README.md
+++ b/test/regress/README.md
@@ -16,7 +16,16 @@ For example:
 TEST_REGEX=quantifiers make regress0
 ```
 
-Runs regression tests from level 0 that have "quantifiers" in their name.
+This runs regression tests from level 0 that have "quantifiers" in their name.
+
+By default, each invocation of CVC4 is done with a 10 minute timeout. To use a
+different timeout, set the `TEST_TIMEOUT` environment variable:
+
+```
+TEST_TIMEOUT=0.5s make regress0
+```
+
+This runs regression tests from level 0 with a 0,5 second timeout.
 
 ## Adding New Regressions
 
diff --git a/test/regress/run_regression.py b/test/regress/run_regression.py
index a45489d6a..260ab3284 100755
--- a/test/regress/run_regression.py
+++ b/test/regress/run_regression.py
@@ -15,6 +15,7 @@ import re
 import shlex
 import subprocess
 import sys
+import threading
 
 SCRUBBER = 'SCRUBBER: '
 ERROR_SCRUBBER = 'ERROR-SCRUBBER: '
@@ -24,8 +25,36 @@ EXIT = 'EXIT: '
 COMMAND_LINE = 'COMMAND-LINE: '
 
 
+def run_process(args, cwd, timeout, s_input=None):
+    """Runs a process with a timeout `timeout` in seconds. `args` are the
+    arguments to execute, `cwd` is the working directory and `s_input` is the
+    input to be sent to the process over stdin. Returns the output, the error
+    output and the exit code of the process. If the process times out, the
+    output and the error output are empty and the exit code is 124."""
+
+    proc = subprocess.Popen(
+        args,
+        cwd=cwd,
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE)
+
+    out = ''
+    err = ''
+    exit_status = 124
+    timer = threading.Timer(timeout, lambda p: p.kill(), [proc])
+    try:
+        timer.start()
+        out, err = proc.communicate(input=s_input)
+        exit_status = proc.returncode
+    finally:
+        timer.cancel()
+
+    return out, err, exit_status
+
+
 def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
-                  command_line, benchmark_dir, benchmark_filename):
+                  command_line, benchmark_dir, benchmark_filename, timeout):
     """Runs CVC4 on the file `benchmark_filename` in the directory
     `benchmark_dir` using the binary `cvc4_binary` with the command line
     options `command_line`. The output is scrubbed using `scrubber` and
@@ -44,44 +73,24 @@ def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
             '--preprocess-only', '--dump', 'raw-benchmark',
             '--output-lang=smt2', '-qq'
         ]
-        dump_process = subprocess.Popen(
+        dump_output, _, _ = run_process(
             bin_args + command_line + dump_args + [benchmark_filename],
-            cwd=benchmark_dir,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE)
-        dump_output, _ = dump_process.communicate()
-        process = subprocess.Popen(
-            bin_args + command_line + ['--lang=smt2', '-'],
-            cwd=benchmark_dir,
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE)
-        output, error = process.communicate(input=dump_output)
-        exit_status = process.returncode
+            benchmark_dir, timeout)
+        output, error, exit_status = run_process(
+            bin_args + command_line + ['--lang=smt2', '-'], benchmark_dir,
+            timeout, dump_output)
     else:
-        process = subprocess.Popen(
-            bin_args + command_line + [benchmark_filename],
-            cwd=benchmark_dir,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE)
-        output, error = process.communicate()
-        exit_status = process.returncode
+        output, error, exit_status = run_process(
+            bin_args + command_line + [benchmark_filename], benchmark_dir,
+            timeout)
 
     # If a scrubber command has been specified then apply it to the output.
     if scrubber:
-        scrubber_process = subprocess.Popen(
-            shlex.split(scrubber),
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE)
-        output, _ = scrubber_process.communicate(input=output)
+        output, _, _ = run_process(
+            shlex.split(scrubber), benchmark_dir, timeout, output)
     if error_scrubber:
-        error_scrubber_process = subprocess.Popen(
-            shlex.split(error_scrubber),
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE)
-        error, _ = error_scrubber_process.communicate(input=error)
+        error, _, _ = run_process(
+            shlex.split(error_scrubber), benchmark_dir, timeout, error)
 
     # Popen in Python 3 returns a bytes object instead of a string for
     # stdout/stderr.
@@ -92,7 +101,7 @@ def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary,
     return (output.strip(), error.strip(), exit_status)
 
 
-def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path):
+def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path, timeout):
     """Determines the expected output for a benchmark, runs CVC4 on it and then
     checks whether the output corresponds to the expected output. Optionally
     uses a wrapper `wrapper`, tests proof generation (if proof is true), or
@@ -241,7 +250,7 @@ def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path):
     for command_line_args in command_line_args_configs:
         output, error, exit_status = run_benchmark(
             dump, wrapper, scrubber, error_scrubber, cvc4_binary,
-            command_line_args, benchmark_dir, benchmark_basename)
+            command_line_args, benchmark_dir, benchmark_basename, timeout)
         if output != expected_output:
             print(
                 'not ok - Differences between expected and actual output on stdout - Flags: {}'.
@@ -283,7 +292,10 @@ def main():
     if os.environ.get('VALGRIND') == '1' and not wrapper:
         wrapper = ['libtool', '--mode=execute', 'valgrind']
 
-    run_regression(args.proof, args.dump, wrapper, cvc4_binary, args.benchmark)
+    timeout = float(os.getenv('TEST_TIMEOUT', 600.0))
+
+    run_regression(args.proof, args.dump, wrapper, cvc4_binary, args.benchmark,
+                   timeout)
 
 
 if __name__ == "__main__":
author	Andres Noetzli <andres.noetzli@gmail.com>	2018-04-16 21:08:38 -0700
committer	GitHub <noreply@github.com>	2018-04-16 21:08:38 -0700
commit	8b9baa66ad6ffc9e323b83bc967992439a7d9bfa (patch)
tree	bdb58c5a3249dc5f573284c306d529e4e3bd8067 /test
parent	8a079f9b982a502995da8e535a4b4487489af0d2 (diff)