Unverified Commit a1cd3c94 authored by Yifan Xiong's avatar Yifan Xiong Committed by GitHub
Browse files

Runner - Add signal handler in runner (#530)

Add signal handler in runner to gracefully exit when receiving SIGINT
(<kbd>Ctrl</kbd>+<kbd>C</kbd>) or SIGTERM during benchmark execution.
parent 4c0d96e5
......@@ -198,6 +198,7 @@ def run(self):
'types-pkg_resources',
'types-pyyaml',
'typing-extensions>=3.10',
'urllib3<2.0',
'vcrpy>=4.1.1',
'yapf==0.31.0',
],
......
......@@ -59,11 +59,12 @@ def __init__(self, config):
self._config['cmdline'] += ' --ask-pass --ask-become-pass'
logger.info(self._config)
def run(self, ansible_config, sudo=False): # pragma: no cover
def run(self, ansible_config, cancel_callback=None, sudo=False): # pragma: no cover
"""Run Ansible runner.
Args:
ansible_config (dict): Ansible config dict.
cancel_callback (Callable): Ansible runner cancel callback.
sudo (bool): Run as sudo or not. Defaults to False.
Returns:
......@@ -73,7 +74,7 @@ def run(self, ansible_config, sudo=False): # pragma: no cover
logger.info('Run as sudo ...')
ansible_config['cmdline'] += ' --become'
with tempfile.TemporaryDirectory(prefix='ansible') as tmpdir:
r = ansible_runner.run(private_data_dir=tmpdir, **ansible_config)
r = ansible_runner.run(private_data_dir=tmpdir, cancel_callback=cancel_callback, **ansible_config)
logger.debug(r.stats)
if r.rc == 0:
logger.info('Run succeed, return code {}.'.format(r.rc))
......
......@@ -4,8 +4,10 @@
"""SuperBench Runner."""
import os
import sys
import json
import random
import signal
from pathlib import Path
from pprint import pformat
from collections import defaultdict
......@@ -233,6 +235,18 @@ def fetch_results(self): # pragma: no cover
)
)
def __signal_handler(self, signum, frame):
"""Signal handler for runner.
Args:
signum (int): Signal number.
frame (FrameType): Timeout frame.
"""
if signum == signal.SIGINT or signum == signal.SIGTERM:
logger.info('Killed by %s, exiting ...', signal.Signals(signum).name)
self.cleanup()
sys.exit(128 + signum)
def __create_results_summary(self): # pragma: no cover
"""Create the result summary file of all nodes."""
all_results = list()
......@@ -438,12 +452,17 @@ def _run_proc(self, benchmark_name, mode, vars):
# we do not expect timeout in ansible unless subprocess hangs
ansible_runner_config['timeout'] = timeout + 60
rc = self._ansible_client.run(ansible_runner_config, sudo=(not self._docker_config.skip))
# overwrite ansible runner's default signal handler with main process's
rc = self._ansible_client.run(
ansible_runner_config, cancel_callback=lambda: None, sudo=(not self._docker_config.skip)
)
return rc
def run(self):
"""Run the SuperBench benchmarks distributedly."""
self.check_env()
signal.signal(signal.SIGINT, self.__signal_handler)
signal.signal(signal.SIGTERM, self.__signal_handler)
for benchmark_name in self._sb_benchmarks:
if benchmark_name not in self._sb_enabled_benchmarks:
continue
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment