Unverified Commit a1cd3c94 authored by Yifan Xiong's avatar Yifan Xiong Committed by GitHub
Browse files

Runner - Add signal handler in runner (#530)

Add signal handler in runner to gracefully exit when receiving SIGINT
(<kbd>Ctrl</kbd>+<kbd>C</kbd>) or SIGTERM during benchmark execution.
parent 4c0d96e5
...@@ -198,6 +198,7 @@ def run(self): ...@@ -198,6 +198,7 @@ def run(self):
'types-pkg_resources', 'types-pkg_resources',
'types-pyyaml', 'types-pyyaml',
'typing-extensions>=3.10', 'typing-extensions>=3.10',
'urllib3<2.0',
'vcrpy>=4.1.1', 'vcrpy>=4.1.1',
'yapf==0.31.0', 'yapf==0.31.0',
], ],
......
...@@ -59,11 +59,12 @@ def __init__(self, config): ...@@ -59,11 +59,12 @@ def __init__(self, config):
self._config['cmdline'] += ' --ask-pass --ask-become-pass' self._config['cmdline'] += ' --ask-pass --ask-become-pass'
logger.info(self._config) logger.info(self._config)
def run(self, ansible_config, sudo=False): # pragma: no cover def run(self, ansible_config, cancel_callback=None, sudo=False): # pragma: no cover
"""Run Ansible runner. """Run Ansible runner.
Args: Args:
ansible_config (dict): Ansible config dict. ansible_config (dict): Ansible config dict.
cancel_callback (Callable): Ansible runner cancel callback.
sudo (bool): Run as sudo or not. Defaults to False. sudo (bool): Run as sudo or not. Defaults to False.
Returns: Returns:
...@@ -73,7 +74,7 @@ def run(self, ansible_config, sudo=False): # pragma: no cover ...@@ -73,7 +74,7 @@ def run(self, ansible_config, sudo=False): # pragma: no cover
logger.info('Run as sudo ...') logger.info('Run as sudo ...')
ansible_config['cmdline'] += ' --become' ansible_config['cmdline'] += ' --become'
with tempfile.TemporaryDirectory(prefix='ansible') as tmpdir: with tempfile.TemporaryDirectory(prefix='ansible') as tmpdir:
r = ansible_runner.run(private_data_dir=tmpdir, **ansible_config) r = ansible_runner.run(private_data_dir=tmpdir, cancel_callback=cancel_callback, **ansible_config)
logger.debug(r.stats) logger.debug(r.stats)
if r.rc == 0: if r.rc == 0:
logger.info('Run succeed, return code {}.'.format(r.rc)) logger.info('Run succeed, return code {}.'.format(r.rc))
......
...@@ -4,8 +4,10 @@ ...@@ -4,8 +4,10 @@
"""SuperBench Runner.""" """SuperBench Runner."""
import os import os
import sys
import json import json
import random import random
import signal
from pathlib import Path from pathlib import Path
from pprint import pformat from pprint import pformat
from collections import defaultdict from collections import defaultdict
...@@ -233,6 +235,18 @@ def fetch_results(self): # pragma: no cover ...@@ -233,6 +235,18 @@ def fetch_results(self): # pragma: no cover
) )
) )
def __signal_handler(self, signum, frame):
"""Signal handler for runner.
Args:
signum (int): Signal number.
frame (FrameType): Timeout frame.
"""
if signum == signal.SIGINT or signum == signal.SIGTERM:
logger.info('Killed by %s, exiting ...', signal.Signals(signum).name)
self.cleanup()
sys.exit(128 + signum)
def __create_results_summary(self): # pragma: no cover def __create_results_summary(self): # pragma: no cover
"""Create the result summary file of all nodes.""" """Create the result summary file of all nodes."""
all_results = list() all_results = list()
...@@ -438,12 +452,17 @@ def _run_proc(self, benchmark_name, mode, vars): ...@@ -438,12 +452,17 @@ def _run_proc(self, benchmark_name, mode, vars):
# we do not expect timeout in ansible unless subprocess hangs # we do not expect timeout in ansible unless subprocess hangs
ansible_runner_config['timeout'] = timeout + 60 ansible_runner_config['timeout'] = timeout + 60
rc = self._ansible_client.run(ansible_runner_config, sudo=(not self._docker_config.skip)) # overwrite ansible runner's default signal handler with main process's
rc = self._ansible_client.run(
ansible_runner_config, cancel_callback=lambda: None, sudo=(not self._docker_config.skip)
)
return rc return rc
def run(self): def run(self):
"""Run the SuperBench benchmarks distributedly.""" """Run the SuperBench benchmarks distributedly."""
self.check_env() self.check_env()
signal.signal(signal.SIGINT, self.__signal_handler)
signal.signal(signal.SIGTERM, self.__signal_handler)
for benchmark_name in self._sb_benchmarks: for benchmark_name in self._sb_benchmarks:
if benchmark_name not in self._sb_enabled_benchmarks: if benchmark_name not in self._sb_enabled_benchmarks:
continue continue
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment