Unverified Commit 67141400 authored by J-shang's avatar J-shang Committed by GitHub
Browse files

Retry logic for getting experiment metadata failed (#4569)

parent b0bcde86
......@@ -222,9 +222,22 @@ def stop_experiment(args):
if experiment_id_list:
for experiment_id in experiment_id_list:
print_normal('Stopping experiment %s' % experiment_id)
experiments_config = Experiments()
experiments_dict = experiments_config.get_all_experiments()
rest_pid = experiments_dict.get(experiment_id).get('pid')
# FIXME: Retry should be placed to `Experiments`, need review both python and ts code.
# retry up to 10 times to get the experiment metadata
for i in range(1, 11):
experiments_dict = Experiments().get_all_experiments()
experiment_info = experiments_dict.get(experiment_id)
if experiment_info is None:
print_warning('Get experiment {} metadata failed, {} time retry...'.format(experiment_id, i))
time.sleep(0.5)
else:
break
if experiment_info is None:
print_error('Experiment {} metadata getting failed.'.format(experiment_id))
print_error('The experiments metadata in `.experiment` is:')
print_error(json.dumps(Experiments().get_all_experiments(), indent=4))
exit(1)
rest_pid = experiment_info.get('pid')
if rest_pid:
kill_command(rest_pid)
print_normal('Stop experiment success.')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment