Unverified Commit ef176d29 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Merge pull request #116 from Microsoft/master

merge master
parents 97866505 4553de75
......@@ -27,6 +27,16 @@ import yaml
EXPERIMENT_DONE_SIGNAL = '"Experiment done"'
GREEN = '\33[32m'
RED = '\33[31m'
CLEAR = '\33[0m'
REST_ENDPOINT = 'http://localhost:8080/api/v1/nni'
EXPERIMENT_URL = REST_ENDPOINT + '/experiment'
STATUS_URL = REST_ENDPOINT + '/check-status'
TRIAL_JOBS_URL = REST_ENDPOINT + '/trial-jobs'
METRICS_URL = REST_ENDPOINT + '/metric-data'
def read_last_line(file_name):
'''read last line of a file and return None if file not found'''
try:
......@@ -73,10 +83,38 @@ def fetch_nni_log_path(experiment_url):
return nnimanager_log_path
def check_experiment_status(nnimanager_log_path):
def is_experiment_done(nnimanager_log_path):
'''check if the experiment is done successfully'''
assert os.path.exists(nnimanager_log_path), 'Experiment starts failed'
cmds = ['cat', nnimanager_log_path, '|', 'grep', EXPERIMENT_DONE_SIGNAL]
completed_process = subprocess.run(' '.join(cmds), shell=True)
return completed_process.returncode == 0
def get_experiment_status(status_url):
nni_status = requests.get(status_url).json()
#print(nni_status)
return nni_status['status']
def get_succeeded_trial_num(trial_jobs_url):
trial_jobs = requests.get(trial_jobs_url).json()
print(trial_jobs)
num_succeed = 0
for trial_job in trial_jobs:
if trial_job['status'] in ['SUCCEEDED', 'EARLY_STOPPED']:
num_succeed += 1
print('num_succeed:', num_succeed)
return num_succeed
def print_stderr(trial_jobs_url):
trial_jobs = requests.get(trial_jobs_url).json()
for trial_job in trial_jobs:
if trial_job['status'] == 'FAILED':
stderr_path = trial_job['stderrPath'].split(':')[-1]
subprocess.run(['cat', stderr_path])
def parse_max_duration_time(max_exec_duration):
unit = max_exec_duration[-1]
time = max_exec_duration[:-1]
units_dict = {'s':1, 'm':60, 'h':3600, 'd':86400}
return int(time) * units_dict[unit]
......@@ -216,12 +216,14 @@ frameworkcontroller_trial_schema = {
frameworkcontroller_config_schema = {
'frameworkcontrollerConfig':Or({
Optional('storage'): Or('nfs', 'azureStorage'),
Optional('serviceAccountName'): str,
'nfs': {
'server': str,
'path': str
}
},{
Optional('storage'): Or('nfs', 'azureStorage'),
Optional('serviceAccountName'): str,
'keyVault': {
'vaultName': Regex('([0-9]|[a-z]|[A-Z]|-){1,127}'),
'name': Regex('([0-9]|[a-z]|[A-Z]|-){1,127}')
......
......@@ -35,18 +35,18 @@ def parse_relative_path(root_path, experiment_config, key):
print_normal('expand %s: %s to %s ' % (key, experiment_config[key], absolute_path))
experiment_config[key] = absolute_path
def parse_time(experiment_config):
'''Parse time format'''
unit = experiment_config['maxExecDuration'][-1]
def parse_time(time):
'''Change the time to seconds'''
unit = time[-1]
if unit not in ['s', 'm', 'h', 'd']:
print_error('the unit of time could only from {s, m, h, d}')
exit(1)
time = experiment_config['maxExecDuration'][:-1]
time = time[:-1]
if not time.isdigit():
print_error('time format error!')
exit(1)
parse_dict = {'s':1, 'm':60, 'h':3600, 'd':86400}
experiment_config['maxExecDuration'] = int(time) * parse_dict[unit]
return int(time) * parse_dict[unit]
def parse_path(experiment_config, config_path):
'''Parse path in config file'''
......@@ -216,7 +216,7 @@ def validate_all_content(experiment_config, config_path):
'''Validate whether experiment_config is valid'''
parse_path(experiment_config, config_path)
validate_common_content(experiment_config)
parse_time(experiment_config)
experiment_config['maxExecDuration'] = parse_time(experiment_config['maxExecDuration'])
if experiment_config.get('advisor'):
parse_advisor_content(experiment_config)
validate_annotation_content(experiment_config, 'advisor', 'builtinAdvisorName')
......
......@@ -73,7 +73,7 @@ def parse_args():
parser_updater_concurrency.set_defaults(func=update_concurrency)
parser_updater_duration = parser_updater_subparsers.add_parser('duration', help='update duration')
parser_updater_duration.add_argument('id', nargs='?', help='the id of experiment')
parser_updater_duration.add_argument('--value', '-v', required=True)
parser_updater_duration.add_argument('--value', '-v', required=True, help='the unit of time should in {\'s\', \'m\', \'h\', \'d\'}')
parser_updater_duration.set_defaults(func=update_duration)
parser_updater_trialnum = parser_updater_subparsers.add_parser('trialnum', help='update maxtrialnum')
parser_updater_trialnum.add_argument('--id', '-i', dest='id', help='the id of experiment')
......
......@@ -26,6 +26,7 @@ from .url_utils import experiment_url
from .config_utils import Config
from .common_utils import get_json_content
from .nnictl_utils import check_experiment_id, get_experiment_port, get_config_filename
from .launcher_utils import parse_time
def validate_digit(value, start, end):
'''validate if a digit is valid'''
......@@ -92,7 +93,8 @@ def update_concurrency(args):
print('ERROR: update %s failed!' % 'concurrency')
def update_duration(args):
validate_digit(args.value, 1, 999999999)
#parse time, change time unit to seconds
args.value = parse_time(args.value)
args.port = get_experiment_port(args)
if args.port is not None:
if update_experiment_profile(args, 'maxExecDuration', int(args.value)):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment