"src/vscode:/vscode.git/clone" did not exist on "63bf16093cc1bfb1abfacb7dc10a3c73a3dd0530"
Unverified Commit 2921e143 authored by fishyds's avatar fishyds Committed by GitHub
Browse files

Merge pull request #154 from Microsoft/v0.2

Merge V0.2 branch back to master
parents 2a28a578 35900e2a
...@@ -98,10 +98,10 @@ class Sessionpro extends React.Component<{}, SessionState> { ...@@ -98,10 +98,10 @@ class Sessionpro extends React.Component<{}, SessionState> {
let sessionData = res.data; let sessionData = res.data;
let tunerAsstemp = []; let tunerAsstemp = [];
let trialPro = []; let trialPro = [];
const startExper = new Date(sessionData.startTime).toLocaleString(); const startExper = new Date(sessionData.startTime).toLocaleString('en-US');
let experEndStr: string; let experEndStr: string;
if (sessionData.endTime !== undefined) { if (sessionData.endTime !== undefined) {
experEndStr = new Date(sessionData.endTime).toLocaleString(); experEndStr = new Date(sessionData.endTime).toLocaleString('en-US');
} else { } else {
experEndStr = 'not over'; experEndStr = 'not over';
} }
...@@ -156,8 +156,8 @@ class Sessionpro extends React.Component<{}, SessionState> { ...@@ -156,8 +156,8 @@ class Sessionpro extends React.Component<{}, SessionState> {
const desJobDetail: Parameters = { const desJobDetail: Parameters = {
parameters: {} parameters: {}
}; };
const startTime = new Date(tableData[item].startTime).toLocaleString(); const startTime = new Date(tableData[item].startTime).toLocaleString('en-US');
const endTime = new Date(tableData[item].endTime).toLocaleString(); const endTime = new Date(tableData[item].endTime).toLocaleString('en-US');
const duration = (tableData[item].endTime - tableData[item].startTime) / 1000; const duration = (tableData[item].endTime - tableData[item].startTime) / 1000;
let acc; let acc;
if (tableData[item].finalMetricData) { if (tableData[item].finalMetricData) {
......
...@@ -230,10 +230,10 @@ class TrialStatus extends React.Component<{}, TabState> { ...@@ -230,10 +230,10 @@ class TrialStatus extends React.Component<{}, TabState> {
? trialJobs[item].status ? trialJobs[item].status
: ''; : '';
const startTime = trialJobs[item].startTime !== undefined const startTime = trialJobs[item].startTime !== undefined
? new Date(trialJobs[item].startTime).toLocaleString() ? new Date(trialJobs[item].startTime).toLocaleString('en-US')
: ''; : '';
const endTime = trialJobs[item].endTime !== undefined const endTime = trialJobs[item].endTime !== undefined
? new Date(trialJobs[item].endTime).toLocaleString() ? new Date(trialJobs[item].endTime).toLocaleString('en-US')
: ''; : '';
if (trialJobs[item].hyperParameters !== undefined) { if (trialJobs[item].hyperParameters !== undefined) {
desc.parameters = JSON.parse(trialJobs[item].hyperParameters).parameters; desc.parameters = JSON.parse(trialJobs[item].hyperParameters).parameters;
...@@ -394,7 +394,7 @@ class TrialStatus extends React.Component<{}, TabState> { ...@@ -394,7 +394,7 @@ class TrialStatus extends React.Component<{}, TabState> {
dataIndex: 'start', dataIndex: 'start',
key: 'start', key: 'start',
width: '15%', width: '15%',
sorter: (a: TableObj, b: TableObj): number => a.start.localeCompare(b.start) sorter: (a: TableObj, b: TableObj): number => (Date.parse(a.start) - Date.parse(b.start))
}, { }, {
title: 'End', title: 'End',
dataIndex: 'end', dataIndex: 'end',
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
import json import json
import yaml import yaml
import psutil import psutil
from .constants import ERROR_INFO, NORMAL_INFO from .constants import ERROR_INFO, NORMAL_INFO, WARNING_INFO, COLOR_RED_FORMAT, COLOR_YELLOW_FORMAT
def get_yml_content(file_path): def get_yml_content(file_path):
'''Load yaml file content''' '''Load yaml file content'''
...@@ -43,12 +43,16 @@ def get_json_content(file_path): ...@@ -43,12 +43,16 @@ def get_json_content(file_path):
def print_error(content): def print_error(content):
'''Print error information to screen''' '''Print error information to screen'''
print(ERROR_INFO % content) print(COLOR_RED_FORMAT % (ERROR_INFO % content))
def print_normal(content): def print_normal(content):
'''Print error information to screen''' '''Print error information to screen'''
print(NORMAL_INFO % content) print(NORMAL_INFO % content)
def print_warning(content):
'''Print warning information to screen'''
print(COLOR_YELLOW_FORMAT % (WARNING_INFO % content))
def detect_process(pid): def detect_process(pid):
'''Detect if a process is alive''' '''Detect if a process is alive'''
try: try:
......
...@@ -34,22 +34,37 @@ STDOUT_FULL_PATH = os.path.join(LOG_DIR, 'stdout') ...@@ -34,22 +34,37 @@ STDOUT_FULL_PATH = os.path.join(LOG_DIR, 'stdout')
STDERR_FULL_PATH = os.path.join(LOG_DIR, 'stderr') STDERR_FULL_PATH = os.path.join(LOG_DIR, 'stderr')
ERROR_INFO = 'Error: %s' ERROR_INFO = 'ERROR: %s'
NORMAL_INFO = 'Info: %s' NORMAL_INFO = 'INFO: %s'
WARNING_INFO = 'Waining: %s' WARNING_INFO = 'WARNING: %s'
EXPERIMENT_SUCCESS_INFO = 'Start experiment success! The experiment id is %s, and the restful server post is %s.\n' \ EXPERIMENT_SUCCESS_INFO = '\033[1;32;32mSuccessfully started experiment!\n\033[0m' \
'You can use these commands to get more information about this experiment:\n' \ '-----------------------------------------------------------------------\n' \
'The experiment id is %s\n'\
'The restful server post is %s\n' \
'The Web UI urls are: %s\n' \
'-----------------------------------------------------------------------\n\n' \
'You can use these commands to get more information about the experiment\n' \
'-----------------------------------------------------------------------\n' \
' commands description\n' \ ' commands description\n' \
'1. nnictl experiment show show the information of experiments\n' \ '1. nnictl experiment show show the information of experiments\n' \
'2. nnictl trial ls list all of trial jobs\n' \ '2. nnictl trial ls list all of trial jobs\n' \
'3. nnictl stop stop a experiment\n' \ '3. nnictl log stderr show stderr log content\n' \
'4. nnictl trial kill kill a trial job by id\n' \ '4. nnictl log stdout show stdout log content\n' \
'5. nnictl --help get help information about nnictl\n' \ '5. nnictl stop stop a experiment\n' \
'6. nnictl webui url get the url of web ui' '6. nnictl trial kill kill a trial job by id\n' \
'7. nnictl webui url get the url of web ui\n' \
'8. nnictl --help get help information about nnictl\n' \
'-----------------------------------------------------------------------\n' \
PACKAGE_REQUIREMENTS = { PACKAGE_REQUIREMENTS = {
'SMAC': 'smac_tuner' 'SMAC': 'smac_tuner'
} }
COLOR_RED_FORMAT = '\033[1;31;31m%s\033[0m'
COLOR_GREEN_FORMAT = '\033[1;32;32m%s\033[0m'
COLOR_YELLOW_FORMAT = '\033[1;33;33m%s\033[0m'
\ No newline at end of file
...@@ -30,13 +30,13 @@ from .launcher_utils import validate_all_content ...@@ -30,13 +30,13 @@ from .launcher_utils import validate_all_content
from .rest_utils import rest_put, rest_post, check_rest_server, check_rest_server_quick, check_response from .rest_utils import rest_put, rest_post, check_rest_server, check_rest_server_quick, check_response
from .url_utils import cluster_metadata_url, experiment_url from .url_utils import cluster_metadata_url, experiment_url
from .config_utils import Config from .config_utils import Config
from .common_utils import get_yml_content, get_json_content, print_error, print_normal, detect_process from .common_utils import get_yml_content, get_json_content, print_error, print_normal, print_warning, detect_process
from .constants import EXPERIMENT_SUCCESS_INFO, STDOUT_FULL_PATH, STDERR_FULL_PATH, LOG_DIR, REST_PORT, ERROR_INFO, NORMAL_INFO from .constants import *
from .webui_utils import start_web_ui, check_web_ui from .webui_utils import start_web_ui, check_web_ui
def start_rest_server(port, platform, mode, experiment_id=None): def start_rest_server(port, platform, mode, experiment_id=None):
'''Run nni manager process''' '''Run nni manager process'''
print_normal('Checking experiment...') print_normal('Checking environment...')
nni_config = Config() nni_config = Config()
rest_port = nni_config.get_config('restServerPort') rest_port = nni_config.get_config('restServerPort')
running, _ = check_rest_server_quick(rest_port) running, _ = check_rest_server_quick(rest_port)
...@@ -191,6 +191,8 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No ...@@ -191,6 +191,8 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
# Deal with annotation # Deal with annotation
if experiment_config.get('useAnnotation'): if experiment_config.get('useAnnotation'):
path = os.path.join(tempfile.gettempdir(), 'nni', 'annotation') path = os.path.join(tempfile.gettempdir(), 'nni', 'annotation')
if not os.path.isdir(path):
os.makedirs(path)
path = tempfile.mkdtemp(dir=path) path = tempfile.mkdtemp(dir=path)
code_dir = expand_annotations(experiment_config['trial']['codeDir'], path) code_dir = expand_annotations(experiment_config['trial']['codeDir'], path)
experiment_config['trial']['codeDir'] = code_dir experiment_config['trial']['codeDir'] = code_dir
...@@ -204,10 +206,9 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No ...@@ -204,10 +206,9 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
experiment_config['searchSpace'] = json.dumps('') experiment_config['searchSpace'] = json.dumps('')
# check rest server # check rest server
print_normal('Checking restful server...')
running, _ = check_rest_server(REST_PORT) running, _ = check_rest_server(REST_PORT)
if running: if running:
print_normal('Restful server start success!') print_normal('Successfully started Restful server!')
else: else:
print_error('Restful server start failed!') print_error('Restful server start failed!')
try: try:
...@@ -236,7 +237,7 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No ...@@ -236,7 +237,7 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
if experiment_config['trainingServicePlatform'] == 'local': if experiment_config['trainingServicePlatform'] == 'local':
print_normal('Setting local config...') print_normal('Setting local config...')
if set_local_config(experiment_config, REST_PORT): if set_local_config(experiment_config, REST_PORT):
print_normal('Success!') print_normal('Successfully set local config!')
else: else:
print_error('Failed!') print_error('Failed!')
try: try:
...@@ -251,7 +252,7 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No ...@@ -251,7 +252,7 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
print_normal('Setting pai config...') print_normal('Setting pai config...')
config_result, err_msg = set_pai_config(experiment_config, REST_PORT) config_result, err_msg = set_pai_config(experiment_config, REST_PORT)
if config_result: if config_result:
print_normal('Success!') print_normal('Successfully set pai config!')
else: else:
if err_msg: if err_msg:
print_error('Failed! Error is: {}'.format(err_msg)) print_error('Failed! Error is: {}'.format(err_msg))
...@@ -259,8 +260,19 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No ...@@ -259,8 +260,19 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
cmds = ['pkill', '-P', str(rest_process.pid)] cmds = ['pkill', '-P', str(rest_process.pid)]
call(cmds) call(cmds)
except Exception: except Exception:
raise Exception(ERROR_INFO % 'Rest server stopped!') raise Exception(ERROR_INFO % 'Restful server stopped!')
exit(0) exit(0)
#start webui
if check_web_ui():
print_warning('{0} {1}'.format(' '.join(nni_config.get_config('webuiUrl')),'is being used, please stop it first!'))
print_normal('You can use \'nnictl webui stop\' to stop old Web UI process...')
else:
print_normal('Starting Web UI...')
webui_process = start_web_ui(webuiport)
if webui_process:
nni_config.set_config('webuiPid', webui_process.pid)
print_normal('Successfully started Web UI!')
# start a new experiment # start a new experiment
print_normal('Starting experiment...') print_normal('Starting experiment...')
...@@ -274,25 +286,12 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No ...@@ -274,25 +286,12 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
try: try:
cmds = ['pkill', '-P', str(rest_process.pid)] cmds = ['pkill', '-P', str(rest_process.pid)]
call(cmds) call(cmds)
cmds = ['pkill', '-P', str(webui_process.pid)]
call(cmds)
except Exception: except Exception:
raise Exception(ERROR_INFO % 'Rest server stopped!') raise Exception(ERROR_INFO % 'Restful server stopped!')
exit(0) exit(0)
print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, REST_PORT, ' '.join(nni_config.get_config('webuiUrl'))))
#start webui
print_normal('Checking web ui...')
if check_web_ui():
print_error('{0} {1}'.format(' '.join(nni_config.get_config('webuiUrl')),'is being used, please stop it first!'))
print_normal('You can use \'nnictl webui stop\' to stop old web ui process...')
else:
print_normal('Starting web ui...')
webui_process = start_web_ui(webuiport)
if webui_process:
nni_config.set_config('webuiPid', webui_process.pid)
print_normal('Starting web ui success!')
print_normal('{0} {1}'.format('Web UI url:', ' '.join(nni_config.get_config('webuiUrl'))))
print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, REST_PORT))
def resume_experiment(args): def resume_experiment(args):
'''resume an experiment''' '''resume an experiment'''
......
...@@ -64,17 +64,20 @@ def stop_experiment(args): ...@@ -64,17 +64,20 @@ def stop_experiment(args):
stop_web_ui() stop_web_ui()
return return
running, _ = check_rest_server_quick(rest_port) running, _ = check_rest_server_quick(rest_port)
stop_rest_result = True
if running: if running:
response = rest_delete(experiment_url(rest_port), 20) response = rest_delete(experiment_url(rest_port), 20)
if not response or not check_response(response): if not response or not check_response(response):
print_error('Stop experiment failed!') print_error('Stop experiment failed!')
stop_rest_result = False
#sleep to wait rest handler done #sleep to wait rest handler done
time.sleep(3) time.sleep(3)
rest_pid = nni_config.get_config('restServerPid') rest_pid = nni_config.get_config('restServerPid')
cmds = ['pkill', '-P', str(rest_pid)] cmds = ['pkill', '-P', str(rest_pid)]
call(cmds) call(cmds)
stop_web_ui() stop_web_ui()
print_normal('Stop experiment success!') if stop_rest_result:
print_normal('Stop experiment success!')
def trial_ls(args): def trial_ls(args):
'''List trial''' '''List trial'''
......
...@@ -45,7 +45,6 @@ def main_loop(args): ...@@ -45,7 +45,6 @@ def main_loop(args):
# Notice: We don't appoint env, which means subprocess wil inherit current environment and that is expected behavior # Notice: We don't appoint env, which means subprocess wil inherit current environment and that is expected behavior
process = Popen(args.trial_command, shell = True, stdout = stdout_file, stderr = stderr_file) process = Popen(args.trial_command, shell = True, stdout = stdout_file, stderr = stderr_file)
print('Subprocess pid is {}'.format(process.pid)) print('Subprocess pid is {}'.format(process.pid))
print('Current cwd is {}'.format(os.getcwd()))
while True: while True:
retCode = process.poll() retCode = process.poll()
## Read experiment metrics, to avoid missing metrics ## Read experiment metrics, to avoid missing metrics
...@@ -55,15 +54,15 @@ def main_loop(args): ...@@ -55,15 +54,15 @@ def main_loop(args):
print('subprocess terminated. Exit code is {}. Quit'.format(retCode)) print('subprocess terminated. Exit code is {}. Quit'.format(retCode))
#copy local directory to hdfs #copy local directory to hdfs
nni_local_output_dir = os.environ['NNI_OUTPUT_DIR'] nni_local_output_dir = os.environ['NNI_OUTPUT_DIR']
hdfs_client = HdfsClient(hosts='{0}:{1}'.format(args.pai_hdfs_host, '50070'), user_name=args.pai_user_name) hdfs_client = HdfsClient(hosts='{0}:{1}'.format(args.pai_hdfs_host, '50070'), user_name=args.pai_user_name, timeout=5)
print(nni_local_output_dir, args.pai_hdfs_output_dir)
try: try:
if copyDirectoryToHdfs(nni_local_output_dir, args.pai_hdfs_output_dir, hdfs_client): if copyDirectoryToHdfs(nni_local_output_dir, args.pai_hdfs_output_dir, hdfs_client):
print('copy directory success!') print('copy directory from {0} to {1} success!'.format(nni_local_output_dir, args.pai_hdfs_output_dir))
else: else:
print('copy directory failed!') print('copy directory from {0} to {1} failed!'.format(nni_local_output_dir, args.pai_hdfs_output_dir))
except Exception as exception: except Exception as exception:
print(exception) print('HDFS copy directory got exception')
raise exception
## Exit as the retCode of subprocess(trial) ## Exit as the retCode of subprocess(trial)
exit(retCode) exit(retCode)
...@@ -91,7 +90,10 @@ if __name__ == '__main__': ...@@ -91,7 +90,10 @@ if __name__ == '__main__':
try: try:
main_loop(args) main_loop(args)
except: except SystemExit as se:
print('Exiting by user request') print('NNI trial keeper exit with code {}'.format(se.code))
sys.exit(se.code)
except Exception as e:
print('Exit trial keeper with code 1 because Exception: {} is catched'.format(str(e)))
sys.exit(1) sys.exit(1)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment