Unverified Commit 2921e143 authored by fishyds's avatar fishyds Committed by GitHub
Browse files

Merge pull request #154 from Microsoft/v0.2

Merge V0.2 branch back to master
parents 2a28a578 35900e2a
......@@ -98,10 +98,10 @@ class Sessionpro extends React.Component<{}, SessionState> {
let sessionData = res.data;
let tunerAsstemp = [];
let trialPro = [];
const startExper = new Date(sessionData.startTime).toLocaleString();
const startExper = new Date(sessionData.startTime).toLocaleString('en-US');
let experEndStr: string;
if (sessionData.endTime !== undefined) {
experEndStr = new Date(sessionData.endTime).toLocaleString();
experEndStr = new Date(sessionData.endTime).toLocaleString('en-US');
} else {
experEndStr = 'not over';
}
......@@ -156,8 +156,8 @@ class Sessionpro extends React.Component<{}, SessionState> {
const desJobDetail: Parameters = {
parameters: {}
};
const startTime = new Date(tableData[item].startTime).toLocaleString();
const endTime = new Date(tableData[item].endTime).toLocaleString();
const startTime = new Date(tableData[item].startTime).toLocaleString('en-US');
const endTime = new Date(tableData[item].endTime).toLocaleString('en-US');
const duration = (tableData[item].endTime - tableData[item].startTime) / 1000;
let acc;
if (tableData[item].finalMetricData) {
......
......@@ -230,10 +230,10 @@ class TrialStatus extends React.Component<{}, TabState> {
? trialJobs[item].status
: '';
const startTime = trialJobs[item].startTime !== undefined
? new Date(trialJobs[item].startTime).toLocaleString()
? new Date(trialJobs[item].startTime).toLocaleString('en-US')
: '';
const endTime = trialJobs[item].endTime !== undefined
? new Date(trialJobs[item].endTime).toLocaleString()
? new Date(trialJobs[item].endTime).toLocaleString('en-US')
: '';
if (trialJobs[item].hyperParameters !== undefined) {
desc.parameters = JSON.parse(trialJobs[item].hyperParameters).parameters;
......@@ -394,7 +394,7 @@ class TrialStatus extends React.Component<{}, TabState> {
dataIndex: 'start',
key: 'start',
width: '15%',
sorter: (a: TableObj, b: TableObj): number => a.start.localeCompare(b.start)
sorter: (a: TableObj, b: TableObj): number => (Date.parse(a.start) - Date.parse(b.start))
}, {
title: 'End',
dataIndex: 'end',
......
......@@ -21,7 +21,7 @@
import json
import yaml
import psutil
from .constants import ERROR_INFO, NORMAL_INFO
from .constants import ERROR_INFO, NORMAL_INFO, WARNING_INFO, COLOR_RED_FORMAT, COLOR_YELLOW_FORMAT
def get_yml_content(file_path):
'''Load yaml file content'''
......@@ -43,12 +43,16 @@ def get_json_content(file_path):
def print_error(content):
'''Print error information to screen'''
print(ERROR_INFO % content)
print(COLOR_RED_FORMAT % (ERROR_INFO % content))
def print_normal(content):
'''Print error information to screen'''
print(NORMAL_INFO % content)
def print_warning(content):
'''Print warning information to screen'''
print(COLOR_YELLOW_FORMAT % (WARNING_INFO % content))
def detect_process(pid):
'''Detect if a process is alive'''
try:
......
......@@ -34,22 +34,37 @@ STDOUT_FULL_PATH = os.path.join(LOG_DIR, 'stdout')
STDERR_FULL_PATH = os.path.join(LOG_DIR, 'stderr')
ERROR_INFO = 'Error: %s'
ERROR_INFO = 'ERROR: %s'
NORMAL_INFO = 'Info: %s'
NORMAL_INFO = 'INFO: %s'
WARNING_INFO = 'Waining: %s'
WARNING_INFO = 'WARNING: %s'
EXPERIMENT_SUCCESS_INFO = 'Start experiment success! The experiment id is %s, and the restful server post is %s.\n' \
'You can use these commands to get more information about this experiment:\n' \
EXPERIMENT_SUCCESS_INFO = '\033[1;32;32mSuccessfully started experiment!\n\033[0m' \
'-----------------------------------------------------------------------\n' \
'The experiment id is %s\n'\
'The restful server post is %s\n' \
'The Web UI urls are: %s\n' \
'-----------------------------------------------------------------------\n\n' \
'You can use these commands to get more information about the experiment\n' \
'-----------------------------------------------------------------------\n' \
' commands description\n' \
'1. nnictl experiment show show the information of experiments\n' \
'2. nnictl trial ls list all of trial jobs\n' \
'3. nnictl stop stop a experiment\n' \
'4. nnictl trial kill kill a trial job by id\n' \
'5. nnictl --help get help information about nnictl\n' \
'6. nnictl webui url get the url of web ui'
'3. nnictl log stderr show stderr log content\n' \
'4. nnictl log stdout show stdout log content\n' \
'5. nnictl stop stop a experiment\n' \
'6. nnictl trial kill kill a trial job by id\n' \
'7. nnictl webui url get the url of web ui\n' \
'8. nnictl --help get help information about nnictl\n' \
'-----------------------------------------------------------------------\n' \
PACKAGE_REQUIREMENTS = {
'SMAC': 'smac_tuner'
}
COLOR_RED_FORMAT = '\033[1;31;31m%s\033[0m'
COLOR_GREEN_FORMAT = '\033[1;32;32m%s\033[0m'
COLOR_YELLOW_FORMAT = '\033[1;33;33m%s\033[0m'
\ No newline at end of file
......@@ -30,13 +30,13 @@ from .launcher_utils import validate_all_content
from .rest_utils import rest_put, rest_post, check_rest_server, check_rest_server_quick, check_response
from .url_utils import cluster_metadata_url, experiment_url
from .config_utils import Config
from .common_utils import get_yml_content, get_json_content, print_error, print_normal, detect_process
from .constants import EXPERIMENT_SUCCESS_INFO, STDOUT_FULL_PATH, STDERR_FULL_PATH, LOG_DIR, REST_PORT, ERROR_INFO, NORMAL_INFO
from .common_utils import get_yml_content, get_json_content, print_error, print_normal, print_warning, detect_process
from .constants import *
from .webui_utils import start_web_ui, check_web_ui
def start_rest_server(port, platform, mode, experiment_id=None):
'''Run nni manager process'''
print_normal('Checking experiment...')
print_normal('Checking environment...')
nni_config = Config()
rest_port = nni_config.get_config('restServerPort')
running, _ = check_rest_server_quick(rest_port)
......@@ -191,6 +191,8 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
# Deal with annotation
if experiment_config.get('useAnnotation'):
path = os.path.join(tempfile.gettempdir(), 'nni', 'annotation')
if not os.path.isdir(path):
os.makedirs(path)
path = tempfile.mkdtemp(dir=path)
code_dir = expand_annotations(experiment_config['trial']['codeDir'], path)
experiment_config['trial']['codeDir'] = code_dir
......@@ -204,10 +206,9 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
experiment_config['searchSpace'] = json.dumps('')
# check rest server
print_normal('Checking restful server...')
running, _ = check_rest_server(REST_PORT)
if running:
print_normal('Restful server start success!')
print_normal('Successfully started Restful server!')
else:
print_error('Restful server start failed!')
try:
......@@ -236,7 +237,7 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
if experiment_config['trainingServicePlatform'] == 'local':
print_normal('Setting local config...')
if set_local_config(experiment_config, REST_PORT):
print_normal('Success!')
print_normal('Successfully set local config!')
else:
print_error('Failed!')
try:
......@@ -251,7 +252,7 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
print_normal('Setting pai config...')
config_result, err_msg = set_pai_config(experiment_config, REST_PORT)
if config_result:
print_normal('Success!')
print_normal('Successfully set pai config!')
else:
if err_msg:
print_error('Failed! Error is: {}'.format(err_msg))
......@@ -259,8 +260,19 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
cmds = ['pkill', '-P', str(rest_process.pid)]
call(cmds)
except Exception:
raise Exception(ERROR_INFO % 'Rest server stopped!')
raise Exception(ERROR_INFO % 'Restful server stopped!')
exit(0)
#start webui
if check_web_ui():
print_warning('{0} {1}'.format(' '.join(nni_config.get_config('webuiUrl')),'is being used, please stop it first!'))
print_normal('You can use \'nnictl webui stop\' to stop old Web UI process...')
else:
print_normal('Starting Web UI...')
webui_process = start_web_ui(webuiport)
if webui_process:
nni_config.set_config('webuiPid', webui_process.pid)
print_normal('Successfully started Web UI!')
# start a new experiment
print_normal('Starting experiment...')
......@@ -274,25 +286,12 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
try:
cmds = ['pkill', '-P', str(rest_process.pid)]
call(cmds)
cmds = ['pkill', '-P', str(webui_process.pid)]
call(cmds)
except Exception:
raise Exception(ERROR_INFO % 'Rest server stopped!')
raise Exception(ERROR_INFO % 'Restful server stopped!')
exit(0)
#start webui
print_normal('Checking web ui...')
if check_web_ui():
print_error('{0} {1}'.format(' '.join(nni_config.get_config('webuiUrl')),'is being used, please stop it first!'))
print_normal('You can use \'nnictl webui stop\' to stop old web ui process...')
else:
print_normal('Starting web ui...')
webui_process = start_web_ui(webuiport)
if webui_process:
nni_config.set_config('webuiPid', webui_process.pid)
print_normal('Starting web ui success!')
print_normal('{0} {1}'.format('Web UI url:', ' '.join(nni_config.get_config('webuiUrl'))))
print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, REST_PORT))
print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, REST_PORT, ' '.join(nni_config.get_config('webuiUrl'))))
def resume_experiment(args):
'''resume an experiment'''
......
......@@ -64,17 +64,20 @@ def stop_experiment(args):
stop_web_ui()
return
running, _ = check_rest_server_quick(rest_port)
stop_rest_result = True
if running:
response = rest_delete(experiment_url(rest_port), 20)
if not response or not check_response(response):
print_error('Stop experiment failed!')
stop_rest_result = False
#sleep to wait rest handler done
time.sleep(3)
rest_pid = nni_config.get_config('restServerPid')
cmds = ['pkill', '-P', str(rest_pid)]
call(cmds)
stop_web_ui()
print_normal('Stop experiment success!')
if stop_rest_result:
print_normal('Stop experiment success!')
def trial_ls(args):
'''List trial'''
......
......@@ -45,7 +45,6 @@ def main_loop(args):
# Notice: We don't appoint env, which means subprocess wil inherit current environment and that is expected behavior
process = Popen(args.trial_command, shell = True, stdout = stdout_file, stderr = stderr_file)
print('Subprocess pid is {}'.format(process.pid))
print('Current cwd is {}'.format(os.getcwd()))
while True:
retCode = process.poll()
## Read experiment metrics, to avoid missing metrics
......@@ -55,15 +54,15 @@ def main_loop(args):
print('subprocess terminated. Exit code is {}. Quit'.format(retCode))
#copy local directory to hdfs
nni_local_output_dir = os.environ['NNI_OUTPUT_DIR']
hdfs_client = HdfsClient(hosts='{0}:{1}'.format(args.pai_hdfs_host, '50070'), user_name=args.pai_user_name)
print(nni_local_output_dir, args.pai_hdfs_output_dir)
hdfs_client = HdfsClient(hosts='{0}:{1}'.format(args.pai_hdfs_host, '50070'), user_name=args.pai_user_name, timeout=5)
try:
if copyDirectoryToHdfs(nni_local_output_dir, args.pai_hdfs_output_dir, hdfs_client):
print('copy directory success!')
print('copy directory from {0} to {1} success!'.format(nni_local_output_dir, args.pai_hdfs_output_dir))
else:
print('copy directory failed!')
print('copy directory from {0} to {1} failed!'.format(nni_local_output_dir, args.pai_hdfs_output_dir))
except Exception as exception:
print(exception)
print('HDFS copy directory got exception')
raise exception
## Exit as the retCode of subprocess(trial)
exit(retCode)
......@@ -91,7 +90,10 @@ if __name__ == '__main__':
try:
main_loop(args)
except:
print('Exiting by user request')
except SystemExit as se:
print('NNI trial keeper exit with code {}'.format(se.code))
sys.exit(se.code)
except Exception as e:
print('Exit trial keeper with code 1 because Exception: {} is catched'.format(str(e)))
sys.exit(1)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment