Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
2921e143
Unverified
Commit
2921e143
authored
Sep 30, 2018
by
fishyds
Committed by
GitHub
Sep 30, 2018
Browse files
Merge pull request #154 from Microsoft/v0.2
Merge V0.2 branch back to master
parents
2a28a578
35900e2a
Changes
27
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
75 additions
and
52 deletions
+75
-52
src/webui/src/components/Sessionpro.tsx
src/webui/src/components/Sessionpro.tsx
+4
-4
src/webui/src/components/TrialStatus.tsx
src/webui/src/components/TrialStatus.tsx
+3
-3
tools/nnicmd/common_utils.py
tools/nnicmd/common_utils.py
+6
-2
tools/nnicmd/constants.py
tools/nnicmd/constants.py
+24
-9
tools/nnicmd/launcher.py
tools/nnicmd/launcher.py
+24
-25
tools/nnicmd/nnictl_utils.py
tools/nnicmd/nnictl_utils.py
+4
-1
tools/trial_tool/trial_keeper.py
tools/trial_tool/trial_keeper.py
+10
-8
No files found.
src/webui/src/components/Sessionpro.tsx
View file @
2921e143
...
...
@@ -98,10 +98,10 @@ class Sessionpro extends React.Component<{}, SessionState> {
let
sessionData
=
res
.
data
;
let
tunerAsstemp
=
[];
let
trialPro
=
[];
const
startExper
=
new
Date
(
sessionData
.
startTime
).
toLocaleString
();
const
startExper
=
new
Date
(
sessionData
.
startTime
).
toLocaleString
(
'
en-US
'
);
let
experEndStr
:
string
;
if
(
sessionData
.
endTime
!==
undefined
)
{
experEndStr
=
new
Date
(
sessionData
.
endTime
).
toLocaleString
();
experEndStr
=
new
Date
(
sessionData
.
endTime
).
toLocaleString
(
'
en-US
'
);
}
else
{
experEndStr
=
'
not over
'
;
}
...
...
@@ -156,8 +156,8 @@ class Sessionpro extends React.Component<{}, SessionState> {
const
desJobDetail
:
Parameters
=
{
parameters
:
{}
};
const
startTime
=
new
Date
(
tableData
[
item
].
startTime
).
toLocaleString
();
const
endTime
=
new
Date
(
tableData
[
item
].
endTime
).
toLocaleString
();
const
startTime
=
new
Date
(
tableData
[
item
].
startTime
).
toLocaleString
(
'
en-US
'
);
const
endTime
=
new
Date
(
tableData
[
item
].
endTime
).
toLocaleString
(
'
en-US
'
);
const
duration
=
(
tableData
[
item
].
endTime
-
tableData
[
item
].
startTime
)
/
1000
;
let
acc
;
if
(
tableData
[
item
].
finalMetricData
)
{
...
...
src/webui/src/components/TrialStatus.tsx
View file @
2921e143
...
...
@@ -230,10 +230,10 @@ class TrialStatus extends React.Component<{}, TabState> {
?
trialJobs
[
item
].
status
:
''
;
const
startTime
=
trialJobs
[
item
].
startTime
!==
undefined
?
new
Date
(
trialJobs
[
item
].
startTime
).
toLocaleString
()
?
new
Date
(
trialJobs
[
item
].
startTime
).
toLocaleString
(
'
en-US
'
)
:
''
;
const
endTime
=
trialJobs
[
item
].
endTime
!==
undefined
?
new
Date
(
trialJobs
[
item
].
endTime
).
toLocaleString
()
?
new
Date
(
trialJobs
[
item
].
endTime
).
toLocaleString
(
'
en-US
'
)
:
''
;
if
(
trialJobs
[
item
].
hyperParameters
!==
undefined
)
{
desc
.
parameters
=
JSON
.
parse
(
trialJobs
[
item
].
hyperParameters
).
parameters
;
...
...
@@ -394,7 +394,7 @@ class TrialStatus extends React.Component<{}, TabState> {
dataIndex
:
'
start
'
,
key
:
'
start
'
,
width
:
'
15%
'
,
sorter
:
(
a
:
TableObj
,
b
:
TableObj
):
number
=>
a
.
start
.
localeCom
pare
(
b
.
start
)
sorter
:
(
a
:
TableObj
,
b
:
TableObj
):
number
=>
(
Date
.
parse
(
a
.
start
)
-
Date
.
par
s
e
(
b
.
start
)
)
},
{
title
:
'
End
'
,
dataIndex
:
'
end
'
,
...
...
tools/nnicmd/common_utils.py
View file @
2921e143
...
...
@@ -21,7 +21,7 @@
import
json
import
yaml
import
psutil
from
.constants
import
ERROR_INFO
,
NORMAL_INFO
from
.constants
import
ERROR_INFO
,
NORMAL_INFO
,
WARNING_INFO
,
COLOR_RED_FORMAT
,
COLOR_YELLOW_FORMAT
def
get_yml_content
(
file_path
):
'''Load yaml file content'''
...
...
@@ -43,12 +43,16 @@ def get_json_content(file_path):
def
print_error
(
content
):
'''Print error information to screen'''
print
(
ERROR_INFO
%
content
)
print
(
COLOR_RED_FORMAT
%
(
ERROR_INFO
%
content
)
)
def
print_normal
(
content
):
'''Print error information to screen'''
print
(
NORMAL_INFO
%
content
)
def
print_warning
(
content
):
'''Print warning information to screen'''
print
(
COLOR_YELLOW_FORMAT
%
(
WARNING_INFO
%
content
))
def
detect_process
(
pid
):
'''Detect if a process is alive'''
try
:
...
...
tools/nnicmd/constants.py
View file @
2921e143
...
...
@@ -34,22 +34,37 @@ STDOUT_FULL_PATH = os.path.join(LOG_DIR, 'stdout')
STDERR_FULL_PATH
=
os
.
path
.
join
(
LOG_DIR
,
'stderr'
)
ERROR_INFO
=
'E
rror
: %s'
ERROR_INFO
=
'E
RROR
: %s'
NORMAL_INFO
=
'I
nfo
: %s'
NORMAL_INFO
=
'I
NFO
: %s'
WARNING_INFO
=
'W
aining
: %s'
WARNING_INFO
=
'W
ARNING
: %s'
EXPERIMENT_SUCCESS_INFO
=
'Start experiment success! The experiment id is %s, and the restful server post is %s.
\n
'
\
'You can use these commands to get more information about this experiment:
\n
'
\
EXPERIMENT_SUCCESS_INFO
=
'
\033
[1;32;32mSuccessfully started experiment!
\n\033
[0m'
\
'-----------------------------------------------------------------------
\n
'
\
'The experiment id is %s
\n
'
\
'The restful server post is %s
\n
'
\
'The Web UI urls are: %s
\n
'
\
'-----------------------------------------------------------------------
\n\n
'
\
'You can use these commands to get more information about the experiment
\n
'
\
'-----------------------------------------------------------------------
\n
'
\
' commands description
\n
'
\
'1. nnictl experiment show show the information of experiments
\n
'
\
'2. nnictl trial ls list all of trial jobs
\n
'
\
'3. nnictl stop stop a experiment
\n
'
\
'4. nnictl trial kill kill a trial job by id
\n
'
\
'5. nnictl --help get help information about nnictl
\n
'
\
'6. nnictl webui url get the url of web ui'
'3. nnictl log stderr show stderr log content
\n
'
\
'4. nnictl log stdout show stdout log content
\n
'
\
'5. nnictl stop stop a experiment
\n
'
\
'6. nnictl trial kill kill a trial job by id
\n
'
\
'7. nnictl webui url get the url of web ui
\n
'
\
'8. nnictl --help get help information about nnictl
\n
'
\
'-----------------------------------------------------------------------
\n
'
\
PACKAGE_REQUIREMENTS
=
{
'SMAC'
:
'smac_tuner'
}
COLOR_RED_FORMAT
=
'
\033
[1;31;31m%s
\033
[0m'
COLOR_GREEN_FORMAT
=
'
\033
[1;32;32m%s
\033
[0m'
COLOR_YELLOW_FORMAT
=
'
\033
[1;33;33m%s
\033
[0m'
\ No newline at end of file
tools/nnicmd/launcher.py
View file @
2921e143
...
...
@@ -30,13 +30,13 @@ from .launcher_utils import validate_all_content
from
.rest_utils
import
rest_put
,
rest_post
,
check_rest_server
,
check_rest_server_quick
,
check_response
from
.url_utils
import
cluster_metadata_url
,
experiment_url
from
.config_utils
import
Config
from
.common_utils
import
get_yml_content
,
get_json_content
,
print_error
,
print_normal
,
detect_process
from
.constants
import
EXPERIMENT_SUCCESS_INFO
,
STDOUT_FULL_PATH
,
STDERR_FULL_PATH
,
LOG_DIR
,
REST_PORT
,
ERROR_INFO
,
NORMAL_INFO
from
.common_utils
import
get_yml_content
,
get_json_content
,
print_error
,
print_normal
,
print_warning
,
detect_process
from
.constants
import
*
from
.webui_utils
import
start_web_ui
,
check_web_ui
def
start_rest_server
(
port
,
platform
,
mode
,
experiment_id
=
None
):
'''Run nni manager process'''
print_normal
(
'Checking e
xperi
ment...'
)
print_normal
(
'Checking e
nviron
ment...'
)
nni_config
=
Config
()
rest_port
=
nni_config
.
get_config
(
'restServerPort'
)
running
,
_
=
check_rest_server_quick
(
rest_port
)
...
...
@@ -191,6 +191,8 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
# Deal with annotation
if
experiment_config
.
get
(
'useAnnotation'
):
path
=
os
.
path
.
join
(
tempfile
.
gettempdir
(),
'nni'
,
'annotation'
)
if
not
os
.
path
.
isdir
(
path
):
os
.
makedirs
(
path
)
path
=
tempfile
.
mkdtemp
(
dir
=
path
)
code_dir
=
expand_annotations
(
experiment_config
[
'trial'
][
'codeDir'
],
path
)
experiment_config
[
'trial'
][
'codeDir'
]
=
code_dir
...
...
@@ -204,10 +206,9 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
experiment_config
[
'searchSpace'
]
=
json
.
dumps
(
''
)
# check rest server
print_normal
(
'Checking restful server...'
)
running
,
_
=
check_rest_server
(
REST_PORT
)
if
running
:
print_normal
(
'
R
es
t
ful
server start success
!'
)
print_normal
(
'
Succ
es
s
ful
ly started Restful server
!'
)
else
:
print_error
(
'Restful server start failed!'
)
try
:
...
...
@@ -236,7 +237,7 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
if
experiment_config
[
'trainingServicePlatform'
]
==
'local'
:
print_normal
(
'Setting local config...'
)
if
set_local_config
(
experiment_config
,
REST_PORT
):
print_normal
(
'Success!'
)
print_normal
(
'Success
fully set local config
!'
)
else
:
print_error
(
'Failed!'
)
try
:
...
...
@@ -251,7 +252,7 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
print_normal
(
'Setting pai config...'
)
config_result
,
err_msg
=
set_pai_config
(
experiment_config
,
REST_PORT
)
if
config_result
:
print_normal
(
'Success!'
)
print_normal
(
'Success
fully set pai config
!'
)
else
:
if
err_msg
:
print_error
(
'Failed! Error is: {}'
.
format
(
err_msg
))
...
...
@@ -259,8 +260,19 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
cmds
=
[
'pkill'
,
'-P'
,
str
(
rest_process
.
pid
)]
call
(
cmds
)
except
Exception
:
raise
Exception
(
ERROR_INFO
%
'Rest server stopped!'
)
raise
Exception
(
ERROR_INFO
%
'Rest
ful
server stopped!'
)
exit
(
0
)
#start webui
if
check_web_ui
():
print_warning
(
'{0} {1}'
.
format
(
' '
.
join
(
nni_config
.
get_config
(
'webuiUrl'
)),
'is being used, please stop it first!'
))
print_normal
(
'You can use
\'
nnictl webui stop
\'
to stop old Web UI process...'
)
else
:
print_normal
(
'Starting Web UI...'
)
webui_process
=
start_web_ui
(
webuiport
)
if
webui_process
:
nni_config
.
set_config
(
'webuiPid'
,
webui_process
.
pid
)
print_normal
(
'Successfully started Web UI!'
)
# start a new experiment
print_normal
(
'Starting experiment...'
)
...
...
@@ -274,25 +286,12 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
try
:
cmds
=
[
'pkill'
,
'-P'
,
str
(
rest_process
.
pid
)]
call
(
cmds
)
cmds
=
[
'pkill'
,
'-P'
,
str
(
webui_process
.
pid
)]
call
(
cmds
)
except
Exception
:
raise
Exception
(
ERROR_INFO
%
'Rest server stopped!'
)
raise
Exception
(
ERROR_INFO
%
'Rest
ful
server stopped!'
)
exit
(
0
)
#start webui
print_normal
(
'Checking web ui...'
)
if
check_web_ui
():
print_error
(
'{0} {1}'
.
format
(
' '
.
join
(
nni_config
.
get_config
(
'webuiUrl'
)),
'is being used, please stop it first!'
))
print_normal
(
'You can use
\'
nnictl webui stop
\'
to stop old web ui process...'
)
else
:
print_normal
(
'Starting web ui...'
)
webui_process
=
start_web_ui
(
webuiport
)
if
webui_process
:
nni_config
.
set_config
(
'webuiPid'
,
webui_process
.
pid
)
print_normal
(
'Starting web ui success!'
)
print_normal
(
'{0} {1}'
.
format
(
'Web UI url:'
,
' '
.
join
(
nni_config
.
get_config
(
'webuiUrl'
))))
print_normal
(
EXPERIMENT_SUCCESS_INFO
%
(
experiment_id
,
REST_PORT
))
print_normal
(
EXPERIMENT_SUCCESS_INFO
%
(
experiment_id
,
REST_PORT
,
' '
.
join
(
nni_config
.
get_config
(
'webuiUrl'
))))
def
resume_experiment
(
args
):
'''resume an experiment'''
...
...
tools/nnicmd/nnictl_utils.py
View file @
2921e143
...
...
@@ -64,17 +64,20 @@ def stop_experiment(args):
stop_web_ui
()
return
running
,
_
=
check_rest_server_quick
(
rest_port
)
stop_rest_result
=
True
if
running
:
response
=
rest_delete
(
experiment_url
(
rest_port
),
20
)
if
not
response
or
not
check_response
(
response
):
print_error
(
'Stop experiment failed!'
)
stop_rest_result
=
False
#sleep to wait rest handler done
time
.
sleep
(
3
)
rest_pid
=
nni_config
.
get_config
(
'restServerPid'
)
cmds
=
[
'pkill'
,
'-P'
,
str
(
rest_pid
)]
call
(
cmds
)
stop_web_ui
()
print_normal
(
'Stop experiment success!'
)
if
stop_rest_result
:
print_normal
(
'Stop experiment success!'
)
def
trial_ls
(
args
):
'''List trial'''
...
...
tools/trial_tool/trial_keeper.py
View file @
2921e143
...
...
@@ -45,7 +45,6 @@ def main_loop(args):
# Notice: We don't appoint env, which means subprocess wil inherit current environment and that is expected behavior
process
=
Popen
(
args
.
trial_command
,
shell
=
True
,
stdout
=
stdout_file
,
stderr
=
stderr_file
)
print
(
'Subprocess pid is {}'
.
format
(
process
.
pid
))
print
(
'Current cwd is {}'
.
format
(
os
.
getcwd
()))
while
True
:
retCode
=
process
.
poll
()
## Read experiment metrics, to avoid missing metrics
...
...
@@ -55,15 +54,15 @@ def main_loop(args):
print
(
'subprocess terminated. Exit code is {}. Quit'
.
format
(
retCode
))
#copy local directory to hdfs
nni_local_output_dir
=
os
.
environ
[
'NNI_OUTPUT_DIR'
]
hdfs_client
=
HdfsClient
(
hosts
=
'{0}:{1}'
.
format
(
args
.
pai_hdfs_host
,
'50070'
),
user_name
=
args
.
pai_user_name
)
print
(
nni_local_output_dir
,
args
.
pai_hdfs_output_dir
)
hdfs_client
=
HdfsClient
(
hosts
=
'{0}:{1}'
.
format
(
args
.
pai_hdfs_host
,
'50070'
),
user_name
=
args
.
pai_user_name
,
timeout
=
5
)
try
:
if
copyDirectoryToHdfs
(
nni_local_output_dir
,
args
.
pai_hdfs_output_dir
,
hdfs_client
):
print
(
'copy directory
success!'
)
print
(
'copy directory
from {0} to {1} success!'
.
format
(
nni_local_output_dir
,
args
.
pai_hdfs_output_dir
)
)
else
:
print
(
'copy directory f
ailed!'
)
print
(
'copy directory f
rom {0} to {1} failed!'
.
format
(
nni_local_output_dir
,
args
.
pai_hdfs_output_dir
)
)
except
Exception
as
exception
:
print
(
exception
)
print
(
'HDFS copy directory got exception'
)
raise
exception
## Exit as the retCode of subprocess(trial)
exit
(
retCode
)
...
...
@@ -91,7 +90,10 @@ if __name__ == '__main__':
try
:
main_loop
(
args
)
except
:
print
(
'Exiting by user request'
)
except
SystemExit
as
se
:
print
(
'NNI trial keeper exit with code {}'
.
format
(
se
.
code
))
sys
.
exit
(
se
.
code
)
except
Exception
as
e
:
print
(
'Exit trial keeper with code 1 because Exception: {} is catched'
.
format
(
str
(
e
)))
sys
.
exit
(
1
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment