Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
2921e143
Unverified
Commit
2921e143
authored
Sep 30, 2018
by
fishyds
Committed by
GitHub
Sep 30, 2018
Browse files
Merge pull request #154 from Microsoft/v0.2
Merge V0.2 branch back to master
parents
2a28a578
35900e2a
Changes
27
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
75 additions
and
52 deletions
+75
-52
src/webui/src/components/Sessionpro.tsx
src/webui/src/components/Sessionpro.tsx
+4
-4
src/webui/src/components/TrialStatus.tsx
src/webui/src/components/TrialStatus.tsx
+3
-3
tools/nnicmd/common_utils.py
tools/nnicmd/common_utils.py
+6
-2
tools/nnicmd/constants.py
tools/nnicmd/constants.py
+24
-9
tools/nnicmd/launcher.py
tools/nnicmd/launcher.py
+24
-25
tools/nnicmd/nnictl_utils.py
tools/nnicmd/nnictl_utils.py
+4
-1
tools/trial_tool/trial_keeper.py
tools/trial_tool/trial_keeper.py
+10
-8
No files found.
src/webui/src/components/Sessionpro.tsx
View file @
2921e143
...
...
@@ -98,10 +98,10 @@ class Sessionpro extends React.Component<{}, SessionState> {
let
sessionData
=
res
.
data
;
let
tunerAsstemp
=
[];
let
trialPro
=
[];
const
startExper
=
new
Date
(
sessionData
.
startTime
).
toLocaleString
();
const
startExper
=
new
Date
(
sessionData
.
startTime
).
toLocaleString
(
'
en-US
'
);
let
experEndStr
:
string
;
if
(
sessionData
.
endTime
!==
undefined
)
{
experEndStr
=
new
Date
(
sessionData
.
endTime
).
toLocaleString
();
experEndStr
=
new
Date
(
sessionData
.
endTime
).
toLocaleString
(
'
en-US
'
);
}
else
{
experEndStr
=
'
not over
'
;
}
...
...
@@ -156,8 +156,8 @@ class Sessionpro extends React.Component<{}, SessionState> {
const
desJobDetail
:
Parameters
=
{
parameters
:
{}
};
const
startTime
=
new
Date
(
tableData
[
item
].
startTime
).
toLocaleString
();
const
endTime
=
new
Date
(
tableData
[
item
].
endTime
).
toLocaleString
();
const
startTime
=
new
Date
(
tableData
[
item
].
startTime
).
toLocaleString
(
'
en-US
'
);
const
endTime
=
new
Date
(
tableData
[
item
].
endTime
).
toLocaleString
(
'
en-US
'
);
const
duration
=
(
tableData
[
item
].
endTime
-
tableData
[
item
].
startTime
)
/
1000
;
let
acc
;
if
(
tableData
[
item
].
finalMetricData
)
{
...
...
src/webui/src/components/TrialStatus.tsx
View file @
2921e143
...
...
@@ -230,10 +230,10 @@ class TrialStatus extends React.Component<{}, TabState> {
?
trialJobs
[
item
].
status
:
''
;
const
startTime
=
trialJobs
[
item
].
startTime
!==
undefined
?
new
Date
(
trialJobs
[
item
].
startTime
).
toLocaleString
()
?
new
Date
(
trialJobs
[
item
].
startTime
).
toLocaleString
(
'
en-US
'
)
:
''
;
const
endTime
=
trialJobs
[
item
].
endTime
!==
undefined
?
new
Date
(
trialJobs
[
item
].
endTime
).
toLocaleString
()
?
new
Date
(
trialJobs
[
item
].
endTime
).
toLocaleString
(
'
en-US
'
)
:
''
;
if
(
trialJobs
[
item
].
hyperParameters
!==
undefined
)
{
desc
.
parameters
=
JSON
.
parse
(
trialJobs
[
item
].
hyperParameters
).
parameters
;
...
...
@@ -394,7 +394,7 @@ class TrialStatus extends React.Component<{}, TabState> {
dataIndex
:
'
start
'
,
key
:
'
start
'
,
width
:
'
15%
'
,
sorter
:
(
a
:
TableObj
,
b
:
TableObj
):
number
=>
a
.
start
.
localeCom
pare
(
b
.
start
)
sorter
:
(
a
:
TableObj
,
b
:
TableObj
):
number
=>
(
Date
.
parse
(
a
.
start
)
-
Date
.
par
s
e
(
b
.
start
)
)
},
{
title
:
'
End
'
,
dataIndex
:
'
end
'
,
...
...
tools/nnicmd/common_utils.py
View file @
2921e143
...
...
@@ -21,7 +21,7 @@
import
json
import
yaml
import
psutil
from
.constants
import
ERROR_INFO
,
NORMAL_INFO
from
.constants
import
ERROR_INFO
,
NORMAL_INFO
,
WARNING_INFO
,
COLOR_RED_FORMAT
,
COLOR_YELLOW_FORMAT
def
get_yml_content
(
file_path
):
'''Load yaml file content'''
...
...
@@ -43,12 +43,16 @@ def get_json_content(file_path):
def
print_error
(
content
):
'''Print error information to screen'''
print
(
ERROR_INFO
%
content
)
print
(
COLOR_RED_FORMAT
%
(
ERROR_INFO
%
content
)
)
def
print_normal
(
content
):
'''Print error information to screen'''
print
(
NORMAL_INFO
%
content
)
def
print_warning
(
content
):
'''Print warning information to screen'''
print
(
COLOR_YELLOW_FORMAT
%
(
WARNING_INFO
%
content
))
def
detect_process
(
pid
):
'''Detect if a process is alive'''
try
:
...
...
tools/nnicmd/constants.py
View file @
2921e143
...
...
@@ -34,22 +34,37 @@ STDOUT_FULL_PATH = os.path.join(LOG_DIR, 'stdout')
STDERR_FULL_PATH
=
os
.
path
.
join
(
LOG_DIR
,
'stderr'
)
ERROR_INFO
=
'E
rror
: %s'
ERROR_INFO
=
'E
RROR
: %s'
NORMAL_INFO
=
'I
nfo
: %s'
NORMAL_INFO
=
'I
NFO
: %s'
WARNING_INFO
=
'W
aining
: %s'
WARNING_INFO
=
'W
ARNING
: %s'
EXPERIMENT_SUCCESS_INFO
=
'Start experiment success! The experiment id is %s, and the restful server post is %s.
\n
'
\
'You can use these commands to get more information about this experiment:
\n
'
\
EXPERIMENT_SUCCESS_INFO
=
'
\033
[1;32;32mSuccessfully started experiment!
\n\033
[0m'
\
'-----------------------------------------------------------------------
\n
'
\
'The experiment id is %s
\n
'
\
'The restful server post is %s
\n
'
\
'The Web UI urls are: %s
\n
'
\
'-----------------------------------------------------------------------
\n\n
'
\
'You can use these commands to get more information about the experiment
\n
'
\
'-----------------------------------------------------------------------
\n
'
\
' commands description
\n
'
\
'1. nnictl experiment show show the information of experiments
\n
'
\
'2. nnictl trial ls list all of trial jobs
\n
'
\
'3. nnictl stop stop a experiment
\n
'
\
'4. nnictl trial kill kill a trial job by id
\n
'
\
'5. nnictl --help get help information about nnictl
\n
'
\
'6. nnictl webui url get the url of web ui'
'3. nnictl log stderr show stderr log content
\n
'
\
'4. nnictl log stdout show stdout log content
\n
'
\
'5. nnictl stop stop a experiment
\n
'
\
'6. nnictl trial kill kill a trial job by id
\n
'
\
'7. nnictl webui url get the url of web ui
\n
'
\
'8. nnictl --help get help information about nnictl
\n
'
\
'-----------------------------------------------------------------------
\n
'
\
PACKAGE_REQUIREMENTS
=
{
'SMAC'
:
'smac_tuner'
}
COLOR_RED_FORMAT
=
'
\033
[1;31;31m%s
\033
[0m'
COLOR_GREEN_FORMAT
=
'
\033
[1;32;32m%s
\033
[0m'
COLOR_YELLOW_FORMAT
=
'
\033
[1;33;33m%s
\033
[0m'
\ No newline at end of file
tools/nnicmd/launcher.py
View file @
2921e143
...
...
@@ -30,13 +30,13 @@ from .launcher_utils import validate_all_content
from
.rest_utils
import
rest_put
,
rest_post
,
check_rest_server
,
check_rest_server_quick
,
check_response
from
.url_utils
import
cluster_metadata_url
,
experiment_url
from
.config_utils
import
Config
from
.common_utils
import
get_yml_content
,
get_json_content
,
print_error
,
print_normal
,
detect_process
from
.constants
import
EXPERIMENT_SUCCESS_INFO
,
STDOUT_FULL_PATH
,
STDERR_FULL_PATH
,
LOG_DIR
,
REST_PORT
,
ERROR_INFO
,
NORMAL_INFO
from
.common_utils
import
get_yml_content
,
get_json_content
,
print_error
,
print_normal
,
print_warning
,
detect_process
from
.constants
import
*
from
.webui_utils
import
start_web_ui
,
check_web_ui
def
start_rest_server
(
port
,
platform
,
mode
,
experiment_id
=
None
):
'''Run nni manager process'''
print_normal
(
'Checking e
xperi
ment...'
)
print_normal
(
'Checking e
nviron
ment...'
)
nni_config
=
Config
()
rest_port
=
nni_config
.
get_config
(
'restServerPort'
)
running
,
_
=
check_rest_server_quick
(
rest_port
)
...
...
@@ -191,6 +191,8 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
# Deal with annotation
if
experiment_config
.
get
(
'useAnnotation'
):
path
=
os
.
path
.
join
(
tempfile
.
gettempdir
(),
'nni'
,
'annotation'
)
if
not
os
.
path
.
isdir
(
path
):
os
.
makedirs
(
path
)
path
=
tempfile
.
mkdtemp
(
dir
=
path
)
code_dir
=
expand_annotations
(
experiment_config
[
'trial'
][
'codeDir'
],
path
)
experiment_config
[
'trial'
][
'codeDir'
]
=
code_dir
...
...
@@ -204,10 +206,9 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
experiment_config
[
'searchSpace'
]
=
json
.
dumps
(
''
)
# check rest server
print_normal
(
'Checking restful server...'
)
running
,
_
=
check_rest_server
(
REST_PORT
)
if
running
:
print_normal
(
'
R
es
t
ful
server start success
!'
)
print_normal
(
'
Succ
es
s
ful
ly started Restful server
!'
)
else
:
print_error
(
'Restful server start failed!'
)
try
:
...
...
@@ -236,7 +237,7 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
if
experiment_config
[
'trainingServicePlatform'
]
==
'local'
:
print_normal
(
'Setting local config...'
)
if
set_local_config
(
experiment_config
,
REST_PORT
):
print_normal
(
'Success!'
)
print_normal
(
'Success
fully set local config
!'
)
else
:
print_error
(
'Failed!'
)
try
:
...
...
@@ -251,7 +252,7 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
print_normal
(
'Setting pai config...'
)
config_result
,
err_msg
=
set_pai_config
(
experiment_config
,
REST_PORT
)
if
config_result
:
print_normal
(
'Success!'
)
print_normal
(
'Success
fully set pai config
!'
)
else
:
if
err_msg
:
print_error
(
'Failed! Error is: {}'
.
format
(
err_msg
))
...
...
@@ -259,8 +260,19 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
cmds
=
[
'pkill'
,
'-P'
,
str
(
rest_process
.
pid
)]
call
(
cmds
)
except
Exception
:
raise
Exception
(
ERROR_INFO
%
'Rest server stopped!'
)
raise
Exception
(
ERROR_INFO
%
'Rest
ful
server stopped!'
)
exit
(
0
)
#start webui
if
check_web_ui
():
print_warning
(
'{0} {1}'
.
format
(
' '
.
join
(
nni_config
.
get_config
(
'webuiUrl'
)),
'is being used, please stop it first!'
))
print_normal
(
'You can use
\'
nnictl webui stop
\'
to stop old Web UI process...'
)
else
:
print_normal
(
'Starting Web UI...'
)
webui_process
=
start_web_ui
(
webuiport
)
if
webui_process
:
nni_config
.
set_config
(
'webuiPid'
,
webui_process
.
pid
)
print_normal
(
'Successfully started Web UI!'
)
# start a new experiment
print_normal
(
'Starting experiment...'
)
...
...
@@ -274,25 +286,12 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
try
:
cmds
=
[
'pkill'
,
'-P'
,
str
(
rest_process
.
pid
)]
call
(
cmds
)
cmds
=
[
'pkill'
,
'-P'
,
str
(
webui_process
.
pid
)]
call
(
cmds
)
except
Exception
:
raise
Exception
(
ERROR_INFO
%
'Rest server stopped!'
)
raise
Exception
(
ERROR_INFO
%
'Rest
ful
server stopped!'
)
exit
(
0
)
#start webui
print_normal
(
'Checking web ui...'
)
if
check_web_ui
():
print_error
(
'{0} {1}'
.
format
(
' '
.
join
(
nni_config
.
get_config
(
'webuiUrl'
)),
'is being used, please stop it first!'
))
print_normal
(
'You can use
\'
nnictl webui stop
\'
to stop old web ui process...'
)
else
:
print_normal
(
'Starting web ui...'
)
webui_process
=
start_web_ui
(
webuiport
)
if
webui_process
:
nni_config
.
set_config
(
'webuiPid'
,
webui_process
.
pid
)
print_normal
(
'Starting web ui success!'
)
print_normal
(
'{0} {1}'
.
format
(
'Web UI url:'
,
' '
.
join
(
nni_config
.
get_config
(
'webuiUrl'
))))
print_normal
(
EXPERIMENT_SUCCESS_INFO
%
(
experiment_id
,
REST_PORT
))
print_normal
(
EXPERIMENT_SUCCESS_INFO
%
(
experiment_id
,
REST_PORT
,
' '
.
join
(
nni_config
.
get_config
(
'webuiUrl'
))))
def
resume_experiment
(
args
):
'''resume an experiment'''
...
...
tools/nnicmd/nnictl_utils.py
View file @
2921e143
...
...
@@ -64,17 +64,20 @@ def stop_experiment(args):
stop_web_ui
()
return
running
,
_
=
check_rest_server_quick
(
rest_port
)
stop_rest_result
=
True
if
running
:
response
=
rest_delete
(
experiment_url
(
rest_port
),
20
)
if
not
response
or
not
check_response
(
response
):
print_error
(
'Stop experiment failed!'
)
stop_rest_result
=
False
#sleep to wait rest handler done
time
.
sleep
(
3
)
rest_pid
=
nni_config
.
get_config
(
'restServerPid'
)
cmds
=
[
'pkill'
,
'-P'
,
str
(
rest_pid
)]
call
(
cmds
)
stop_web_ui
()
print_normal
(
'Stop experiment success!'
)
if
stop_rest_result
:
print_normal
(
'Stop experiment success!'
)
def
trial_ls
(
args
):
'''List trial'''
...
...
tools/trial_tool/trial_keeper.py
View file @
2921e143
...
...
@@ -45,7 +45,6 @@ def main_loop(args):
# Notice: We don't appoint env, which means subprocess wil inherit current environment and that is expected behavior
process
=
Popen
(
args
.
trial_command
,
shell
=
True
,
stdout
=
stdout_file
,
stderr
=
stderr_file
)
print
(
'Subprocess pid is {}'
.
format
(
process
.
pid
))
print
(
'Current cwd is {}'
.
format
(
os
.
getcwd
()))
while
True
:
retCode
=
process
.
poll
()
## Read experiment metrics, to avoid missing metrics
...
...
@@ -55,15 +54,15 @@ def main_loop(args):
print
(
'subprocess terminated. Exit code is {}. Quit'
.
format
(
retCode
))
#copy local directory to hdfs
nni_local_output_dir
=
os
.
environ
[
'NNI_OUTPUT_DIR'
]
hdfs_client
=
HdfsClient
(
hosts
=
'{0}:{1}'
.
format
(
args
.
pai_hdfs_host
,
'50070'
),
user_name
=
args
.
pai_user_name
)
print
(
nni_local_output_dir
,
args
.
pai_hdfs_output_dir
)
hdfs_client
=
HdfsClient
(
hosts
=
'{0}:{1}'
.
format
(
args
.
pai_hdfs_host
,
'50070'
),
user_name
=
args
.
pai_user_name
,
timeout
=
5
)
try
:
if
copyDirectoryToHdfs
(
nni_local_output_dir
,
args
.
pai_hdfs_output_dir
,
hdfs_client
):
print
(
'copy directory
success!'
)
print
(
'copy directory
from {0} to {1} success!'
.
format
(
nni_local_output_dir
,
args
.
pai_hdfs_output_dir
)
)
else
:
print
(
'copy directory f
ailed!'
)
print
(
'copy directory f
rom {0} to {1} failed!'
.
format
(
nni_local_output_dir
,
args
.
pai_hdfs_output_dir
)
)
except
Exception
as
exception
:
print
(
exception
)
print
(
'HDFS copy directory got exception'
)
raise
exception
## Exit as the retCode of subprocess(trial)
exit
(
retCode
)
...
...
@@ -91,7 +90,10 @@ if __name__ == '__main__':
try
:
main_loop
(
args
)
except
:
print
(
'Exiting by user request'
)
except
SystemExit
as
se
:
print
(
'NNI trial keeper exit with code {}'
.
format
(
se
.
code
))
sys
.
exit
(
se
.
code
)
except
Exception
as
e
:
print
(
'Exit trial keeper with code 1 because Exception: {} is catched'
.
format
(
str
(
e
)))
sys
.
exit
(
1
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment