Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
0a20c3fc
Unverified
Commit
0a20c3fc
authored
Jan 05, 2021
by
liuzhe-lz
Committed by
GitHub
Jan 05, 2021
Browse files
Fix RemoteConfig bug and add save log to dispatcher.log for nni.Experiment (#3245)
parent
8a08fab6
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
22 additions
and
10 deletions
+22
-10
nni/experiment/config/remote.py
nni/experiment/config/remote.py
+4
-0
nni/experiment/experiment.py
nni/experiment/experiment.py
+4
-1
nni/runtime/log.py
nni/runtime/log.py
+3
-0
ts/nni_manager/core/nnimanager.ts
ts/nni_manager/core/nnimanager.ts
+11
-9
No files found.
nni/experiment/config/remote.py
View file @
0a20c3fc
...
...
@@ -51,6 +51,10 @@ class RemoteConfig(TrainingServiceConfig):
kwargs
[
'machinelist'
]
=
util
.
load_config
(
RemoteMachineConfig
,
kwargs
.
get
(
'machinelist'
))
super
().
__init__
(
**
kwargs
)
_canonical_rules
=
{
'machine_list'
:
lambda
value
:
[
config
.
canonical
()
for
config
in
value
]
}
_validation_rules
=
{
'platform'
:
lambda
value
:
(
value
==
'remote'
,
'cannot be modified'
)
}
nni/experiment/experiment.py
View file @
0a20c3fc
...
...
@@ -139,7 +139,7 @@ class Experiment:
"""
Stop background experiment.
"""
_logger
.
info
(
'Stopping experiment...'
)
_logger
.
info
(
'Stopping experiment
, please wait
...'
)
atexit
.
unregister
(
self
.
stop
)
if
self
.
_proc
is
not
None
:
...
...
@@ -155,6 +155,7 @@ class Experiment:
self
.
_pipe
=
None
self
.
_dispatcher
=
None
self
.
_dispatcher_thread
=
None
_logger
.
info
(
'Experiment stopped'
)
def
run
(
self
,
port
:
int
=
8080
,
debug
:
bool
=
False
)
->
bool
:
...
...
@@ -174,6 +175,8 @@ class Experiment:
return
True
if
status
==
'ERROR'
:
return
False
except
KeyboardInterrupt
:
_logger
.
warning
(
'KeyboardInterrupt detected'
)
finally
:
self
.
stop
()
...
...
nni/runtime/log.py
View file @
0a20c3fc
...
...
@@ -46,6 +46,9 @@ def init_logger_experiment() -> None:
"""
formatter
.
format
=
_colorful_format
log_path
=
_prepare_log_dir
(
dispatcher_env_vars
.
NNI_LOG_DIRECTORY
)
/
'dispatcher.log'
_setup_root_logger
(
FileHandler
(
log_path
),
logging
.
DEBUG
)
time_format
=
'%Y-%m-%d %H:%M:%S'
...
...
ts/nni_manager/core/nnimanager.ts
View file @
0a20c3fc
...
...
@@ -450,15 +450,17 @@ class NNIManager implements Manager {
throw
new
Error
(
'
Error: tuner has not been setup
'
);
}
this
.
trainingService
.
removeTrialJobMetricListener
(
this
.
trialJobMetricListener
);
this
.
dispatcher
.
sendCommand
(
TERMINATE
);
let
tunerAlive
:
boolean
=
true
;
// gracefully terminate tuner and assessor here, wait at most 30 seconds.
for
(
let
i
:
number
=
0
;
i
<
30
;
i
++
)
{
if
(
!
tunerAlive
)
{
break
;
}
tunerAlive
=
await
isAlive
(
this
.
dispatcherPid
);
await
delay
(
1000
);
}
await
killPid
(
this
.
dispatcherPid
);
if
(
this
.
dispatcherPid
>
0
)
{
this
.
dispatcher
.
sendCommand
(
TERMINATE
);
let
tunerAlive
:
boolean
=
true
;
// gracefully terminate tuner and assessor here, wait at most 30 seconds.
for
(
let
i
:
number
=
0
;
i
<
30
;
i
++
)
{
if
(
!
tunerAlive
)
{
break
;
}
tunerAlive
=
await
isAlive
(
this
.
dispatcherPid
);
await
delay
(
1000
);
}
await
killPid
(
this
.
dispatcherPid
);
}
const
trialJobList
:
TrialJobDetail
[]
=
await
this
.
trainingService
.
listTrialJobs
();
// DON'T try to make it in parallel, the training service may not handle it well.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment