Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
df39b5ea
Unverified
Commit
df39b5ea
authored
Jan 22, 2019
by
chicm-ms
Committed by
GitHub
Jan 22, 2019
Browse files
Detect tuner failing (#635)
parent
2b300395
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
26 additions
and
4 deletions
+26
-4
src/nni_manager/core/commands.ts
src/nni_manager/core/commands.ts
+3
-0
src/nni_manager/core/nnimanager.ts
src/nni_manager/core/nnimanager.ts
+16
-3
src/sdk/pynni/nni/msg_dispatcher_base.py
src/sdk/pynni/nni/msg_dispatcher_base.py
+6
-1
src/sdk/pynni/nni/protocol.py
src/sdk/pynni/nni/protocol.py
+1
-0
No files found.
src/nni_manager/core/commands.ts
View file @
df39b5ea
...
@@ -25,6 +25,7 @@ const UPDATE_SEARCH_SPACE = 'SS';
...
@@ -25,6 +25,7 @@ const UPDATE_SEARCH_SPACE = 'SS';
const
ADD_CUSTOMIZED_TRIAL_JOB
=
'
AD
'
;
const
ADD_CUSTOMIZED_TRIAL_JOB
=
'
AD
'
;
const
TRIAL_END
=
'
EN
'
;
const
TRIAL_END
=
'
EN
'
;
const
TERMINATE
=
'
TE
'
;
const
TERMINATE
=
'
TE
'
;
const
PING
=
'
PI
'
;
const
INITIALIZED
=
'
ID
'
;
const
INITIALIZED
=
'
ID
'
;
const
NEW_TRIAL_JOB
=
'
TR
'
;
const
NEW_TRIAL_JOB
=
'
TR
'
;
...
@@ -39,6 +40,7 @@ const TUNER_COMMANDS: Set<string> = new Set([
...
@@ -39,6 +40,7 @@ const TUNER_COMMANDS: Set<string> = new Set([
UPDATE_SEARCH_SPACE
,
UPDATE_SEARCH_SPACE
,
ADD_CUSTOMIZED_TRIAL_JOB
,
ADD_CUSTOMIZED_TRIAL_JOB
,
TERMINATE
,
TERMINATE
,
PING
,
INITIALIZED
,
INITIALIZED
,
NEW_TRIAL_JOB
,
NEW_TRIAL_JOB
,
...
@@ -63,6 +65,7 @@ export {
...
@@ -63,6 +65,7 @@ export {
ADD_CUSTOMIZED_TRIAL_JOB
,
ADD_CUSTOMIZED_TRIAL_JOB
,
TRIAL_END
,
TRIAL_END
,
TERMINATE
,
TERMINATE
,
PING
,
INITIALIZED
,
INITIALIZED
,
NEW_TRIAL_JOB
,
NEW_TRIAL_JOB
,
NO_MORE_TRIAL_JOBS
,
NO_MORE_TRIAL_JOBS
,
...
...
src/nni_manager/core/nnimanager.ts
View file @
df39b5ea
...
@@ -35,15 +35,15 @@ import {
...
@@ -35,15 +35,15 @@ import {
import
{
import
{
TrainingService
,
TrialJobApplicationForm
,
TrialJobDetail
,
TrialJobMetric
,
TrialJobStatus
TrainingService
,
TrialJobApplicationForm
,
TrialJobDetail
,
TrialJobMetric
,
TrialJobStatus
}
from
'
../common/trainingService
'
;
}
from
'
../common/trainingService
'
;
import
{
delay
,
get
LogDir
,
getCheckpoint
Dir
,
getMsgDispatcherCommand
,
mkDirP
}
from
'
../common/utils
'
;
import
{
delay
,
get
CheckpointDir
,
getLog
Dir
,
getMsgDispatcherCommand
,
mkDirP
}
from
'
../common/utils
'
;
import
{
import
{
ADD_CUSTOMIZED_TRIAL_JOB
,
INITIALIZE
,
INITIALIZED
,
KILL_TRIAL_JOB
,
NEW_TRIAL_JOB
,
NO_MORE_TRIAL_JOBS
,
ADD_CUSTOMIZED_TRIAL_JOB
,
INITIALIZE
,
INITIALIZED
,
KILL_TRIAL_JOB
,
NEW_TRIAL_JOB
,
NO_MORE_TRIAL_JOBS
,
PING
,
REPORT_METRIC_DATA
,
REQUEST_TRIAL_JOBS
,
SEND_TRIAL_JOB_PARAMETER
,
TERMINATE
,
TRIAL_END
,
UPDATE_SEARCH_SPACE
REPORT_METRIC_DATA
,
REQUEST_TRIAL_JOBS
,
SEND_TRIAL_JOB_PARAMETER
,
TERMINATE
,
TRIAL_END
,
UPDATE_SEARCH_SPACE
}
from
'
./commands
'
;
}
from
'
./commands
'
;
import
{
createDispatcherInterface
,
IpcInterface
}
from
'
./ipcInterface
'
;
import
{
createDispatcherInterface
,
IpcInterface
}
from
'
./ipcInterface
'
;
/**
/**
* NNIManager
* NNIManager
which implements Manager interface
*/
*/
class
NNIManager
implements
Manager
{
class
NNIManager
implements
Manager
{
private
trainingService
:
TrainingService
;
private
trainingService
:
TrainingService
;
...
@@ -360,6 +360,16 @@ class NNIManager implements Manager {
...
@@ -360,6 +360,16 @@ class NNIManager implements Manager {
}
}
}
}
private
async
pingDispatcher
():
Promise
<
void
>
{
if
(
this
.
dispatcher
===
undefined
)
{
throw
new
Error
(
'
Error: tuner has not been setup
'
);
}
while
(
!
[
'
ERROR
'
,
'
STOPPING
'
,
'
STOPPED
'
].
includes
(
this
.
status
.
status
))
{
await
delay
(
1000
*
5
);
this
.
dispatcher
.
sendCommand
(
PING
);
}
}
private
async
requestTrialJobsStatus
():
Promise
<
number
>
{
private
async
requestTrialJobsStatus
():
Promise
<
number
>
{
let
finishedTrialJobNum
:
number
=
0
;
let
finishedTrialJobNum
:
number
=
0
;
if
(
this
.
dispatcher
===
undefined
)
{
if
(
this
.
dispatcher
===
undefined
)
{
...
@@ -536,6 +546,9 @@ class NNIManager implements Manager {
...
@@ -536,6 +546,9 @@ class NNIManager implements Manager {
await
Promise
.
all
([
await
Promise
.
all
([
this
.
periodicallyUpdateExecDuration
(),
this
.
periodicallyUpdateExecDuration
(),
this
.
pingDispatcher
().
catch
((
err
:
Error
)
=>
{
throw
new
NNIError
(
'
Dispatcher error
'
,
`Dispatcher error:
${
err
.
message
}
`
,
err
);
}),
this
.
trainingService
.
run
().
catch
((
err
:
Error
)
=>
{
this
.
trainingService
.
run
().
catch
((
err
:
Error
)
=>
{
throw
new
NNIError
(
'
Training service error
'
,
`Training service error:
${
err
.
message
}
`
,
err
);
throw
new
NNIError
(
'
Training service error
'
,
`Training service error:
${
err
.
message
}
`
,
err
);
}),
}),
...
...
src/sdk/pynni/nni/msg_dispatcher_base.py
View file @
df39b5ea
...
@@ -83,7 +83,8 @@ class MsgDispatcherBase(Recoverable):
...
@@ -83,7 +83,8 @@ class MsgDispatcherBase(Recoverable):
_logger
.
debug
(
'handle request: command: [{}], data: [{}]'
.
format
(
command
,
data
))
_logger
.
debug
(
'handle request: command: [{}], data: [{}]'
.
format
(
command
,
data
))
data
=
json_tricks
.
loads
(
data
)
if
data
:
data
=
json_tricks
.
loads
(
data
)
command_handlers
=
{
command_handlers
=
{
# Tunner commands:
# Tunner commands:
...
@@ -96,12 +97,16 @@ class MsgDispatcherBase(Recoverable):
...
@@ -96,12 +97,16 @@ class MsgDispatcherBase(Recoverable):
CommandType
.
ReportMetricData
:
self
.
handle_report_metric_data
,
CommandType
.
ReportMetricData
:
self
.
handle_report_metric_data
,
CommandType
.
TrialEnd
:
self
.
handle_trial_end
,
CommandType
.
TrialEnd
:
self
.
handle_trial_end
,
CommandType
.
Ping
:
self
.
handle_ping
,
}
}
if
command
not
in
command_handlers
:
if
command
not
in
command_handlers
:
raise
AssertionError
(
'Unsupported command: {}'
.
format
(
command
))
raise
AssertionError
(
'Unsupported command: {}'
.
format
(
command
))
return
command_handlers
[
command
](
data
)
return
command_handlers
[
command
](
data
)
def
handle_ping
(
self
,
data
):
pass
def
handle_initialize
(
self
,
data
):
def
handle_initialize
(
self
,
data
):
raise
NotImplementedError
(
'handle_initialize not implemented'
)
raise
NotImplementedError
(
'handle_initialize not implemented'
)
...
...
src/sdk/pynni/nni/protocol.py
View file @
df39b5ea
...
@@ -33,6 +33,7 @@ class CommandType(Enum):
...
@@ -33,6 +33,7 @@ class CommandType(Enum):
AddCustomizedTrialJob
=
b
'AD'
AddCustomizedTrialJob
=
b
'AD'
TrialEnd
=
b
'EN'
TrialEnd
=
b
'EN'
Terminate
=
b
'TE'
Terminate
=
b
'TE'
Ping
=
b
'PI'
# out
# out
Initialized
=
b
'ID'
Initialized
=
b
'ID'
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment