Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
a922f9f0
Unverified
Commit
a922f9f0
authored
Jan 15, 2020
by
chicm-ms
Committed by
GitHub
Jan 15, 2020
Browse files
Cancel unknown trial job (#1943)
parent
7565d3c0
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
15 additions
and
6 deletions
+15
-6
src/nni_manager/training_service/pai/paiTrainingService.ts
src/nni_manager/training_service/pai/paiTrainingService.ts
+9
-6
src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
...ng_service/remote_machine/remoteMachineTrainingService.ts
+6
-0
No files found.
src/nni_manager/training_service/pai/paiTrainingService.ts
View file @
a922f9f0
...
@@ -151,18 +151,20 @@ abstract class PAITrainingService implements TrainingService {
...
@@ -151,18 +151,20 @@ abstract class PAITrainingService implements TrainingService {
public
cancelTrialJob
(
trialJobId
:
string
,
isEarlyStopped
:
boolean
=
false
):
Promise
<
void
>
{
public
cancelTrialJob
(
trialJobId
:
string
,
isEarlyStopped
:
boolean
=
false
):
Promise
<
void
>
{
const
trialJobDetail
:
PAITrialJobDetail
|
undefined
=
this
.
trialJobsMap
.
get
(
trialJobId
);
const
trialJobDetail
:
PAITrialJobDetail
|
undefined
=
this
.
trialJobsMap
.
get
(
trialJobId
);
const
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
if
(
trialJobDetail
===
undefined
)
{
if
(
trialJobDetail
===
undefined
)
{
this
.
log
.
error
(
`cancelTrialJob: trial job id
${
trialJobId
}
not found`
);
return
Promise
.
reject
(
new
Error
(
`cancelTrialJob: trial job id
${
trialJobId
}
not found`
));
return
Promise
.
reject
();
}
}
if
(
this
.
paiClusterConfig
===
undefined
)
{
if
(
this
.
paiClusterConfig
===
undefined
)
{
throw
new
Error
(
'
PAI Cluster config is not initialized
'
);
return
Promise
.
reject
(
new
Error
(
'
PAI Cluster config is not initialized
'
)
)
;
}
}
if
(
this
.
paiToken
===
undefined
)
{
if
(
this
.
paiToken
===
undefined
)
{
throw
new
Error
(
'
PAI token is not initialized
'
);
return
Promise
.
reject
(
new
Error
(
'
PAI token is not initialized
'
));
}
if
(
trialJobDetail
.
status
===
'
UNKNOWN
'
)
{
trialJobDetail
.
status
=
'
USER_CANCELED
'
;
return
Promise
.
resolve
();
}
}
const
stopJobRequest
:
request
.
Options
=
{
const
stopJobRequest
:
request
.
Options
=
{
...
@@ -179,6 +181,7 @@ abstract class PAITrainingService implements TrainingService {
...
@@ -179,6 +181,7 @@ abstract class PAITrainingService implements TrainingService {
// Set trialjobDetail's early stopped field, to mark the job's cancellation source
// Set trialjobDetail's early stopped field, to mark the job's cancellation source
trialJobDetail
.
isEarlyStopped
=
isEarlyStopped
;
trialJobDetail
.
isEarlyStopped
=
isEarlyStopped
;
const
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
request
(
stopJobRequest
,
(
error
:
Error
,
response
:
request
.
Response
,
body
:
any
)
=>
{
request
(
stopJobRequest
,
(
error
:
Error
,
response
:
request
.
Response
,
body
:
any
)
=>
{
if
((
error
!==
undefined
&&
error
!==
null
)
||
response
.
statusCode
>=
400
)
{
if
((
error
!==
undefined
&&
error
!==
null
)
||
response
.
statusCode
>=
400
)
{
...
...
src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
View file @
a922f9f0
...
@@ -277,6 +277,12 @@ class RemoteMachineTrainingService implements TrainingService {
...
@@ -277,6 +277,12 @@ class RemoteMachineTrainingService implements TrainingService {
throw
new
Error
(
`Invalid job id
${
trialJobId
}
, cannot find ssh client`
);
throw
new
Error
(
`Invalid job id
${
trialJobId
}
, cannot find ssh client`
);
}
}
if
(
trialJob
.
status
===
'
UNKNOWN
'
)
{
this
.
releaseTrialSSHClient
(
trialJob
);
trialJob
.
status
=
'
USER_CANCELED
'
;
return
}
const
jobpidPath
:
string
=
this
.
getJobPidPath
(
trialJob
.
id
);
const
jobpidPath
:
string
=
this
.
getJobPidPath
(
trialJob
.
id
);
try
{
try
{
// Mark the toEarlyStop tag here
// Mark the toEarlyStop tag here
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment