Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
f1bfdd80
Unverified
Commit
f1bfdd80
authored
Sep 24, 2021
by
SparkSnail
Committed by
GitHub
Sep 24, 2021
Browse files
Fix aml stop environment logic (#4199)
parent
62b6ec0c
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
45 additions
and
10 deletions
+45
-10
ts/nni_manager/config/aml/amlUtil.py
ts/nni_manager/config/aml/amlUtil.py
+10
-0
ts/nni_manager/training_service/reusable/aml/amlClient.ts
ts/nni_manager/training_service/reusable/aml/amlClient.ts
+11
-1
ts/nni_manager/training_service/reusable/environments/amlEnvironmentService.ts
...ng_service/reusable/environments/amlEnvironmentService.ts
+6
-1
ts/nni_manager/training_service/reusable/trialDispatcher.ts
ts/nni_manager/training_service/reusable/trialDispatcher.ts
+18
-8
No files found.
ts/nni_manager/config/aml/amlUtil.py
View file @
f1bfdd80
...
...
@@ -44,6 +44,16 @@ if __name__ == "__main__":
print
(
'tracking_url:'
+
run
.
get_portal_url
())
elif
line
==
'stop'
:
run
.
cancel
()
loop_count
=
0
status
=
run
.
get_status
()
# wait until the run is canceled
while
status
!=
'Canceled'
:
if
loop_count
>
5
:
print
(
'stop_result:failed'
)
exit
(
0
)
loop_count
+=
1
time
.
sleep
(
500
)
print
(
'stop_result:success'
)
exit
(
0
)
elif
line
==
'receive'
:
print
(
'receive:'
+
json
.
dumps
(
run
.
get_metrics
()))
...
...
ts/nni_manager/training_service/reusable/aml/amlClient.ts
View file @
f1bfdd80
...
...
@@ -60,11 +60,21 @@ export class AMLClient {
return
deferred
.
promise
;
}
public
stop
():
void
{
public
stop
():
Promise
<
boolean
>
{
if
(
this
.
pythonShellClient
===
undefined
)
{
throw
Error
(
'
python shell client not initialized!
'
);
}
const
deferred
:
Deferred
<
boolean
>
=
new
Deferred
<
boolean
>
();
this
.
pythonShellClient
.
send
(
'
stop
'
);
this
.
pythonShellClient
.
on
(
'
message
'
,
(
result
:
any
)
=>
{
const
stopResult
=
this
.
parseContent
(
'
stop_result
'
,
result
);
if
(
stopResult
===
'
success
'
)
{
deferred
.
resolve
(
true
);
}
else
if
(
stopResult
===
'
failed
'
)
{
deferred
.
resolve
(
false
);
}
});
return
deferred
.
promise
;
}
public
getTrackingUrl
():
Promise
<
string
>
{
...
...
ts/nni_manager/training_service/reusable/environments/amlEnvironmentService.ts
View file @
f1bfdd80
...
...
@@ -127,6 +127,11 @@ export class AMLEnvironmentService extends EnvironmentService {
if
(
!
amlClient
)
{
throw
new
Error
(
'
AML client not initialized!
'
);
}
amlClient
.
stop
();
const
result
=
await
amlClient
.
stop
();
if
(
result
)
{
this
.
log
.
info
(
`Stop aml run
${
environment
.
id
}
success!`
);
}
else
{
this
.
log
.
info
(
`Stop aml run
${
environment
.
id
}
failed!`
);
}
}
}
ts/nni_manager/training_service/reusable/trialDispatcher.ts
View file @
f1bfdd80
...
...
@@ -299,6 +299,16 @@ class TrialDispatcher implements TrainingService {
public
async
setClusterMetadata
(
_key
:
string
,
_value
:
string
):
Promise
<
void
>
{
return
;
}
public
async
getClusterMetadata
(
_key
:
string
):
Promise
<
string
>
{
return
""
;
}
public
async
stopEnvironment
(
environment
:
EnvironmentInformation
):
Promise
<
void
>
{
if
(
environment
.
environmentService
===
undefined
)
{
throw
new
Error
(
`
${
environment
.
id
}
do not have environmentService!`
);
}
this
.
log
.
info
(
`stopping environment
${
environment
.
id
}
...`
);
await
environment
.
environmentService
.
stopEnvironment
(
environment
);
this
.
log
.
info
(
`stopped environment
${
environment
.
id
}
.`
);
return
;
}
public
async
cleanUp
():
Promise
<
void
>
{
if
(
this
.
commandEmitter
===
undefined
)
{
throw
new
Error
(
`TrialDispatcher: commandEmitter shouldn't be undefined in cleanUp.`
);
...
...
@@ -306,16 +316,12 @@ class TrialDispatcher implements TrainingService {
this
.
stopping
=
true
;
this
.
shouldUpdateTrials
=
true
;
const
environments
=
[...
this
.
environments
.
values
()];
const
stopEnvironmentPromise
:
Promise
<
void
>
[]
=
[];
for
(
let
index
=
0
;
index
<
environments
.
length
;
index
++
)
{
const
environment
=
environments
[
index
];
this
.
log
.
info
(
`stopping environment
${
environment
.
id
}
...`
);
if
(
environment
.
environmentService
===
undefined
)
{
throw
new
Error
(
`
${
environment
.
id
}
do not have environmentService!`
);
}
await
environment
.
environmentService
.
stopEnvironment
(
environment
);
this
.
log
.
info
(
`stopped environment
${
environment
.
id
}
.`
);
stopEnvironmentPromise
.
push
(
this
.
stopEnvironment
(
environments
[
index
]));
}
await
Promise
.
all
(
stopEnvironmentPromise
);
this
.
commandEmitter
.
off
(
"
command
"
,
this
.
handleCommand
);
for
(
const
commandChannel
of
this
.
commandChannelSet
)
{
await
commandChannel
.
stop
();
...
...
@@ -650,6 +656,10 @@ class TrialDispatcher implements TrainingService {
}
private
async
requestEnvironment
(
environmentService
:
EnvironmentService
):
Promise
<
void
>
{
if
(
this
.
stopping
)
{
this
.
log
.
info
(
`Experiment is stopping, stop creating new environment`
);
return
;
}
const
envId
=
uniqueString
(
5
);
const
envName
=
`nni_exp_
${
this
.
experimentId
}
_env_
${
envId
}
`
;
const
environment
=
environmentService
.
createEnvironmentInformation
(
envId
,
envName
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment