Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
f1bfdd80
"...composable_kernel.git" did not exist on "75640f2278d0e4db87e59c53a056ade42e4cd7ee"
Unverified
Commit
f1bfdd80
authored
Sep 24, 2021
by
SparkSnail
Committed by
GitHub
Sep 24, 2021
Browse files
Fix aml stop environment logic (#4199)
parent
62b6ec0c
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
45 additions
and
10 deletions
+45
-10
ts/nni_manager/config/aml/amlUtil.py
ts/nni_manager/config/aml/amlUtil.py
+10
-0
ts/nni_manager/training_service/reusable/aml/amlClient.ts
ts/nni_manager/training_service/reusable/aml/amlClient.ts
+11
-1
ts/nni_manager/training_service/reusable/environments/amlEnvironmentService.ts
...ng_service/reusable/environments/amlEnvironmentService.ts
+6
-1
ts/nni_manager/training_service/reusable/trialDispatcher.ts
ts/nni_manager/training_service/reusable/trialDispatcher.ts
+18
-8
No files found.
ts/nni_manager/config/aml/amlUtil.py
View file @
f1bfdd80
...
@@ -44,6 +44,16 @@ if __name__ == "__main__":
...
@@ -44,6 +44,16 @@ if __name__ == "__main__":
print
(
'tracking_url:'
+
run
.
get_portal_url
())
print
(
'tracking_url:'
+
run
.
get_portal_url
())
elif
line
==
'stop'
:
elif
line
==
'stop'
:
run
.
cancel
()
run
.
cancel
()
loop_count
=
0
status
=
run
.
get_status
()
# wait until the run is canceled
while
status
!=
'Canceled'
:
if
loop_count
>
5
:
print
(
'stop_result:failed'
)
exit
(
0
)
loop_count
+=
1
time
.
sleep
(
500
)
print
(
'stop_result:success'
)
exit
(
0
)
exit
(
0
)
elif
line
==
'receive'
:
elif
line
==
'receive'
:
print
(
'receive:'
+
json
.
dumps
(
run
.
get_metrics
()))
print
(
'receive:'
+
json
.
dumps
(
run
.
get_metrics
()))
...
...
ts/nni_manager/training_service/reusable/aml/amlClient.ts
View file @
f1bfdd80
...
@@ -60,11 +60,21 @@ export class AMLClient {
...
@@ -60,11 +60,21 @@ export class AMLClient {
return
deferred
.
promise
;
return
deferred
.
promise
;
}
}
public
stop
():
void
{
public
stop
():
Promise
<
boolean
>
{
if
(
this
.
pythonShellClient
===
undefined
)
{
if
(
this
.
pythonShellClient
===
undefined
)
{
throw
Error
(
'
python shell client not initialized!
'
);
throw
Error
(
'
python shell client not initialized!
'
);
}
}
const
deferred
:
Deferred
<
boolean
>
=
new
Deferred
<
boolean
>
();
this
.
pythonShellClient
.
send
(
'
stop
'
);
this
.
pythonShellClient
.
send
(
'
stop
'
);
this
.
pythonShellClient
.
on
(
'
message
'
,
(
result
:
any
)
=>
{
const
stopResult
=
this
.
parseContent
(
'
stop_result
'
,
result
);
if
(
stopResult
===
'
success
'
)
{
deferred
.
resolve
(
true
);
}
else
if
(
stopResult
===
'
failed
'
)
{
deferred
.
resolve
(
false
);
}
});
return
deferred
.
promise
;
}
}
public
getTrackingUrl
():
Promise
<
string
>
{
public
getTrackingUrl
():
Promise
<
string
>
{
...
...
ts/nni_manager/training_service/reusable/environments/amlEnvironmentService.ts
View file @
f1bfdd80
...
@@ -127,6 +127,11 @@ export class AMLEnvironmentService extends EnvironmentService {
...
@@ -127,6 +127,11 @@ export class AMLEnvironmentService extends EnvironmentService {
if
(
!
amlClient
)
{
if
(
!
amlClient
)
{
throw
new
Error
(
'
AML client not initialized!
'
);
throw
new
Error
(
'
AML client not initialized!
'
);
}
}
amlClient
.
stop
();
const
result
=
await
amlClient
.
stop
();
if
(
result
)
{
this
.
log
.
info
(
`Stop aml run
${
environment
.
id
}
success!`
);
}
else
{
this
.
log
.
info
(
`Stop aml run
${
environment
.
id
}
failed!`
);
}
}
}
}
}
ts/nni_manager/training_service/reusable/trialDispatcher.ts
View file @
f1bfdd80
...
@@ -299,6 +299,16 @@ class TrialDispatcher implements TrainingService {
...
@@ -299,6 +299,16 @@ class TrialDispatcher implements TrainingService {
public
async
setClusterMetadata
(
_key
:
string
,
_value
:
string
):
Promise
<
void
>
{
return
;
}
public
async
setClusterMetadata
(
_key
:
string
,
_value
:
string
):
Promise
<
void
>
{
return
;
}
public
async
getClusterMetadata
(
_key
:
string
):
Promise
<
string
>
{
return
""
;
}
public
async
getClusterMetadata
(
_key
:
string
):
Promise
<
string
>
{
return
""
;
}
public
async
stopEnvironment
(
environment
:
EnvironmentInformation
):
Promise
<
void
>
{
if
(
environment
.
environmentService
===
undefined
)
{
throw
new
Error
(
`
${
environment
.
id
}
do not have environmentService!`
);
}
this
.
log
.
info
(
`stopping environment
${
environment
.
id
}
...`
);
await
environment
.
environmentService
.
stopEnvironment
(
environment
);
this
.
log
.
info
(
`stopped environment
${
environment
.
id
}
.`
);
return
;
}
public
async
cleanUp
():
Promise
<
void
>
{
public
async
cleanUp
():
Promise
<
void
>
{
if
(
this
.
commandEmitter
===
undefined
)
{
if
(
this
.
commandEmitter
===
undefined
)
{
throw
new
Error
(
`TrialDispatcher: commandEmitter shouldn't be undefined in cleanUp.`
);
throw
new
Error
(
`TrialDispatcher: commandEmitter shouldn't be undefined in cleanUp.`
);
...
@@ -306,16 +316,12 @@ class TrialDispatcher implements TrainingService {
...
@@ -306,16 +316,12 @@ class TrialDispatcher implements TrainingService {
this
.
stopping
=
true
;
this
.
stopping
=
true
;
this
.
shouldUpdateTrials
=
true
;
this
.
shouldUpdateTrials
=
true
;
const
environments
=
[...
this
.
environments
.
values
()];
const
environments
=
[...
this
.
environments
.
values
()];
const
stopEnvironmentPromise
:
Promise
<
void
>
[]
=
[];
for
(
let
index
=
0
;
index
<
environments
.
length
;
index
++
)
{
for
(
let
index
=
0
;
index
<
environments
.
length
;
index
++
)
{
const
environment
=
environments
[
index
];
stopEnvironmentPromise
.
push
(
this
.
stopEnvironment
(
environments
[
index
]));
this
.
log
.
info
(
`stopping environment
${
environment
.
id
}
...`
);
if
(
environment
.
environmentService
===
undefined
)
{
throw
new
Error
(
`
${
environment
.
id
}
do not have environmentService!`
);
}
await
environment
.
environmentService
.
stopEnvironment
(
environment
);
this
.
log
.
info
(
`stopped environment
${
environment
.
id
}
.`
);
}
}
await
Promise
.
all
(
stopEnvironmentPromise
);
this
.
commandEmitter
.
off
(
"
command
"
,
this
.
handleCommand
);
this
.
commandEmitter
.
off
(
"
command
"
,
this
.
handleCommand
);
for
(
const
commandChannel
of
this
.
commandChannelSet
)
{
for
(
const
commandChannel
of
this
.
commandChannelSet
)
{
await
commandChannel
.
stop
();
await
commandChannel
.
stop
();
...
@@ -650,6 +656,10 @@ class TrialDispatcher implements TrainingService {
...
@@ -650,6 +656,10 @@ class TrialDispatcher implements TrainingService {
}
}
private
async
requestEnvironment
(
environmentService
:
EnvironmentService
):
Promise
<
void
>
{
private
async
requestEnvironment
(
environmentService
:
EnvironmentService
):
Promise
<
void
>
{
if
(
this
.
stopping
)
{
this
.
log
.
info
(
`Experiment is stopping, stop creating new environment`
);
return
;
}
const
envId
=
uniqueString
(
5
);
const
envId
=
uniqueString
(
5
);
const
envName
=
`nni_exp_
${
this
.
experimentId
}
_env_
${
envId
}
`
;
const
envName
=
`nni_exp_
${
this
.
experimentId
}
_env_
${
envId
}
`
;
const
environment
=
environmentService
.
createEnvironmentInformation
(
envId
,
envName
);
const
environment
=
environmentService
.
createEnvironmentInformation
(
envId
,
envName
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment