Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
fba2dd5c
Unverified
Commit
fba2dd5c
authored
Apr 30, 2020
by
George Cheng
Committed by
GitHub
Apr 30, 2020
Browse files
DLTS: Get NNI manager IP / port from endpoints (#2305)
parent
62d74565
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
64 additions
and
2 deletions
+64
-2
src/nni_manager/training_service/dlts/dltsTrainingService.ts
src/nni_manager/training_service/dlts/dltsTrainingService.ts
+64
-2
No files found.
src/nni_manager/training_service/dlts/dltsTrainingService.ts
View file @
fba2dd5c
...
@@ -38,7 +38,9 @@ class DLTSTrainingService implements TrainingService {
...
@@ -38,7 +38,9 @@ class DLTSTrainingService implements TrainingService {
private
versionCheck
:
boolean
=
true
;
private
versionCheck
:
boolean
=
true
;
private
logCollection
:
string
=
'
none
'
;
private
logCollection
:
string
=
'
none
'
;
private
isMultiPhase
:
boolean
=
false
;
private
isMultiPhase
:
boolean
=
false
;
private
dltsRestServerHost
:
string
;
private
dltsRestServerPort
?:
number
;
private
dltsRestServerPort
?:
number
;
private
jobMode
:
boolean
;
private
readonly
trialJobsMap
:
Map
<
string
,
DLTSTrialJobDetail
>
;
private
readonly
trialJobsMap
:
Map
<
string
,
DLTSTrialJobDetail
>
;
private
nniManagerIpConfig
?:
NNIManagerIpConfig
;
private
nniManagerIpConfig
?:
NNIManagerIpConfig
;
...
@@ -51,7 +53,9 @@ class DLTSTrainingService implements TrainingService {
...
@@ -51,7 +53,9 @@ class DLTSTrainingService implements TrainingService {
this
.
trialJobsMap
=
new
Map
();
this
.
trialJobsMap
=
new
Map
();
this
.
jobQueue
=
[];
this
.
jobQueue
=
[];
this
.
experimentId
=
getExperimentId
();
this
.
experimentId
=
getExperimentId
();
this
.
log
.
info
(
'
Construct DLTS training service.
'
);
this
.
dltsRestServerHost
=
getIPV4Address
();
this
.
jobMode
=
'
DLTS_JOB_ID
'
in
process
.
env
;
this
.
log
.
info
(
`Construct DLTS training service in
${
this
.
jobMode
?
'
job mode
'
:
'
local mode
'
}
.`
);
}
}
public
async
run
():
Promise
<
void
>
{
public
async
run
():
Promise
<
void
>
{
...
@@ -60,12 +64,70 @@ class DLTSTrainingService implements TrainingService {
...
@@ -60,12 +64,70 @@ class DLTSTrainingService implements TrainingService {
await
restServer
.
start
();
await
restServer
.
start
();
restServer
.
setEnableVersionCheck
=
this
.
versionCheck
;
restServer
.
setEnableVersionCheck
=
this
.
versionCheck
;
this
.
log
.
info
(
`DLTS Training service rest server listening on:
${
restServer
.
endPoint
}
`
);
this
.
log
.
info
(
`DLTS Training service rest server listening on:
${
restServer
.
endPoint
}
`
);
if
(
this
.
jobMode
)
{
await
this
.
exposeRestServerPort
(
restServer
.
clusterRestServerPort
);
}
else
{
this
.
dltsRestServerPort
=
restServer
.
clusterRestServerPort
}
await
Promise
.
all
([
await
Promise
.
all
([
this
.
statusCheckingLoop
(),
this
.
statusCheckingLoop
(),
this
.
submitJobLoop
()]);
this
.
submitJobLoop
()]);
this
.
log
.
info
(
'
DLTS training service exit.
'
);
this
.
log
.
info
(
'
DLTS training service exit.
'
);
}
}
private
async
exposeRestServerPort
(
port
:
number
):
Promise
<
void
>
{
if
(
this
.
dltsClusterConfig
==
null
)
{
throw
Error
(
'
Cluster config is not set
'
);
}
const
{
dashboard
,
cluster
,
email
,
password
}
=
this
.
dltsClusterConfig
;
const
jobId
=
process
.
env
[
'
DLTS_JOB_ID
'
]
+
''
;
const
uri
=
`
${
dashboard
}
api/clusters/
${
cluster
}
/jobs/
${
jobId
}
/endpoints`
;
const
qs
=
{
email
,
password
};
do
{
this
.
log
.
debug
(
'
Checking endpoints
'
);
const
endpoints
=
await
new
Promise
((
resolve
,
reject
)
=>
{
request
.
get
(
uri
,
{
qs
,
json
:
true
},
function
(
error
,
response
,
body
)
{
if
(
error
)
{
reject
(
error
);
}
else
{
resolve
(
body
);
}
});
});
this
.
log
.
debug
(
'
Endpoints: %o
'
,
endpoints
);
if
(
Array
.
isArray
(
endpoints
))
{
const
restServerEndpoint
=
endpoints
.
find
(({
podPort
})
=>
podPort
===
port
);
if
(
restServerEndpoint
==
null
)
{
this
.
log
.
debug
(
'
Exposing %d
'
,
port
);
await
new
Promise
((
resolve
,
reject
)
=>
{
request
.
post
(
uri
,
{
qs
,
json
:
true
,
body
:
{
endpoints
:
[{
name
:
"
nni-rest-server
"
,
podPort
:
port
}]
}
},
function
(
error
)
{
if
(
error
)
{
reject
(
error
);
}
else
{
resolve
();
}
});
});
}
else
if
(
restServerEndpoint
[
'
status
'
]
===
'
running
'
)
{
// We get an exposed restserver port
this
.
dltsRestServerHost
=
restServerEndpoint
[
'
nodeName
'
];
this
.
dltsRestServerPort
=
restServerEndpoint
[
'
port
'
];
break
;
}
}
}
while
(
await
new
Promise
(
resolve
=>
setTimeout
(
resolve
,
1000
,
true
)));
}
private
async
statusCheckingLoop
():
Promise
<
void
>
{
private
async
statusCheckingLoop
():
Promise
<
void
>
{
while
(
!
this
.
stopping
)
{
while
(
!
this
.
stopping
)
{
const
updateDLTSTrialJobs
:
Promise
<
void
>
[]
=
[];
const
updateDLTSTrialJobs
:
Promise
<
void
>
[]
=
[];
...
@@ -400,7 +462,7 @@ class DLTSTrainingService implements TrainingService {
...
@@ -400,7 +462,7 @@ class DLTSTrainingService implements TrainingService {
);
);
}
}
// tslint:disable-next-line: strict-boolean-expressions
// tslint:disable-next-line: strict-boolean-expressions
const
nniManagerIp
:
string
=
this
.
nniManagerIpConfig
?
this
.
nniManagerIpConfig
.
nniManagerIp
:
getIPV4Address
()
;
const
nniManagerIp
:
string
=
this
.
nniManagerIpConfig
?
this
.
nniManagerIpConfig
.
nniManagerIp
:
this
.
dltsRestServerHost
;
const
version
:
string
=
this
.
versionCheck
?
await
getVersion
()
:
''
;
const
version
:
string
=
this
.
versionCheck
?
await
getVersion
()
:
''
;
const
nniDLTSTrialCommand
:
string
=
String
.
Format
(
const
nniDLTSTrialCommand
:
string
=
String
.
Format
(
DLTS_TRIAL_COMMAND_FORMAT
,
DLTS_TRIAL_COMMAND_FORMAT
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment