Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
80624de7
Unverified
Commit
80624de7
authored
Dec 07, 2018
by
SparkSnail
Committed by
GitHub
Dec 07, 2018
Browse files
Update pai token by time interval (#434)
Update pai token every 2 hours.
parent
23530bb6
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
60 additions
and
34 deletions
+60
-34
src/nni_manager/training_service/pai/paiTrainingService.ts
src/nni_manager/training_service/pai/paiTrainingService.ts
+60
-34
No files found.
src/nni_manager/training_service/pai/paiTrainingService.ts
View file @
80624de7
...
@@ -64,6 +64,8 @@ class PAITrainingService implements TrainingService {
...
@@ -64,6 +64,8 @@ class PAITrainingService implements TrainingService {
private
stopping
:
boolean
=
false
;
private
stopping
:
boolean
=
false
;
private
hdfsClient
:
any
;
private
hdfsClient
:
any
;
private
paiToken
?
:
string
;
private
paiToken
?
:
string
;
private
paiTokenUpdateTime
?:
number
;
private
paiTokenUpdateInterval
:
number
;
private
experimentId
!
:
string
;
private
experimentId
!
:
string
;
private
readonly
paiJobCollector
:
PAIJobInfoCollector
;
private
readonly
paiJobCollector
:
PAIJobInfoCollector
;
private
readonly
hdfsDirPattern
:
string
;
private
readonly
hdfsDirPattern
:
string
;
...
@@ -83,6 +85,7 @@ class PAITrainingService implements TrainingService {
...
@@ -83,6 +85,7 @@ class PAITrainingService implements TrainingService {
this
.
paiJobCollector
=
new
PAIJobInfoCollector
(
this
.
trialJobsMap
);
this
.
paiJobCollector
=
new
PAIJobInfoCollector
(
this
.
trialJobsMap
);
this
.
hdfsDirPattern
=
'
hdfs://(?<host>([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(?<baseDir>/.*)?
'
;
this
.
hdfsDirPattern
=
'
hdfs://(?<host>([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(?<baseDir>/.*)?
'
;
this
.
nextTrialSequenceId
=
-
1
;
this
.
nextTrialSequenceId
=
-
1
;
this
.
paiTokenUpdateInterval
=
7200000
;
//2hours
}
}
public
async
run
():
Promise
<
void
>
{
public
async
run
():
Promise
<
void
>
{
...
@@ -90,6 +93,7 @@ class PAITrainingService implements TrainingService {
...
@@ -90,6 +93,7 @@ class PAITrainingService implements TrainingService {
await
restServer
.
start
();
await
restServer
.
start
();
this
.
log
.
info
(
`PAI Training service rest server listening on:
${
restServer
.
endPoint
}
`
);
this
.
log
.
info
(
`PAI Training service rest server listening on:
${
restServer
.
endPoint
}
`
);
while
(
!
this
.
stopping
)
{
while
(
!
this
.
stopping
)
{
await
this
.
updatePaiToken
();
await
this
.
paiJobCollector
.
retrieveTrialStatus
(
this
.
paiToken
,
this
.
paiClusterConfig
);
await
this
.
paiJobCollector
.
retrieveTrialStatus
(
this
.
paiToken
,
this
.
paiClusterConfig
);
await
delay
(
3000
);
await
delay
(
3000
);
}
}
...
@@ -347,40 +351,8 @@ class PAITrainingService implements TrainingService {
...
@@ -347,40 +351,8 @@ class PAITrainingService implements TrainingService {
});
});
// Get PAI authentication token
// Get PAI authentication token
const
authentication_req
:
request
.
Options
=
{
await
this
.
updatePaiToken
();
uri
:
`http://
${
this
.
paiClusterConfig
.
host
}
/rest-server/api/v1/token`
,
break
;
method
:
'
POST
'
,
json
:
true
,
body
:
{
username
:
this
.
paiClusterConfig
.
userName
,
password
:
this
.
paiClusterConfig
.
passWord
}
};
request
(
authentication_req
,
(
error
:
Error
,
response
:
request
.
Response
,
body
:
any
)
=>
{
if
(
error
)
{
this
.
log
.
error
(
`Get PAI token failed:
${
error
.
message
}
`
);
deferred
.
reject
(
new
Error
(
`Get PAI token failed:
${
error
.
message
}
`
));
}
else
{
if
(
response
.
statusCode
!==
200
){
this
.
log
.
error
(
`Get PAI token failed: get PAI Rest return code
${
response
.
statusCode
}
`
);
deferred
.
reject
(
new
Error
(
`Get PAI token failed, please check paiConfig username or password`
));
}
this
.
paiToken
=
body
.
token
;
deferred
.
resolve
();
}
});
let
timeoutId
:
NodeJS
.
Timer
;
const
timeoutDelay
:
Promise
<
void
>
=
new
Promise
<
void
>
((
resolve
:
Function
,
reject
:
Function
):
void
=>
{
// Set timeout and reject the promise once reach timeout (5 seconds)
timeoutId
=
setTimeout
(
()
=>
reject
(
new
Error
(
'
Get PAI token timeout. Please check your PAI cluster.
'
)),
5000
);
});
return
Promise
.
race
([
timeoutDelay
,
deferred
.
promise
]).
finally
(()
=>
clearTimeout
(
timeoutId
));
case
TrialConfigMetadataKey
.
TRIAL_CONFIG
:
case
TrialConfigMetadataKey
.
TRIAL_CONFIG
:
if
(
!
this
.
paiClusterConfig
){
if
(
!
this
.
paiClusterConfig
){
...
@@ -487,6 +459,60 @@ class PAITrainingService implements TrainingService {
...
@@ -487,6 +459,60 @@ class PAITrainingService implements TrainingService {
return
this
.
nextTrialSequenceId
++
;
return
this
.
nextTrialSequenceId
++
;
}
}
/**
* Update pai token by the interval time or initialize the pai token
*/
private
async
updatePaiToken
():
Promise
<
void
>
{
const
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
let
currentTime
:
number
=
new
Date
().
getTime
();
//If pai token initialized and not reach the interval time, do not update
if
(
this
.
paiTokenUpdateTime
&&
(
currentTime
-
this
.
paiTokenUpdateTime
)
<
this
.
paiTokenUpdateInterval
){
return
Promise
.
resolve
();
}
if
(
!
this
.
paiClusterConfig
){
const
paiClusterConfigError
=
`pai cluster config not initialized!`
this
.
log
.
error
(
`
${
paiClusterConfigError
}
`
);
throw
Error
(
`
${
paiClusterConfigError
}
`
)
}
const
authentication_req
:
request
.
Options
=
{
uri
:
`http://
${
this
.
paiClusterConfig
.
host
}
/rest-server/api/v1/token`
,
method
:
'
POST
'
,
json
:
true
,
body
:
{
username
:
this
.
paiClusterConfig
.
userName
,
password
:
this
.
paiClusterConfig
.
passWord
}
};
request
(
authentication_req
,
(
error
:
Error
,
response
:
request
.
Response
,
body
:
any
)
=>
{
if
(
error
)
{
this
.
log
.
error
(
`Get PAI token failed:
${
error
.
message
}
`
);
deferred
.
reject
(
new
Error
(
`Get PAI token failed:
${
error
.
message
}
`
));
}
else
{
if
(
response
.
statusCode
!==
200
){
this
.
log
.
error
(
`Get PAI token failed: get PAI Rest return code
${
response
.
statusCode
}
`
);
deferred
.
reject
(
new
Error
(
`Get PAI token failed, please check paiConfig username or password`
));
}
this
.
paiToken
=
body
.
token
;
this
.
paiTokenUpdateTime
=
new
Date
().
getTime
();
deferred
.
resolve
();
}
});
let
timeoutId
:
NodeJS
.
Timer
;
const
timeoutDelay
:
Promise
<
void
>
=
new
Promise
<
void
>
((
resolve
:
Function
,
reject
:
Function
):
void
=>
{
// Set timeout and reject the promise once reach timeout (5 seconds)
timeoutId
=
setTimeout
(
()
=>
reject
(
new
Error
(
'
Get PAI token timeout. Please check your PAI cluster.
'
)),
5000
);
});
return
Promise
.
race
([
timeoutDelay
,
deferred
.
promise
]).
finally
(()
=>
clearTimeout
(
timeoutId
));
}
}
}
export
{
PAITrainingService
}
export
{
PAITrainingService
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment