Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
1328f412
Unverified
Commit
1328f412
authored
Dec 10, 2019
by
chicm-ms
Committed by
GitHub
Dec 10, 2019
Browse files
Fix eslint errors (#1836)
* update eslint rules * auto fix eslint * manually fix eslint (#1833)
parent
8c07cf41
Changes
42
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
111 additions
and
102 deletions
+111
-102
src/nni_manager/.eslintrc
src/nni_manager/.eslintrc
+6
-1
src/nni_manager/common/datastore.ts
src/nni_manager/common/datastore.ts
+3
-3
src/nni_manager/common/experimentStartupInfo.ts
src/nni_manager/common/experimentStartupInfo.ts
+2
-2
src/nni_manager/common/log.ts
src/nni_manager/common/log.ts
+1
-1
src/nni_manager/common/observableTimer.ts
src/nni_manager/common/observableTimer.ts
+1
-1
src/nni_manager/common/utils.ts
src/nni_manager/common/utils.ts
+13
-14
src/nni_manager/core/nniDataStore.ts
src/nni_manager/core/nniDataStore.ts
+9
-7
src/nni_manager/core/nnimanager.ts
src/nni_manager/core/nnimanager.ts
+23
-19
src/nni_manager/main.ts
src/nni_manager/main.ts
+1
-1
src/nni_manager/rest_server/restHandler.ts
src/nni_manager/rest_server/restHandler.ts
+19
-19
src/nni_manager/rest_server/restValidationSchemas.ts
src/nni_manager/rest_server/restValidationSchemas.ts
+8
-7
src/nni_manager/training_service/common/clusterJobRestServer.ts
...i_manager/training_service/common/clusterJobRestServer.ts
+2
-2
src/nni_manager/training_service/common/gpuData.ts
src/nni_manager/training_service/common/gpuData.ts
+1
-1
src/nni_manager/training_service/common/jobMetrics.ts
src/nni_manager/training_service/common/jobMetrics.ts
+1
-1
src/nni_manager/training_service/common/trialConfig.ts
src/nni_manager/training_service/common/trialConfig.ts
+4
-4
src/nni_manager/training_service/common/util.ts
src/nni_manager/training_service/common/util.ts
+1
-2
src/nni_manager/training_service/kubernetes/azureStorageClientUtils.ts
...er/training_service/kubernetes/azureStorageClientUtils.ts
+2
-2
src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerConfig.ts
...bernetes/frameworkcontroller/frameworkcontrollerConfig.ts
+1
-1
src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerJobInfoCollector.ts
...rameworkcontroller/frameworkcontrollerJobInfoCollector.ts
+4
-3
src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService.ts
...frameworkcontroller/frameworkcontrollerTrainingService.ts
+9
-11
No files found.
src/nni_manager/.eslintrc
View file @
1328f412
...
...
@@ -18,7 +18,12 @@
"plugin:@typescript-eslint/recommended"
],
"rules": {
"@typescript-eslint/no-explicit-any": 0
"@typescript-eslint/no-explicit-any": 0,
"@typescript-eslint/no-namespace": 0,
"@typescript-eslint/consistent-type-assertions": 0,
"@typescript-eslint/no-inferrable-types": 0,
"no-inner-declarations": 0,
"@typescript-eslint/no-var-requires": 0
},
"ignorePatterns": [
"node_modules/",
...
...
src/nni_manager/common/datastore.ts
View file @
1328f412
...
...
@@ -56,13 +56,13 @@ interface TrialJobInfo {
interface
HyperParameterFormat
{
parameter_source
:
string
;
parameters
:
Object
;
parameters
:
Record
<
string
,
any
>
;
parameter_id
:
number
;
}
interface
ExportedDataFormat
{
parameter
:
Object
;
value
:
Object
;
parameter
:
Record
<
string
,
any
>
;
value
:
Record
<
string
,
any
>
;
id
:
string
;
}
...
...
src/nni_manager/common/experimentStartupInfo.ts
View file @
1328f412
...
...
@@ -27,9 +27,9 @@ class ExperimentStartupInfo {
this
.
initialized
=
true
;
if
(
logDir
!==
undefined
&&
logDir
.
length
>
0
)
{
this
.
logDir
=
path
.
join
(
path
.
normalize
(
logDir
),
getExperimentId
());
this
.
logDir
=
path
.
join
(
path
.
normalize
(
logDir
),
this
.
getExperimentId
());
}
else
{
this
.
logDir
=
path
.
join
(
os
.
homedir
(),
'
nni
'
,
'
experiments
'
,
getExperimentId
());
this
.
logDir
=
path
.
join
(
os
.
homedir
(),
'
nni
'
,
'
experiments
'
,
this
.
getExperimentId
());
}
if
(
logLevel
!==
undefined
&&
logLevel
.
length
>
1
)
{
...
...
src/nni_manager/common/log.ts
View file @
1328f412
...
...
@@ -84,7 +84,7 @@ class Logger {
this
.
readonly
=
isReadonly
();
}
public
close
()
{
public
close
()
:
void
{
this
.
writable
.
destroy
();
}
...
...
src/nni_manager/common/observableTimer.ts
View file @
1328f412
...
...
@@ -18,7 +18,7 @@ class ObservableTimer {
return
this
.
observableSource
.
subscribe
(
onNext
,
onError
,
onCompleted
);
}
public
unsubscribe
(
subscription
:
Rx
.
IDisposable
)
{
public
unsubscribe
(
subscription
:
Rx
.
IDisposable
)
:
void
{
if
(
typeof
subscription
!==
undefined
)
{
subscription
.
dispose
();
}
...
...
src/nni_manager/common/utils.ts
View file @
1328f412
...
...
@@ -16,11 +16,9 @@ import { Container } from 'typescript-ioc';
import
*
as
util
from
'
util
'
;
import
{
Database
,
DataStore
}
from
'
./datastore
'
;
import
{
ExperimentStartupInfo
,
getExperimentId
,
getExperimentStartupInfo
,
setExperimentStartupInfo
}
from
'
./experimentStartupInfo
'
;
import
{
ExperimentStartupInfo
,
getExperimentStartupInfo
,
setExperimentStartupInfo
}
from
'
./experimentStartupInfo
'
;
import
{
Manager
}
from
'
./manager
'
;
import
{
TrialConfig
}
from
'
../training_service/common/trialConfig
'
;
import
{
HyperParameters
,
TrainingService
,
TrialJobStatus
}
from
'
./trainingService
'
;
import
{
getLogger
}
from
'
./log
'
;
function
getExperimentRootDir
():
string
{
return
getExperimentStartupInfo
()
...
...
@@ -236,11 +234,11 @@ function getMsgDispatcherCommand(tuner: any, assessor: any, advisor: any, multiP
* Generate parameter file name based on HyperParameters object
* @param hyperParameters HyperParameters instance
*/
function
generateParamFileName
(
hyperParameters
:
HyperParameters
):
string
{
function
generateParamFileName
(
hyperParameters
:
HyperParameters
):
string
{
assert
(
hyperParameters
!==
undefined
);
assert
(
hyperParameters
.
index
>=
0
);
let
paramFileName
:
string
;
let
paramFileName
:
string
;
if
(
hyperParameters
.
index
==
0
)
{
paramFileName
=
'
parameter.cfg
'
;
}
else
{
...
...
@@ -283,7 +281,7 @@ function cleanupUnitTest(): void {
Container
.
restore
(
ExperimentStartupInfo
);
}
let
cachedipv4Address
:
string
=
''
;
let
cachedipv4Address
:
string
=
''
;
/**
* Get IPv4 address of current machine
*/
...
...
@@ -325,15 +323,15 @@ function getJobCancelStatus(isEarlyStopped: boolean): TrialJobStatus {
* Utility method to calculate file numbers under a directory, recursively
* @param directory directory name
*/
function
countFilesRecursively
(
directory
:
string
,
timeoutMilliSeconds
?:
number
):
Promise
<
number
>
{
function
countFilesRecursively
(
directory
:
string
):
Promise
<
number
>
{
if
(
!
fs
.
existsSync
(
directory
))
{
throw
Error
(
`Direcotory
${
directory
}
doesn't exist`
);
}
const
deferred
:
Deferred
<
number
>
=
new
Deferred
<
number
>
();
let
timeoutId
:
NodeJS
.
Timer
const
delayTimeout
:
Promise
<
number
>
=
new
Promise
((
resolve
:
Function
,
reject
:
Function
)
:
void
=>
{
let
timeoutId
:
NodeJS
.
Timer
const
delayTimeout
:
Promise
<
number
>
=
new
Promise
((
resolve
:
Function
,
reject
:
Function
):
void
=>
{
// Set timeout and reject the promise once reach timeout (5 seconds)
timeoutId
=
setTimeout
(()
=>
{
reject
(
new
Error
(
`Timeout: path
${
directory
}
has too many files`
));
...
...
@@ -359,7 +357,7 @@ function countFilesRecursively(directory: string, timeoutMilliSeconds?: number):
}
function
validateFileName
(
fileName
:
string
):
boolean
{
le
t
pattern
:
string
=
'
^[a-z0-9A-Z
\
._-]+$
'
;
cons
t
pattern
:
string
=
'
^[a-z0-9A-Z._-]+$
'
;
const
validateResult
=
fileName
.
match
(
pattern
);
if
(
validateResult
)
{
return
true
;
...
...
@@ -374,7 +372,7 @@ async function validateFileNameRecursively(directory: string): Promise<boolean>
const
fileNameArray
:
string
[]
=
fs
.
readdirSync
(
directory
);
let
result
=
true
;
for
(
var
name
of
fileNameArray
){
for
(
const
name
of
fileNameArray
){
const
fullFilePath
:
string
=
path
.
join
(
directory
,
name
);
try
{
// validate file names and directory names
...
...
@@ -396,7 +394,7 @@ async function validateFileNameRecursively(directory: string): Promise<boolean>
* get the version of current package
*/
async
function
getVersion
():
Promise
<
string
>
{
const
deferred
:
Deferred
<
string
>
=
new
Deferred
<
string
>
();
const
deferred
:
Deferred
<
string
>
=
new
Deferred
<
string
>
();
import
(
path
.
join
(
__dirname
,
'
..
'
,
'
package.json
'
)).
then
((
pkg
)
=>
{
deferred
.
resolve
(
pkg
.
version
);
}).
catch
((
error
)
=>
{
...
...
@@ -430,7 +428,7 @@ function getTunerProc(command: string, stdio: StdioOptions, newCwd: string, newE
* judge whether the process is alive
*/
async
function
isAlive
(
pid
:
any
):
Promise
<
boolean
>
{
le
t
deferred
:
Deferred
<
boolean
>
=
new
Deferred
<
boolean
>
();
cons
t
deferred
:
Deferred
<
boolean
>
=
new
Deferred
<
boolean
>
();
let
alive
:
boolean
=
false
;
if
(
process
.
platform
===
'
win32
'
)
{
try
{
...
...
@@ -440,6 +438,7 @@ async function isAlive(pid: any): Promise<boolean> {
}
}
catch
(
error
)
{
//ignore
}
}
else
{
...
...
@@ -458,7 +457,7 @@ async function isAlive(pid: any): Promise<boolean> {
* kill process
*/
async
function
killPid
(
pid
:
any
):
Promise
<
void
>
{
le
t
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
cons
t
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
try
{
if
(
process
.
platform
===
"
win32
"
)
{
await
cpp
.
exec
(
`cmd.exe /c taskkill /PID
${
pid
}
/F`
);
...
...
src/nni_manager/core/nniDataStore.ts
View file @
1328f412
...
...
@@ -159,7 +159,7 @@ class NNIDataStore implements DataStore {
public
async
exportTrialHpConfigs
():
Promise
<
string
>
{
const
jobs
:
TrialJobInfo
[]
=
await
this
.
listTrialJobs
();
le
t
exportedData
:
ExportedDataFormat
[]
=
[];
cons
t
exportedData
:
ExportedDataFormat
[]
=
[];
for
(
const
job
of
jobs
)
{
if
(
job
.
hyperParameters
&&
job
.
finalMetricData
)
{
if
(
job
.
hyperParameters
.
length
===
1
&&
job
.
finalMetricData
.
length
===
1
)
{
...
...
@@ -172,18 +172,18 @@ class NNIDataStore implements DataStore {
};
exportedData
.
push
(
oneEntry
);
}
else
{
le
t
paraMap
:
Map
<
number
,
Object
>
=
new
Map
();
le
t
metricMap
:
Map
<
number
,
Object
>
=
new
Map
();
cons
t
paraMap
:
Map
<
number
,
Record
<
string
,
any
>
>
=
new
Map
();
cons
t
metricMap
:
Map
<
number
,
Record
<
string
,
any
>
>
=
new
Map
();
for
(
const
eachPara
of
job
.
hyperParameters
)
{
const
parameters
:
HyperParameterFormat
=
<
HyperParameterFormat
>
JSON
.
parse
(
eachPara
);
paraMap
.
set
(
parameters
.
parameter_id
,
parameters
.
parameters
);
}
for
(
const
eachMetric
of
job
.
finalMetricData
)
{
const
value
:
Object
=
JSON
.
parse
(
eachMetric
.
data
);
const
value
:
Record
<
string
,
any
>
=
JSON
.
parse
(
eachMetric
.
data
);
metricMap
.
set
(
Number
(
eachMetric
.
parameterId
),
value
);
}
paraMap
.
forEach
((
value
:
Object
,
key
:
number
)
=>
{
const
metricValue
:
Object
|
undefined
=
metricMap
.
get
(
key
);
paraMap
.
forEach
((
value
:
Record
<
string
,
any
>
,
key
:
number
)
=>
{
const
metricValue
:
Record
<
string
,
any
>
|
undefined
=
metricMap
.
get
(
key
);
if
(
metricValue
)
{
const
oneEntry
:
ExportedDataFormat
=
{
parameter
:
value
,
...
...
@@ -201,7 +201,7 @@ class NNIDataStore implements DataStore {
}
public
async
getImportedData
():
Promise
<
string
[]
>
{
le
t
importedData
:
string
[]
=
[];
cons
t
importedData
:
string
[]
=
[];
const
importDataEvents
:
TrialJobEventRecord
[]
=
await
this
.
db
.
queryTrialJobEvent
(
undefined
,
'
IMPORT_DATA
'
);
for
(
const
event
of
importDataEvents
)
{
if
(
event
.
data
)
{
...
...
@@ -329,6 +329,7 @@ class NNIDataStore implements DataStore {
if
(
!
jobInfo
)
{
throw
new
Error
(
'
Empty JobInfo
'
);
}
/* eslint-disable no-fallthrough */
switch
(
record
.
event
)
{
case
'
RUNNING
'
:
if
(
record
.
timestamp
!==
undefined
)
{
...
...
@@ -358,6 +359,7 @@ class NNIDataStore implements DataStore {
}
default
:
}
/* eslint-enable no-fallthrough */
jobInfo
.
status
=
this
.
getJobStatusByLatestEvent
(
jobInfo
.
status
,
record
.
event
);
if
(
record
.
data
!==
undefined
&&
record
.
data
.
trim
().
length
>
0
)
{
const
newHParam
:
any
=
this
.
parseHyperParameter
(
record
.
data
);
...
...
src/nni_manager/core/nnimanager.ts
View file @
1328f412
...
...
@@ -4,8 +4,7 @@
'
use strict
'
;
import
*
as
assert
from
'
assert
'
;
import
*
as
cpp
from
'
child-process-promise
'
;
import
{
ChildProcess
,
spawn
,
StdioOptions
}
from
'
child_process
'
;
import
{
ChildProcess
,
StdioOptions
}
from
'
child_process
'
;
import
{
Deferred
}
from
'
ts-deferred
'
;
import
*
as
component
from
'
../common/component
'
;
import
{
DataStore
,
MetricDataRecord
,
MetricType
,
TrialJobInfo
}
from
'
../common/datastore
'
;
...
...
@@ -21,7 +20,7 @@ import {
}
from
'
../common/trainingService
'
;
import
{
delay
,
getCheckpointDir
,
getExperimentRootDir
,
getLogDir
,
getMsgDispatcherCommand
,
mkDirP
,
getTunerProc
,
getLogLevel
,
isAlive
,
killPid
}
from
'
../common/utils
'
;
import
{
ADD_CUSTOMIZED_TRIAL_JOB
,
INITIALIZE
,
INITIALIZED
,
KILL_TRIAL_JOB
,
NEW_TRIAL_JOB
,
NO_MORE_TRIAL_JOBS
,
PING
,
INITIALIZE
,
INITIALIZED
,
KILL_TRIAL_JOB
,
NEW_TRIAL_JOB
,
NO_MORE_TRIAL_JOBS
,
PING
,
REPORT_METRIC_DATA
,
REQUEST_TRIAL_JOBS
,
SEND_TRIAL_JOB_PARAMETER
,
TERMINATE
,
TRIAL_END
,
UPDATE_SEARCH_SPACE
,
IMPORT_DATA
}
from
'
./commands
'
;
import
{
createDispatcherInterface
,
IpcInterface
}
from
'
./ipcInterface
'
;
...
...
@@ -64,7 +63,7 @@ class NNIManager implements Manager {
status
:
'
INITIALIZED
'
,
errors
:
[]
};
this
.
trialJobMetricListener
=
(
metric
:
TrialJobMetric
)
=>
{
this
.
trialJobMetricListener
=
(
metric
:
TrialJobMetric
)
:
void
=>
{
this
.
onTrialJobMetrics
(
metric
).
catch
((
err
:
Error
)
=>
{
this
.
criticalError
(
NNIError
.
FromError
(
err
,
'
Job metrics error:
'
));
});
...
...
@@ -123,8 +122,8 @@ class NNIManager implements Manager {
// TODO: NNI manager should not peek tuner's internal protocol, let's refactor this later
const
packedParameter
=
{
parameter_id
:
null
,
parameter_source
:
'
customized
'
,
parameter_id
:
null
,
// eslint-disable-line @typescript-eslint/camelcase
parameter_source
:
'
customized
'
,
// eslint-disable-line @typescript-eslint/camelcase
parameters
:
JSON
.
parse
(
hyperParams
)
}
...
...
@@ -235,10 +234,10 @@ class NNIManager implements Manager {
// Collect generated trials and imported trials
const
finishedTrialData
:
string
=
await
this
.
exportData
();
const
importedData
:
string
[]
=
await
this
.
dataStore
.
getImportedData
();
let
trialData
:
Object
[]
=
JSON
.
parse
(
finishedTrialData
);
let
trialData
:
Record
<
string
,
any
>
[]
=
JSON
.
parse
(
finishedTrialData
);
for
(
const
oneImportedData
of
importedData
)
{
// do not deduplicate
trialData
=
trialData
.
concat
(
<
Object
[]
>
JSON
.
parse
(
oneImportedData
));
trialData
=
trialData
.
concat
(
<
Record
<
string
,
any
>
[]
>
JSON
.
parse
(
oneImportedData
));
}
this
.
trialDataForTuner
=
JSON
.
stringify
(
trialData
);
...
...
@@ -361,7 +360,7 @@ class NNIManager implements Manager {
includeIntermediateResultsEnv
=
this
.
experimentProfile
.
params
.
tuner
.
includeIntermediateResults
;
}
le
t
nniEnv
=
{
cons
t
nniEnv
=
{
NNI_MODE
:
mode
,
NNI_CHECKPOINT_DIRECTORY
:
dataDirectory
,
NNI_LOG_DIRECTORY
:
getLogDir
(),
...
...
@@ -369,7 +368,7 @@ class NNIManager implements Manager {
NNI_INCLUDE_INTERMEDIATE_RESULTS
:
includeIntermediateResultsEnv
,
CUDA_VISIBLE_DEVICES
:
this
.
getGpuEnvvarValue
()
};
le
t
newEnv
=
Object
.
assign
({},
process
.
env
,
nniEnv
);
cons
t
newEnv
=
Object
.
assign
({},
process
.
env
,
nniEnv
);
const
tunerProc
:
ChildProcess
=
getTunerProc
(
command
,
stdio
,
newCwd
,
newEnv
);
this
.
dispatcherPid
=
tunerProc
.
pid
;
this
.
dispatcher
=
createDispatcherInterface
(
tunerProc
);
...
...
@@ -502,9 +501,9 @@ class NNIManager implements Manager {
finishedTrialJobNum
++
;
hyperParams
=
trialJobDetail
.
form
.
hyperParameters
.
value
;
this
.
dispatcher
.
sendCommand
(
TRIAL_END
,
JSON
.
stringify
({
trial_job_id
:
trialJobDetail
.
id
,
trial_job_id
:
trialJobDetail
.
id
,
// eslint-disable-line @typescript-eslint/camelcase
event
:
trialJobDetail
.
status
,
hyper_params
:
hyperParams
hyper_params
:
hyperParams
// eslint-disable-line @typescript-eslint/camelcase
}));
break
;
case
'
FAILED
'
:
...
...
@@ -515,9 +514,9 @@ class NNIManager implements Manager {
finishedTrialJobNum
++
;
hyperParams
=
trialJobDetail
.
form
.
hyperParameters
.
value
;
this
.
dispatcher
.
sendCommand
(
TRIAL_END
,
JSON
.
stringify
({
trial_job_id
:
trialJobDetail
.
id
,
trial_job_id
:
trialJobDetail
.
id
,
// eslint-disable-line @typescript-eslint/camelcase
event
:
trialJobDetail
.
status
,
hyper_params
:
hyperParams
hyper_params
:
hyperParams
// eslint-disable-line @typescript-eslint/camelcase
}));
break
;
case
'
WAITING
'
:
...
...
@@ -695,7 +694,7 @@ class NNIManager implements Manager {
private
async
onTunerCommand
(
commandType
:
string
,
content
:
string
):
Promise
<
void
>
{
this
.
log
.
info
(
`NNIManager received command from dispatcher:
${
commandType
}
,
${
content
}
`
);
switch
(
commandType
)
{
case
INITIALIZED
:
case
INITIALIZED
:
{
// Tuner is intialized, search space is set, request tuner to generate hyper parameters
if
(
this
.
trialDataForTuner
.
length
>
0
)
{
if
(
this
.
dispatcher
===
undefined
)
{
...
...
@@ -705,7 +704,8 @@ class NNIManager implements Manager {
}
this
.
requestTrialJobs
(
this
.
experimentProfile
.
params
.
trialConcurrency
);
break
;
case
NEW_TRIAL_JOB
:
}
case
NEW_TRIAL_JOB
:
{
if
(
this
.
status
.
status
===
'
TUNER_NO_MORE_TRIAL
'
)
{
this
.
log
.
warning
(
'
It is not supposed to receive more trials after NO_MORE_TRIAL is set
'
);
this
.
setStatus
(
'
RUNNING
'
);
...
...
@@ -719,7 +719,8 @@ class NNIManager implements Manager {
};
this
.
waitingTrials
.
push
(
form
);
break
;
case
SEND_TRIAL_JOB_PARAMETER
:
}
case
SEND_TRIAL_JOB_PARAMETER
:
{
const
tunerCommand
:
any
=
JSON
.
parse
(
content
);
assert
(
tunerCommand
.
parameter_index
>=
0
);
assert
(
tunerCommand
.
trial_job_id
!==
undefined
);
...
...
@@ -739,15 +740,18 @@ class NNIManager implements Manager {
'
ADD_HYPERPARAMETER
'
,
tunerCommand
.
trial_job_id
,
content
,
undefined
);
}
break
;
case
NO_MORE_TRIAL_JOBS
:
}
case
NO_MORE_TRIAL_JOBS
:
{
if
(
!
[
'
ERROR
'
,
'
STOPPING
'
,
'
STOPPED
'
].
includes
(
this
.
status
.
status
))
{
this
.
setStatus
(
'
TUNER_NO_MORE_TRIAL
'
);
}
break
;
case
KILL_TRIAL_JOB
:
}
case
KILL_TRIAL_JOB
:
{
this
.
log
.
info
(
`cancelTrialJob:
${
JSON
.
parse
(
content
)}
`
);
await
this
.
trainingService
.
cancelTrialJob
(
JSON
.
parse
(
content
),
true
);
break
;
}
default
:
throw
new
Error
(
'
Error: unsupported command type from tuner
'
);
}
...
...
src/nni_manager/main.ts
View file @
1328f412
...
...
@@ -55,7 +55,7 @@ async function initContainer(platformMode: string, logFileName?: string): Promis
.
to
(
FrameworkControllerTrainingService
)
.
scope
(
Scope
.
Singleton
);
}
else
{
throw
new
Error
(
`Error: unsupported mode:
${
m
ode
}
`
);
throw
new
Error
(
`Error: unsupported mode:
${
platformM
ode
}
`
);
}
Container
.
bind
(
Manager
)
.
to
(
NNIManager
)
...
...
src/nni_manager/rest_server/restHandler.ts
View file @
1328f412
...
...
@@ -11,7 +11,7 @@ import { DataStore, MetricDataRecord, TrialJobInfo } from '../common/datastore';
import
{
NNIError
,
NNIErrorNames
}
from
'
../common/errors
'
;
import
{
isNewExperiment
,
isReadonly
}
from
'
../common/experimentStartupInfo
'
;
import
{
getLogger
,
Logger
}
from
'
../common/log
'
;
import
{
ExperimentProfile
,
Manager
,
TrialJobStatistics
,
ExperimentStartUpMode
}
from
'
../common/manager
'
;
import
{
ExperimentProfile
,
Manager
,
TrialJobStatistics
}
from
'
../common/manager
'
;
import
{
ValidationSchemas
}
from
'
./restValidationSchemas
'
;
import
{
NNIRestServer
}
from
'
./nniRestServer
'
;
import
{
getVersion
}
from
'
../common/utils
'
;
...
...
@@ -72,7 +72,7 @@ class NNIRestHandler {
return
router
;
}
private
handle
_e
rror
(
err
:
Error
,
res
:
Response
,
isFatal
:
boolean
=
false
,
errorCode
:
number
=
500
):
void
{
private
handle
E
rror
(
err
:
Error
,
res
:
Response
,
isFatal
:
boolean
=
false
,
errorCode
:
number
=
500
):
void
{
if
(
err
instanceof
NNIError
&&
err
.
name
===
NNIErrorNames
.
NOT_FOUND
)
{
res
.
status
(
404
);
}
else
{
...
...
@@ -105,7 +105,7 @@ class NNIRestHandler {
ds
.
init
().
then
(()
=>
{
res
.
send
(
this
.
nniManager
.
getStatus
());
}).
catch
(
async
(
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
this
.
log
.
error
(
err
.
message
);
this
.
log
.
error
(
`Datastore initialize failed, stopping rest server...`
);
await
this
.
restServer
.
stop
();
...
...
@@ -118,7 +118,7 @@ class NNIRestHandler {
this
.
nniManager
.
getExperimentProfile
().
then
((
profile
:
ExperimentProfile
)
=>
{
res
.
send
(
profile
);
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -128,7 +128,7 @@ class NNIRestHandler {
this
.
nniManager
.
updateExperimentProfile
(
req
.
body
,
req
.
query
.
update_type
).
then
(()
=>
{
res
.
send
();
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -138,7 +138,7 @@ class NNIRestHandler {
this
.
nniManager
.
importData
(
JSON
.
stringify
(
req
.
body
)).
then
(()
=>
{
res
.
send
();
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -148,18 +148,18 @@ class NNIRestHandler {
if
(
isNewExperiment
())
{
this
.
nniManager
.
startExperiment
(
req
.
body
).
then
((
eid
:
string
)
=>
{
res
.
send
({
experiment_id
:
eid
experiment_id
:
eid
// eslint-disable-line @typescript-eslint/camelcase
});
}).
catch
((
err
:
Error
)
=>
{
// Start experiment is a step of initialization, so any exception thrown is a fatal
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
}
else
{
this
.
nniManager
.
resumeExperiment
(
isReadonly
()).
then
(()
=>
{
res
.
send
();
}).
catch
((
err
:
Error
)
=>
{
// Resume experiment is a step of initialization, so any exception thrown is a fatal
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
}
});
...
...
@@ -170,7 +170,7 @@ class NNIRestHandler {
this
.
nniManager
.
getTrialJobStatistics
().
then
((
statistics
:
TrialJobStatistics
[])
=>
{
res
.
send
(
statistics
);
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -189,7 +189,7 @@ class NNIRestHandler {
res
.
send
();
}
catch
(
err
)
{
// setClusterMetata is a step of initialization, so any exception thrown is a fatal
this
.
handle
_e
rror
(
NNIError
.
FromError
(
err
),
res
,
true
);
this
.
handle
E
rror
(
NNIError
.
FromError
(
err
),
res
,
true
);
}
});
}
...
...
@@ -202,7 +202,7 @@ class NNIRestHandler {
});
res
.
send
(
jobInfos
);
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -213,7 +213,7 @@ class NNIRestHandler {
const
jobInfo
:
TrialJobInfo
=
this
.
setErrorPathForFailedJob
(
jobDetail
);
res
.
send
(
jobInfo
);
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -223,7 +223,7 @@ class NNIRestHandler {
this
.
nniManager
.
addCustomizedTrialJob
(
JSON
.
stringify
(
req
.
body
)).
then
((
sequenceId
:
number
)
=>
{
res
.
send
({
sequenceId
});
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -233,7 +233,7 @@ class NNIRestHandler {
this
.
nniManager
.
cancelTrialJobByUser
(
req
.
params
.
id
).
then
(()
=>
{
res
.
send
();
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -243,7 +243,7 @@ class NNIRestHandler {
this
.
nniManager
.
getMetricData
(
req
.
params
.
job_id
,
req
.
query
.
type
).
then
((
metricsData
:
MetricDataRecord
[])
=>
{
res
.
send
(
metricsData
);
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -255,7 +255,7 @@ class NNIRestHandler {
this
.
nniManager
.
getMetricDataByRange
(
minSeqId
,
maxSeqId
).
then
((
metricsData
:
MetricDataRecord
[])
=>
{
res
.
send
(
metricsData
);
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -265,7 +265,7 @@ class NNIRestHandler {
this
.
nniManager
.
getLatestMetricData
().
then
((
metricsData
:
MetricDataRecord
[])
=>
{
res
.
send
(
metricsData
);
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -275,7 +275,7 @@ class NNIRestHandler {
this
.
nniManager
.
exportData
().
then
((
exportedData
:
string
)
=>
{
res
.
send
(
exportedData
);
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
src/nni_manager/rest_server/restValidationSchemas.ts
View file @
1328f412
...
...
@@ -8,7 +8,7 @@ const joi = require('joi');
export
namespace
ValidationSchemas
{
export
const
SETCLUSTERMETADATA
=
{
body
:
{
machine_list
:
joi
.
array
().
items
(
joi
.
object
({
machine_list
:
joi
.
array
().
items
(
joi
.
object
({
// eslint-disable-line @typescript-eslint/camelcase
username
:
joi
.
string
().
required
(),
ip
:
joi
.
string
().
ip
().
required
(),
port
:
joi
.
number
().
min
(
1
).
max
(
65535
).
required
(),
...
...
@@ -19,12 +19,12 @@ export namespace ValidationSchemas {
maxTrialNumPerGpu
:
joi
.
number
(),
useActiveGpu
:
joi
.
boolean
()
})),
local_config
:
joi
.
object
({
local_config
:
joi
.
object
({
// eslint-disable-line @typescript-eslint/camelcase
gpuIndices
:
joi
.
string
(),
maxTrialNumPerGpu
:
joi
.
number
(),
useActiveGpu
:
joi
.
boolean
()
}),
trial_config
:
joi
.
object
({
trial_config
:
joi
.
object
({
// eslint-disable-line @typescript-eslint/camelcase
image
:
joi
.
string
().
min
(
1
),
codeDir
:
joi
.
string
().
min
(
1
).
required
(),
dataDir
:
joi
.
string
(),
...
...
@@ -89,13 +89,13 @@ export namespace ValidationSchemas {
})
})
}),
pai_config
:
joi
.
object
({
pai_config
:
joi
.
object
({
// eslint-disable-line @typescript-eslint/camelcase
userName
:
joi
.
string
().
min
(
1
).
required
(),
passWord
:
joi
.
string
().
min
(
1
),
token
:
joi
.
string
().
min
(
1
),
host
:
joi
.
string
().
min
(
1
).
required
()
}),
kubeflow_config
:
joi
.
object
({
kubeflow_config
:
joi
.
object
({
// eslint-disable-line @typescript-eslint/camelcase
operator
:
joi
.
string
().
min
(
1
).
required
(),
storage
:
joi
.
string
().
min
(
1
),
apiVersion
:
joi
.
string
().
min
(
1
),
...
...
@@ -113,7 +113,7 @@ export namespace ValidationSchemas {
}),
uploadRetryCount
:
joi
.
number
().
min
(
1
)
}),
frameworkcontroller_config
:
joi
.
object
({
frameworkcontroller_config
:
joi
.
object
({
// eslint-disable-line @typescript-eslint/camelcase
storage
:
joi
.
string
().
min
(
1
),
serviceAccountName
:
joi
.
string
().
min
(
1
),
nfs
:
joi
.
object
({
...
...
@@ -130,7 +130,7 @@ export namespace ValidationSchemas {
}),
uploadRetryCount
:
joi
.
number
().
min
(
1
)
}),
nni_manager_ip
:
joi
.
object
({
nni_manager_ip
:
joi
.
object
({
// eslint-disable-line @typescript-eslint/camelcase
nniManagerIp
:
joi
.
string
().
min
(
1
)
})
}
...
...
@@ -184,6 +184,7 @@ export namespace ValidationSchemas {
};
export
const
UPDATEEXPERIMENT
=
{
query
:
{
/* eslint-disable-next-line @typescript-eslint/camelcase */
update_type
:
joi
.
string
().
required
().
valid
(
'
TRIAL_CONCURRENCY
'
,
'
MAX_EXEC_DURATION
'
,
'
SEARCH_SPACE
'
,
'
MAX_TRIAL_NUM
'
)
},
body
:
{
...
...
src/nni_manager/training_service/common/clusterJobRestServer.ts
View file @
1328f412
...
...
@@ -72,10 +72,10 @@ export abstract class ClusterJobRestServer extends RestServer {
// Abstract method to handle trial metrics data
// tslint:disable-next-line:no-any
protected
abstract
handleTrialMetrics
(
jobId
:
string
,
trialMetrics
:
any
[])
:
void
;
protected
abstract
handleTrialMetrics
(
jobId
:
string
,
trialMetrics
:
any
[]):
void
;
// tslint:disable: no-unsafe-any no-any
protected
createRestHandler
()
:
Router
{
protected
createRestHandler
():
Router
{
const
router
:
Router
=
Router
();
router
.
use
((
req
:
Request
,
res
:
Response
,
next
:
any
)
=>
{
...
...
src/nni_manager/training_service/common/gpuData.ts
View file @
1328f412
...
...
@@ -17,7 +17,7 @@ export class GPUInfo {
// the index number of this GPU (starting from 0)
public
readonly
index
:
number
;
constructor
(
activeProcessNum
:
number
,
gpuMemUtil
:
number
,
gpuUtil
:
number
,
index
:
number
)
{
constructor
(
activeProcessNum
:
number
,
gpuMemUtil
:
number
,
gpuUtil
:
number
,
index
:
number
)
{
this
.
activeProcessNum
=
activeProcessNum
;
this
.
gpuMemUtil
=
gpuMemUtil
;
this
.
gpuUtil
=
gpuUtil
;
...
...
src/nni_manager/training_service/common/jobMetrics.ts
View file @
1328f412
...
...
@@ -15,7 +15,7 @@ export class JobMetrics {
public
readonly
jobStatus
:
TrialJobStatus
;
public
readonly
endTimestamp
:
number
;
constructor
(
jobId
:
string
,
metrics
:
string
[],
jobStatus
:
TrialJobStatus
,
endTimestamp
:
number
)
{
constructor
(
jobId
:
string
,
metrics
:
string
[],
jobStatus
:
TrialJobStatus
,
endTimestamp
:
number
)
{
this
.
jobId
=
jobId
;
this
.
metrics
=
metrics
;
this
.
jobStatus
=
jobStatus
;
...
...
src/nni_manager/training_service/common/trialConfig.ts
View file @
1328f412
...
...
@@ -9,13 +9,13 @@
*/
export
class
TrialConfig
{
// Trail command
public
readonly
command
:
string
;
public
readonly
command
:
string
;
// Code directory
public
readonly
codeDir
:
string
;
public
readonly
codeDir
:
string
;
// Required GPU number for trial job. The number should be in [0,100]
public
readonly
gpuNum
:
number
;
public
readonly
gpuNum
:
number
;
/**
* Constructor
...
...
@@ -23,7 +23,7 @@ export class TrialConfig {
* @param codeDir Code directory
* @param gpuNum Required GPU number for trial job
*/
constructor
(
command
:
string
,
codeDir
:
string
,
gpuNum
:
number
)
{
constructor
(
command
:
string
,
codeDir
:
string
,
gpuNum
:
number
)
{
this
.
command
=
command
;
this
.
codeDir
=
codeDir
;
this
.
gpuNum
=
gpuNum
;
...
...
src/nni_manager/training_service/common/util.ts
View file @
1328f412
...
...
@@ -10,7 +10,6 @@ import * as os from 'os';
import
*
as
path
from
'
path
'
;
import
{
String
}
from
'
typescript-string-operations
'
;
import
{
countFilesRecursively
,
getNewLine
,
validateFileNameRecursively
}
from
'
../../common/utils
'
;
import
{
file
}
from
'
../../node_modules/@types/tmp
'
;
import
{
GPU_INFO_COLLECTOR_FORMAT_WINDOWS
}
from
'
./gpuData
'
;
/**
...
...
@@ -20,7 +19,7 @@ import { GPU_INFO_COLLECTOR_FORMAT_WINDOWS } from './gpuData';
* @returns file number under codeDir
*/
// tslint:disable: no-redundant-jsdoc
export
async
function
validateCodeDir
(
codeDir
:
string
)
:
Promise
<
number
>
{
export
async
function
validateCodeDir
(
codeDir
:
string
):
Promise
<
number
>
{
let
fileCount
:
number
|
undefined
;
let
fileNameValid
:
boolean
=
true
;
try
{
...
...
src/nni_manager/training_service/kubernetes/azureStorageClientUtils.ts
View file @
1328f412
...
...
@@ -66,7 +66,7 @@ export namespace AzureStorageClientUtility {
let
rootDirectory
:
string
=
''
;
for
(
const
directory
of
directories
)
{
rootDirectory
+=
directory
;
le
t
result
:
boolean
=
await
createDirectory
(
fileServerClient
,
rootDirectory
,
azureShare
);
cons
t
result
:
boolean
=
await
createDirectory
(
fileServerClient
,
rootDirectory
,
azureShare
);
if
(
!
result
)
{
deferred
.
resolve
(
false
);
return
deferred
.
promise
;
...
...
@@ -141,7 +141,7 @@ export namespace AzureStorageClientUtility {
localDirectory
:
string
):
Promise
<
boolean
>
{
const
deferred
:
Deferred
<
boolean
>
=
new
Deferred
<
boolean
>
();
const
fileNameArray
:
string
[]
=
fs
.
readdirSync
(
localDirectory
);
le
t
result
:
boolean
=
await
createDirectoryRecursive
(
fileServerClient
,
azureDirectory
,
azureShare
);
cons
t
result
:
boolean
=
await
createDirectoryRecursive
(
fileServerClient
,
azureDirectory
,
azureShare
);
if
(
!
result
)
{
deferred
.
resolve
(
false
);
return
deferred
.
promise
;
...
...
src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerConfig.ts
View file @
1328f412
...
...
@@ -26,7 +26,7 @@ export class FrameworkControllerTrialConfigTemplate extends KubernetesTrialConfi
public
readonly
frameworkAttemptCompletionPolicy
:
FrameworkAttemptCompletionPolicy
;
public
readonly
name
:
string
;
public
readonly
taskNum
:
number
;
constructor
(
taskNum
:
number
,
command
:
string
,
gpuNum
:
number
,
constructor
(
taskNum
:
number
,
command
:
string
,
gpuNum
:
number
,
cpuNum
:
number
,
memoryMB
:
number
,
image
:
string
,
frameworkAttemptCompletionPolicy
:
FrameworkAttemptCompletionPolicy
,
privateRegistryFilePath
?:
string
|
undefined
)
{
super
(
command
,
gpuNum
,
cpuNum
,
memoryMB
,
image
,
privateRegistryFilePath
);
...
...
src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerJobInfoCollector.ts
View file @
1328f412
...
...
@@ -17,7 +17,7 @@ export class FrameworkControllerJobInfoCollector extends KubernetesJobInfoCollec
}
protected
async
retrieveSingleTrialJobInfo
(
kubernetesCRDClient
:
KubernetesCRDClient
|
undefined
,
kubernetesTrialJob
:
KubernetesTrialJobDetail
)
:
Promise
<
void
>
{
kubernetesTrialJob
:
KubernetesTrialJobDetail
):
Promise
<
void
>
{
if
(
!
this
.
statusesNeedToCheck
.
includes
(
kubernetesTrialJob
.
status
))
{
return
Promise
.
resolve
();
}
...
...
@@ -52,8 +52,8 @@ export class FrameworkControllerJobInfoCollector extends KubernetesJobInfoCollec
kubernetesTrialJob
.
startTime
=
Date
.
parse
(
<
string
>
kubernetesJobInfo
.
status
.
startTime
);
}
break
;
case
'
Completed
'
:
const
completedJobType
:
FrameworkControllerJobCompleteStatus
=
case
'
Completed
'
:
{
const
completedJobType
:
FrameworkControllerJobCompleteStatus
=
<
FrameworkControllerJobCompleteStatus
>
kubernetesJobInfo
.
status
.
attemptStatus
.
completionStatus
.
type
.
name
;
switch
(
completedJobType
)
{
case
'
Succeeded
'
:
...
...
@@ -66,6 +66,7 @@ export class FrameworkControllerJobInfoCollector extends KubernetesJobInfoCollec
}
kubernetesTrialJob
.
endTime
=
Date
.
parse
(
<
string
>
kubernetesJobInfo
.
status
.
completionTime
);
break
;
}
default
:
}
}
...
...
src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService.ts
View file @
1328f412
...
...
@@ -15,7 +15,6 @@ import { delay, generateParamFileName, getExperimentRootDir, uniqueString } from
import
{
CONTAINER_INSTALL_NNI_SHELL_FORMAT
}
from
'
../../common/containerJobData
'
;
import
{
TrialConfigMetadataKey
}
from
'
../../common/trialConfigMetadataKey
'
;
import
{
validateCodeDir
}
from
'
../../common/util
'
;
import
{
AzureStorageClientUtility
}
from
'
../azureStorageClientUtils
'
;
import
{
NFSConfig
}
from
'
../kubernetesConfig
'
;
import
{
KubernetesTrialJobDetail
}
from
'
../kubernetesData
'
;
import
{
KubernetesTrainingService
}
from
'
../kubernetesTrainingService
'
;
...
...
@@ -119,7 +118,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
case
TrialConfigMetadataKey
.
NNI_MANAGER_IP
:
this
.
nniManagerIpConfig
=
<
NNIManagerIpConfig
>
JSON
.
parse
(
value
);
break
;
case
TrialConfigMetadataKey
.
FRAMEWORKCONTROLLER_CLUSTER_CONFIG
:
case
TrialConfigMetadataKey
.
FRAMEWORKCONTROLLER_CLUSTER_CONFIG
:
{
const
frameworkcontrollerClusterJsonObject
:
any
=
JSON
.
parse
(
value
);
this
.
fcClusterConfig
=
FrameworkControllerClusterConfigFactory
.
generateFrameworkControllerClusterConfig
(
frameworkcontrollerClusterJsonObject
);
...
...
@@ -130,9 +129,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
this
.
azureStorageShare
=
azureFrameworkControllerClusterConfig
.
azureStorage
.
azureShare
;
await
this
.
createAzureStorage
(
azureFrameworkControllerClusterConfig
.
keyVault
.
vaultName
,
azureFrameworkControllerClusterConfig
.
keyVault
.
name
,
azureFrameworkControllerClusterConfig
.
azureStorage
.
accountName
,
azureFrameworkControllerClusterConfig
.
azureStorage
.
azureShare
azureFrameworkControllerClusterConfig
.
keyVault
.
name
);
}
else
if
(
this
.
fcClusterConfig
.
storageType
===
'
nfs
'
)
{
const
nfsFrameworkControllerClusterConfig
:
FrameworkControllerClusterConfigNFS
=
...
...
@@ -144,7 +141,8 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
}
this
.
kubernetesCRDClient
=
FrameworkControllerClientFactory
.
createClient
();
break
;
case
TrialConfigMetadataKey
.
TRIAL_CONFIG
:
}
case
TrialConfigMetadataKey
.
TRIAL_CONFIG
:
{
const
frameworkcontrollerTrialJsonObjsect
:
any
=
JSON
.
parse
(
value
);
this
.
fcTrialConfig
=
new
FrameworkControllerTrialConfig
(
...
...
@@ -161,6 +159,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
return
Promise
.
reject
(
new
Error
(
error
));
}
break
;
}
case
TrialConfigMetadataKey
.
VERSION_CHECK
:
this
.
versionCheck
=
(
value
===
'
true
'
||
value
===
'
True
'
);
break
;
...
...
@@ -237,7 +236,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
await
cpp
.
exec
(
`mkdir -p
${
trialLocalTempFolder
}
`
);
const
installScriptContent
:
string
=
CONTAINER_INSTALL_NNI_SHELL_FORMAT
;
const
installScriptContent
:
string
=
CONTAINER_INSTALL_NNI_SHELL_FORMAT
;
// Write NNI installation file to local tmp files
await
fs
.
promises
.
writeFile
(
path
.
join
(
trialLocalTempFolder
,
'
install_nni.sh
'
),
installScriptContent
,
{
encoding
:
'
utf8
'
});
// Create tmp trial working folder locally.
...
...
@@ -251,7 +250,6 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
}
// Write file content ( parameter.cfg ) to local tmp folders
const
trialForm
:
TrialJobApplicationForm
=
(
<
TrialJobApplicationForm
>
form
);
if
(
form
!==
undefined
)
{
await
fs
.
promises
.
writeFile
(
path
.
join
(
trialLocalTempFolder
,
generateParamFileName
(
form
.
hyperParameters
)),
form
.
hyperParameters
.
value
,
{
encoding
:
'
utf8
'
});
...
...
@@ -266,7 +264,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
throw
new
Error
(
'
frameworkcontroller trial config is not initialized
'
);
}
const
podResources
:
any
=
[];
const
podResources
:
any
=
[];
for
(
const
taskRole
of
this
.
fcTrialConfig
.
taskRoles
)
{
const
resource
:
any
=
{};
resource
.
requests
=
this
.
generatePodResource
(
taskRole
.
memoryMB
,
taskRole
.
cpuNum
,
taskRole
.
gpuNum
);
...
...
@@ -300,7 +298,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
* @param podResources pod template
*/
private
async
generateFrameworkControllerJobConfig
(
trialJobId
:
string
,
trialWorkingFolder
:
string
,
frameworkcontrollerJobName
:
string
,
podResources
:
any
)
:
Promise
<
any
>
{
frameworkcontrollerJobName
:
string
,
podResources
:
any
):
Promise
<
any
>
{
if
(
this
.
fcClusterConfig
===
undefined
)
{
throw
new
Error
(
'
frameworkcontroller Cluster config is not initialized
'
);
}
...
...
@@ -424,7 +422,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
}]
}];
le
t
spec
:
any
=
{
cons
t
spec
:
any
=
{
containers
:
containers
,
initContainers
:
initContainers
,
restartPolicy
:
'
OnFailure
'
,
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment