Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
543239c6
Unverified
Commit
543239c6
authored
Dec 12, 2019
by
SparkSnail
Committed by
GitHub
Dec 12, 2019
Browse files
Merge pull request #220 from microsoft/master
merge master
parents
32efaa36
659480f2
Changes
94
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
85 additions
and
102 deletions
+85
-102
src/nni_manager/common/log.ts
src/nni_manager/common/log.ts
+1
-2
src/nni_manager/common/observableTimer.ts
src/nni_manager/common/observableTimer.ts
+1
-1
src/nni_manager/common/restServer.ts
src/nni_manager/common/restServer.ts
+0
-1
src/nni_manager/common/utils.ts
src/nni_manager/common/utils.ts
+13
-15
src/nni_manager/core/nniDataStore.ts
src/nni_manager/core/nniDataStore.ts
+9
-8
src/nni_manager/core/nnimanager.ts
src/nni_manager/core/nnimanager.ts
+23
-19
src/nni_manager/core/sqlDatabase.ts
src/nni_manager/core/sqlDatabase.ts
+0
-2
src/nni_manager/core/test/dataStore.test.ts
src/nni_manager/core/test/dataStore.test.ts
+0
-1
src/nni_manager/core/test/ipcInterfaceTerminate.test.ts
src/nni_manager/core/test/ipcInterfaceTerminate.test.ts
+1
-1
src/nni_manager/core/test/sqlDatabase.test.ts
src/nni_manager/core/test/sqlDatabase.test.ts
+1
-3
src/nni_manager/main.ts
src/nni_manager/main.ts
+1
-1
src/nni_manager/package.json
src/nni_manager/package.json
+0
-2
src/nni_manager/rest_server/restHandler.ts
src/nni_manager/rest_server/restHandler.ts
+19
-21
src/nni_manager/rest_server/restValidationSchemas.ts
src/nni_manager/rest_server/restValidationSchemas.ts
+8
-7
src/nni_manager/rest_server/test/mockedNNIManager.ts
src/nni_manager/rest_server/test/mockedNNIManager.ts
+0
-1
src/nni_manager/rest_server/test/restserver.test.ts
src/nni_manager/rest_server/test/restserver.test.ts
+0
-4
src/nni_manager/training_service/common/clusterJobRestServer.ts
...i_manager/training_service/common/clusterJobRestServer.ts
+2
-7
src/nni_manager/training_service/common/gpuData.ts
src/nni_manager/training_service/common/gpuData.ts
+1
-1
src/nni_manager/training_service/common/jobMetrics.ts
src/nni_manager/training_service/common/jobMetrics.ts
+1
-1
src/nni_manager/training_service/common/trialConfig.ts
src/nni_manager/training_service/common/trialConfig.ts
+4
-4
No files found.
src/nni_manager/common/log.ts
View file @
543239c6
...
...
@@ -2,7 +2,6 @@
// Licensed under the MIT license.
'
use strict
'
;
/* tslint:disable:no-any */
import
*
as
fs
from
'
fs
'
;
import
*
as
path
from
'
path
'
;
...
...
@@ -84,7 +83,7 @@ class Logger {
this
.
readonly
=
isReadonly
();
}
public
close
()
{
public
close
()
:
void
{
this
.
writable
.
destroy
();
}
...
...
src/nni_manager/common/observableTimer.ts
View file @
543239c6
...
...
@@ -18,7 +18,7 @@ class ObservableTimer {
return
this
.
observableSource
.
subscribe
(
onNext
,
onError
,
onCompleted
);
}
public
unsubscribe
(
subscription
:
Rx
.
IDisposable
)
{
public
unsubscribe
(
subscription
:
Rx
.
IDisposable
)
:
void
{
if
(
typeof
subscription
!==
undefined
)
{
subscription
.
dispose
();
}
...
...
src/nni_manager/common/restServer.ts
View file @
543239c6
...
...
@@ -34,7 +34,6 @@ export abstract class RestServer {
}
get
endPoint
():
string
{
// tslint:disable-next-line:no-http-string
return
`http://
${
this
.
hostName
}
:
${
this
.
port
}
`
;
}
...
...
src/nni_manager/common/utils.ts
View file @
543239c6
...
...
@@ -16,11 +16,9 @@ import { Container } from 'typescript-ioc';
import
*
as
util
from
'
util
'
;
import
{
Database
,
DataStore
}
from
'
./datastore
'
;
import
{
ExperimentStartupInfo
,
getExperimentId
,
getExperimentStartupInfo
,
setExperimentStartupInfo
}
from
'
./experimentStartupInfo
'
;
import
{
ExperimentStartupInfo
,
getExperimentStartupInfo
,
setExperimentStartupInfo
}
from
'
./experimentStartupInfo
'
;
import
{
Manager
}
from
'
./manager
'
;
import
{
TrialConfig
}
from
'
../training_service/common/trialConfig
'
;
import
{
HyperParameters
,
TrainingService
,
TrialJobStatus
}
from
'
./trainingService
'
;
import
{
getLogger
}
from
'
./log
'
;
function
getExperimentRootDir
():
string
{
return
getExperimentStartupInfo
()
...
...
@@ -118,7 +116,6 @@ function uniqueString(len: number): string {
function
randomSelect
<
T
>
(
a
:
T
[]):
T
{
assert
(
a
!==
undefined
);
// tslint:disable-next-line:insecure-random
return
a
[
Math
.
floor
(
Math
.
random
()
*
a
.
length
)];
}
function
parseArg
(
names
:
string
[]):
string
{
...
...
@@ -236,11 +233,11 @@ function getMsgDispatcherCommand(tuner: any, assessor: any, advisor: any, multiP
* Generate parameter file name based on HyperParameters object
* @param hyperParameters HyperParameters instance
*/
function
generateParamFileName
(
hyperParameters
:
HyperParameters
):
string
{
function
generateParamFileName
(
hyperParameters
:
HyperParameters
):
string
{
assert
(
hyperParameters
!==
undefined
);
assert
(
hyperParameters
.
index
>=
0
);
let
paramFileName
:
string
;
let
paramFileName
:
string
;
if
(
hyperParameters
.
index
==
0
)
{
paramFileName
=
'
parameter.cfg
'
;
}
else
{
...
...
@@ -283,7 +280,7 @@ function cleanupUnitTest(): void {
Container
.
restore
(
ExperimentStartupInfo
);
}
let
cachedipv4Address
:
string
=
''
;
let
cachedipv4Address
:
string
=
''
;
/**
* Get IPv4 address of current machine
*/
...
...
@@ -325,15 +322,15 @@ function getJobCancelStatus(isEarlyStopped: boolean): TrialJobStatus {
* Utility method to calculate file numbers under a directory, recursively
* @param directory directory name
*/
function
countFilesRecursively
(
directory
:
string
,
timeoutMilliSeconds
?:
number
):
Promise
<
number
>
{
function
countFilesRecursively
(
directory
:
string
):
Promise
<
number
>
{
if
(
!
fs
.
existsSync
(
directory
))
{
throw
Error
(
`Direcotory
${
directory
}
doesn't exist`
);
}
const
deferred
:
Deferred
<
number
>
=
new
Deferred
<
number
>
();
let
timeoutId
:
NodeJS
.
Timer
const
delayTimeout
:
Promise
<
number
>
=
new
Promise
((
resolve
:
Function
,
reject
:
Function
)
:
void
=>
{
let
timeoutId
:
NodeJS
.
Timer
const
delayTimeout
:
Promise
<
number
>
=
new
Promise
((
resolve
:
Function
,
reject
:
Function
):
void
=>
{
// Set timeout and reject the promise once reach timeout (5 seconds)
timeoutId
=
setTimeout
(()
=>
{
reject
(
new
Error
(
`Timeout: path
${
directory
}
has too many files`
));
...
...
@@ -359,7 +356,7 @@ function countFilesRecursively(directory: string, timeoutMilliSeconds?: number):
}
function
validateFileName
(
fileName
:
string
):
boolean
{
le
t
pattern
:
string
=
'
^[a-z0-9A-Z
\
._-]+$
'
;
cons
t
pattern
:
string
=
'
^[a-z0-9A-Z._-]+$
'
;
const
validateResult
=
fileName
.
match
(
pattern
);
if
(
validateResult
)
{
return
true
;
...
...
@@ -374,7 +371,7 @@ async function validateFileNameRecursively(directory: string): Promise<boolean>
const
fileNameArray
:
string
[]
=
fs
.
readdirSync
(
directory
);
let
result
=
true
;
for
(
var
name
of
fileNameArray
){
for
(
const
name
of
fileNameArray
){
const
fullFilePath
:
string
=
path
.
join
(
directory
,
name
);
try
{
// validate file names and directory names
...
...
@@ -396,7 +393,7 @@ async function validateFileNameRecursively(directory: string): Promise<boolean>
* get the version of current package
*/
async
function
getVersion
():
Promise
<
string
>
{
const
deferred
:
Deferred
<
string
>
=
new
Deferred
<
string
>
();
const
deferred
:
Deferred
<
string
>
=
new
Deferred
<
string
>
();
import
(
path
.
join
(
__dirname
,
'
..
'
,
'
package.json
'
)).
then
((
pkg
)
=>
{
deferred
.
resolve
(
pkg
.
version
);
}).
catch
((
error
)
=>
{
...
...
@@ -430,7 +427,7 @@ function getTunerProc(command: string, stdio: StdioOptions, newCwd: string, newE
* judge whether the process is alive
*/
async
function
isAlive
(
pid
:
any
):
Promise
<
boolean
>
{
le
t
deferred
:
Deferred
<
boolean
>
=
new
Deferred
<
boolean
>
();
cons
t
deferred
:
Deferred
<
boolean
>
=
new
Deferred
<
boolean
>
();
let
alive
:
boolean
=
false
;
if
(
process
.
platform
===
'
win32
'
)
{
try
{
...
...
@@ -440,6 +437,7 @@ async function isAlive(pid: any): Promise<boolean> {
}
}
catch
(
error
)
{
//ignore
}
}
else
{
...
...
@@ -458,7 +456,7 @@ async function isAlive(pid: any): Promise<boolean> {
* kill process
*/
async
function
killPid
(
pid
:
any
):
Promise
<
void
>
{
le
t
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
cons
t
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
try
{
if
(
process
.
platform
===
"
win32
"
)
{
await
cpp
.
exec
(
`cmd.exe /c taskkill /PID
${
pid
}
/F`
);
...
...
src/nni_manager/core/nniDataStore.ts
View file @
543239c6
...
...
@@ -159,7 +159,7 @@ class NNIDataStore implements DataStore {
public
async
exportTrialHpConfigs
():
Promise
<
string
>
{
const
jobs
:
TrialJobInfo
[]
=
await
this
.
listTrialJobs
();
le
t
exportedData
:
ExportedDataFormat
[]
=
[];
cons
t
exportedData
:
ExportedDataFormat
[]
=
[];
for
(
const
job
of
jobs
)
{
if
(
job
.
hyperParameters
&&
job
.
finalMetricData
)
{
if
(
job
.
hyperParameters
.
length
===
1
&&
job
.
finalMetricData
.
length
===
1
)
{
...
...
@@ -172,18 +172,18 @@ class NNIDataStore implements DataStore {
};
exportedData
.
push
(
oneEntry
);
}
else
{
le
t
paraMap
:
Map
<
number
,
Object
>
=
new
Map
();
le
t
metricMap
:
Map
<
number
,
Object
>
=
new
Map
();
cons
t
paraMap
:
Map
<
number
,
Record
<
string
,
any
>
>
=
new
Map
();
cons
t
metricMap
:
Map
<
number
,
Record
<
string
,
any
>
>
=
new
Map
();
for
(
const
eachPara
of
job
.
hyperParameters
)
{
const
parameters
:
HyperParameterFormat
=
<
HyperParameterFormat
>
JSON
.
parse
(
eachPara
);
paraMap
.
set
(
parameters
.
parameter_id
,
parameters
.
parameters
);
}
for
(
const
eachMetric
of
job
.
finalMetricData
)
{
const
value
:
Object
=
JSON
.
parse
(
eachMetric
.
data
);
const
value
:
Record
<
string
,
any
>
=
JSON
.
parse
(
eachMetric
.
data
);
metricMap
.
set
(
Number
(
eachMetric
.
parameterId
),
value
);
}
paraMap
.
forEach
((
value
:
Object
,
key
:
number
)
=>
{
const
metricValue
:
Object
|
undefined
=
metricMap
.
get
(
key
);
paraMap
.
forEach
((
value
:
Record
<
string
,
any
>
,
key
:
number
)
=>
{
const
metricValue
:
Record
<
string
,
any
>
|
undefined
=
metricMap
.
get
(
key
);
if
(
metricValue
)
{
const
oneEntry
:
ExportedDataFormat
=
{
parameter
:
value
,
...
...
@@ -201,7 +201,7 @@ class NNIDataStore implements DataStore {
}
public
async
getImportedData
():
Promise
<
string
[]
>
{
le
t
importedData
:
string
[]
=
[];
cons
t
importedData
:
string
[]
=
[];
const
importDataEvents
:
TrialJobEventRecord
[]
=
await
this
.
db
.
queryTrialJobEvent
(
undefined
,
'
IMPORT_DATA
'
);
for
(
const
event
of
importDataEvents
)
{
if
(
event
.
data
)
{
...
...
@@ -304,7 +304,6 @@ class NNIDataStore implements DataStore {
}
}
// tslint:disable-next-line:cyclomatic-complexity
private
getTrialJobsByReplayEvents
(
trialJobEvents
:
TrialJobEventRecord
[]):
Map
<
string
,
TrialJobInfo
>
{
this
.
log
.
debug
(
'
getTrialJobsByReplayEvents begin
'
);
...
...
@@ -329,6 +328,7 @@ class NNIDataStore implements DataStore {
if
(
!
jobInfo
)
{
throw
new
Error
(
'
Empty JobInfo
'
);
}
/* eslint-disable no-fallthrough */
switch
(
record
.
event
)
{
case
'
RUNNING
'
:
if
(
record
.
timestamp
!==
undefined
)
{
...
...
@@ -358,6 +358,7 @@ class NNIDataStore implements DataStore {
}
default
:
}
/* eslint-enable no-fallthrough */
jobInfo
.
status
=
this
.
getJobStatusByLatestEvent
(
jobInfo
.
status
,
record
.
event
);
if
(
record
.
data
!==
undefined
&&
record
.
data
.
trim
().
length
>
0
)
{
const
newHParam
:
any
=
this
.
parseHyperParameter
(
record
.
data
);
...
...
src/nni_manager/core/nnimanager.ts
View file @
543239c6
...
...
@@ -4,8 +4,7 @@
'
use strict
'
;
import
*
as
assert
from
'
assert
'
;
import
*
as
cpp
from
'
child-process-promise
'
;
import
{
ChildProcess
,
spawn
,
StdioOptions
}
from
'
child_process
'
;
import
{
ChildProcess
,
StdioOptions
}
from
'
child_process
'
;
import
{
Deferred
}
from
'
ts-deferred
'
;
import
*
as
component
from
'
../common/component
'
;
import
{
DataStore
,
MetricDataRecord
,
MetricType
,
TrialJobInfo
}
from
'
../common/datastore
'
;
...
...
@@ -21,7 +20,7 @@ import {
}
from
'
../common/trainingService
'
;
import
{
delay
,
getCheckpointDir
,
getExperimentRootDir
,
getLogDir
,
getMsgDispatcherCommand
,
mkDirP
,
getTunerProc
,
getLogLevel
,
isAlive
,
killPid
}
from
'
../common/utils
'
;
import
{
ADD_CUSTOMIZED_TRIAL_JOB
,
INITIALIZE
,
INITIALIZED
,
KILL_TRIAL_JOB
,
NEW_TRIAL_JOB
,
NO_MORE_TRIAL_JOBS
,
PING
,
INITIALIZE
,
INITIALIZED
,
KILL_TRIAL_JOB
,
NEW_TRIAL_JOB
,
NO_MORE_TRIAL_JOBS
,
PING
,
REPORT_METRIC_DATA
,
REQUEST_TRIAL_JOBS
,
SEND_TRIAL_JOB_PARAMETER
,
TERMINATE
,
TRIAL_END
,
UPDATE_SEARCH_SPACE
,
IMPORT_DATA
}
from
'
./commands
'
;
import
{
createDispatcherInterface
,
IpcInterface
}
from
'
./ipcInterface
'
;
...
...
@@ -64,7 +63,7 @@ class NNIManager implements Manager {
status
:
'
INITIALIZED
'
,
errors
:
[]
};
this
.
trialJobMetricListener
=
(
metric
:
TrialJobMetric
)
=>
{
this
.
trialJobMetricListener
=
(
metric
:
TrialJobMetric
)
:
void
=>
{
this
.
onTrialJobMetrics
(
metric
).
catch
((
err
:
Error
)
=>
{
this
.
criticalError
(
NNIError
.
FromError
(
err
,
'
Job metrics error:
'
));
});
...
...
@@ -123,8 +122,8 @@ class NNIManager implements Manager {
// TODO: NNI manager should not peek tuner's internal protocol, let's refactor this later
const
packedParameter
=
{
parameter_id
:
null
,
parameter_source
:
'
customized
'
,
parameter_id
:
null
,
// eslint-disable-line @typescript-eslint/camelcase
parameter_source
:
'
customized
'
,
// eslint-disable-line @typescript-eslint/camelcase
parameters
:
JSON
.
parse
(
hyperParams
)
}
...
...
@@ -235,10 +234,10 @@ class NNIManager implements Manager {
// Collect generated trials and imported trials
const
finishedTrialData
:
string
=
await
this
.
exportData
();
const
importedData
:
string
[]
=
await
this
.
dataStore
.
getImportedData
();
let
trialData
:
Object
[]
=
JSON
.
parse
(
finishedTrialData
);
let
trialData
:
Record
<
string
,
any
>
[]
=
JSON
.
parse
(
finishedTrialData
);
for
(
const
oneImportedData
of
importedData
)
{
// do not deduplicate
trialData
=
trialData
.
concat
(
<
Object
[]
>
JSON
.
parse
(
oneImportedData
));
trialData
=
trialData
.
concat
(
<
Record
<
string
,
any
>
[]
>
JSON
.
parse
(
oneImportedData
));
}
this
.
trialDataForTuner
=
JSON
.
stringify
(
trialData
);
...
...
@@ -361,7 +360,7 @@ class NNIManager implements Manager {
includeIntermediateResultsEnv
=
this
.
experimentProfile
.
params
.
tuner
.
includeIntermediateResults
;
}
le
t
nniEnv
=
{
cons
t
nniEnv
=
{
NNI_MODE
:
mode
,
NNI_CHECKPOINT_DIRECTORY
:
dataDirectory
,
NNI_LOG_DIRECTORY
:
getLogDir
(),
...
...
@@ -369,7 +368,7 @@ class NNIManager implements Manager {
NNI_INCLUDE_INTERMEDIATE_RESULTS
:
includeIntermediateResultsEnv
,
CUDA_VISIBLE_DEVICES
:
this
.
getGpuEnvvarValue
()
};
le
t
newEnv
=
Object
.
assign
({},
process
.
env
,
nniEnv
);
cons
t
newEnv
=
Object
.
assign
({},
process
.
env
,
nniEnv
);
const
tunerProc
:
ChildProcess
=
getTunerProc
(
command
,
stdio
,
newCwd
,
newEnv
);
this
.
dispatcherPid
=
tunerProc
.
pid
;
this
.
dispatcher
=
createDispatcherInterface
(
tunerProc
);
...
...
@@ -502,9 +501,9 @@ class NNIManager implements Manager {
finishedTrialJobNum
++
;
hyperParams
=
trialJobDetail
.
form
.
hyperParameters
.
value
;
this
.
dispatcher
.
sendCommand
(
TRIAL_END
,
JSON
.
stringify
({
trial_job_id
:
trialJobDetail
.
id
,
trial_job_id
:
trialJobDetail
.
id
,
// eslint-disable-line @typescript-eslint/camelcase
event
:
trialJobDetail
.
status
,
hyper_params
:
hyperParams
hyper_params
:
hyperParams
// eslint-disable-line @typescript-eslint/camelcase
}));
break
;
case
'
FAILED
'
:
...
...
@@ -515,9 +514,9 @@ class NNIManager implements Manager {
finishedTrialJobNum
++
;
hyperParams
=
trialJobDetail
.
form
.
hyperParameters
.
value
;
this
.
dispatcher
.
sendCommand
(
TRIAL_END
,
JSON
.
stringify
({
trial_job_id
:
trialJobDetail
.
id
,
trial_job_id
:
trialJobDetail
.
id
,
// eslint-disable-line @typescript-eslint/camelcase
event
:
trialJobDetail
.
status
,
hyper_params
:
hyperParams
hyper_params
:
hyperParams
// eslint-disable-line @typescript-eslint/camelcase
}));
break
;
case
'
WAITING
'
:
...
...
@@ -695,7 +694,7 @@ class NNIManager implements Manager {
private
async
onTunerCommand
(
commandType
:
string
,
content
:
string
):
Promise
<
void
>
{
this
.
log
.
info
(
`NNIManager received command from dispatcher:
${
commandType
}
,
${
content
}
`
);
switch
(
commandType
)
{
case
INITIALIZED
:
case
INITIALIZED
:
{
// Tuner is intialized, search space is set, request tuner to generate hyper parameters
if
(
this
.
trialDataForTuner
.
length
>
0
)
{
if
(
this
.
dispatcher
===
undefined
)
{
...
...
@@ -705,7 +704,8 @@ class NNIManager implements Manager {
}
this
.
requestTrialJobs
(
this
.
experimentProfile
.
params
.
trialConcurrency
);
break
;
case
NEW_TRIAL_JOB
:
}
case
NEW_TRIAL_JOB
:
{
if
(
this
.
status
.
status
===
'
TUNER_NO_MORE_TRIAL
'
)
{
this
.
log
.
warning
(
'
It is not supposed to receive more trials after NO_MORE_TRIAL is set
'
);
this
.
setStatus
(
'
RUNNING
'
);
...
...
@@ -719,7 +719,8 @@ class NNIManager implements Manager {
};
this
.
waitingTrials
.
push
(
form
);
break
;
case
SEND_TRIAL_JOB_PARAMETER
:
}
case
SEND_TRIAL_JOB_PARAMETER
:
{
const
tunerCommand
:
any
=
JSON
.
parse
(
content
);
assert
(
tunerCommand
.
parameter_index
>=
0
);
assert
(
tunerCommand
.
trial_job_id
!==
undefined
);
...
...
@@ -739,15 +740,18 @@ class NNIManager implements Manager {
'
ADD_HYPERPARAMETER
'
,
tunerCommand
.
trial_job_id
,
content
,
undefined
);
}
break
;
case
NO_MORE_TRIAL_JOBS
:
}
case
NO_MORE_TRIAL_JOBS
:
{
if
(
!
[
'
ERROR
'
,
'
STOPPING
'
,
'
STOPPED
'
].
includes
(
this
.
status
.
status
))
{
this
.
setStatus
(
'
TUNER_NO_MORE_TRIAL
'
);
}
break
;
case
KILL_TRIAL_JOB
:
}
case
KILL_TRIAL_JOB
:
{
this
.
log
.
info
(
`cancelTrialJob:
${
JSON
.
parse
(
content
)}
`
);
await
this
.
trainingService
.
cancelTrialJob
(
JSON
.
parse
(
content
),
true
);
break
;
}
default
:
throw
new
Error
(
'
Error: unsupported command type from tuner
'
);
}
...
...
src/nni_manager/core/sqlDatabase.ts
View file @
543239c6
...
...
@@ -20,7 +20,6 @@ import { getLogger, Logger } from '../common/log';
import
{
ExperimentProfile
}
from
'
../common/manager
'
;
import
{
TrialJobDetail
}
from
'
../common/trainingService
'
;
/* tslint:disable:no-any */
const
createTables
:
string
=
`
create table TrialJobEvent (timestamp integer, trialJobId text, event text, data text, logPath text, sequenceId integer);
...
...
@@ -91,7 +90,6 @@ class SqlDB implements Database {
this
.
log
.
debug
(
`Database directory:
${
dbDir
}
`
);
assert
(
fs
.
existsSync
(
dbDir
));
// tslint:disable-next-line:no-bitwise
const
mode
:
number
=
createNew
?
(
sqlite3
.
OPEN_CREATE
|
sqlite3
.
OPEN_READWRITE
)
:
sqlite3
.
OPEN_READWRITE
;
const
dbFileName
:
string
=
path
.
join
(
dbDir
,
'
nni.sqlite
'
);
...
...
src/nni_manager/core/test/dataStore.test.ts
View file @
543239c6
...
...
@@ -107,7 +107,6 @@ describe('Unit test for dataStore', () => {
}
];
// tslint:disable-next-line:no-any
const
metricsData
:
any
=
[
{
trial_job_id
:
'
111
'
,
...
...
src/nni_manager/core/test/ipcInterfaceTerminate.test.ts
View file @
543239c6
...
...
@@ -47,7 +47,7 @@ function startProcess(): void {
// create IPC interface
dispatcher
=
createDispatcherInterface
(
proc
);
(
<
IpcInterface
>
dispatcher
).
onCommand
((
commandType
:
string
,
content
:
string
):
void
=>
{
console
.
log
(
commandType
,
content
);
// tslint:disable-line:no-console
console
.
log
(
commandType
,
content
);
});
}
...
...
src/nni_manager/core/test/sqlDatabase.test.ts
View file @
543239c6
...
...
@@ -70,19 +70,17 @@ const metrics: MetricDataRecord[] = [
{
timestamp
:
Date
.
now
(),
trialJobId
:
'
C
'
,
parameterId
:
'
2
'
,
type
:
'
FINAL
'
,
sequence
:
0
,
data
:
2.2
}
// 5
];
// tslint:disable-next-line:no-any
function
assertRecordEqual
(
record
:
any
,
value
:
any
):
void
{
assert
.
ok
(
record
.
timestamp
>
new
Date
(
2018
,
6
,
1
).
getTime
());
assert
.
ok
(
record
.
timestamp
<
Date
.
now
());
for
(
const
key
in
value
)
{
// tslint:disable-line:no-for-in
for
(
const
key
in
value
)
{
if
(
key
!==
'
timestamp
'
)
{
assert
.
equal
(
record
[
key
],
value
[
key
]);
}
}
}
// tslint:disable-next-line:no-any
function
assertRecordsEqual
(
records
:
any
[],
inputs
:
any
[],
indices
:
number
[]):
void
{
assert
.
equal
(
records
.
length
,
indices
.
length
);
for
(
let
i
:
number
=
0
;
i
<
records
.
length
;
i
++
)
{
...
...
src/nni_manager/main.ts
View file @
543239c6
...
...
@@ -55,7 +55,7 @@ async function initContainer(platformMode: string, logFileName?: string): Promis
.
to
(
FrameworkControllerTrainingService
)
.
scope
(
Scope
.
Singleton
);
}
else
{
throw
new
Error
(
`Error: unsupported mode:
${
m
ode
}
`
);
throw
new
Error
(
`Error: unsupported mode:
${
platformM
ode
}
`
);
}
Container
.
bind
(
Manager
)
.
to
(
NNIManager
)
...
...
src/nni_manager/package.json
View file @
543239c6
...
...
@@ -54,8 +54,6 @@
"rmdir"
:
"^1.2.0"
,
"tmp"
:
"^0.0.33"
,
"ts-node"
:
"^7.0.0"
,
"tslint"
:
"^5.12.0"
,
"tslint-microsoft-contrib"
:
"^6.0.0"
,
"typescript"
:
"^3.2.2"
},
"resolutions"
:
{
...
...
src/nni_manager/rest_server/restHandler.ts
View file @
543239c6
...
...
@@ -11,7 +11,7 @@ import { DataStore, MetricDataRecord, TrialJobInfo } from '../common/datastore';
import
{
NNIError
,
NNIErrorNames
}
from
'
../common/errors
'
;
import
{
isNewExperiment
,
isReadonly
}
from
'
../common/experimentStartupInfo
'
;
import
{
getLogger
,
Logger
}
from
'
../common/log
'
;
import
{
ExperimentProfile
,
Manager
,
TrialJobStatistics
,
ExperimentStartUpMode
}
from
'
../common/manager
'
;
import
{
ExperimentProfile
,
Manager
,
TrialJobStatistics
}
from
'
../common/manager
'
;
import
{
ValidationSchemas
}
from
'
./restValidationSchemas
'
;
import
{
NNIRestServer
}
from
'
./nniRestServer
'
;
import
{
getVersion
}
from
'
../common/utils
'
;
...
...
@@ -32,7 +32,6 @@ class NNIRestHandler {
public
createRestHandler
():
Router
{
const
router
:
Router
=
Router
();
// tslint:disable-next-line:typedef
router
.
use
((
req
:
Request
,
res
:
Response
,
next
)
=>
{
this
.
log
.
debug
(
`
${
req
.
method
}
:
${
req
.
url
}
: body:\n
${
JSON
.
stringify
(
req
.
body
,
undefined
,
4
)}
`
);
res
.
header
(
'
Access-Control-Allow-Origin
'
,
'
*
'
);
...
...
@@ -72,7 +71,7 @@ class NNIRestHandler {
return
router
;
}
private
handle
_e
rror
(
err
:
Error
,
res
:
Response
,
isFatal
:
boolean
=
false
,
errorCode
:
number
=
500
):
void
{
private
handle
E
rror
(
err
:
Error
,
res
:
Response
,
isFatal
:
boolean
=
false
,
errorCode
:
number
=
500
):
void
{
if
(
err
instanceof
NNIError
&&
err
.
name
===
NNIErrorNames
.
NOT_FOUND
)
{
res
.
status
(
404
);
}
else
{
...
...
@@ -105,7 +104,7 @@ class NNIRestHandler {
ds
.
init
().
then
(()
=>
{
res
.
send
(
this
.
nniManager
.
getStatus
());
}).
catch
(
async
(
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
this
.
log
.
error
(
err
.
message
);
this
.
log
.
error
(
`Datastore initialize failed, stopping rest server...`
);
await
this
.
restServer
.
stop
();
...
...
@@ -118,7 +117,7 @@ class NNIRestHandler {
this
.
nniManager
.
getExperimentProfile
().
then
((
profile
:
ExperimentProfile
)
=>
{
res
.
send
(
profile
);
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -128,7 +127,7 @@ class NNIRestHandler {
this
.
nniManager
.
updateExperimentProfile
(
req
.
body
,
req
.
query
.
update_type
).
then
(()
=>
{
res
.
send
();
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -138,7 +137,7 @@ class NNIRestHandler {
this
.
nniManager
.
importData
(
JSON
.
stringify
(
req
.
body
)).
then
(()
=>
{
res
.
send
();
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -148,18 +147,18 @@ class NNIRestHandler {
if
(
isNewExperiment
())
{
this
.
nniManager
.
startExperiment
(
req
.
body
).
then
((
eid
:
string
)
=>
{
res
.
send
({
experiment_id
:
eid
experiment_id
:
eid
// eslint-disable-line @typescript-eslint/camelcase
});
}).
catch
((
err
:
Error
)
=>
{
// Start experiment is a step of initialization, so any exception thrown is a fatal
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
}
else
{
this
.
nniManager
.
resumeExperiment
(
isReadonly
()).
then
(()
=>
{
res
.
send
();
}).
catch
((
err
:
Error
)
=>
{
// Resume experiment is a step of initialization, so any exception thrown is a fatal
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
}
});
...
...
@@ -170,7 +169,7 @@ class NNIRestHandler {
this
.
nniManager
.
getTrialJobStatistics
().
then
((
statistics
:
TrialJobStatistics
[])
=>
{
res
.
send
(
statistics
);
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -179,7 +178,6 @@ class NNIRestHandler {
router
.
put
(
'
/experiment/cluster-metadata
'
,
expressJoi
(
ValidationSchemas
.
SETCLUSTERMETADATA
),
async
(
req
:
Request
,
res
:
Response
)
=>
{
// tslint:disable-next-line:no-any
const
metadata
:
any
=
req
.
body
;
const
keys
:
string
[]
=
Object
.
keys
(
metadata
);
try
{
...
...
@@ -189,7 +187,7 @@ class NNIRestHandler {
res
.
send
();
}
catch
(
err
)
{
// setClusterMetata is a step of initialization, so any exception thrown is a fatal
this
.
handle
_e
rror
(
NNIError
.
FromError
(
err
),
res
,
true
);
this
.
handle
E
rror
(
NNIError
.
FromError
(
err
),
res
,
true
);
}
});
}
...
...
@@ -202,7 +200,7 @@ class NNIRestHandler {
});
res
.
send
(
jobInfos
);
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -213,7 +211,7 @@ class NNIRestHandler {
const
jobInfo
:
TrialJobInfo
=
this
.
setErrorPathForFailedJob
(
jobDetail
);
res
.
send
(
jobInfo
);
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -223,7 +221,7 @@ class NNIRestHandler {
this
.
nniManager
.
addCustomizedTrialJob
(
JSON
.
stringify
(
req
.
body
)).
then
((
sequenceId
:
number
)
=>
{
res
.
send
({
sequenceId
});
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -233,7 +231,7 @@ class NNIRestHandler {
this
.
nniManager
.
cancelTrialJobByUser
(
req
.
params
.
id
).
then
(()
=>
{
res
.
send
();
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -243,7 +241,7 @@ class NNIRestHandler {
this
.
nniManager
.
getMetricData
(
req
.
params
.
job_id
,
req
.
query
.
type
).
then
((
metricsData
:
MetricDataRecord
[])
=>
{
res
.
send
(
metricsData
);
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -255,7 +253,7 @@ class NNIRestHandler {
this
.
nniManager
.
getMetricDataByRange
(
minSeqId
,
maxSeqId
).
then
((
metricsData
:
MetricDataRecord
[])
=>
{
res
.
send
(
metricsData
);
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -265,7 +263,7 @@ class NNIRestHandler {
this
.
nniManager
.
getLatestMetricData
().
then
((
metricsData
:
MetricDataRecord
[])
=>
{
res
.
send
(
metricsData
);
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
@@ -275,7 +273,7 @@ class NNIRestHandler {
this
.
nniManager
.
exportData
().
then
((
exportedData
:
string
)
=>
{
res
.
send
(
exportedData
);
}).
catch
((
err
:
Error
)
=>
{
this
.
handle
_e
rror
(
err
,
res
);
this
.
handle
E
rror
(
err
,
res
);
});
});
}
...
...
src/nni_manager/rest_server/restValidationSchemas.ts
View file @
543239c6
...
...
@@ -8,7 +8,7 @@ const joi = require('joi');
export
namespace
ValidationSchemas
{
export
const
SETCLUSTERMETADATA
=
{
body
:
{
machine_list
:
joi
.
array
().
items
(
joi
.
object
({
machine_list
:
joi
.
array
().
items
(
joi
.
object
({
// eslint-disable-line @typescript-eslint/camelcase
username
:
joi
.
string
().
required
(),
ip
:
joi
.
string
().
ip
().
required
(),
port
:
joi
.
number
().
min
(
1
).
max
(
65535
).
required
(),
...
...
@@ -19,12 +19,12 @@ export namespace ValidationSchemas {
maxTrialNumPerGpu
:
joi
.
number
(),
useActiveGpu
:
joi
.
boolean
()
})),
local_config
:
joi
.
object
({
local_config
:
joi
.
object
({
// eslint-disable-line @typescript-eslint/camelcase
gpuIndices
:
joi
.
string
(),
maxTrialNumPerGpu
:
joi
.
number
(),
useActiveGpu
:
joi
.
boolean
()
}),
trial_config
:
joi
.
object
({
trial_config
:
joi
.
object
({
// eslint-disable-line @typescript-eslint/camelcase
image
:
joi
.
string
().
min
(
1
),
codeDir
:
joi
.
string
().
min
(
1
).
required
(),
dataDir
:
joi
.
string
(),
...
...
@@ -89,13 +89,13 @@ export namespace ValidationSchemas {
})
})
}),
pai_config
:
joi
.
object
({
pai_config
:
joi
.
object
({
// eslint-disable-line @typescript-eslint/camelcase
userName
:
joi
.
string
().
min
(
1
).
required
(),
passWord
:
joi
.
string
().
min
(
1
),
token
:
joi
.
string
().
min
(
1
),
host
:
joi
.
string
().
min
(
1
).
required
()
}),
kubeflow_config
:
joi
.
object
({
kubeflow_config
:
joi
.
object
({
// eslint-disable-line @typescript-eslint/camelcase
operator
:
joi
.
string
().
min
(
1
).
required
(),
storage
:
joi
.
string
().
min
(
1
),
apiVersion
:
joi
.
string
().
min
(
1
),
...
...
@@ -113,7 +113,7 @@ export namespace ValidationSchemas {
}),
uploadRetryCount
:
joi
.
number
().
min
(
1
)
}),
frameworkcontroller_config
:
joi
.
object
({
frameworkcontroller_config
:
joi
.
object
({
// eslint-disable-line @typescript-eslint/camelcase
storage
:
joi
.
string
().
min
(
1
),
serviceAccountName
:
joi
.
string
().
min
(
1
),
nfs
:
joi
.
object
({
...
...
@@ -130,7 +130,7 @@ export namespace ValidationSchemas {
}),
uploadRetryCount
:
joi
.
number
().
min
(
1
)
}),
nni_manager_ip
:
joi
.
object
({
nni_manager_ip
:
joi
.
object
({
// eslint-disable-line @typescript-eslint/camelcase
nniManagerIp
:
joi
.
string
().
min
(
1
)
})
}
...
...
@@ -184,6 +184,7 @@ export namespace ValidationSchemas {
};
export
const
UPDATEEXPERIMENT
=
{
query
:
{
/* eslint-disable-next-line @typescript-eslint/camelcase */
update_type
:
joi
.
string
().
required
().
valid
(
'
TRIAL_CONCURRENCY
'
,
'
MAX_EXEC_DURATION
'
,
'
SEARCH_SPACE
'
,
'
MAX_TRIAL_NUM
'
)
},
body
:
{
...
...
src/nni_manager/rest_server/test/mockedNNIManager.ts
View file @
543239c6
...
...
@@ -66,7 +66,6 @@ export class MockedNNIManager extends Manager {
startTime
:
Date
.
now
(),
endTime
:
Date
.
now
(),
tags
:
[
'
test
'
],
// tslint:disable-next-line:no-http-string
url
:
'
http://test
'
,
workingDirectory
:
'
/tmp/mocked
'
,
form
:
{
...
...
src/nni_manager/rest_server/test/restserver.test.ts
View file @
543239c6
...
...
@@ -3,9 +3,7 @@
'
use strict
'
;
// tslint:disable-next-line:no-implicit-dependencies
import
{
assert
,
expect
}
from
'
chai
'
;
// tslint:disable-next-line:no-implicit-dependencies
import
*
as
request
from
'
request
'
;
import
{
Container
}
from
'
typescript-ioc
'
;
...
...
@@ -54,7 +52,6 @@ describe('Unit test for rest server', () => {
});
it
(
'
Test GET trial-jobs/:id
'
,
(
done
:
Mocha
.
Done
)
=>
{
// tslint:disable-next-line:no-any
request
.
get
(
`
${
ROOT_URL
}
/trial-jobs/1234`
,
(
err
:
Error
,
res
:
request
.
Response
,
body
:
any
)
=>
{
if
(
err
)
{
assert
.
fail
(
err
.
message
);
...
...
@@ -88,7 +85,6 @@ describe('Unit test for rest server', () => {
});
it
(
'
Test change concurrent-trial-jobs
'
,
(
done
:
Mocha
.
Done
)
=>
{
// tslint:disable-next-line:no-any
request
.
get
(
`
${
ROOT_URL
}
/experiment`
,
(
err
:
Error
,
res
:
request
.
Response
,
body
:
any
)
=>
{
if
(
err
)
{
assert
.
fail
(
err
.
message
);
...
...
src/nni_manager/training_service/common/clusterJobRestServer.ts
View file @
543239c6
...
...
@@ -4,7 +4,6 @@
'
use strict
'
;
import
*
as
assert
from
'
assert
'
;
// tslint:disable-next-line:no-implicit-dependencies
import
*
as
bodyParser
from
'
body-parser
'
;
import
{
Request
,
Response
,
Router
}
from
'
express
'
;
import
*
as
fs
from
'
fs
'
;
...
...
@@ -71,11 +70,9 @@ export abstract class ClusterJobRestServer extends RestServer {
}
// Abstract method to handle trial metrics data
// tslint:disable-next-line:no-any
protected
abstract
handleTrialMetrics
(
jobId
:
string
,
trialMetrics
:
any
[])
:
void
;
protected
abstract
handleTrialMetrics
(
jobId
:
string
,
trialMetrics
:
any
[]):
void
;
// tslint:disable: no-unsafe-any no-any
protected
createRestHandler
()
:
Router
{
protected
createRestHandler
():
Router
{
const
router
:
Router
=
Router
();
router
.
use
((
req
:
Request
,
res
:
Response
,
next
:
any
)
=>
{
...
...
@@ -146,7 +143,6 @@ export abstract class ClusterJobRestServer extends RestServer {
if
(
!
skipLogging
)
{
// Construct write stream to write remote trial's log into local file
// tslint:disable-next-line:non-literal-fs-path
const
writeStream
:
Writable
=
fs
.
createWriteStream
(
trialLogPath
,
{
flags
:
'
a+
'
,
encoding
:
'
utf8
'
,
...
...
@@ -166,5 +162,4 @@ export abstract class ClusterJobRestServer extends RestServer {
return
router
;
}
// tslint:enable: no-unsafe-any no-any
}
src/nni_manager/training_service/common/gpuData.ts
View file @
543239c6
...
...
@@ -17,7 +17,7 @@ export class GPUInfo {
// the index number of this GPU (starting from 0)
public
readonly
index
:
number
;
constructor
(
activeProcessNum
:
number
,
gpuMemUtil
:
number
,
gpuUtil
:
number
,
index
:
number
)
{
constructor
(
activeProcessNum
:
number
,
gpuMemUtil
:
number
,
gpuUtil
:
number
,
index
:
number
)
{
this
.
activeProcessNum
=
activeProcessNum
;
this
.
gpuMemUtil
=
gpuMemUtil
;
this
.
gpuUtil
=
gpuUtil
;
...
...
src/nni_manager/training_service/common/jobMetrics.ts
View file @
543239c6
...
...
@@ -15,7 +15,7 @@ export class JobMetrics {
public
readonly
jobStatus
:
TrialJobStatus
;
public
readonly
endTimestamp
:
number
;
constructor
(
jobId
:
string
,
metrics
:
string
[],
jobStatus
:
TrialJobStatus
,
endTimestamp
:
number
)
{
constructor
(
jobId
:
string
,
metrics
:
string
[],
jobStatus
:
TrialJobStatus
,
endTimestamp
:
number
)
{
this
.
jobId
=
jobId
;
this
.
metrics
=
metrics
;
this
.
jobStatus
=
jobStatus
;
...
...
src/nni_manager/training_service/common/trialConfig.ts
View file @
543239c6
...
...
@@ -9,13 +9,13 @@
*/
export
class
TrialConfig
{
// Trail command
public
readonly
command
:
string
;
public
readonly
command
:
string
;
// Code directory
public
readonly
codeDir
:
string
;
public
readonly
codeDir
:
string
;
// Required GPU number for trial job. The number should be in [0,100]
public
readonly
gpuNum
:
number
;
public
readonly
gpuNum
:
number
;
/**
* Constructor
...
...
@@ -23,7 +23,7 @@ export class TrialConfig {
* @param codeDir Code directory
* @param gpuNum Required GPU number for trial job
*/
constructor
(
command
:
string
,
codeDir
:
string
,
gpuNum
:
number
)
{
constructor
(
command
:
string
,
codeDir
:
string
,
gpuNum
:
number
)
{
this
.
command
=
command
;
this
.
codeDir
=
codeDir
;
this
.
gpuNum
=
gpuNum
;
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment