Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
521f1917
Unverified
Commit
521f1917
authored
Jun 03, 2021
by
liuzhe-lz
Committed by
GitHub
Jun 03, 2021
Browse files
Fix a logging related bug (#3705)
parent
b7c91e73
Changes
34
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
44 additions
and
45 deletions
+44
-45
ts/nni_manager/common/pythonScript.ts
ts/nni_manager/common/pythonScript.ts
+1
-1
ts/nni_manager/common/restServer.ts
ts/nni_manager/common/restServer.ts
+1
-1
ts/nni_manager/core/ipcInterface.ts
ts/nni_manager/core/ipcInterface.ts
+1
-1
ts/nni_manager/core/nniDataStore.ts
ts/nni_manager/core/nniDataStore.ts
+2
-3
ts/nni_manager/core/nniExperimentsManager.ts
ts/nni_manager/core/nniExperimentsManager.ts
+1
-1
ts/nni_manager/core/nniTensorboardManager.ts
ts/nni_manager/core/nniTensorboardManager.ts
+1
-1
ts/nni_manager/core/nnimanager.ts
ts/nni_manager/core/nnimanager.ts
+6
-6
ts/nni_manager/core/sqlDatabase.ts
ts/nni_manager/core/sqlDatabase.ts
+8
-8
ts/nni_manager/rest_server/restHandler.ts
ts/nni_manager/rest_server/restHandler.ts
+2
-2
ts/nni_manager/training_service/common/clusterJobRestServer.ts
...i_manager/training_service/common/clusterJobRestServer.ts
+2
-2
ts/nni_manager/training_service/common/util.ts
ts/nni_manager/training_service/common/util.ts
+1
-1
ts/nni_manager/training_service/kubernetes/azureStorageClientUtils.ts
...er/training_service/kubernetes/azureStorageClientUtils.ts
+7
-7
ts/nni_manager/training_service/kubernetes/kubernetesApiClient.ts
...anager/training_service/kubernetes/kubernetesApiClient.ts
+2
-2
ts/nni_manager/training_service/kubernetes/kubernetesJobInfoCollector.ts
...training_service/kubernetes/kubernetesJobInfoCollector.ts
+1
-1
ts/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts
.../training_service/kubernetes/kubernetesTrainingService.ts
+1
-1
ts/nni_manager/training_service/local/gpuScheduler.ts
ts/nni_manager/training_service/local/gpuScheduler.ts
+1
-1
ts/nni_manager/training_service/local/localTrainingService.ts
...ni_manager/training_service/local/localTrainingService.ts
+2
-2
ts/nni_manager/training_service/pai/paiJobInfoCollector.ts
ts/nni_manager/training_service/pai/paiJobInfoCollector.ts
+1
-1
ts/nni_manager/training_service/pai/paiJobRestServer.ts
ts/nni_manager/training_service/pai/paiJobRestServer.ts
+1
-1
ts/nni_manager/training_service/pai/paiTrainingService.ts
ts/nni_manager/training_service/pai/paiTrainingService.ts
+2
-2
No files found.
ts/nni_manager/common/pythonScript.ts
View file @
521f1917
...
@@ -22,7 +22,7 @@ export async function runPythonScript(script: string, logger?: Logger): Promise<
...
@@ -22,7 +22,7 @@ export async function runPythonScript(script: string, logger?: Logger): Promise<
if
(
stderr
)
{
if
(
stderr
)
{
if
(
logger
===
undefined
)
{
if
(
logger
===
undefined
)
{
logger
=
getLogger
();
logger
=
getLogger
(
'
pythonScript
'
);
}
}
logger
.
warning
(
'
python script has stderr.
'
);
logger
.
warning
(
'
python script has stderr.
'
);
logger
.
warning
(
'
script:
'
,
script
);
logger
.
warning
(
'
script:
'
,
script
);
...
...
ts/nni_manager/common/restServer.ts
View file @
521f1917
...
@@ -25,7 +25,7 @@ export abstract class RestServer {
...
@@ -25,7 +25,7 @@ export abstract class RestServer {
protected
hostName
:
string
=
'
0.0.0.0
'
;
protected
hostName
:
string
=
'
0.0.0.0
'
;
protected
port
?:
number
;
protected
port
?:
number
;
protected
app
:
express
.
Application
=
express
();
protected
app
:
express
.
Application
=
express
();
protected
log
:
Logger
=
getLogger
();
protected
log
:
Logger
=
getLogger
(
'
RestServer
'
);
protected
basePort
?:
number
;
protected
basePort
?:
number
;
constructor
()
{
constructor
()
{
...
...
ts/nni_manager/core/ipcInterface.ts
View file @
521f1917
...
@@ -56,7 +56,7 @@ class IpcInterface {
...
@@ -56,7 +56,7 @@ class IpcInterface {
private
incomingStream
:
Readable
;
private
incomingStream
:
Readable
;
private
eventEmitter
:
EventEmitter
;
private
eventEmitter
:
EventEmitter
;
private
readBuffer
:
Buffer
;
private
readBuffer
:
Buffer
;
private
logger
:
Logger
=
getLogger
();
private
logger
:
Logger
=
getLogger
(
'
IpcInterface
'
);
/**
/**
* Construct a IPC proxy
* Construct a IPC proxy
...
...
ts/nni_manager/core/nniDataStore.ts
View file @
521f1917
...
@@ -19,7 +19,7 @@ import { getDefaultDatabaseDir, mkDirP } from '../common/utils';
...
@@ -19,7 +19,7 @@ import { getDefaultDatabaseDir, mkDirP } from '../common/utils';
class
NNIDataStore
implements
DataStore
{
class
NNIDataStore
implements
DataStore
{
private
db
:
Database
=
component
.
get
(
Database
);
private
db
:
Database
=
component
.
get
(
Database
);
private
log
:
Logger
=
getLogger
();
private
log
:
Logger
=
getLogger
(
'
NNIDataStore
'
);
private
initTask
!
:
Deferred
<
void
>
;
private
initTask
!
:
Deferred
<
void
>
;
public
init
():
Promise
<
void
>
{
public
init
():
Promise
<
void
>
{
...
@@ -71,7 +71,6 @@ class NNIDataStore implements DataStore {
...
@@ -71,7 +71,6 @@ class NNIDataStore implements DataStore {
public
storeTrialJobEvent
(
public
storeTrialJobEvent
(
event
:
TrialJobEvent
,
trialJobId
:
string
,
hyperParameter
?:
string
,
jobDetail
?:
TrialJobDetail
):
Promise
<
void
>
{
event
:
TrialJobEvent
,
trialJobId
:
string
,
hyperParameter
?:
string
,
jobDetail
?:
TrialJobDetail
):
Promise
<
void
>
{
//this.log.debug(`storeTrialJobEvent: event: ${event}, data: ${hyperParameter}, jobDetail: ${JSON.stringify(jobDetail)}`);
// Use the timestamp in jobDetail as TrialJobEvent timestamp for different events
// Use the timestamp in jobDetail as TrialJobEvent timestamp for different events
let
timestamp
:
number
|
undefined
;
let
timestamp
:
number
|
undefined
;
...
@@ -243,7 +242,7 @@ class NNIDataStore implements DataStore {
...
@@ -243,7 +242,7 @@ class NNIDataStore implements DataStore {
for
(
const
metric
of
metrics
)
{
for
(
const
metric
of
metrics
)
{
const
existMetrics
:
MetricDataRecord
[]
|
undefined
=
map
.
get
(
metric
.
trialJobId
);
const
existMetrics
:
MetricDataRecord
[]
|
undefined
=
map
.
get
(
metric
.
trialJobId
);
if
(
existMetrics
!==
undefined
)
{
if
(
existMetrics
!==
undefined
)
{
this
.
log
.
error
(
`Found multiple FINAL results for trial job
${
trialJobId
}
, metrics:
${
JSON
.
stringify
(
metrics
)
}
`
)
;
this
.
log
.
error
(
`Found multiple FINAL results for trial job
${
trialJobId
}
, metrics:
`
,
metrics
);
}
else
{
}
else
{
map
.
set
(
metric
.
trialJobId
,
[
metric
]);
map
.
set
(
metric
.
trialJobId
,
[
metric
]);
}
}
...
...
ts/nni_manager/core/nniExperimentsManager.ts
View file @
521f1917
...
@@ -30,7 +30,7 @@ class NNIExperimentsManager implements ExperimentManager {
...
@@ -30,7 +30,7 @@ class NNIExperimentsManager implements ExperimentManager {
constructor
()
{
constructor
()
{
this
.
experimentsPath
=
getExperimentsInfoPath
();
this
.
experimentsPath
=
getExperimentsInfoPath
();
this
.
log
=
getLogger
();
this
.
log
=
getLogger
(
'
NNIExperimentsManager
'
);
this
.
profileUpdateTimer
=
{};
this
.
profileUpdateTimer
=
{};
}
}
...
...
ts/nni_manager/core/nniTensorboardManager.ts
View file @
521f1917
...
@@ -37,7 +37,7 @@ class NNITensorboardManager implements TensorboardManager {
...
@@ -37,7 +37,7 @@ class NNITensorboardManager implements TensorboardManager {
private
nniManager
:
Manager
;
private
nniManager
:
Manager
;
constructor
()
{
constructor
()
{
this
.
log
=
getLogger
();
this
.
log
=
getLogger
(
'
NNITensorboardManager
'
);
this
.
tensorboardTaskMap
=
new
Map
<
string
,
TensorboardTaskDetail
>
();
this
.
tensorboardTaskMap
=
new
Map
<
string
,
TensorboardTaskDetail
>
();
this
.
setTensorboardVersion
();
this
.
setTensorboardVersion
();
this
.
nniManager
=
component
.
get
(
Manager
);
this
.
nniManager
=
component
.
get
(
Manager
);
...
...
ts/nni_manager/core/nnimanager.ts
View file @
521f1917
...
@@ -61,7 +61,7 @@ class NNIManager implements Manager {
...
@@ -61,7 +61,7 @@ class NNIManager implements Manager {
this
.
trialDataForTuner
=
''
;
this
.
trialDataForTuner
=
''
;
this
.
readonly
=
false
;
this
.
readonly
=
false
;
this
.
log
=
getLogger
();
this
.
log
=
getLogger
(
'
NNIManager
'
);
this
.
dataStore
=
component
.
get
(
DataStore
);
this
.
dataStore
=
component
.
get
(
DataStore
);
this
.
status
=
{
this
.
status
=
{
status
:
'
INITIALIZED
'
,
status
:
'
INITIALIZED
'
,
...
@@ -659,7 +659,7 @@ class NNIManager implements Manager {
...
@@ -659,7 +659,7 @@ class NNIManager implements Manager {
}
}
const
form
=
this
.
waitingTrials
.
shift
()
as
TrialJobApplicationForm
;
const
form
=
this
.
waitingTrials
.
shift
()
as
TrialJobApplicationForm
;
this
.
currSubmittedTrialNum
++
;
this
.
currSubmittedTrialNum
++
;
this
.
log
.
info
(
`
submitTrialJob: form:
${
JSON
.
stringify
(
form
)
}
`
)
;
this
.
log
.
info
(
'
submitTrialJob: form:
'
,
form
);
const
trialJobDetail
:
TrialJobDetail
=
await
this
.
trainingService
.
submitTrialJob
(
form
);
const
trialJobDetail
:
TrialJobDetail
=
await
this
.
trainingService
.
submitTrialJob
(
form
);
const
Snapshot
:
TrialJobDetail
=
Object
.
assign
({},
trialJobDetail
);
const
Snapshot
:
TrialJobDetail
=
Object
.
assign
({},
trialJobDetail
);
await
this
.
storeExperimentProfile
();
await
this
.
storeExperimentProfile
();
...
@@ -732,7 +732,7 @@ class NNIManager implements Manager {
...
@@ -732,7 +732,7 @@ class NNIManager implements Manager {
}
}
private
async
onTrialJobMetrics
(
metric
:
TrialJobMetric
):
Promise
<
void
>
{
private
async
onTrialJobMetrics
(
metric
:
TrialJobMetric
):
Promise
<
void
>
{
this
.
log
.
debug
(
`
NNIManager received trial job metrics:
${
JSON
.
stringify
(
metric
)
}
`
)
;
this
.
log
.
debug
(
'
NNIManager received trial job metrics:
'
,
metric
);
if
(
this
.
trialJobs
.
has
(
metric
.
id
)){
if
(
this
.
trialJobs
.
has
(
metric
.
id
)){
await
this
.
dataStore
.
storeMetricData
(
metric
.
id
,
metric
.
data
);
await
this
.
dataStore
.
storeMetricData
(
metric
.
id
,
metric
.
data
);
if
(
this
.
dispatcher
===
undefined
)
{
if
(
this
.
dispatcher
===
undefined
)
{
...
@@ -740,7 +740,7 @@ class NNIManager implements Manager {
...
@@ -740,7 +740,7 @@ class NNIManager implements Manager {
}
}
this
.
dispatcher
.
sendCommand
(
REPORT_METRIC_DATA
,
metric
.
data
);
this
.
dispatcher
.
sendCommand
(
REPORT_METRIC_DATA
,
metric
.
data
);
}
else
{
}
else
{
this
.
log
.
warning
(
`
NNIManager received non-existent trial job metrics:
${
metric
}
`
);
this
.
log
.
warning
(
'
NNIManager received non-existent trial job metrics:
'
,
metric
);
}
}
}
}
...
@@ -804,7 +804,7 @@ class NNIManager implements Manager {
...
@@ -804,7 +804,7 @@ class NNIManager implements Manager {
index
:
tunerCommand
.
parameter_index
index
:
tunerCommand
.
parameter_index
}
}
};
};
this
.
log
.
info
(
`
updateTrialJob: job id:
${
tunerCommand
.
trial_job_id
}
, form:
${
JSON
.
stringify
(
trialJobForm
)
}
`
)
;
this
.
log
.
info
(
'
updateTrialJob: job id:
'
,
tunerCommand
.
trial_job_id
,
'
form:
'
,
trialJobForm
);
await
this
.
trainingService
.
updateTrialJob
(
tunerCommand
.
trial_job_id
,
trialJobForm
);
await
this
.
trainingService
.
updateTrialJob
(
tunerCommand
.
trial_job_id
,
trialJobForm
);
if
(
tunerCommand
[
'
parameters
'
]
!==
null
)
{
if
(
tunerCommand
[
'
parameters
'
]
!==
null
)
{
// parameters field is set as empty string if no more hyper parameter can be generated by tuner.
// parameters field is set as empty string if no more hyper parameter can be generated by tuner.
...
@@ -820,7 +820,7 @@ class NNIManager implements Manager {
...
@@ -820,7 +820,7 @@ class NNIManager implements Manager {
break
;
break
;
}
}
case
KILL_TRIAL_JOB
:
{
case
KILL_TRIAL_JOB
:
{
this
.
log
.
info
(
`
cancelTrialJob:
${
JSON
.
parse
(
content
)
}
`
)
;
this
.
log
.
info
(
'
cancelTrialJob:
'
,
content
);
await
this
.
trainingService
.
cancelTrialJob
(
JSON
.
parse
(
content
),
true
);
await
this
.
trainingService
.
cancelTrialJob
(
JSON
.
parse
(
content
),
true
);
break
;
break
;
}
}
...
...
ts/nni_manager/core/sqlDatabase.ts
View file @
521f1917
...
@@ -80,7 +80,7 @@ function loadMetricData(row: any): MetricDataRecord {
...
@@ -80,7 +80,7 @@ function loadMetricData(row: any): MetricDataRecord {
class
SqlDB
implements
Database
{
class
SqlDB
implements
Database
{
private
db
!
:
sqlite3
.
Database
;
private
db
!
:
sqlite3
.
Database
;
private
log
:
Logger
=
getLogger
();
private
log
:
Logger
=
getLogger
(
'
SqlDB
'
);
private
initTask
!
:
Deferred
<
void
>
;
private
initTask
!
:
Deferred
<
void
>
;
public
init
(
createNew
:
boolean
,
dbDir
:
string
):
Promise
<
void
>
{
public
init
(
createNew
:
boolean
,
dbDir
:
string
):
Promise
<
void
>
{
...
@@ -130,7 +130,7 @@ class SqlDB implements Database {
...
@@ -130,7 +130,7 @@ class SqlDB implements Database {
exp
.
nextSequenceId
,
exp
.
nextSequenceId
,
exp
.
revision
exp
.
revision
];
];
this
.
log
.
trace
(
`storeExperimentProfile: SQL:
${
sql
}
, args:
${
JSON
.
stringify
(
args
)
}
`
)
;
this
.
log
.
trace
(
`storeExperimentProfile: SQL:
${
sql
}
, args:
`
,
args
);
const
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
const
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
this
.
db
.
run
(
sql
,
args
,
(
err
:
Error
|
null
)
=>
{
this
.
resolve
(
deferred
,
err
);
});
this
.
db
.
run
(
sql
,
args
,
(
err
:
Error
|
null
)
=>
{
this
.
resolve
(
deferred
,
err
);
});
...
@@ -147,7 +147,7 @@ class SqlDB implements Database {
...
@@ -147,7 +147,7 @@ class SqlDB implements Database {
sql
=
'
select * from ExperimentProfile where id=? and revision=?
'
;
sql
=
'
select * from ExperimentProfile where id=? and revision=?
'
;
args
=
[
experimentId
,
revision
];
args
=
[
experimentId
,
revision
];
}
}
this
.
log
.
trace
(
`queryExperimentProfile: SQL:
${
sql
}
, args:
${
JSON
.
stringify
(
args
)
}
`
)
;
this
.
log
.
trace
(
`queryExperimentProfile: SQL:
${
sql
}
, args:
`
,
args
);
const
deferred
:
Deferred
<
ExperimentProfile
[]
>
=
new
Deferred
<
ExperimentProfile
[]
>
();
const
deferred
:
Deferred
<
ExperimentProfile
[]
>
=
new
Deferred
<
ExperimentProfile
[]
>
();
this
.
db
.
all
(
sql
,
args
,
(
err
:
Error
|
null
,
rows
:
any
[])
=>
{
this
.
db
.
all
(
sql
,
args
,
(
err
:
Error
|
null
,
rows
:
any
[])
=>
{
this
.
resolve
(
deferred
,
err
,
rows
,
loadExperimentProfile
);
this
.
resolve
(
deferred
,
err
,
rows
,
loadExperimentProfile
);
...
@@ -170,7 +170,7 @@ class SqlDB implements Database {
...
@@ -170,7 +170,7 @@ class SqlDB implements Database {
const
message
:
string
|
undefined
=
jobDetail
===
undefined
?
undefined
:
jobDetail
.
message
;
const
message
:
string
|
undefined
=
jobDetail
===
undefined
?
undefined
:
jobDetail
.
message
;
const
args
:
any
[]
=
[
timestamp
,
trialJobId
,
event
,
hyperParameter
,
logPath
,
sequenceId
,
message
];
const
args
:
any
[]
=
[
timestamp
,
trialJobId
,
event
,
hyperParameter
,
logPath
,
sequenceId
,
message
];
this
.
log
.
trace
(
`storeTrialJobEvent: SQL:
${
sql
}
, args:
${
JSON
.
stringify
(
args
)
}
`
)
;
this
.
log
.
trace
(
`storeTrialJobEvent: SQL:
${
sql
}
, args:
`
,
args
);
const
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
const
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
this
.
db
.
run
(
sql
,
args
,
(
err
:
Error
|
null
)
=>
{
this
.
resolve
(
deferred
,
err
);
});
this
.
db
.
run
(
sql
,
args
,
(
err
:
Error
|
null
)
=>
{
this
.
resolve
(
deferred
,
err
);
});
...
@@ -193,7 +193,7 @@ class SqlDB implements Database {
...
@@ -193,7 +193,7 @@ class SqlDB implements Database {
args
=
[
trialJobId
,
event
];
args
=
[
trialJobId
,
event
];
}
}
this
.
log
.
trace
(
`queryTrialJobEvent: SQL:
${
sql
}
, args:
${
JSON
.
stringify
(
args
)
}
`
)
;
this
.
log
.
trace
(
`queryTrialJobEvent: SQL:
${
sql
}
, args:
`
,
args
);
const
deferred
:
Deferred
<
TrialJobEventRecord
[]
>
=
new
Deferred
<
TrialJobEventRecord
[]
>
();
const
deferred
:
Deferred
<
TrialJobEventRecord
[]
>
=
new
Deferred
<
TrialJobEventRecord
[]
>
();
this
.
db
.
all
(
sql
,
args
,
(
err
:
Error
|
null
,
rows
:
any
[])
=>
{
this
.
db
.
all
(
sql
,
args
,
(
err
:
Error
|
null
,
rows
:
any
[])
=>
{
this
.
resolve
(
deferred
,
err
,
rows
,
loadTrialJobEvent
);
this
.
resolve
(
deferred
,
err
,
rows
,
loadTrialJobEvent
);
...
@@ -207,7 +207,7 @@ class SqlDB implements Database {
...
@@ -207,7 +207,7 @@ class SqlDB implements Database {
const
json
:
MetricDataRecord
=
JSON
.
parse
(
data
);
const
json
:
MetricDataRecord
=
JSON
.
parse
(
data
);
const
args
:
any
[]
=
[
Date
.
now
(),
json
.
trialJobId
,
json
.
parameterId
,
json
.
type
,
json
.
sequence
,
JSON
.
stringify
(
json
.
data
)];
const
args
:
any
[]
=
[
Date
.
now
(),
json
.
trialJobId
,
json
.
parameterId
,
json
.
type
,
json
.
sequence
,
JSON
.
stringify
(
json
.
data
)];
this
.
log
.
trace
(
`storeMetricData: SQL:
${
sql
}
, args:
${
JSON
.
stringify
(
args
)
}
`
)
;
this
.
log
.
trace
(
`storeMetricData: SQL:
${
sql
}
, args:
`
,
args
);
const
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
const
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
this
.
db
.
run
(
sql
,
args
,
(
err
:
Error
|
null
)
=>
{
this
.
resolve
(
deferred
,
err
);
});
this
.
db
.
run
(
sql
,
args
,
(
err
:
Error
|
null
)
=>
{
this
.
resolve
(
deferred
,
err
);
});
...
@@ -230,7 +230,7 @@ class SqlDB implements Database {
...
@@ -230,7 +230,7 @@ class SqlDB implements Database {
args
=
[
trialJobId
,
metricType
];
args
=
[
trialJobId
,
metricType
];
}
}
this
.
log
.
trace
(
`queryMetricData: SQL:
${
sql
}
, args:
${
JSON
.
stringify
(
args
)
}
`
)
;
this
.
log
.
trace
(
`queryMetricData: SQL:
${
sql
}
, args:
`
,
args
);
const
deferred
:
Deferred
<
MetricDataRecord
[]
>
=
new
Deferred
<
MetricDataRecord
[]
>
();
const
deferred
:
Deferred
<
MetricDataRecord
[]
>
=
new
Deferred
<
MetricDataRecord
[]
>
();
this
.
db
.
all
(
sql
,
args
,
(
err
:
Error
|
null
,
rows
:
any
[])
=>
{
this
.
db
.
all
(
sql
,
args
,
(
err
:
Error
|
null
,
rows
:
any
[])
=>
{
this
.
resolve
(
deferred
,
err
,
rows
,
loadMetricData
);
this
.
resolve
(
deferred
,
err
,
rows
,
loadMetricData
);
...
@@ -259,7 +259,7 @@ class SqlDB implements Database {
...
@@ -259,7 +259,7 @@ class SqlDB implements Database {
for
(
const
row
of
(
<
any
[]
>
rows
))
{
for
(
const
row
of
(
<
any
[]
>
rows
))
{
data
.
push
(
rowLoader
(
row
));
data
.
push
(
rowLoader
(
row
));
}
}
this
.
log
.
trace
(
`sql query result:
${
JSON
.
stringify
(
data
)
}
`
)
;
this
.
log
.
trace
(
`sql query result:
`
,
data
);
(
<
Deferred
<
T
[]
>>
deferred
).
resolve
(
data
);
(
<
Deferred
<
T
[]
>>
deferred
).
resolve
(
data
);
}
}
}
}
...
...
ts/nni_manager/rest_server/restHandler.ts
View file @
521f1917
...
@@ -32,14 +32,14 @@ class NNIRestHandler {
...
@@ -32,14 +32,14 @@ class NNIRestHandler {
this
.
experimentsManager
=
component
.
get
(
ExperimentManager
);
this
.
experimentsManager
=
component
.
get
(
ExperimentManager
);
this
.
tensorboardManager
=
component
.
get
(
TensorboardManager
);
this
.
tensorboardManager
=
component
.
get
(
TensorboardManager
);
this
.
restServer
=
rs
;
this
.
restServer
=
rs
;
this
.
log
=
getLogger
();
this
.
log
=
getLogger
(
'
NNIRestHandler
'
);
}
}
public
createRestHandler
():
Router
{
public
createRestHandler
():
Router
{
const
router
:
Router
=
Router
();
const
router
:
Router
=
Router
();
router
.
use
((
req
:
Request
,
res
:
Response
,
next
)
=>
{
router
.
use
((
req
:
Request
,
res
:
Response
,
next
)
=>
{
this
.
log
.
debug
(
`
${
req
.
method
}
:
${
req
.
url
}
: body:
\n
${
JSON
.
stringify
(
req
.
body
,
undefined
,
4
)}
`
);
this
.
log
.
debug
(
`
${
req
.
method
}
:
${
req
.
url
}
: body:
`
,
req
.
body
);
res
.
header
(
'
Access-Control-Allow-Headers
'
,
'
Origin, X-Requested-With, Content-Type, Accept
'
);
res
.
header
(
'
Access-Control-Allow-Headers
'
,
'
Origin, X-Requested-With, Content-Type, Accept
'
);
res
.
header
(
'
Access-Control-Allow-Methods
'
,
'
PUT,POST,GET,DELETE,OPTIONS
'
);
res
.
header
(
'
Access-Control-Allow-Methods
'
,
'
PUT,POST,GET,DELETE,OPTIONS
'
);
...
...
ts/nni_manager/training_service/common/clusterJobRestServer.ts
View file @
521f1917
...
@@ -76,7 +76,7 @@ export abstract class ClusterJobRestServer extends RestServer {
...
@@ -76,7 +76,7 @@ export abstract class ClusterJobRestServer extends RestServer {
const
router
:
Router
=
Router
();
const
router
:
Router
=
Router
();
router
.
use
((
req
:
Request
,
res
:
Response
,
next
:
any
)
=>
{
router
.
use
((
req
:
Request
,
res
:
Response
,
next
:
any
)
=>
{
this
.
log
.
info
(
`
${
req
.
method
}
:
${
req
.
url
}
: body:
\n
${
JSON
.
stringify
(
req
.
body
,
undefined
,
4
)}
`
);
this
.
log
.
info
(
`
${
req
.
method
}
:
${
req
.
url
}
: body:
`
,
req
.
body
);
res
.
setHeader
(
'
Content-Type
'
,
'
application/json
'
);
res
.
setHeader
(
'
Content-Type
'
,
'
application/json
'
);
next
();
next
();
});
});
...
@@ -109,7 +109,7 @@ export abstract class ClusterJobRestServer extends RestServer {
...
@@ -109,7 +109,7 @@ export abstract class ClusterJobRestServer extends RestServer {
router
.
post
(
`/update-metrics/
${
this
.
expId
}
/:trialId`
,
(
req
:
Request
,
res
:
Response
)
=>
{
router
.
post
(
`/update-metrics/
${
this
.
expId
}
/:trialId`
,
(
req
:
Request
,
res
:
Response
)
=>
{
try
{
try
{
this
.
log
.
info
(
`Get update-metrics request, trial job id is
${
req
.
params
.
trialId
}
`
);
this
.
log
.
info
(
`Get update-metrics request, trial job id is
${
req
.
params
.
trialId
}
`
);
this
.
log
.
info
(
`
update-metrics body is
${
JSON
.
stringify
(
req
.
body
)
}
`
)
;
this
.
log
.
info
(
'
update-metrics body is
'
,
req
.
body
);
this
.
handleTrialMetrics
(
req
.
body
.
jobId
,
req
.
body
.
metrics
);
this
.
handleTrialMetrics
(
req
.
body
.
jobId
,
req
.
body
.
metrics
);
...
...
ts/nni_manager/training_service/common/util.ts
View file @
521f1917
...
@@ -111,7 +111,7 @@ export async function execCopydir(source: string, destination: string): Promise<
...
@@ -111,7 +111,7 @@ export async function execCopydir(source: string, destination: string): Promise<
await
fs
.
promises
.
mkdir
(
destPath
);
await
fs
.
promises
.
mkdir
(
destPath
);
}
}
}
else
{
}
else
{
getLogger
().
debug
(
`Copying file from
${
sourcePath
}
to
${
destPath
}
`
);
getLogger
(
'
execCopydir
'
).
debug
(
`Copying file from
${
sourcePath
}
to
${
destPath
}
`
);
await
fs
.
promises
.
copyFile
(
sourcePath
,
destPath
);
await
fs
.
promises
.
copyFile
(
sourcePath
,
destPath
);
}
}
}
}
...
...
ts/nni_manager/training_service/kubernetes/azureStorageClientUtils.ts
View file @
521f1917
...
@@ -22,7 +22,7 @@ export namespace AzureStorageClientUtility {
...
@@ -22,7 +22,7 @@ export namespace AzureStorageClientUtility {
const
deferred
:
Deferred
<
boolean
>
=
new
Deferred
<
boolean
>
();
const
deferred
:
Deferred
<
boolean
>
=
new
Deferred
<
boolean
>
();
fileServerClient
.
createShareIfNotExists
(
azureShare
,
(
error
:
any
,
_result
:
any
,
_response
:
any
)
=>
{
fileServerClient
.
createShareIfNotExists
(
azureShare
,
(
error
:
any
,
_result
:
any
,
_response
:
any
)
=>
{
if
(
error
)
{
if
(
error
)
{
getLogger
()
getLogger
(
'
AzureStorageClientUtility
'
)
.
error
(
`Create share failed:,
${
error
}
`
);
.
error
(
`Create share failed:,
${
error
}
`
);
deferred
.
resolve
(
false
);
deferred
.
resolve
(
false
);
}
else
{
}
else
{
...
@@ -43,7 +43,7 @@ export namespace AzureStorageClientUtility {
...
@@ -43,7 +43,7 @@ export namespace AzureStorageClientUtility {
const
deferred
:
Deferred
<
boolean
>
=
new
Deferred
<
boolean
>
();
const
deferred
:
Deferred
<
boolean
>
=
new
Deferred
<
boolean
>
();
fileServerClient
.
createDirectoryIfNotExists
(
azureShare
,
azureFoler
,
(
error
:
any
,
_result
:
any
,
_response
:
any
)
=>
{
fileServerClient
.
createDirectoryIfNotExists
(
azureShare
,
azureFoler
,
(
error
:
any
,
_result
:
any
,
_response
:
any
)
=>
{
if
(
error
)
{
if
(
error
)
{
getLogger
()
getLogger
(
'
AzureStorageClientUtility
'
)
.
error
(
`Create directory failed:,
${
error
}
`
);
.
error
(
`Create directory failed:,
${
error
}
`
);
deferred
.
resolve
(
false
);
deferred
.
resolve
(
false
);
}
else
{
}
else
{
...
@@ -91,7 +91,7 @@ export namespace AzureStorageClientUtility {
...
@@ -91,7 +91,7 @@ export namespace AzureStorageClientUtility {
await
fileServerClient
.
createFileFromLocalFile
(
azureShare
,
azureDirectory
,
azureFileName
,
localFilePath
,
await
fileServerClient
.
createFileFromLocalFile
(
azureShare
,
azureDirectory
,
azureFileName
,
localFilePath
,
(
error
:
any
,
_result
:
any
,
_response
:
any
)
=>
{
(
error
:
any
,
_result
:
any
,
_response
:
any
)
=>
{
if
(
error
)
{
if
(
error
)
{
getLogger
()
getLogger
(
'
AzureStorageClientUtility
'
)
.
error
(
`Upload file failed:,
${
error
}
`
);
.
error
(
`Upload file failed:,
${
error
}
`
);
deferred
.
resolve
(
false
);
deferred
.
resolve
(
false
);
}
else
{
}
else
{
...
@@ -116,7 +116,7 @@ export namespace AzureStorageClientUtility {
...
@@ -116,7 +116,7 @@ export namespace AzureStorageClientUtility {
await
fileServerClient
.
getFileToStream
(
azureShare
,
azureDirectory
,
azureFileName
,
fs
.
createWriteStream
(
localFilePath
),
await
fileServerClient
.
getFileToStream
(
azureShare
,
azureDirectory
,
azureFileName
,
fs
.
createWriteStream
(
localFilePath
),
(
error
:
any
,
_result
:
any
,
_response
:
any
)
=>
{
(
error
:
any
,
_result
:
any
,
_response
:
any
)
=>
{
if
(
error
)
{
if
(
error
)
{
getLogger
()
getLogger
(
'
AzureStorageClientUtility
'
)
.
error
(
`Download file failed:,
${
error
}
`
);
.
error
(
`Download file failed:,
${
error
}
`
);
deferred
.
resolve
(
false
);
deferred
.
resolve
(
false
);
}
else
{
}
else
{
...
@@ -185,19 +185,19 @@ export namespace AzureStorageClientUtility {
...
@@ -185,19 +185,19 @@ export namespace AzureStorageClientUtility {
fileServerClient
.
listFilesAndDirectoriesSegmented
(
azureShare
,
azureDirectory
,
'
null
'
,
fileServerClient
.
listFilesAndDirectoriesSegmented
(
azureShare
,
azureDirectory
,
'
null
'
,
async
(
_error
:
any
,
result
:
any
,
_response
:
any
)
=>
{
async
(
_error
:
any
,
result
:
any
,
_response
:
any
)
=>
{
if
((
'
entries
'
in
result
)
===
false
)
{
if
((
'
entries
'
in
result
)
===
false
)
{
getLogger
()
getLogger
(
'
AzureStorageClientUtility
'
)
.
error
(
`list files failed, can't get entries in result`
);
.
error
(
`list files failed, can't get entries in result`
);
throw
new
Error
(
`list files failed, can't get entries in result`
);
throw
new
Error
(
`list files failed, can't get entries in result`
);
}
}
if
((
'
files
'
in
result
.
entries
)
===
false
)
{
if
((
'
files
'
in
result
.
entries
)
===
false
)
{
getLogger
()
getLogger
(
'
AzureStorageClientUtility
'
)
.
error
(
`list files failed, can't get files in result['entries']`
);
.
error
(
`list files failed, can't get files in result['entries']`
);
throw
new
Error
(
`list files failed, can't get files in result['entries']`
);
throw
new
Error
(
`list files failed, can't get files in result['entries']`
);
}
}
if
((
'
directories
'
in
result
.
directories
)
===
false
)
{
if
((
'
directories
'
in
result
.
directories
)
===
false
)
{
getLogger
()
getLogger
(
'
AzureStorageClientUtility
'
)
.
error
(
`list files failed, can't get directories in result['entries']`
);
.
error
(
`list files failed, can't get directories in result['entries']`
);
throw
new
Error
(
`list files failed, can't get directories in result['entries']`
);
throw
new
Error
(
`list files failed, can't get directories in result['entries']`
);
}
}
...
...
ts/nni_manager/training_service/kubernetes/kubernetesApiClient.ts
View file @
521f1917
...
@@ -12,7 +12,7 @@ import {getLogger, Logger} from '../../common/log';
...
@@ -12,7 +12,7 @@ import {getLogger, Logger} from '../../common/log';
*/
*/
class
GeneralK8sClient
{
class
GeneralK8sClient
{
protected
readonly
client
:
any
;
protected
readonly
client
:
any
;
protected
readonly
log
:
Logger
=
getLogger
();
protected
readonly
log
:
Logger
=
getLogger
(
'
GeneralK8sClient
'
);
protected
namespace
:
string
=
'
default
'
;
protected
namespace
:
string
=
'
default
'
;
constructor
()
{
constructor
()
{
...
@@ -135,7 +135,7 @@ class GeneralK8sClient {
...
@@ -135,7 +135,7 @@ class GeneralK8sClient {
*/
*/
abstract
class
KubernetesCRDClient
{
abstract
class
KubernetesCRDClient
{
protected
readonly
client
:
any
;
protected
readonly
client
:
any
;
protected
readonly
log
:
Logger
=
getLogger
();
protected
readonly
log
:
Logger
=
getLogger
(
'
KubernetesCRDClient
'
);
protected
crdSchema
:
any
;
protected
crdSchema
:
any
;
constructor
()
{
constructor
()
{
...
...
ts/nni_manager/training_service/kubernetes/kubernetesJobInfoCollector.ts
View file @
521f1917
...
@@ -15,7 +15,7 @@ import { KubernetesTrialJobDetail } from './kubernetesData';
...
@@ -15,7 +15,7 @@ import { KubernetesTrialJobDetail } from './kubernetesData';
*/
*/
export
class
KubernetesJobInfoCollector
{
export
class
KubernetesJobInfoCollector
{
protected
readonly
trialJobsMap
:
Map
<
string
,
KubernetesTrialJobDetail
>
;
protected
readonly
trialJobsMap
:
Map
<
string
,
KubernetesTrialJobDetail
>
;
protected
readonly
log
:
Logger
=
getLogger
();
protected
readonly
log
:
Logger
=
getLogger
(
'
KubernetesJobInfoCollector
'
);
protected
readonly
statusesNeedToCheck
:
TrialJobStatus
[];
protected
readonly
statusesNeedToCheck
:
TrialJobStatus
[];
constructor
(
jobMap
:
Map
<
string
,
KubernetesTrialJobDetail
>
)
{
constructor
(
jobMap
:
Map
<
string
,
KubernetesTrialJobDetail
>
)
{
...
...
ts/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts
View file @
521f1917
...
@@ -54,7 +54,7 @@ abstract class KubernetesTrainingService {
...
@@ -54,7 +54,7 @@ abstract class KubernetesTrainingService {
protected
expContainerCodeFolder
:
string
;
protected
expContainerCodeFolder
:
string
;
constructor
()
{
constructor
()
{
this
.
log
=
getLogger
();
this
.
log
=
getLogger
(
'
KubernetesTrainingService
'
);
this
.
metricsEmitter
=
new
EventEmitter
();
this
.
metricsEmitter
=
new
EventEmitter
();
this
.
trialJobsMap
=
new
Map
<
string
,
KubernetesTrialJobDetail
>
();
this
.
trialJobsMap
=
new
Map
<
string
,
KubernetesTrialJobDetail
>
();
this
.
trialLocalTempFolder
=
path
.
join
(
getExperimentRootDir
(),
'
trials-nfs-tmp
'
);
this
.
trialLocalTempFolder
=
path
.
join
(
getExperimentRootDir
(),
'
trials-nfs-tmp
'
);
...
...
ts/nni_manager/training_service/local/gpuScheduler.ts
View file @
521f1917
...
@@ -24,7 +24,7 @@ class GPUScheduler {
...
@@ -24,7 +24,7 @@ class GPUScheduler {
constructor
()
{
constructor
()
{
this
.
stopping
=
false
;
this
.
stopping
=
false
;
this
.
log
=
getLogger
();
this
.
log
=
getLogger
(
'
GPUScheduler
'
);
this
.
gpuMetricCollectorScriptFolder
=
`
${
os
.
tmpdir
()}
/
${
os
.
userInfo
().
username
}
/nni/script`
;
this
.
gpuMetricCollectorScriptFolder
=
`
${
os
.
tmpdir
()}
/
${
os
.
userInfo
().
username
}
/nni/script`
;
}
}
...
...
ts/nni_manager/training_service/local/localTrainingService.ts
View file @
521f1917
...
@@ -98,7 +98,7 @@ class LocalTrainingService implements TrainingService {
...
@@ -98,7 +98,7 @@ class LocalTrainingService implements TrainingService {
this
.
jobMap
=
new
Map
<
string
,
LocalTrialJobDetail
>
();
this
.
jobMap
=
new
Map
<
string
,
LocalTrialJobDetail
>
();
this
.
jobQueue
=
[];
this
.
jobQueue
=
[];
this
.
stopping
=
false
;
this
.
stopping
=
false
;
this
.
log
=
getLogger
();
this
.
log
=
getLogger
(
'
LocalTrainingService
'
);
this
.
experimentId
=
getExperimentId
();
this
.
experimentId
=
getExperimentId
();
this
.
jobStreamMap
=
new
Map
<
string
,
ts
.
Stream
>
();
this
.
jobStreamMap
=
new
Map
<
string
,
ts
.
Stream
>
();
this
.
log
.
info
(
'
Construct local machine training service.
'
);
this
.
log
.
info
(
'
Construct local machine training service.
'
);
...
@@ -204,7 +204,7 @@ class LocalTrainingService implements TrainingService {
...
@@ -204,7 +204,7 @@ class LocalTrainingService implements TrainingService {
this
.
jobQueue
.
push
(
trialJobId
);
this
.
jobQueue
.
push
(
trialJobId
);
this
.
jobMap
.
set
(
trialJobId
,
trialJobDetail
);
this
.
jobMap
.
set
(
trialJobId
,
trialJobDetail
);
this
.
log
.
debug
(
`
submitTrialJob: return:
${
JSON
.
stringify
(
trialJobDetail
)
}
`
)
;
this
.
log
.
debug
(
'
submitTrialJob: return:
'
,
trialJobDetail
);
return
Promise
.
resolve
(
trialJobDetail
);
return
Promise
.
resolve
(
trialJobDetail
);
}
}
...
...
ts/nni_manager/training_service/pai/paiJobInfoCollector.ts
View file @
521f1917
...
@@ -18,7 +18,7 @@ interface FlattenOpenpaiConfig extends ExperimentConfig, OpenpaiConfig { }
...
@@ -18,7 +18,7 @@ interface FlattenOpenpaiConfig extends ExperimentConfig, OpenpaiConfig { }
*/
*/
export
class
PAIJobInfoCollector
{
export
class
PAIJobInfoCollector
{
private
readonly
trialJobsMap
:
Map
<
string
,
PAITrialJobDetail
>
;
private
readonly
trialJobsMap
:
Map
<
string
,
PAITrialJobDetail
>
;
private
readonly
log
:
Logger
=
getLogger
();
private
readonly
log
:
Logger
=
getLogger
(
'
PAIJobInfoCollector
'
);
private
readonly
statusesNeedToCheck
:
TrialJobStatus
[];
private
readonly
statusesNeedToCheck
:
TrialJobStatus
[];
private
readonly
finalStatuses
:
TrialJobStatus
[];
private
readonly
finalStatuses
:
TrialJobStatus
[];
...
...
ts/nni_manager/training_service/pai/paiJobRestServer.ts
View file @
521f1917
...
@@ -46,7 +46,7 @@ export class PAIJobRestServer extends ClusterJobRestServer {
...
@@ -46,7 +46,7 @@ export class PAIJobRestServer extends ClusterJobRestServer {
router
.
post
(
`/parameter-file-meta`
,
(
req
:
Request
,
res
:
Response
)
=>
{
router
.
post
(
`/parameter-file-meta`
,
(
req
:
Request
,
res
:
Response
)
=>
{
try
{
try
{
this
.
log
.
info
(
`
POST /parameter-file-meta, body is
${
JSON
.
stringify
(
req
.
body
)
}
`
)
;
this
.
log
.
info
(
'
POST /parameter-file-meta, body is
'
,
req
.
body
);
this
.
parameterFileMetaList
.
push
(
req
.
body
);
this
.
parameterFileMetaList
.
push
(
req
.
body
);
res
.
send
();
res
.
send
();
}
catch
(
err
)
{
}
catch
(
err
)
{
...
...
ts/nni_manager/training_service/pai/paiTrainingService.ts
View file @
521f1917
...
@@ -63,7 +63,7 @@ class PAITrainingService implements TrainingService {
...
@@ -63,7 +63,7 @@ class PAITrainingService implements TrainingService {
private
config
:
FlattenOpenpaiConfig
;
private
config
:
FlattenOpenpaiConfig
;
constructor
(
config
:
ExperimentConfig
)
{
constructor
(
config
:
ExperimentConfig
)
{
this
.
log
=
getLogger
();
this
.
log
=
getLogger
(
'
PAITrainingService
'
);
this
.
metricsEmitter
=
new
EventEmitter
();
this
.
metricsEmitter
=
new
EventEmitter
();
this
.
trialJobsMap
=
new
Map
<
string
,
PAITrialJobDetail
>
();
this
.
trialJobsMap
=
new
Map
<
string
,
PAITrialJobDetail
>
();
this
.
jobQueue
=
[];
this
.
jobQueue
=
[];
...
@@ -308,7 +308,7 @@ class PAITrainingService implements TrainingService {
...
@@ -308,7 +308,7 @@ class PAITrainingService implements TrainingService {
}
}
public
async
submitTrialJob
(
form
:
TrialJobApplicationForm
):
Promise
<
TrialJobDetail
>
{
public
async
submitTrialJob
(
form
:
TrialJobApplicationForm
):
Promise
<
TrialJobDetail
>
{
this
.
log
.
info
(
`
submitTrialJob: form:
${
JSON
.
stringify
(
form
)
}
`
)
;
this
.
log
.
info
(
'
submitTrialJob: form:
'
,
form
);
const
trialJobId
:
string
=
uniqueString
(
5
);
const
trialJobId
:
string
=
uniqueString
(
5
);
//TODO: use HDFS working folder instead
//TODO: use HDFS working folder instead
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment