Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
f56f688b
Unverified
Commit
f56f688b
authored
Nov 02, 2018
by
chicm-ms
Committed by
GitHub
Nov 02, 2018
Browse files
Fix sequence id issue on resuming experiment (#316)
parent
06710abd
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
63 additions
and
13 deletions
+63
-13
src/nni_manager/common/experimentStartupInfo.ts
src/nni_manager/common/experimentStartupInfo.ts
+22
-1
src/nni_manager/common/manager.ts
src/nni_manager/common/manager.ts
+1
-0
src/nni_manager/core/nnimanager.ts
src/nni_manager/core/nnimanager.ts
+12
-1
src/nni_manager/core/sqlDatabase.ts
src/nni_manager/core/sqlDatabase.ts
+4
-1
src/nni_manager/core/test/dataStore.test.ts
src/nni_manager/core/test/dataStore.test.ts
+1
-0
src/nni_manager/core/test/sqlDatabase.test.ts
src/nni_manager/core/test/sqlDatabase.test.ts
+4
-4
src/nni_manager/rest_server/test/mockedNNIManager.ts
src/nni_manager/rest_server/test/mockedNNIManager.ts
+1
-0
src/nni_manager/training_service/local/localTrainingService.ts
...ni_manager/training_service/local/localTrainingService.ts
+6
-2
src/nni_manager/training_service/pai/paiTrainingService.ts
src/nni_manager/training_service/pai/paiTrainingService.ts
+6
-2
src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
...ng_service/remote_machine/remoteMachineTrainingService.ts
+6
-2
No files found.
src/nni_manager/common/experimentStartupInfo.ts
View file @
f56f688b
...
@@ -27,6 +27,7 @@ class ExperimentStartupInfo {
...
@@ -27,6 +27,7 @@ class ExperimentStartupInfo {
private
experimentId
:
string
=
''
;
private
experimentId
:
string
=
''
;
private
newExperiment
:
boolean
=
true
;
private
newExperiment
:
boolean
=
true
;
private
initialized
:
boolean
=
false
;
private
initialized
:
boolean
=
false
;
private
initTrialSequenceID
:
number
=
0
;
public
setStartupInfo
(
newExperiment
:
boolean
,
experimentId
:
string
):
void
{
public
setStartupInfo
(
newExperiment
:
boolean
,
experimentId
:
string
):
void
{
assert
(
!
this
.
initialized
);
assert
(
!
this
.
initialized
);
...
@@ -48,6 +49,17 @@ class ExperimentStartupInfo {
...
@@ -48,6 +49,17 @@ class ExperimentStartupInfo {
return
this
.
newExperiment
;
return
this
.
newExperiment
;
}
}
public
setInitTrialSequenceId
(
initSequenceId
:
number
):
void
{
assert
(
this
.
initialized
);
this
.
initTrialSequenceID
=
initSequenceId
;
}
public
getInitTrialSequenceId
():
number
{
assert
(
this
.
initialized
);
return
this
.
initTrialSequenceID
;
}
}
}
function
getExperimentId
():
string
{
function
getExperimentId
():
string
{
...
@@ -58,8 +70,17 @@ function isNewExperiment(): boolean {
...
@@ -58,8 +70,17 @@ function isNewExperiment(): boolean {
return
component
.
get
<
ExperimentStartupInfo
>
(
ExperimentStartupInfo
).
isNewExperiment
();
return
component
.
get
<
ExperimentStartupInfo
>
(
ExperimentStartupInfo
).
isNewExperiment
();
}
}
function
setInitTrialSequenceId
(
initSequenceId
:
number
):
void
{
component
.
get
<
ExperimentStartupInfo
>
(
ExperimentStartupInfo
).
setInitTrialSequenceId
(
initSequenceId
);
}
function
getInitTrialSequenceId
():
number
{
return
component
.
get
<
ExperimentStartupInfo
>
(
ExperimentStartupInfo
).
getInitTrialSequenceId
();
}
function
setExperimentStartupInfo
(
newExperiment
:
boolean
,
experimentId
:
string
):
void
{
function
setExperimentStartupInfo
(
newExperiment
:
boolean
,
experimentId
:
string
):
void
{
component
.
get
<
ExperimentStartupInfo
>
(
ExperimentStartupInfo
).
setStartupInfo
(
newExperiment
,
experimentId
);
component
.
get
<
ExperimentStartupInfo
>
(
ExperimentStartupInfo
).
setStartupInfo
(
newExperiment
,
experimentId
);
}
}
export
{
ExperimentStartupInfo
,
getExperimentId
,
isNewExperiment
,
setExperimentStartupInfo
};
export
{
ExperimentStartupInfo
,
getExperimentId
,
isNewExperiment
,
setExperimentStartupInfo
,
setInitTrialSequenceId
,
getInitTrialSequenceId
};
src/nni_manager/common/manager.ts
View file @
f56f688b
...
@@ -65,6 +65,7 @@ interface ExperimentProfile {
...
@@ -65,6 +65,7 @@ interface ExperimentProfile {
logDir
?:
string
;
logDir
?:
string
;
startTime
?:
number
;
startTime
?:
number
;
endTime
?:
number
;
endTime
?:
number
;
maxSequenceId
:
number
;
revision
:
number
;
revision
:
number
;
}
}
...
...
src/nni_manager/core/nnimanager.ts
View file @
f56f688b
...
@@ -26,7 +26,7 @@ import { Deferred } from 'ts-deferred';
...
@@ -26,7 +26,7 @@ import { Deferred } from 'ts-deferred';
import
*
as
component
from
'
../common/component
'
;
import
*
as
component
from
'
../common/component
'
;
import
{
DataStore
,
MetricDataRecord
,
MetricType
,
TrialJobInfo
}
from
'
../common/datastore
'
;
import
{
DataStore
,
MetricDataRecord
,
MetricType
,
TrialJobInfo
}
from
'
../common/datastore
'
;
import
{
NNIError
}
from
'
../common/errors
'
;
import
{
NNIError
}
from
'
../common/errors
'
;
import
{
getExperimentId
}
from
'
../common/experimentStartupInfo
'
;
import
{
getExperimentId
,
setInitTrialSequenceId
}
from
'
../common/experimentStartupInfo
'
;
import
{
getLogger
,
Logger
}
from
'
../common/log
'
;
import
{
getLogger
,
Logger
}
from
'
../common/log
'
;
import
{
import
{
ExperimentParams
,
ExperimentProfile
,
Manager
,
ExperimentParams
,
ExperimentProfile
,
Manager
,
...
@@ -152,6 +152,8 @@ class NNIManager implements Manager {
...
@@ -152,6 +152,8 @@ class NNIManager implements Manager {
this
.
experimentProfile
=
await
this
.
dataStore
.
getExperimentProfile
(
experimentId
);
this
.
experimentProfile
=
await
this
.
dataStore
.
getExperimentProfile
(
experimentId
);
const
expParams
:
ExperimentParams
=
this
.
experimentProfile
.
params
;
const
expParams
:
ExperimentParams
=
this
.
experimentProfile
.
params
;
setInitTrialSequenceId
(
this
.
experimentProfile
.
maxSequenceId
+
1
);
// Set up multiphase config
// Set up multiphase config
if
(
expParams
.
multiPhase
&&
this
.
trainingService
.
isMultiPhaseJobSupported
)
{
if
(
expParams
.
multiPhase
&&
this
.
trainingService
.
isMultiPhaseJobSupported
)
{
this
.
trainingService
.
setClusterMetadata
(
'
multiPhase
'
,
expParams
.
multiPhase
.
toString
());
this
.
trainingService
.
setClusterMetadata
(
'
multiPhase
'
,
expParams
.
multiPhase
.
toString
());
...
@@ -462,6 +464,7 @@ class NNIManager implements Manager {
...
@@ -462,6 +464,7 @@ class NNIManager implements Manager {
}
}
};
};
const
trialJobDetail
:
TrialJobDetail
=
await
this
.
trainingService
.
submitTrialJob
(
trialJobAppForm
);
const
trialJobDetail
:
TrialJobDetail
=
await
this
.
trainingService
.
submitTrialJob
(
trialJobAppForm
);
await
this
.
storeMaxSequenceId
(
trialJobDetail
.
sequenceId
);
this
.
trialJobs
.
set
(
trialJobDetail
.
id
,
Object
.
assign
({},
trialJobDetail
));
this
.
trialJobs
.
set
(
trialJobDetail
.
id
,
Object
.
assign
({},
trialJobDetail
));
const
trialJobDetailSnapshot
:
TrialJobDetail
|
undefined
=
this
.
trialJobs
.
get
(
trialJobDetail
.
id
);
const
trialJobDetailSnapshot
:
TrialJobDetail
|
undefined
=
this
.
trialJobs
.
get
(
trialJobDetail
.
id
);
if
(
trialJobDetailSnapshot
!=
undefined
)
{
if
(
trialJobDetailSnapshot
!=
undefined
)
{
...
@@ -593,6 +596,7 @@ class NNIManager implements Manager {
...
@@ -593,6 +596,7 @@ class NNIManager implements Manager {
revision
:
0
,
revision
:
0
,
execDuration
:
0
,
execDuration
:
0
,
logDir
:
getLogDir
(),
logDir
:
getLogDir
(),
maxSequenceId
:
0
,
params
:
{
params
:
{
authorName
:
''
,
authorName
:
''
,
experimentName
:
''
,
experimentName
:
''
,
...
@@ -609,6 +613,13 @@ class NNIManager implements Manager {
...
@@ -609,6 +613,13 @@ class NNIManager implements Manager {
}
}
};
};
}
}
private
async
storeMaxSequenceId
(
sequenceId
:
number
):
Promise
<
void
>
{
if
(
sequenceId
>
this
.
experimentProfile
.
maxSequenceId
)
{
this
.
experimentProfile
.
maxSequenceId
=
sequenceId
;
await
this
.
storeExperimentProfile
();
}
}
}
}
export
{
NNIManager
};
export
{
NNIManager
};
src/nni_manager/core/sqlDatabase.ts
View file @
f56f688b
...
@@ -53,6 +53,7 @@ create table ExperimentProfile (
...
@@ -53,6 +53,7 @@ create table ExperimentProfile (
startTime integer,
startTime integer,
endTime integer,
endTime integer,
logDir text,
logDir text,
maxSequenceId integer,
revision integer);
revision integer);
create index ExperimentProfile_id on ExperimentProfile(id);
create index ExperimentProfile_id on ExperimentProfile(id);
`
;
`
;
...
@@ -65,6 +66,7 @@ function loadExperimentProfile(row: any): ExperimentProfile {
...
@@ -65,6 +66,7 @@ function loadExperimentProfile(row: any): ExperimentProfile {
startTime
:
row
.
startTime
===
null
?
undefined
:
row
.
startTime
,
startTime
:
row
.
startTime
===
null
?
undefined
:
row
.
startTime
,
endTime
:
row
.
endTime
===
null
?
undefined
:
row
.
endTime
,
endTime
:
row
.
endTime
===
null
?
undefined
:
row
.
endTime
,
logDir
:
row
.
logDir
===
null
?
undefined
:
row
.
logDir
,
logDir
:
row
.
logDir
===
null
?
undefined
:
row
.
logDir
,
maxSequenceId
:
row
.
maxSequenceId
,
revision
:
row
.
revision
revision
:
row
.
revision
};
};
}
}
...
@@ -131,7 +133,7 @@ class SqlDB implements Database {
...
@@ -131,7 +133,7 @@ class SqlDB implements Database {
}
}
public
storeExperimentProfile
(
exp
:
ExperimentProfile
):
Promise
<
void
>
{
public
storeExperimentProfile
(
exp
:
ExperimentProfile
):
Promise
<
void
>
{
const
sql
:
string
=
'
insert into ExperimentProfile values (?,?,?,?,?,?,?)
'
;
const
sql
:
string
=
'
insert into ExperimentProfile values (?,?,?,?,?,?,?
,?
)
'
;
const
args
:
any
[]
=
[
const
args
:
any
[]
=
[
JSON
.
stringify
(
exp
.
params
),
JSON
.
stringify
(
exp
.
params
),
exp
.
id
,
exp
.
id
,
...
@@ -139,6 +141,7 @@ class SqlDB implements Database {
...
@@ -139,6 +141,7 @@ class SqlDB implements Database {
exp
.
startTime
===
undefined
?
null
:
exp
.
startTime
,
exp
.
startTime
===
undefined
?
null
:
exp
.
startTime
,
exp
.
endTime
===
undefined
?
null
:
exp
.
endTime
,
exp
.
endTime
===
undefined
?
null
:
exp
.
endTime
,
exp
.
logDir
===
undefined
?
null
:
exp
.
logDir
,
exp
.
logDir
===
undefined
?
null
:
exp
.
logDir
,
exp
.
maxSequenceId
,
exp
.
revision
exp
.
revision
];
];
...
...
src/nni_manager/core/test/dataStore.test.ts
View file @
f56f688b
...
@@ -79,6 +79,7 @@ describe('Unit test for dataStore', () => {
...
@@ -79,6 +79,7 @@ describe('Unit test for dataStore', () => {
execDuration
:
0
,
execDuration
:
0
,
startTime
:
Date
.
now
(),
startTime
:
Date
.
now
(),
endTime
:
Date
.
now
(),
endTime
:
Date
.
now
(),
maxSequenceId
:
0
,
revision
:
0
revision
:
0
}
}
const
id
:
string
=
profile
.
id
;
const
id
:
string
=
profile
.
id
;
...
...
src/nni_manager/core/test/sqlDatabase.test.ts
View file @
f56f688b
...
@@ -64,10 +64,10 @@ const expParams2: ExperimentParams = {
...
@@ -64,10 +64,10 @@ const expParams2: ExperimentParams = {
};
};
const
profiles
:
ExperimentProfile
[]
=
[
const
profiles
:
ExperimentProfile
[]
=
[
{
params
:
expParams1
,
id
:
'
#1
'
,
execDuration
:
0
,
startTime
:
Date
.
now
(),
endTime
:
undefined
,
revision
:
1
},
{
params
:
expParams1
,
id
:
'
#1
'
,
execDuration
:
0
,
startTime
:
Date
.
now
(),
endTime
:
undefined
,
revision
:
1
,
maxSequenceId
:
0
},
{
params
:
expParams1
,
id
:
'
#1
'
,
execDuration
:
0
,
startTime
:
Date
.
now
(),
endTime
:
Date
.
now
(),
revision
:
2
},
{
params
:
expParams1
,
id
:
'
#1
'
,
execDuration
:
0
,
startTime
:
Date
.
now
(),
endTime
:
Date
.
now
(),
revision
:
2
,
maxSequenceId
:
0
},
{
params
:
expParams2
,
id
:
'
#2
'
,
execDuration
:
0
,
startTime
:
Date
.
now
(),
endTime
:
Date
.
now
(),
revision
:
2
},
{
params
:
expParams2
,
id
:
'
#2
'
,
execDuration
:
0
,
startTime
:
Date
.
now
(),
endTime
:
Date
.
now
(),
revision
:
2
,
maxSequenceId
:
0
},
{
params
:
expParams2
,
id
:
'
#2
'
,
execDuration
:
0
,
startTime
:
Date
.
now
(),
endTime
:
Date
.
now
(),
revision
:
3
}
{
params
:
expParams2
,
id
:
'
#2
'
,
execDuration
:
0
,
startTime
:
Date
.
now
(),
endTime
:
Date
.
now
(),
revision
:
3
,
maxSequenceId
:
0
}
];
];
const
events
:
TrialJobEventRecord
[]
=
[
const
events
:
TrialJobEventRecord
[]
=
[
...
...
src/nni_manager/rest_server/test/mockedNNIManager.ts
View file @
f56f688b
...
@@ -147,6 +147,7 @@ export class MockedNNIManager extends Manager {
...
@@ -147,6 +147,7 @@ export class MockedNNIManager extends Manager {
execDuration
:
0
,
execDuration
:
0
,
startTime
:
Date
.
now
(),
startTime
:
Date
.
now
(),
endTime
:
Date
.
now
(),
endTime
:
Date
.
now
(),
maxSequenceId
:
0
,
revision
:
0
revision
:
0
};
};
...
...
src/nni_manager/training_service/local/localTrainingService.ts
View file @
f56f688b
...
@@ -30,12 +30,12 @@ import { MethodNotImplementedError, NNIError, NNIErrorNames } from '../../common
...
@@ -30,12 +30,12 @@ import { MethodNotImplementedError, NNIError, NNIErrorNames } from '../../common
import
{
getLogger
,
Logger
}
from
'
../../common/log
'
;
import
{
getLogger
,
Logger
}
from
'
../../common/log
'
;
import
{
TrialConfig
}
from
'
../common/trialConfig
'
;
import
{
TrialConfig
}
from
'
../common/trialConfig
'
;
import
{
TrialConfigMetadataKey
}
from
'
../common/trialConfigMetadataKey
'
;
import
{
TrialConfigMetadataKey
}
from
'
../common/trialConfigMetadataKey
'
;
import
{
getInitTrialSequenceId
}
from
'
../../common/experimentStartupInfo
'
;
import
{
import
{
HostJobApplicationForm
,
JobApplicationForm
,
HyperParameters
,
TrainingService
,
TrialJobApplicationForm
,
HostJobApplicationForm
,
JobApplicationForm
,
HyperParameters
,
TrainingService
,
TrialJobApplicationForm
,
TrialJobDetail
,
TrialJobMetric
,
TrialJobStatus
TrialJobDetail
,
TrialJobMetric
,
TrialJobStatus
}
from
'
../../common/trainingService
'
;
}
from
'
../../common/trainingService
'
;
import
{
delay
,
generateParamFileName
,
getExperimentRootDir
,
uniqueString
}
from
'
../../common/utils
'
;
import
{
delay
,
generateParamFileName
,
getExperimentRootDir
,
uniqueString
}
from
'
../../common/utils
'
;
import
{
file
}
from
'
tmp
'
;
const
tkill
=
require
(
'
tree-kill
'
);
const
tkill
=
require
(
'
tree-kill
'
);
...
@@ -111,7 +111,7 @@ class LocalTrainingService implements TrainingService {
...
@@ -111,7 +111,7 @@ class LocalTrainingService implements TrainingService {
this
.
initialized
=
false
;
this
.
initialized
=
false
;
this
.
stopping
=
false
;
this
.
stopping
=
false
;
this
.
log
=
getLogger
();
this
.
log
=
getLogger
();
this
.
trialSequenceId
=
0
;
this
.
trialSequenceId
=
-
1
;
}
}
public
async
run
():
Promise
<
void
>
{
public
async
run
():
Promise
<
void
>
{
...
@@ -432,6 +432,10 @@ class LocalTrainingService implements TrainingService {
...
@@ -432,6 +432,10 @@ class LocalTrainingService implements TrainingService {
}
}
private
generateSequenceId
():
number
{
private
generateSequenceId
():
number
{
if
(
this
.
trialSequenceId
===
-
1
)
{
this
.
trialSequenceId
=
getInitTrialSequenceId
();
}
return
this
.
trialSequenceId
++
;
return
this
.
trialSequenceId
++
;
}
}
...
...
src/nni_manager/training_service/pai/paiTrainingService.ts
View file @
f56f688b
...
@@ -29,7 +29,7 @@ import * as request from 'request';
...
@@ -29,7 +29,7 @@ import * as request from 'request';
import
{
Deferred
}
from
'
ts-deferred
'
;
import
{
Deferred
}
from
'
ts-deferred
'
;
import
{
EventEmitter
}
from
'
events
'
;
import
{
EventEmitter
}
from
'
events
'
;
import
{
getExperimentId
}
from
'
../../common/experimentStartupInfo
'
;
import
{
getExperimentId
,
getInitTrialSequenceId
}
from
'
../../common/experimentStartupInfo
'
;
import
{
HDFSClientUtility
}
from
'
./hdfsClientUtility
'
import
{
HDFSClientUtility
}
from
'
./hdfsClientUtility
'
import
{
MethodNotImplementedError
}
from
'
../../common/errors
'
;
import
{
MethodNotImplementedError
}
from
'
../../common/errors
'
;
import
{
getLogger
,
Logger
}
from
'
../../common/log
'
;
import
{
getLogger
,
Logger
}
from
'
../../common/log
'
;
...
@@ -78,7 +78,7 @@ class PAITrainingService implements TrainingService {
...
@@ -78,7 +78,7 @@ class PAITrainingService implements TrainingService {
this
.
experimentId
=
getExperimentId
();
this
.
experimentId
=
getExperimentId
();
this
.
paiJobCollector
=
new
PAIJobInfoCollector
(
this
.
trialJobsMap
);
this
.
paiJobCollector
=
new
PAIJobInfoCollector
(
this
.
trialJobsMap
);
this
.
hdfsDirPattern
=
'
hdfs://(?<host>([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(?<baseDir>/.*)?
'
;
this
.
hdfsDirPattern
=
'
hdfs://(?<host>([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(?<baseDir>/.*)?
'
;
this
.
trialSequenceId
=
0
;
this
.
trialSequenceId
=
-
1
;
}
}
public
async
run
():
Promise
<
void
>
{
public
async
run
():
Promise
<
void
>
{
...
@@ -454,6 +454,10 @@ class PAITrainingService implements TrainingService {
...
@@ -454,6 +454,10 @@ class PAITrainingService implements TrainingService {
}
}
private
generateSequenceId
():
number
{
private
generateSequenceId
():
number
{
if
(
this
.
trialSequenceId
===
-
1
)
{
this
.
trialSequenceId
=
getInitTrialSequenceId
();
}
return
this
.
trialSequenceId
++
;
return
this
.
trialSequenceId
++
;
}
}
}
}
...
...
src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
View file @
f56f688b
...
@@ -30,7 +30,7 @@ import { Deferred } from 'ts-deferred';
...
@@ -30,7 +30,7 @@ import { Deferred } from 'ts-deferred';
import
{
String
}
from
'
typescript-string-operations
'
;
import
{
String
}
from
'
typescript-string-operations
'
;
import
*
as
component
from
'
../../common/component
'
;
import
*
as
component
from
'
../../common/component
'
;
import
{
MethodNotImplementedError
,
NNIError
,
NNIErrorNames
}
from
'
../../common/errors
'
;
import
{
MethodNotImplementedError
,
NNIError
,
NNIErrorNames
}
from
'
../../common/errors
'
;
import
{
getExperimentId
}
from
'
../../common/experimentStartupInfo
'
;
import
{
getExperimentId
,
getInitTrialSequenceId
}
from
'
../../common/experimentStartupInfo
'
;
import
{
getLogger
,
Logger
}
from
'
../../common/log
'
;
import
{
getLogger
,
Logger
}
from
'
../../common/log
'
;
import
{
ObservableTimer
}
from
'
../../common/observableTimer
'
;
import
{
ObservableTimer
}
from
'
../../common/observableTimer
'
;
import
{
import
{
...
@@ -77,7 +77,7 @@ class RemoteMachineTrainingService implements TrainingService {
...
@@ -77,7 +77,7 @@ class RemoteMachineTrainingService implements TrainingService {
this
.
remoteExpRootDir
=
this
.
getRemoteExperimentRootDir
();
this
.
remoteExpRootDir
=
this
.
getRemoteExperimentRootDir
();
this
.
timer
=
timer
;
this
.
timer
=
timer
;
this
.
log
=
getLogger
();
this
.
log
=
getLogger
();
this
.
trialSequenceId
=
0
;
this
.
trialSequenceId
=
-
1
;
}
}
/**
/**
...
@@ -607,6 +607,10 @@ class RemoteMachineTrainingService implements TrainingService {
...
@@ -607,6 +607,10 @@ class RemoteMachineTrainingService implements TrainingService {
}
}
private
generateSequenceId
():
number
{
private
generateSequenceId
():
number
{
if
(
this
.
trialSequenceId
===
-
1
)
{
this
.
trialSequenceId
=
getInitTrialSequenceId
();
}
return
this
.
trialSequenceId
++
;
return
this
.
trialSequenceId
++
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment