"git@developer.sourcefind.cn:OpenDAS/nni.git" did not exist on "87dc3cdcfdd03a4d67bd77eb09b3226554b5c92b"
Unverified Commit f56f688b authored by chicm-ms's avatar chicm-ms Committed by GitHub
Browse files

Fix sequence id issue on resuming experiment (#316)

parent 06710abd
...@@ -27,6 +27,7 @@ class ExperimentStartupInfo { ...@@ -27,6 +27,7 @@ class ExperimentStartupInfo {
private experimentId: string = ''; private experimentId: string = '';
private newExperiment: boolean = true; private newExperiment: boolean = true;
private initialized: boolean = false; private initialized: boolean = false;
private initTrialSequenceID: number = 0;
public setStartupInfo(newExperiment: boolean, experimentId: string): void { public setStartupInfo(newExperiment: boolean, experimentId: string): void {
assert(!this.initialized); assert(!this.initialized);
...@@ -48,6 +49,17 @@ class ExperimentStartupInfo { ...@@ -48,6 +49,17 @@ class ExperimentStartupInfo {
return this.newExperiment; return this.newExperiment;
} }
public setInitTrialSequenceId(initSequenceId: number): void {
assert(this.initialized);
this.initTrialSequenceID = initSequenceId;
}
public getInitTrialSequenceId(): number {
assert(this.initialized);
return this.initTrialSequenceID;
}
} }
function getExperimentId(): string { function getExperimentId(): string {
...@@ -58,8 +70,17 @@ function isNewExperiment(): boolean { ...@@ -58,8 +70,17 @@ function isNewExperiment(): boolean {
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).isNewExperiment(); return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).isNewExperiment();
} }
function setInitTrialSequenceId(initSequenceId: number): void {
component.get<ExperimentStartupInfo>(ExperimentStartupInfo).setInitTrialSequenceId(initSequenceId);
}
function getInitTrialSequenceId(): number {
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).getInitTrialSequenceId();
}
function setExperimentStartupInfo(newExperiment: boolean, experimentId: string): void { function setExperimentStartupInfo(newExperiment: boolean, experimentId: string): void {
component.get<ExperimentStartupInfo>(ExperimentStartupInfo).setStartupInfo(newExperiment, experimentId); component.get<ExperimentStartupInfo>(ExperimentStartupInfo).setStartupInfo(newExperiment, experimentId);
} }
export { ExperimentStartupInfo, getExperimentId, isNewExperiment, setExperimentStartupInfo }; export { ExperimentStartupInfo, getExperimentId, isNewExperiment,
setExperimentStartupInfo, setInitTrialSequenceId, getInitTrialSequenceId };
...@@ -65,6 +65,7 @@ interface ExperimentProfile { ...@@ -65,6 +65,7 @@ interface ExperimentProfile {
logDir?: string; logDir?: string;
startTime?: number; startTime?: number;
endTime?: number; endTime?: number;
maxSequenceId: number;
revision: number; revision: number;
} }
......
...@@ -26,7 +26,7 @@ import { Deferred } from 'ts-deferred'; ...@@ -26,7 +26,7 @@ import { Deferred } from 'ts-deferred';
import * as component from '../common/component'; import * as component from '../common/component';
import { DataStore, MetricDataRecord, MetricType, TrialJobInfo } from '../common/datastore'; import { DataStore, MetricDataRecord, MetricType, TrialJobInfo } from '../common/datastore';
import { NNIError } from '../common/errors'; import { NNIError } from '../common/errors';
import { getExperimentId } from '../common/experimentStartupInfo'; import { getExperimentId, setInitTrialSequenceId } from '../common/experimentStartupInfo';
import { getLogger, Logger } from '../common/log'; import { getLogger, Logger } from '../common/log';
import { import {
ExperimentParams, ExperimentProfile, Manager, ExperimentParams, ExperimentProfile, Manager,
...@@ -152,6 +152,8 @@ class NNIManager implements Manager { ...@@ -152,6 +152,8 @@ class NNIManager implements Manager {
this.experimentProfile = await this.dataStore.getExperimentProfile(experimentId); this.experimentProfile = await this.dataStore.getExperimentProfile(experimentId);
const expParams: ExperimentParams = this.experimentProfile.params; const expParams: ExperimentParams = this.experimentProfile.params;
setInitTrialSequenceId(this.experimentProfile.maxSequenceId + 1);
// Set up multiphase config // Set up multiphase config
if (expParams.multiPhase && this.trainingService.isMultiPhaseJobSupported) { if (expParams.multiPhase && this.trainingService.isMultiPhaseJobSupported) {
this.trainingService.setClusterMetadata('multiPhase', expParams.multiPhase.toString()); this.trainingService.setClusterMetadata('multiPhase', expParams.multiPhase.toString());
...@@ -462,6 +464,7 @@ class NNIManager implements Manager { ...@@ -462,6 +464,7 @@ class NNIManager implements Manager {
} }
}; };
const trialJobDetail: TrialJobDetail = await this.trainingService.submitTrialJob(trialJobAppForm); const trialJobDetail: TrialJobDetail = await this.trainingService.submitTrialJob(trialJobAppForm);
await this.storeMaxSequenceId(trialJobDetail.sequenceId);
this.trialJobs.set(trialJobDetail.id, Object.assign({}, trialJobDetail)); this.trialJobs.set(trialJobDetail.id, Object.assign({}, trialJobDetail));
const trialJobDetailSnapshot: TrialJobDetail | undefined = this.trialJobs.get(trialJobDetail.id); const trialJobDetailSnapshot: TrialJobDetail | undefined = this.trialJobs.get(trialJobDetail.id);
if (trialJobDetailSnapshot != undefined) { if (trialJobDetailSnapshot != undefined) {
...@@ -593,6 +596,7 @@ class NNIManager implements Manager { ...@@ -593,6 +596,7 @@ class NNIManager implements Manager {
revision: 0, revision: 0,
execDuration: 0, execDuration: 0,
logDir: getLogDir(), logDir: getLogDir(),
maxSequenceId: 0,
params: { params: {
authorName: '', authorName: '',
experimentName: '', experimentName: '',
...@@ -609,6 +613,13 @@ class NNIManager implements Manager { ...@@ -609,6 +613,13 @@ class NNIManager implements Manager {
} }
}; };
} }
private async storeMaxSequenceId(sequenceId: number): Promise<void> {
if (sequenceId > this.experimentProfile.maxSequenceId) {
this.experimentProfile.maxSequenceId = sequenceId;
await this.storeExperimentProfile();
}
}
} }
export { NNIManager }; export { NNIManager };
...@@ -53,6 +53,7 @@ create table ExperimentProfile ( ...@@ -53,6 +53,7 @@ create table ExperimentProfile (
startTime integer, startTime integer,
endTime integer, endTime integer,
logDir text, logDir text,
maxSequenceId integer,
revision integer); revision integer);
create index ExperimentProfile_id on ExperimentProfile(id); create index ExperimentProfile_id on ExperimentProfile(id);
`; `;
...@@ -65,6 +66,7 @@ function loadExperimentProfile(row: any): ExperimentProfile { ...@@ -65,6 +66,7 @@ function loadExperimentProfile(row: any): ExperimentProfile {
startTime: row.startTime === null ? undefined : row.startTime, startTime: row.startTime === null ? undefined : row.startTime,
endTime: row.endTime === null ? undefined : row.endTime, endTime: row.endTime === null ? undefined : row.endTime,
logDir: row.logDir === null ? undefined : row.logDir, logDir: row.logDir === null ? undefined : row.logDir,
maxSequenceId: row.maxSequenceId,
revision: row.revision revision: row.revision
}; };
} }
...@@ -131,7 +133,7 @@ class SqlDB implements Database { ...@@ -131,7 +133,7 @@ class SqlDB implements Database {
} }
public storeExperimentProfile(exp: ExperimentProfile): Promise<void> { public storeExperimentProfile(exp: ExperimentProfile): Promise<void> {
const sql: string = 'insert into ExperimentProfile values (?,?,?,?,?,?,?)'; const sql: string = 'insert into ExperimentProfile values (?,?,?,?,?,?,?,?)';
const args: any[] = [ const args: any[] = [
JSON.stringify(exp.params), JSON.stringify(exp.params),
exp.id, exp.id,
...@@ -139,6 +141,7 @@ class SqlDB implements Database { ...@@ -139,6 +141,7 @@ class SqlDB implements Database {
exp.startTime === undefined ? null : exp.startTime, exp.startTime === undefined ? null : exp.startTime,
exp.endTime === undefined ? null : exp.endTime, exp.endTime === undefined ? null : exp.endTime,
exp.logDir === undefined ? null : exp.logDir, exp.logDir === undefined ? null : exp.logDir,
exp.maxSequenceId,
exp.revision exp.revision
]; ];
......
...@@ -79,6 +79,7 @@ describe('Unit test for dataStore', () => { ...@@ -79,6 +79,7 @@ describe('Unit test for dataStore', () => {
execDuration: 0, execDuration: 0,
startTime: Date.now(), startTime: Date.now(),
endTime: Date.now(), endTime: Date.now(),
maxSequenceId: 0,
revision: 0 revision: 0
} }
const id: string = profile.id; const id: string = profile.id;
......
...@@ -64,10 +64,10 @@ const expParams2: ExperimentParams = { ...@@ -64,10 +64,10 @@ const expParams2: ExperimentParams = {
}; };
const profiles: ExperimentProfile[] = [ const profiles: ExperimentProfile[] = [
{ params: expParams1, id: '#1', execDuration: 0, startTime: Date.now(), endTime: undefined, revision: 1 }, { params: expParams1, id: '#1', execDuration: 0, startTime: Date.now(), endTime: undefined, revision: 1, maxSequenceId: 0 },
{ params: expParams1, id: '#1', execDuration: 0, startTime: Date.now(), endTime: Date.now(), revision: 2 }, { params: expParams1, id: '#1', execDuration: 0, startTime: Date.now(), endTime: Date.now(), revision: 2, maxSequenceId: 0 },
{ params: expParams2, id: '#2', execDuration: 0, startTime: Date.now(), endTime: Date.now(), revision: 2 }, { params: expParams2, id: '#2', execDuration: 0, startTime: Date.now(), endTime: Date.now(), revision: 2, maxSequenceId: 0 },
{ params: expParams2, id: '#2', execDuration: 0, startTime: Date.now(), endTime: Date.now(), revision: 3 } { params: expParams2, id: '#2', execDuration: 0, startTime: Date.now(), endTime: Date.now(), revision: 3, maxSequenceId: 0 }
]; ];
const events: TrialJobEventRecord[] = [ const events: TrialJobEventRecord[] = [
......
...@@ -147,6 +147,7 @@ export class MockedNNIManager extends Manager { ...@@ -147,6 +147,7 @@ export class MockedNNIManager extends Manager {
execDuration: 0, execDuration: 0,
startTime: Date.now(), startTime: Date.now(),
endTime: Date.now(), endTime: Date.now(),
maxSequenceId: 0,
revision: 0 revision: 0
}; };
......
...@@ -30,12 +30,12 @@ import { MethodNotImplementedError, NNIError, NNIErrorNames } from '../../common ...@@ -30,12 +30,12 @@ import { MethodNotImplementedError, NNIError, NNIErrorNames } from '../../common
import { getLogger, Logger } from '../../common/log'; import { getLogger, Logger } from '../../common/log';
import { TrialConfig } from '../common/trialConfig'; import { TrialConfig } from '../common/trialConfig';
import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey'; import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey';
import { getInitTrialSequenceId } from '../../common/experimentStartupInfo';
import { import {
HostJobApplicationForm, JobApplicationForm, HyperParameters, TrainingService, TrialJobApplicationForm, HostJobApplicationForm, JobApplicationForm, HyperParameters, TrainingService, TrialJobApplicationForm,
TrialJobDetail, TrialJobMetric, TrialJobStatus TrialJobDetail, TrialJobMetric, TrialJobStatus
} from '../../common/trainingService'; } from '../../common/trainingService';
import { delay, generateParamFileName, getExperimentRootDir, uniqueString } from '../../common/utils'; import { delay, generateParamFileName, getExperimentRootDir, uniqueString } from '../../common/utils';
import { file } from 'tmp';
const tkill = require('tree-kill'); const tkill = require('tree-kill');
...@@ -111,7 +111,7 @@ class LocalTrainingService implements TrainingService { ...@@ -111,7 +111,7 @@ class LocalTrainingService implements TrainingService {
this.initialized = false; this.initialized = false;
this.stopping = false; this.stopping = false;
this.log = getLogger(); this.log = getLogger();
this.trialSequenceId = 0; this.trialSequenceId = -1;
} }
public async run(): Promise<void> { public async run(): Promise<void> {
...@@ -432,6 +432,10 @@ class LocalTrainingService implements TrainingService { ...@@ -432,6 +432,10 @@ class LocalTrainingService implements TrainingService {
} }
private generateSequenceId(): number { private generateSequenceId(): number {
if (this.trialSequenceId === -1) {
this.trialSequenceId = getInitTrialSequenceId();
}
return this.trialSequenceId++; return this.trialSequenceId++;
} }
......
...@@ -29,7 +29,7 @@ import * as request from 'request'; ...@@ -29,7 +29,7 @@ import * as request from 'request';
import { Deferred } from 'ts-deferred'; import { Deferred } from 'ts-deferred';
import { EventEmitter } from 'events'; import { EventEmitter } from 'events';
import { getExperimentId } from '../../common/experimentStartupInfo'; import { getExperimentId, getInitTrialSequenceId } from '../../common/experimentStartupInfo';
import { HDFSClientUtility } from './hdfsClientUtility' import { HDFSClientUtility } from './hdfsClientUtility'
import { MethodNotImplementedError } from '../../common/errors'; import { MethodNotImplementedError } from '../../common/errors';
import { getLogger, Logger } from '../../common/log'; import { getLogger, Logger } from '../../common/log';
...@@ -78,7 +78,7 @@ class PAITrainingService implements TrainingService { ...@@ -78,7 +78,7 @@ class PAITrainingService implements TrainingService {
this.experimentId = getExperimentId(); this.experimentId = getExperimentId();
this.paiJobCollector = new PAIJobInfoCollector(this.trialJobsMap); this.paiJobCollector = new PAIJobInfoCollector(this.trialJobsMap);
this.hdfsDirPattern = 'hdfs://(?<host>([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(?<baseDir>/.*)?'; this.hdfsDirPattern = 'hdfs://(?<host>([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(?<baseDir>/.*)?';
this.trialSequenceId = 0; this.trialSequenceId = -1;
} }
public async run(): Promise<void> { public async run(): Promise<void> {
...@@ -454,6 +454,10 @@ class PAITrainingService implements TrainingService { ...@@ -454,6 +454,10 @@ class PAITrainingService implements TrainingService {
} }
private generateSequenceId(): number { private generateSequenceId(): number {
if (this.trialSequenceId === -1) {
this.trialSequenceId = getInitTrialSequenceId();
}
return this.trialSequenceId++; return this.trialSequenceId++;
} }
} }
......
...@@ -30,7 +30,7 @@ import { Deferred } from 'ts-deferred'; ...@@ -30,7 +30,7 @@ import { Deferred } from 'ts-deferred';
import { String } from 'typescript-string-operations'; import { String } from 'typescript-string-operations';
import * as component from '../../common/component'; import * as component from '../../common/component';
import { MethodNotImplementedError, NNIError, NNIErrorNames } from '../../common/errors'; import { MethodNotImplementedError, NNIError, NNIErrorNames } from '../../common/errors';
import { getExperimentId } from '../../common/experimentStartupInfo'; import { getExperimentId, getInitTrialSequenceId } from '../../common/experimentStartupInfo';
import { getLogger, Logger } from '../../common/log'; import { getLogger, Logger } from '../../common/log';
import { ObservableTimer } from '../../common/observableTimer'; import { ObservableTimer } from '../../common/observableTimer';
import { import {
...@@ -77,7 +77,7 @@ class RemoteMachineTrainingService implements TrainingService { ...@@ -77,7 +77,7 @@ class RemoteMachineTrainingService implements TrainingService {
this.remoteExpRootDir = this.getRemoteExperimentRootDir(); this.remoteExpRootDir = this.getRemoteExperimentRootDir();
this.timer = timer; this.timer = timer;
this.log = getLogger(); this.log = getLogger();
this.trialSequenceId = 0; this.trialSequenceId = -1;
} }
/** /**
...@@ -607,6 +607,10 @@ class RemoteMachineTrainingService implements TrainingService { ...@@ -607,6 +607,10 @@ class RemoteMachineTrainingService implements TrainingService {
} }
private generateSequenceId(): number { private generateSequenceId(): number {
if (this.trialSequenceId === -1) {
this.trialSequenceId = getInitTrialSequenceId();
}
return this.trialSequenceId++; return this.trialSequenceId++;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment