"git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "eb416ae1b4d8ccf93a0b116fea7797470944e975"
Commit d1008bb3 authored by Zejun Lin's avatar Zejun Lin Committed by QuanluZhang
Browse files

Modify status EXPERIMENT_RUNNING to RUNNING (#476)

* modify status experiment_running to running

* fix hyperband doc

* fix hyperband doc

* fix hyperband doc
parent b794076e
...@@ -85,7 +85,7 @@ interface TrialJobStatistics { ...@@ -85,7 +85,7 @@ interface TrialJobStatistics {
} }
interface NNIManagerStatus { interface NNIManagerStatus {
status: 'INITIALIZED' | 'EXPERIMENT_RUNNING' | 'ERROR' | 'STOPPING' | 'STOPPED' | 'DONE' | 'NO_MORE_TRIAL'; status: 'INITIALIZED' | 'RUNNING' | 'ERROR' | 'STOPPING' | 'STOPPED' | 'DONE' | 'NO_MORE_TRIAL';
errors: string[]; errors: string[];
} }
......
...@@ -138,7 +138,7 @@ class NNIManager implements Manager { ...@@ -138,7 +138,7 @@ class NNIManager implements Manager {
checkpointDir); checkpointDir);
this.experimentProfile.startTime = Date.now(); this.experimentProfile.startTime = Date.now();
this.status.status = 'EXPERIMENT_RUNNING'; this.status.status = 'RUNNING';
await this.storeExperimentProfile(); await this.storeExperimentProfile();
this.run().catch((err: Error) => { this.run().catch((err: Error) => {
this.criticalError(err); this.criticalError(err);
...@@ -185,7 +185,7 @@ class NNIManager implements Manager { ...@@ -185,7 +185,7 @@ class NNIManager implements Manager {
this.experimentProfile.endTime) { this.experimentProfile.endTime) {
delete this.experimentProfile.endTime; delete this.experimentProfile.endTime;
} }
this.status.status = 'EXPERIMENT_RUNNING'; this.status.status = 'RUNNING';
// TO DO: update database record for resume event // TO DO: update database record for resume event
this.run().catch((err: Error) => { this.run().catch((err: Error) => {
...@@ -350,7 +350,7 @@ class NNIManager implements Manager { ...@@ -350,7 +350,7 @@ class NNIManager implements Manager {
let count: number = 1; let count: number = 1;
while (this.status.status !== 'STOPPING' && this.status.status !== 'STOPPED') { while (this.status.status !== 'STOPPING' && this.status.status !== 'STOPPED') {
await delay(1000 * 1); // 1 seconds await delay(1000 * 1); // 1 seconds
if (this.status.status === 'EXPERIMENT_RUNNING') { if (this.status.status === 'RUNNING') {
this.experimentProfile.execDuration += 1; this.experimentProfile.execDuration += 1;
if (count % 10 === 0) { if (count % 10 === 0) {
await this.storeExperimentProfile(); await this.storeExperimentProfile();
...@@ -460,15 +460,15 @@ class NNIManager implements Manager { ...@@ -460,15 +460,15 @@ class NNIManager implements Manager {
} }
// check maxtrialnum and maxduration here // check maxtrialnum and maxduration here
// NO_MORE_TRIAL is more like a subset of EXPERIMENT_RUNNING, because during EXPERIMENT_RUNNING tuner // NO_MORE_TRIAL is more like a subset of RUNNING, because during RUNNING tuner
// might tell nnimanager that this is no more trials. In NO_MORE_TRIAL state, the experiment is viewed // might tell nnimanager that this is no more trials. In NO_MORE_TRIAL state, the experiment is viewed
// as still running. DONE could be transfered from EXPERIMENT_RUNNING or NO_MORE_TRIAL. // as still running. DONE could be transfered from RUNNING or NO_MORE_TRIAL.
assert(this.status.status === 'EXPERIMENT_RUNNING' || assert(this.status.status === 'RUNNING' ||
this.status.status === 'DONE' || this.status.status === 'DONE' ||
this.status.status === 'NO_MORE_TRIAL'); this.status.status === 'NO_MORE_TRIAL');
if (this.experimentProfile.execDuration > this.experimentProfile.params.maxExecDuration || if (this.experimentProfile.execDuration > this.experimentProfile.params.maxExecDuration ||
this.currSubmittedTrialNum >= this.experimentProfile.params.maxTrialNum) { this.currSubmittedTrialNum >= this.experimentProfile.params.maxTrialNum) {
if (this.status.status === 'EXPERIMENT_RUNNING' || if (this.status.status === 'RUNNING' ||
this.status.status === 'NO_MORE_TRIAL') { this.status.status === 'NO_MORE_TRIAL') {
this.experimentProfile.endTime = Date.now(); this.experimentProfile.endTime = Date.now();
await this.storeExperimentProfile(); await this.storeExperimentProfile();
...@@ -480,7 +480,7 @@ class NNIManager implements Manager { ...@@ -480,7 +480,7 @@ class NNIManager implements Manager {
await this.storeExperimentProfile(); await this.storeExperimentProfile();
} }
if (this.status.status !== 'NO_MORE_TRIAL') { if (this.status.status !== 'NO_MORE_TRIAL') {
this.status.status = 'EXPERIMENT_RUNNING'; this.status.status = 'RUNNING';
} }
for (let i: number = this.trialJobs.size; i < this.experimentProfile.params.trialConcurrency; i++) { for (let i: number = this.trialJobs.size; i < this.experimentProfile.params.trialConcurrency; i++) {
if (this.waitingTrials.length === 0 || if (this.waitingTrials.length === 0 ||
...@@ -602,7 +602,7 @@ class NNIManager implements Manager { ...@@ -602,7 +602,7 @@ class NNIManager implements Manager {
case NEW_TRIAL_JOB: case NEW_TRIAL_JOB:
if (this.status.status === 'NO_MORE_TRIAL') { if (this.status.status === 'NO_MORE_TRIAL') {
this.log.warning('It is not supposed to receive more trials after NO_MORE_TRIAL is set'); this.log.warning('It is not supposed to receive more trials after NO_MORE_TRIAL is set');
this.status.status = 'EXPERIMENT_RUNNING'; this.status.status = 'RUNNING';
} }
this.waitingTrials.push(content); this.waitingTrials.push(content);
break; break;
......
...@@ -39,7 +39,7 @@ export const testManagerProvider: Provider = { ...@@ -39,7 +39,7 @@ export const testManagerProvider: Provider = {
export class MockedNNIManager extends Manager { export class MockedNNIManager extends Manager {
public getStatus(): NNIManagerStatus { public getStatus(): NNIManagerStatus {
return { return {
status: 'EXPERIMENT_RUNNING', status: 'RUNNING',
errors: [] errors: []
} }
} }
......
...@@ -33,6 +33,7 @@ If you use Hyperband, among the hyperparameters (i.e., key-value pairs) received ...@@ -33,6 +33,7 @@ If you use Hyperband, among the hyperparameters (i.e., key-value pairs) received
`eta` means `n/eta` configurations from `n` configurations will survive and rerun using more STEPS. `eta` means `n/eta` configurations from `n` configurations will survive and rerun using more STEPS.
Here is a concrete example of `R=81` and `eta=3`: Here is a concrete example of `R=81` and `eta=3`:
| | s=4 | s=3 | s=2 | s=1 | s=0 | | | s=4 | s=3 | s=2 | s=1 | s=0 |
|------|-----|-----|-----|-----|-----| |------|-----|-----|-----|-----|-----|
|i | n r | n r | n r | n r | n r | |i | n r | n r | n r | n r | n r |
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment