Unverified Commit 6568eaee authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Merge pull request #247 from microsoft/master

merge master
parents d90433da 1e2a2e29
import axios from 'axios'; import axios from 'axios';
import { MANAGER_IP } from '../const'; import { MANAGER_IP } from '../const';
import { ExperimentProfile, NNIManagerStatus } from '../interface'; import { ExperimentProfile, NNIManagerStatus } from '../interface';
import { requestAxios } from '../function';
function compareProfiles(profile1?: ExperimentProfile, profile2?: ExperimentProfile): boolean { function compareProfiles(profile1?: ExperimentProfile, profile2?: ExperimentProfile): boolean {
if (!profile1 || !profile2) { if (!profile1 || !profile2) {
...@@ -14,32 +15,87 @@ function compareProfiles(profile1?: ExperimentProfile, profile2?: ExperimentProf ...@@ -14,32 +15,87 @@ function compareProfiles(profile1?: ExperimentProfile, profile2?: ExperimentProf
class Experiment { class Experiment {
private profileField?: ExperimentProfile = undefined; private profileField?: ExperimentProfile = undefined;
private statusField?: NNIManagerStatus = undefined; private statusField?: NNIManagerStatus = undefined;
private isexperimentError: boolean = false;
private experimentErrorMessage: string = '';
private isStatusError: boolean = false;
private statusErrorMessage: string = '';
public async init(): Promise<void> { public async init(): Promise<void> {
while (!this.profileField || !this.statusField) { while (!this.profileField || !this.statusField) {
if (this.isexperimentError) {
return;
}
if (this.isStatusError) {
return;
}
await this.update(); await this.update();
} }
} }
public experimentError(): boolean {
return this.isexperimentError;
}
public statusError(): boolean {
return this.isStatusError;
}
public getExperimentMessage(): string {
return this.experimentErrorMessage;
}
public getStatusMessage(): string {
return this.statusErrorMessage;
}
public async update(): Promise<boolean> { public async update(): Promise<boolean> {
const profilePromise = axios.get(`${MANAGER_IP}/experiment`);
const statusPromise = axios.get(`${MANAGER_IP}/check-status`);
const [ profileResponse, statusResponse ] = await Promise.all([ profilePromise, statusPromise ]);
let updated = false; let updated = false;
if (statusResponse.status === 200) {
updated = JSON.stringify(this.statusField) === JSON.stringify(statusResponse.data); await requestAxios(`${MANAGER_IP}/experiment`)
this.statusField = statusResponse.data; .then(data => {
} updated = updated || compareProfiles(this.profileField, data);
if (profileResponse.status === 200) { this.profileField = data;
updated = updated || compareProfiles(this.profileField, profileResponse.data); })
this.profileField = profileResponse.data; .catch(error => {
} this.isexperimentError = true;
this.experimentErrorMessage = `${error.message}`;
updated = true;
});
await requestAxios(`${MANAGER_IP}/check-status`)
.then(data => {
updated = JSON.stringify(this.statusField) === JSON.stringify(data);
this.statusField = data;
})
.catch(error => {
this.isStatusError = true;
this.statusErrorMessage = `${error.message}`;
updated = true;
});
return updated; return updated;
} }
get profile(): ExperimentProfile { get profile(): ExperimentProfile {
if (!this.profileField) { if (!this.profileField) {
throw Error('Experiment profile not initialized'); // throw Error('Experiment profile not initialized');
// set initProfile to prevent page broken
const initProfile = {
data: {
"id": "", "revision": 0, "execDuration": 0,
"logDir": "", "nextSequenceId": 0,
"params": {
"authorName": "", "experimentName": "", "trialConcurrency": 0, "maxExecDuration": 0, "maxTrialNum": 0, "searchSpace": "null",
"trainingServicePlatform": "", "tuner": {
"builtinTunerName": "TPE",
"classArgs": { "optimize_mode": "" }, "checkpointDir": ""
},
"versionCheck": true, "clusterMetaData": [{ "key": "", "value": "" },
{ "key": "", "value": "" }]
}, "startTime": 0, "endTime": 0
}
};
this.profileField = initProfile.data as any;
} }
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
return this.profileField!; return this.profileField!;
...@@ -68,7 +124,9 @@ class Experiment { ...@@ -68,7 +124,9 @@ class Experiment {
get status(): string { get status(): string {
if (!this.statusField) { if (!this.statusField) {
throw Error('Experiment status not initialized'); // throw Error('Experiment status not initialized');
// this.statusField.status = '';
return '';
} }
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
return this.statusField!.status; return this.statusField!.status;
......
...@@ -2,6 +2,7 @@ import axios from 'axios'; ...@@ -2,6 +2,7 @@ import axios from 'axios';
import { MANAGER_IP, METRIC_GROUP_UPDATE_THRESHOLD, METRIC_GROUP_UPDATE_SIZE } from '../const'; import { MANAGER_IP, METRIC_GROUP_UPDATE_THRESHOLD, METRIC_GROUP_UPDATE_SIZE } from '../const';
import { MetricDataRecord, TableRecord, TrialJobInfo } from '../interface'; import { MetricDataRecord, TableRecord, TrialJobInfo } from '../interface';
import { Trial } from './trial'; import { Trial } from './trial';
import { requestAxios } from '../function';
function groupMetricsByTrial(metrics: MetricDataRecord[]): Map<string, MetricDataRecord[]> { function groupMetricsByTrial(metrics: MetricDataRecord[]): Map<string, MetricDataRecord[]> {
const ret = new Map<string, MetricDataRecord[]>(); const ret = new Map<string, MetricDataRecord[]>();
...@@ -39,9 +40,20 @@ class TrialManager { ...@@ -39,9 +40,20 @@ class TrialManager {
private maxSequenceId: number = 0; private maxSequenceId: number = 0;
private doingBatchUpdate: boolean = false; private doingBatchUpdate: boolean = false;
private batchUpdatedAfterReading: boolean = false; private batchUpdatedAfterReading: boolean = false;
private isJobListError: boolean = false; // trial-jobs api error filed
private jobErrorMessage: string = ''; // trial-jobs error message
private isMetricdataError: boolean = false; // metric-data api error filed
private MetricdataErrorMessage: string = ''; // metric-data error message
private isLatestMetricdataError: boolean = false; // metric-data-latest api error filed
private latestMetricdataErrorMessage: string = ''; // metric-data-latest error message
private isMetricdataRangeError: boolean = false; // metric-data-range api error filed
private metricdataRangeErrorMessage: string = ''; // metric-data-latest error message
public async init(): Promise<void> { public async init(): Promise<void> {
while (!this.infoInitialized || !this.metricInitialized) { while (!this.infoInitialized || !this.metricInitialized) {
if (this.isMetricdataError) {
return;
}
await this.update(); await this.update();
} }
} }
...@@ -156,24 +168,70 @@ class TrialManager { ...@@ -156,24 +168,70 @@ class TrialManager {
return trials; return trials;
} }
// if this.jobListError = true, show trial error message [/trial-jobs]
public jobListError(): boolean {
return this.isJobListError;
}
// trial error message's content [/trial-jobs]
public getJobErrorMessage(): string {
return this.jobErrorMessage;
}
// [/metric-data]
public MetricDataError(): boolean {
return this.isMetricdataError;
}
// [/metric-data]
public getMetricDataErrorMessage(): string {
return this.MetricdataErrorMessage;
}
// [/metric-data-latest]
public latestMetricDataError(): boolean {
return this.isLatestMetricdataError;
}
// [/metric-data-latest]
public getLatestMetricDataErrorMessage(): string {
return this.latestMetricdataErrorMessage;
}
public metricDataRangeError(): boolean {
return this.isMetricdataRangeError;
}
public metricDataRangeErrorMessage(): string {
return this.metricdataRangeErrorMessage;
}
private async updateInfo(): Promise<boolean> { private async updateInfo(): Promise<boolean> {
const response = await axios.get(`${MANAGER_IP}/trial-jobs`);
let updated = false; let updated = false;
if (response.status === 200) { requestAxios(`${MANAGER_IP}/trial-jobs`)
const newTrials = TrialManager.expandJobsToTrials(response.data); .then(data => {
for (const trialInfo of newTrials as TrialJobInfo[]) { const newTrials = TrialManager.expandJobsToTrials(data as any);
if (this.trials.has(trialInfo.id)) { for (const trialInfo of newTrials as TrialJobInfo[]) {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion if (this.trials.has(trialInfo.id)) {
updated = this.trials.get(trialInfo.id)!.updateTrialJobInfo(trialInfo) || updated; // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
} else { updated = this.trials.get(trialInfo.id)!.updateTrialJobInfo(trialInfo) || updated;
this.trials.set(trialInfo.id, new Trial(trialInfo, undefined)); } else {
updated = true; this.trials.set(trialInfo.id, new Trial(trialInfo, undefined));
updated = true;
}
this.maxSequenceId = Math.max(this.maxSequenceId, trialInfo.sequenceId);
} }
this.maxSequenceId = Math.max(this.maxSequenceId, trialInfo.sequenceId); this.infoInitialized = true;
} })
this.infoInitialized = true; .catch(error => {
} this.isJobListError = true;
return updated; this.jobErrorMessage = error.message;
this.infoInitialized = true;
updated = true;
});
return updated;
} }
private async updateMetrics(lastTime?: boolean): Promise<boolean> { private async updateMetrics(lastTime?: boolean): Promise<boolean> {
...@@ -188,13 +246,25 @@ class TrialManager { ...@@ -188,13 +246,25 @@ class TrialManager {
} }
private async updateAllMetrics(): Promise<boolean> { private async updateAllMetrics(): Promise<boolean> {
const response = await axios.get(`${MANAGER_IP}/metric-data`); return requestAxios(`${MANAGER_IP}/metric-data`)
return (response.status === 200) && this.doUpdateMetrics(response.data as MetricDataRecord[], false); .then(data => this.doUpdateMetrics(data as any, false))
.catch(error => {
this.isMetricdataError = true;
this.MetricdataErrorMessage = `${error.message}`;
this.doUpdateMetrics([], false);
return true;
});
} }
private async updateLatestMetrics(): Promise<boolean> { private async updateLatestMetrics(): Promise<boolean> {
const response = await axios.get(`${MANAGER_IP}/metric-data-latest`); return requestAxios(`${MANAGER_IP}/metric-data-latest`)
return (response.status === 200) && this.doUpdateMetrics(response.data as MetricDataRecord[], true); .then(data => this.doUpdateMetrics(data as any, true))
.catch(error => {
this.isLatestMetricdataError = true;
this.latestMetricdataErrorMessage = `${error.message}`;
this.doUpdateMetrics([], true);
return true;
});
} }
private async updateManyMetrics(): Promise<void> { private async updateManyMetrics(): Promise<void> {
...@@ -202,12 +272,16 @@ class TrialManager { ...@@ -202,12 +272,16 @@ class TrialManager {
return; return;
} }
this.doingBatchUpdate = true; this.doingBatchUpdate = true;
for (let i = 0; i < this.maxSequenceId; i += METRIC_GROUP_UPDATE_SIZE) { for (let i = 0; i < this.maxSequenceId && this.isMetricdataRangeError === false; i += METRIC_GROUP_UPDATE_SIZE) {
const response = await axios.get(`${MANAGER_IP}/metric-data-range/${i}/${i + METRIC_GROUP_UPDATE_SIZE}`); requestAxios(`${MANAGER_IP}/metric-data-range/${i}/${i + METRIC_GROUP_UPDATE_SIZE}`)
if (response.status === 200) { .then(data => {
const updated = this.doUpdateMetrics(response.data as MetricDataRecord[], false); const updated = this.doUpdateMetrics(data as any, false);
this.batchUpdatedAfterReading = this.batchUpdatedAfterReading || updated; this.batchUpdatedAfterReading = this.batchUpdatedAfterReading || updated;
} })
.catch(error => {
this.isMetricdataRangeError = true;
this.metricdataRangeErrorMessage = `${error.message}`;
});
} }
this.doingBatchUpdate = false; this.doingBatchUpdate = false;
} }
......
...@@ -46,6 +46,9 @@ ...@@ -46,6 +46,9 @@
.description{ .description{
line-height: 34px; line-height: 34px;
margin-left: 6px; margin-left: 6px;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
} }
} }
......
authorName: nni
experimentName: default_test
maxExecDuration: 3m
maxTrialNum: 1
trialConcurrency: 1
searchSpacePath: ./search_space.json
tuner:
builtinTunerName: Random
trial:
codeDir: .
command: python3 not_exist.py
gpuNum: 0
useAnnotation: false
multiPhase: false
multiThread: false
trainingServicePlatform: local
...@@ -705,6 +705,9 @@ def export_trials_data(args): ...@@ -705,6 +705,9 @@ def export_trials_data(args):
else: else:
formated_record = {**record['parameter'], **{'reward': record_value, 'id': record['id']}} formated_record = {**record['parameter'], **{'reward': record_value, 'id': record['id']}}
trial_records.append(formated_record) trial_records.append(formated_record)
if not trial_records:
print_error('No trial results collected! Please check your trial log...')
exit(0)
with open(args.path, 'w', newline='') as file: with open(args.path, 'w', newline='') as file:
writer = csv.DictWriter(file, set.union(*[set(r.keys()) for r in trial_records])) writer = csv.DictWriter(file, set.union(*[set(r.keys()) for r in trial_records]))
writer.writeheader() writer.writeheader()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment