Unverified Commit 6568eaee authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Merge pull request #247 from microsoft/master

merge master
parents d90433da 1e2a2e29
import axios from 'axios';
import { MANAGER_IP } from '../const';
import { ExperimentProfile, NNIManagerStatus } from '../interface';
import { requestAxios } from '../function';
function compareProfiles(profile1?: ExperimentProfile, profile2?: ExperimentProfile): boolean {
if (!profile1 || !profile2) {
......@@ -14,32 +15,87 @@ function compareProfiles(profile1?: ExperimentProfile, profile2?: ExperimentProf
class Experiment {
private profileField?: ExperimentProfile = undefined;
private statusField?: NNIManagerStatus = undefined;
private isexperimentError: boolean = false;
private experimentErrorMessage: string = '';
private isStatusError: boolean = false;
private statusErrorMessage: string = '';
public async init(): Promise<void> {
while (!this.profileField || !this.statusField) {
if (this.isexperimentError) {
return;
}
if (this.isStatusError) {
return;
}
await this.update();
}
}
public experimentError(): boolean {
return this.isexperimentError;
}
public statusError(): boolean {
return this.isStatusError;
}
public getExperimentMessage(): string {
return this.experimentErrorMessage;
}
public getStatusMessage(): string {
return this.statusErrorMessage;
}
public async update(): Promise<boolean> {
const profilePromise = axios.get(`${MANAGER_IP}/experiment`);
const statusPromise = axios.get(`${MANAGER_IP}/check-status`);
const [ profileResponse, statusResponse ] = await Promise.all([ profilePromise, statusPromise ]);
let updated = false;
if (statusResponse.status === 200) {
updated = JSON.stringify(this.statusField) === JSON.stringify(statusResponse.data);
this.statusField = statusResponse.data;
}
if (profileResponse.status === 200) {
updated = updated || compareProfiles(this.profileField, profileResponse.data);
this.profileField = profileResponse.data;
}
await requestAxios(`${MANAGER_IP}/experiment`)
.then(data => {
updated = updated || compareProfiles(this.profileField, data);
this.profileField = data;
})
.catch(error => {
this.isexperimentError = true;
this.experimentErrorMessage = `${error.message}`;
updated = true;
});
await requestAxios(`${MANAGER_IP}/check-status`)
.then(data => {
updated = JSON.stringify(this.statusField) === JSON.stringify(data);
this.statusField = data;
})
.catch(error => {
this.isStatusError = true;
this.statusErrorMessage = `${error.message}`;
updated = true;
});
return updated;
}
get profile(): ExperimentProfile {
if (!this.profileField) {
throw Error('Experiment profile not initialized');
// throw Error('Experiment profile not initialized');
// set initProfile to prevent page broken
const initProfile = {
data: {
"id": "", "revision": 0, "execDuration": 0,
"logDir": "", "nextSequenceId": 0,
"params": {
"authorName": "", "experimentName": "", "trialConcurrency": 0, "maxExecDuration": 0, "maxTrialNum": 0, "searchSpace": "null",
"trainingServicePlatform": "", "tuner": {
"builtinTunerName": "TPE",
"classArgs": { "optimize_mode": "" }, "checkpointDir": ""
},
"versionCheck": true, "clusterMetaData": [{ "key": "", "value": "" },
{ "key": "", "value": "" }]
}, "startTime": 0, "endTime": 0
}
};
this.profileField = initProfile.data as any;
}
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
return this.profileField!;
......@@ -68,7 +124,9 @@ class Experiment {
get status(): string {
if (!this.statusField) {
throw Error('Experiment status not initialized');
// throw Error('Experiment status not initialized');
// this.statusField.status = '';
return '';
}
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
return this.statusField!.status;
......
......@@ -2,6 +2,7 @@ import axios from 'axios';
import { MANAGER_IP, METRIC_GROUP_UPDATE_THRESHOLD, METRIC_GROUP_UPDATE_SIZE } from '../const';
import { MetricDataRecord, TableRecord, TrialJobInfo } from '../interface';
import { Trial } from './trial';
import { requestAxios } from '../function';
function groupMetricsByTrial(metrics: MetricDataRecord[]): Map<string, MetricDataRecord[]> {
const ret = new Map<string, MetricDataRecord[]>();
......@@ -39,9 +40,20 @@ class TrialManager {
private maxSequenceId: number = 0;
private doingBatchUpdate: boolean = false;
private batchUpdatedAfterReading: boolean = false;
private isJobListError: boolean = false; // trial-jobs api error filed
private jobErrorMessage: string = ''; // trial-jobs error message
private isMetricdataError: boolean = false; // metric-data api error filed
private MetricdataErrorMessage: string = ''; // metric-data error message
private isLatestMetricdataError: boolean = false; // metric-data-latest api error filed
private latestMetricdataErrorMessage: string = ''; // metric-data-latest error message
private isMetricdataRangeError: boolean = false; // metric-data-range api error filed
private metricdataRangeErrorMessage: string = ''; // metric-data-latest error message
public async init(): Promise<void> {
while (!this.infoInitialized || !this.metricInitialized) {
if (this.isMetricdataError) {
return;
}
await this.update();
}
}
......@@ -156,24 +168,70 @@ class TrialManager {
return trials;
}
// if this.jobListError = true, show trial error message [/trial-jobs]
public jobListError(): boolean {
return this.isJobListError;
}
// trial error message's content [/trial-jobs]
public getJobErrorMessage(): string {
return this.jobErrorMessage;
}
// [/metric-data]
public MetricDataError(): boolean {
return this.isMetricdataError;
}
// [/metric-data]
public getMetricDataErrorMessage(): string {
return this.MetricdataErrorMessage;
}
// [/metric-data-latest]
public latestMetricDataError(): boolean {
return this.isLatestMetricdataError;
}
// [/metric-data-latest]
public getLatestMetricDataErrorMessage(): string {
return this.latestMetricdataErrorMessage;
}
public metricDataRangeError(): boolean {
return this.isMetricdataRangeError;
}
public metricDataRangeErrorMessage(): string {
return this.metricdataRangeErrorMessage;
}
private async updateInfo(): Promise<boolean> {
const response = await axios.get(`${MANAGER_IP}/trial-jobs`);
let updated = false;
if (response.status === 200) {
const newTrials = TrialManager.expandJobsToTrials(response.data);
for (const trialInfo of newTrials as TrialJobInfo[]) {
if (this.trials.has(trialInfo.id)) {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
updated = this.trials.get(trialInfo.id)!.updateTrialJobInfo(trialInfo) || updated;
} else {
this.trials.set(trialInfo.id, new Trial(trialInfo, undefined));
updated = true;
requestAxios(`${MANAGER_IP}/trial-jobs`)
.then(data => {
const newTrials = TrialManager.expandJobsToTrials(data as any);
for (const trialInfo of newTrials as TrialJobInfo[]) {
if (this.trials.has(trialInfo.id)) {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
updated = this.trials.get(trialInfo.id)!.updateTrialJobInfo(trialInfo) || updated;
} else {
this.trials.set(trialInfo.id, new Trial(trialInfo, undefined));
updated = true;
}
this.maxSequenceId = Math.max(this.maxSequenceId, trialInfo.sequenceId);
}
this.maxSequenceId = Math.max(this.maxSequenceId, trialInfo.sequenceId);
}
this.infoInitialized = true;
}
return updated;
this.infoInitialized = true;
})
.catch(error => {
this.isJobListError = true;
this.jobErrorMessage = error.message;
this.infoInitialized = true;
updated = true;
});
return updated;
}
private async updateMetrics(lastTime?: boolean): Promise<boolean> {
......@@ -188,13 +246,25 @@ class TrialManager {
}
private async updateAllMetrics(): Promise<boolean> {
const response = await axios.get(`${MANAGER_IP}/metric-data`);
return (response.status === 200) && this.doUpdateMetrics(response.data as MetricDataRecord[], false);
return requestAxios(`${MANAGER_IP}/metric-data`)
.then(data => this.doUpdateMetrics(data as any, false))
.catch(error => {
this.isMetricdataError = true;
this.MetricdataErrorMessage = `${error.message}`;
this.doUpdateMetrics([], false);
return true;
});
}
private async updateLatestMetrics(): Promise<boolean> {
const response = await axios.get(`${MANAGER_IP}/metric-data-latest`);
return (response.status === 200) && this.doUpdateMetrics(response.data as MetricDataRecord[], true);
return requestAxios(`${MANAGER_IP}/metric-data-latest`)
.then(data => this.doUpdateMetrics(data as any, true))
.catch(error => {
this.isLatestMetricdataError = true;
this.latestMetricdataErrorMessage = `${error.message}`;
this.doUpdateMetrics([], true);
return true;
});
}
private async updateManyMetrics(): Promise<void> {
......@@ -202,12 +272,16 @@ class TrialManager {
return;
}
this.doingBatchUpdate = true;
for (let i = 0; i < this.maxSequenceId; i += METRIC_GROUP_UPDATE_SIZE) {
const response = await axios.get(`${MANAGER_IP}/metric-data-range/${i}/${i + METRIC_GROUP_UPDATE_SIZE}`);
if (response.status === 200) {
const updated = this.doUpdateMetrics(response.data as MetricDataRecord[], false);
this.batchUpdatedAfterReading = this.batchUpdatedAfterReading || updated;
}
for (let i = 0; i < this.maxSequenceId && this.isMetricdataRangeError === false; i += METRIC_GROUP_UPDATE_SIZE) {
requestAxios(`${MANAGER_IP}/metric-data-range/${i}/${i + METRIC_GROUP_UPDATE_SIZE}`)
.then(data => {
const updated = this.doUpdateMetrics(data as any, false);
this.batchUpdatedAfterReading = this.batchUpdatedAfterReading || updated;
})
.catch(error => {
this.isMetricdataRangeError = true;
this.metricdataRangeErrorMessage = `${error.message}`;
});
}
this.doingBatchUpdate = false;
}
......
......@@ -46,6 +46,9 @@
.description{
line-height: 34px;
margin-left: 6px;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
}
......
authorName: nni
experimentName: default_test
maxExecDuration: 3m
maxTrialNum: 1
trialConcurrency: 1
searchSpacePath: ./search_space.json
tuner:
builtinTunerName: Random
trial:
codeDir: .
command: python3 not_exist.py
gpuNum: 0
useAnnotation: false
multiPhase: false
multiThread: false
trainingServicePlatform: local
......@@ -705,6 +705,9 @@ def export_trials_data(args):
else:
formated_record = {**record['parameter'], **{'reward': record_value, 'id': record['id']}}
trial_records.append(formated_record)
if not trial_records:
print_error('No trial results collected! Please check your trial log...')
exit(0)
with open(args.path, 'w', newline='') as file:
writer = csv.DictWriter(file, set.union(*[set(r.keys()) for r in trial_records]))
writer.writeheader()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment