"git@developer.sourcefind.cn:OpenDAS/tilelang.git" did not exist on "4eba852ac928ba061b3696ca123aea180387d92f"
Unverified Commit 050ee2bb authored by J-shang's avatar J-shang Committed by GitHub
Browse files

unify trial job id name (#3053)


Co-authored-by: default avatarNing Shang <nishang@microsoft.com>
parent be652aa4
...@@ -99,10 +99,7 @@ class TrialResult: ...@@ -99,10 +99,7 @@ class TrialResult:
self.value = None self.value = None
self.trialJobId = None self.trialJobId = None
for key in json_obj.keys(): for key in json_obj.keys():
if key == 'id': setattr(self, key, json_obj[key])
setattr(self, 'trialJobId', json_obj[key])
elif hasattr(self, key):
setattr(self, key, json_obj[key])
self.value = json.loads(self.value) self.value = json.loads(self.value)
def __repr__(self): def __repr__(self):
...@@ -219,10 +216,7 @@ class TrialJob: ...@@ -219,10 +216,7 @@ class TrialJob:
self.finalMetricData = None self.finalMetricData = None
self.stderrPath = None self.stderrPath = None
for key in json_obj.keys(): for key in json_obj.keys():
if key == 'id': setattr(self, key, json_obj[key])
setattr(self, 'trialJobId', json_obj[key])
elif hasattr(self, key):
setattr(self, key, json_obj[key])
if self.hyperParameters: if self.hyperParameters:
self.hyperParameters = [TrialHyperParameters(json.loads(e)) for e in self.hyperParameters] self.hyperParameters = [TrialHyperParameters(json.loads(e)) for e in self.hyperParameters]
if self.finalMetricData: if self.finalMetricData:
......
...@@ -388,9 +388,9 @@ def log_trial(args): ...@@ -388,9 +388,9 @@ def log_trial(args):
if response and check_response(response): if response and check_response(response):
content = json.loads(response.text) content = json.loads(response.text)
for trial in content: for trial in content:
trial_id_list.append(trial.get('id')) trial_id_list.append(trial.get('trialJobId'))
if trial.get('logPath'): if trial.get('logPath'):
trial_id_path_dict[trial.get('id')] = trial['logPath'] trial_id_path_dict[trial.get('trialJobId')] = trial['logPath']
else: else:
print_error('Restful server is not running...') print_error('Restful server is not running...')
exit(1) exit(1)
...@@ -674,7 +674,7 @@ def show_experiment_info(): ...@@ -674,7 +674,7 @@ def show_experiment_info():
content = json.loads(response.text) content = json.loads(response.text)
for index, value in enumerate(content): for index, value in enumerate(content):
content[index] = convert_time_stamp_to_date(value) content[index] = convert_time_stamp_to_date(value)
print(TRIAL_MONITOR_CONTENT % (content[index].get('id'), content[index].get('startTime'), \ print(TRIAL_MONITOR_CONTENT % (content[index].get('trialJobId'), content[index].get('startTime'), \
content[index].get('endTime'), content[index].get('status'))) content[index].get('endTime'), content[index].get('status')))
print(TRIAL_MONITOR_TAIL) print(TRIAL_MONITOR_TAIL)
...@@ -747,7 +747,7 @@ def export_trials_data(args): ...@@ -747,7 +747,7 @@ def export_trials_data(args):
return return
intermediate_results = groupby_trial_id(json.loads(intermediate_results_response.text)) intermediate_results = groupby_trial_id(json.loads(intermediate_results_response.text))
for record in content: for record in content:
record['intermediate'] = intermediate_results[record['id']] record['intermediate'] = intermediate_results[record['trialJobId']]
if args.type == 'json': if args.type == 'json':
with open(args.path, 'w') as file: with open(args.path, 'w') as file:
file.write(json.dumps(content)) file.write(json.dumps(content))
...@@ -759,9 +759,9 @@ def export_trials_data(args): ...@@ -759,9 +759,9 @@ def export_trials_data(args):
formated_record['intermediate'] = '[' + ','.join(record['intermediate']) + ']' formated_record['intermediate'] = '[' + ','.join(record['intermediate']) + ']'
record_value = json.loads(record['value']) record_value = json.loads(record['value'])
if not isinstance(record_value, (float, int)): if not isinstance(record_value, (float, int)):
formated_record.update({**record['parameter'], **record_value, **{'id': record['id']}}) formated_record.update({**record['parameter'], **record_value, **{'trialJobId': record['trialJobId']}})
else: else:
formated_record.update({**record['parameter'], **{'reward': record_value, 'id': record['id']}}) formated_record.update({**record['parameter'], **{'reward': record_value, 'trialJobId': record['trialJobId']}})
trial_records.append(formated_record) trial_records.append(formated_record)
if not trial_records: if not trial_records:
print_error('No trial results collected! Please check your trial log...') print_error('No trial results collected! Please check your trial log...')
......
...@@ -19,7 +19,7 @@ def parse_log_path(args, trial_content): ...@@ -19,7 +19,7 @@ def parse_log_path(args, trial_content):
path_list = [] path_list = []
host_list = [] host_list = []
for trial in trial_content: for trial in trial_content:
if args.trial_id and args.trial_id != 'all' and trial.get('id') != args.trial_id: if args.trial_id and args.trial_id != 'all' and trial.get('trialJobId') != args.trial_id:
continue continue
pattern = r'(?P<head>.+)://(?P<host>.+):(?P<path>.*)' pattern = r'(?P<head>.+)://(?P<host>.+):(?P<path>.*)'
match = re.search(pattern, trial['logPath']) match = re.search(pattern, trial['logPath'])
...@@ -40,7 +40,7 @@ def copy_data_from_remote(args, nni_config, trial_content, path_list, host_list, ...@@ -40,7 +40,7 @@ def copy_data_from_remote(args, nni_config, trial_content, path_list, host_list,
machine_dict[machine['ip']] = {'port': machine['port'], 'passwd': machine['passwd'], 'username': machine['username'], machine_dict[machine['ip']] = {'port': machine['port'], 'passwd': machine['passwd'], 'username': machine['username'],
'sshKeyPath': machine.get('sshKeyPath'), 'passphrase': machine.get('passphrase')} 'sshKeyPath': machine.get('sshKeyPath'), 'passphrase': machine.get('passphrase')}
for index, host in enumerate(host_list): for index, host in enumerate(host_list):
local_path = os.path.join(temp_nni_path, trial_content[index].get('id')) local_path = os.path.join(temp_nni_path, trial_content[index].get('trialJobId'))
local_path_list.append(local_path) local_path_list.append(local_path)
print_normal('Copying log data from %s to %s' % (host + ':' + path_list[index], local_path)) print_normal('Copying log data from %s to %s' % (host + ':' + path_list[index], local_path))
sftp = create_ssh_sftp_client(host, machine_dict[host]['port'], machine_dict[host]['username'], machine_dict[host]['passwd'], sftp = create_ssh_sftp_client(host, machine_dict[host]['port'], machine_dict[host]['username'], machine_dict[host]['passwd'],
......
...@@ -124,7 +124,7 @@ def print_file_content(filepath): ...@@ -124,7 +124,7 @@ def print_file_content(filepath):
def print_trial_job_log(training_service, trial_jobs_url): def print_trial_job_log(training_service, trial_jobs_url):
trial_jobs = get_trial_jobs(trial_jobs_url) trial_jobs = get_trial_jobs(trial_jobs_url)
for trial_job in trial_jobs: for trial_job in trial_jobs:
trial_log_dir = os.path.join(get_experiment_dir(EXPERIMENT_URL), 'trials', trial_job['id']) trial_log_dir = os.path.join(get_experiment_dir(EXPERIMENT_URL), 'trials', trial_job['trialJobId'])
log_files = ['stderr', 'trial.log'] if training_service == 'local' else ['stdout_log_collection.log'] log_files = ['stderr', 'trial.log'] if training_service == 'local' else ['stdout_log_collection.log']
for log_file in log_files: for log_file in log_files:
print_file_content(os.path.join(trial_log_dir, log_file)) print_file_content(os.path.join(trial_log_dir, log_file))
......
...@@ -43,7 +43,7 @@ interface MetricDataRecord { ...@@ -43,7 +43,7 @@ interface MetricDataRecord {
} }
interface TrialJobInfo { interface TrialJobInfo {
id: string; trialJobId: string;
sequenceId?: number; sequenceId?: number;
status: TrialJobStatus; status: TrialJobStatus;
startTime?: number; startTime?: number;
...@@ -63,7 +63,7 @@ interface HyperParameterFormat { ...@@ -63,7 +63,7 @@ interface HyperParameterFormat {
interface ExportedDataFormat { interface ExportedDataFormat {
parameter: Record<string, any>; parameter: Record<string, any>;
value: Record<string, any>; value: Record<string, any>;
id: string; trialJobId: string;
} }
abstract class DataStore { abstract class DataStore {
......
...@@ -168,7 +168,7 @@ class NNIDataStore implements DataStore { ...@@ -168,7 +168,7 @@ class NNIDataStore implements DataStore {
const oneEntry: ExportedDataFormat = { const oneEntry: ExportedDataFormat = {
parameter: parameters.parameters, parameter: parameters.parameters,
value: JSON.parse(job.finalMetricData[0].data), value: JSON.parse(job.finalMetricData[0].data),
id: job.id trialJobId: job.trialJobId
}; };
exportedData.push(oneEntry); exportedData.push(oneEntry);
} else { } else {
...@@ -188,7 +188,7 @@ class NNIDataStore implements DataStore { ...@@ -188,7 +188,7 @@ class NNIDataStore implements DataStore {
const oneEntry: ExportedDataFormat = { const oneEntry: ExportedDataFormat = {
parameter: value, parameter: value,
value: metricValue, value: metricValue,
id: job.id trialJobId: job.trialJobId
}; };
exportedData.push(oneEntry); exportedData.push(oneEntry);
} }
...@@ -229,7 +229,7 @@ class NNIDataStore implements DataStore { ...@@ -229,7 +229,7 @@ class NNIDataStore implements DataStore {
} }
if (!(status !== undefined && jobInfo.status !== status)) { if (!(status !== undefined && jobInfo.status !== status)) {
if (jobInfo.status === 'SUCCEEDED') { if (jobInfo.status === 'SUCCEEDED') {
jobInfo.finalMetricData = finalMetricsMap.get(jobInfo.id); jobInfo.finalMetricData = finalMetricsMap.get(jobInfo.trialJobId);
} }
result.push(jobInfo); result.push(jobInfo);
} }
...@@ -320,7 +320,7 @@ class NNIDataStore implements DataStore { ...@@ -320,7 +320,7 @@ class NNIDataStore implements DataStore {
jobInfo = map.get(record.trialJobId); jobInfo = map.get(record.trialJobId);
} else { } else {
jobInfo = { jobInfo = {
id: record.trialJobId, trialJobId: record.trialJobId,
status: this.getJobStatusByLatestEvent('UNKNOWN', record.event), status: this.getJobStatusByLatestEvent('UNKNOWN', record.event),
hyperParameters: [] hyperParameters: []
}; };
...@@ -364,14 +364,14 @@ class NNIDataStore implements DataStore { ...@@ -364,14 +364,14 @@ class NNIDataStore implements DataStore {
const newHParam: any = this.parseHyperParameter(record.data); const newHParam: any = this.parseHyperParameter(record.data);
if (newHParam !== undefined) { if (newHParam !== undefined) {
if (jobInfo.hyperParameters !== undefined) { if (jobInfo.hyperParameters !== undefined) {
let hParamIds: Set<number> | undefined = hParamIdMap.get(jobInfo.id); let hParamIds: Set<number> | undefined = hParamIdMap.get(jobInfo.trialJobId);
if (hParamIds === undefined) { if (hParamIds === undefined) {
hParamIds = new Set(); hParamIds = new Set();
} }
if (!hParamIds.has(newHParam.parameter_index)) { if (!hParamIds.has(newHParam.parameter_index)) {
jobInfo.hyperParameters.push(JSON.stringify(newHParam)); jobInfo.hyperParameters.push(JSON.stringify(newHParam));
hParamIds.add(newHParam.parameter_index); hParamIds.add(newHParam.parameter_index);
hParamIdMap.set(jobInfo.id, hParamIds); hParamIdMap.set(jobInfo.trialJobId, hParamIds);
} }
} else { } else {
assert(false, 'jobInfo.hyperParameters is undefined'); assert(false, 'jobInfo.hyperParameters is undefined');
......
...@@ -231,7 +231,7 @@ class NNIManager implements Manager { ...@@ -231,7 +231,7 @@ class NNIManager implements Manager {
// Check the final status for WAITING and RUNNING jobs // Check the final status for WAITING and RUNNING jobs
await Promise.all(allTrialJobs await Promise.all(allTrialJobs
.filter((job: TrialJobInfo) => job.status === 'WAITING' || job.status === 'RUNNING') .filter((job: TrialJobInfo) => job.status === 'WAITING' || job.status === 'RUNNING')
.map((job: TrialJobInfo) => this.dataStore.storeTrialJobEvent('FAILED', job.id))); .map((job: TrialJobInfo) => this.dataStore.storeTrialJobEvent('FAILED', job.trialJobId)));
// Collect generated trials and imported trials // Collect generated trials and imported trials
const finishedTrialData: string = await this.exportData(); const finishedTrialData: string = await this.exportData();
...@@ -304,7 +304,7 @@ class NNIManager implements Manager { ...@@ -304,7 +304,7 @@ class NNIManager implements Manager {
// FIXME: can this be undefined? // FIXME: can this be undefined?
trial.sequenceId !== undefined && minSeqId <= trial.sequenceId && trial.sequenceId <= maxSeqId trial.sequenceId !== undefined && minSeqId <= trial.sequenceId && trial.sequenceId <= maxSeqId
)); ));
const targetTrialIds = new Set(targetTrials.map(trial => trial.id)); const targetTrialIds = new Set(targetTrials.map(trial => trial.trialJobId));
const allMetrics = await this.dataStore.getMetricData(); const allMetrics = await this.dataStore.getMetricData();
return allMetrics.filter(metric => targetTrialIds.has(metric.trialJobId)); return allMetrics.filter(metric => targetTrialIds.has(metric.trialJobId));
......
...@@ -161,7 +161,7 @@ class MockedDataStore implements DataStore { ...@@ -161,7 +161,7 @@ class MockedDataStore implements DataStore {
} }
if (!(status && jobInfo.status !== status)) { if (!(status && jobInfo.status !== status)) {
if (jobInfo.status === 'SUCCEEDED') { if (jobInfo.status === 'SUCCEEDED') {
jobInfo.finalMetricData = await this.getFinalMetricData(jobInfo.id); jobInfo.finalMetricData = await this.getFinalMetricData(jobInfo.trialJobId);
} }
result.push(jobInfo); result.push(jobInfo);
} }
...@@ -206,7 +206,7 @@ class MockedDataStore implements DataStore { ...@@ -206,7 +206,7 @@ class MockedDataStore implements DataStore {
public getTrialJob(trialJobId: string): Promise<TrialJobInfo> { public getTrialJob(trialJobId: string): Promise<TrialJobInfo> {
return Promise.resolve({ return Promise.resolve({
id: '1234', trialJobId: '1234',
status: 'SUCCEEDED', status: 'SUCCEEDED',
startTime: Date.now(), startTime: Date.now(),
endTime: Date.now() endTime: Date.now()
...@@ -242,7 +242,7 @@ class MockedDataStore implements DataStore { ...@@ -242,7 +242,7 @@ class MockedDataStore implements DataStore {
jobInfo = map.get(record.trialJobId); jobInfo = map.get(record.trialJobId);
} else { } else {
jobInfo = { jobInfo = {
id: record.trialJobId, trialJobId: record.trialJobId,
status: this.getJobStatusByLatestEvent(record.event), status: this.getJobStatusByLatestEvent(record.event),
}; };
} }
......
...@@ -122,7 +122,7 @@ describe('Unit test for nnimanager', function () { ...@@ -122,7 +122,7 @@ describe('Unit test for nnimanager', function () {
it('test getTrialJob valid', () => { it('test getTrialJob valid', () => {
//query a exist id //query a exist id
return nniManager.getTrialJob('1234').then(function (trialJobDetail) { return nniManager.getTrialJob('1234').then(function (trialJobDetail) {
expect(trialJobDetail.id).to.be.equal('1234'); expect(trialJobDetail.trialJobId).to.be.equal('1234');
}).catch((error) => { }).catch((error) => {
assert.fail(error); assert.fail(error);
}) })
......
...@@ -101,7 +101,7 @@ export class MockedNNIManager extends Manager { ...@@ -101,7 +101,7 @@ export class MockedNNIManager extends Manager {
public getTrialJob(trialJobId: string): Promise<TrialJobInfo> { public getTrialJob(trialJobId: string): Promise<TrialJobInfo> {
const deferred: Deferred<TrialJobInfo> = new Deferred<TrialJobInfo>(); const deferred: Deferred<TrialJobInfo> = new Deferred<TrialJobInfo>();
const jobInfo: TrialJobInfo = { const jobInfo: TrialJobInfo = {
id: '1234', trialJobId: '1234',
status: 'SUCCEEDED', status: 'SUCCEEDED',
startTime: Date.now(), startTime: Date.now(),
endTime: Date.now() endTime: Date.now()
...@@ -152,7 +152,7 @@ export class MockedNNIManager extends Manager { ...@@ -152,7 +152,7 @@ export class MockedNNIManager extends Manager {
} }
public listTrialJobs(status?: TrialJobStatus): Promise<TrialJobInfo[]> { public listTrialJobs(status?: TrialJobStatus): Promise<TrialJobInfo[]> {
const job1: TrialJobInfo = { const job1: TrialJobInfo = {
id: '1234', trialJobId: '1234',
status: 'SUCCEEDED', status: 'SUCCEEDED',
startTime: Date.now(), startTime: Date.now(),
endTime: Date.now(), endTime: Date.now(),
...@@ -166,7 +166,7 @@ export class MockedNNIManager extends Manager { ...@@ -166,7 +166,7 @@ export class MockedNNIManager extends Manager {
}] }]
}; };
const job2: TrialJobInfo = { const job2: TrialJobInfo = {
id: '3456', trialJobId: '3456',
status: 'FAILED', status: 'FAILED',
startTime: Date.now(), startTime: Date.now(),
endTime: Date.now(), endTime: Date.now(),
......
...@@ -57,7 +57,7 @@ describe('Unit test for rest server', () => { ...@@ -57,7 +57,7 @@ describe('Unit test for rest server', () => {
assert.fail(err.message); assert.fail(err.message);
} else { } else {
expect(res.statusCode).to.equal(200); expect(res.statusCode).to.equal(200);
expect(JSON.parse(body).id).to.equal('1234'); expect(JSON.parse(body).trialJobId).to.equal('1234');
} }
done(); done();
}); });
......
...@@ -167,7 +167,7 @@ class Overview extends React.Component<{}, OverviewState> { ...@@ -167,7 +167,7 @@ class Overview extends React.Component<{}, OverviewState> {
</Stack> </Stack>
</div> </div>
</Stack> </Stack>
<SuccessTable trialIds={bestTrials.map(trial => trial.info.id)} /> <SuccessTable trialIds={bestTrials.map(trial => trial.info.trialJobId)} />
</div> </div>
<div className='overviewCommand1'> <div className='overviewCommand1'>
<Command1 /> <Command1 />
......
...@@ -131,7 +131,7 @@ interface MetricDataRecord { ...@@ -131,7 +131,7 @@ interface MetricDataRecord {
} }
interface TrialJobInfo { interface TrialJobInfo {
id: string; trialJobId: string;
sequenceId: number; sequenceId: number;
status: string; status: string;
startTime?: number; startTime?: number;
......
...@@ -144,9 +144,9 @@ class Trial implements TableObj { ...@@ -144,9 +144,9 @@ class Trial implements TableObj {
} }
return { return {
key: this.info.id, key: this.info.trialJobId,
sequenceId: this.info.sequenceId, sequenceId: this.info.sequenceId,
id: this.info.id, id: this.info.trialJobId,
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
startTime: this.info.startTime!, startTime: this.info.startTime!,
endTime: this.info.endTime, endTime: this.info.endTime,
...@@ -169,7 +169,7 @@ class Trial implements TableObj { ...@@ -169,7 +169,7 @@ class Trial implements TableObj {
} }
get id(): string { get id(): string {
return this.info.id; return this.info.trialJobId;
} }
get duration(): number { get duration(): number {
......
...@@ -172,11 +172,11 @@ class TrialManager { ...@@ -172,11 +172,11 @@ class TrialManager {
requestAxios(`${MANAGER_IP}/trial-jobs`) requestAxios(`${MANAGER_IP}/trial-jobs`)
.then(data => { .then(data => {
for (const trialInfo of data as TrialJobInfo[]) { for (const trialInfo of data as TrialJobInfo[]) {
if (this.trials.has(trialInfo.id)) { if (this.trials.has(trialInfo.trialJobId)) {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
updated = this.trials.get(trialInfo.id)!.updateTrialJobInfo(trialInfo) || updated; updated = this.trials.get(trialInfo.trialJobId)!.updateTrialJobInfo(trialInfo) || updated;
} else { } else {
this.trials.set(trialInfo.id, new Trial(trialInfo, undefined)); this.trials.set(trialInfo.trialJobId, new Trial(trialInfo, undefined));
updated = true; updated = true;
} }
this.maxSequenceId = Math.max(this.maxSequenceId, trialInfo.sequenceId); this.maxSequenceId = Math.max(this.maxSequenceId, trialInfo.sequenceId);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment