Unverified Commit 10c177c2 authored by Junwei Sun's avatar Junwei Sun Committed by GitHub
Browse files

support display trial log on local mode (#2718)

parent e2a86899
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
'use strict'; 'use strict';
import { MetricDataRecord, MetricType, TrialJobInfo } from './datastore'; import { MetricDataRecord, MetricType, TrialJobInfo } from './datastore';
import { TrialJobStatus } from './trainingService'; import { TrialJobStatus, LogType } from './trainingService';
type ProfileUpdateType = 'TRIAL_CONCURRENCY' | 'MAX_EXEC_DURATION' | 'SEARCH_SPACE' | 'MAX_TRIAL_NUM'; type ProfileUpdateType = 'TRIAL_CONCURRENCY' | 'MAX_EXEC_DURATION' | 'SEARCH_SPACE' | 'MAX_TRIAL_NUM';
type ExperimentStatus = 'INITIALIZED' | 'RUNNING' | 'ERROR' | 'STOPPING' | 'STOPPED' | 'DONE' | 'NO_MORE_TRIAL' | 'TUNER_NO_MORE_TRIAL'; type ExperimentStatus = 'INITIALIZED' | 'RUNNING' | 'ERROR' | 'STOPPING' | 'STOPPED' | 'DONE' | 'NO_MORE_TRIAL' | 'TUNER_NO_MORE_TRIAL';
...@@ -101,6 +101,8 @@ abstract class Manager { ...@@ -101,6 +101,8 @@ abstract class Manager {
public abstract getMetricDataByRange(minSeqId: number, maxSeqId: number): Promise<MetricDataRecord[]>; public abstract getMetricDataByRange(minSeqId: number, maxSeqId: number): Promise<MetricDataRecord[]>;
public abstract getLatestMetricData(): Promise<MetricDataRecord[]>; public abstract getLatestMetricData(): Promise<MetricDataRecord[]>;
public abstract getTrialLog(trialJobId: string, logType: LogType): Promise<string>;
public abstract getTrialJobStatistics(): Promise<TrialJobStatistics[]>; public abstract getTrialJobStatistics(): Promise<TrialJobStatistics[]>;
public abstract getStatus(): NNIManagerStatus; public abstract getStatus(): NNIManagerStatus;
} }
......
...@@ -8,6 +8,8 @@ ...@@ -8,6 +8,8 @@
*/ */
type TrialJobStatus = 'UNKNOWN' | 'WAITING' | 'RUNNING' | 'SUCCEEDED' | 'FAILED' | 'USER_CANCELED' | 'SYS_CANCELED' | 'EARLY_STOPPED'; type TrialJobStatus = 'UNKNOWN' | 'WAITING' | 'RUNNING' | 'SUCCEEDED' | 'FAILED' | 'USER_CANCELED' | 'SYS_CANCELED' | 'EARLY_STOPPED';
type LogType = 'TRIAL_LOG' | 'TRIAL_ERROR';
interface TrainingServiceMetadata { interface TrainingServiceMetadata {
readonly key: string; readonly key: string;
readonly value: string; readonly value: string;
...@@ -79,6 +81,7 @@ abstract class TrainingService { ...@@ -79,6 +81,7 @@ abstract class TrainingService {
public abstract updateTrialJob(trialJobId: string, form: TrialJobApplicationForm): Promise<TrialJobDetail>; public abstract updateTrialJob(trialJobId: string, form: TrialJobApplicationForm): Promise<TrialJobDetail>;
public abstract get isMultiPhaseJobSupported(): boolean; public abstract get isMultiPhaseJobSupported(): boolean;
public abstract cancelTrialJob(trialJobId: string, isEarlyStopped?: boolean): Promise<void>; public abstract cancelTrialJob(trialJobId: string, isEarlyStopped?: boolean): Promise<void>;
public abstract getTrialLog(trialJobId: string, logType: LogType): Promise<string>;
public abstract setClusterMetadata(key: string, value: string): Promise<void>; public abstract setClusterMetadata(key: string, value: string): Promise<void>;
public abstract getClusterMetadata(key: string): Promise<string>; public abstract getClusterMetadata(key: string): Promise<string>;
public abstract cleanUp(): Promise<void>; public abstract cleanUp(): Promise<void>;
...@@ -98,5 +101,5 @@ class NNIManagerIpConfig { ...@@ -98,5 +101,5 @@ class NNIManagerIpConfig {
export { export {
TrainingService, TrainingServiceError, TrialJobStatus, TrialJobApplicationForm, TrainingService, TrainingServiceError, TrialJobStatus, TrialJobApplicationForm,
TrainingServiceMetadata, TrialJobDetail, TrialJobMetric, HyperParameters, TrainingServiceMetadata, TrialJobDetail, TrialJobMetric, HyperParameters,
NNIManagerIpConfig NNIManagerIpConfig, LogType
}; };
...@@ -16,7 +16,7 @@ import { ...@@ -16,7 +16,7 @@ import {
NNIManagerStatus, ProfileUpdateType, TrialJobStatistics NNIManagerStatus, ProfileUpdateType, TrialJobStatistics
} from '../common/manager'; } from '../common/manager';
import { import {
TrainingService, TrialJobApplicationForm, TrialJobDetail, TrialJobMetric, TrialJobStatus TrainingService, TrialJobApplicationForm, TrialJobDetail, TrialJobMetric, TrialJobStatus, LogType
} from '../common/trainingService'; } from '../common/trainingService';
import { delay, getCheckpointDir, getExperimentRootDir, getLogDir, getMsgDispatcherCommand, mkDirP, getTunerProc, getLogLevel, isAlive, killPid } from '../common/utils'; import { delay, getCheckpointDir, getExperimentRootDir, getLogDir, getMsgDispatcherCommand, mkDirP, getTunerProc, getLogLevel, isAlive, killPid } from '../common/utils';
import { import {
...@@ -325,6 +325,10 @@ class NNIManager implements Manager { ...@@ -325,6 +325,10 @@ class NNIManager implements Manager {
// FIXME: unit test // FIXME: unit test
} }
public async getTrialLog(trialJobId: string, logType: LogType): Promise<string> {
return this.trainingService.getTrialLog(trialJobId, logType);
}
public getExperimentProfile(): Promise<ExperimentProfile> { public getExperimentProfile(): Promise<ExperimentProfile> {
// TO DO: using Promise.resolve() // TO DO: using Promise.resolve()
const deferred: Deferred<ExperimentProfile> = new Deferred<ExperimentProfile>(); const deferred: Deferred<ExperimentProfile> = new Deferred<ExperimentProfile>();
......
...@@ -7,7 +7,7 @@ import { Deferred } from 'ts-deferred'; ...@@ -7,7 +7,7 @@ import { Deferred } from 'ts-deferred';
import { Provider } from 'typescript-ioc'; import { Provider } from 'typescript-ioc';
import { MethodNotImplementedError } from '../../common/errors'; import { MethodNotImplementedError } from '../../common/errors';
import { TrainingService, TrialJobApplicationForm, TrialJobDetail, TrialJobMetric } from '../../common/trainingService'; import { TrainingService, TrialJobApplicationForm, TrialJobDetail, TrialJobMetric, LogType } from '../../common/trainingService';
const testTrainingServiceProvider: Provider = { const testTrainingServiceProvider: Provider = {
get: () => { return new MockedTrainingService(); } get: () => { return new MockedTrainingService(); }
...@@ -63,6 +63,10 @@ class MockedTrainingService extends TrainingService { ...@@ -63,6 +63,10 @@ class MockedTrainingService extends TrainingService {
return deferred.promise; return deferred.promise;
} }
public getTrialLog(trialJobId: string, logType: LogType): Promise<string> {
throw new MethodNotImplementedError();
}
async run(): Promise<void> { async run(): Promise<void> {
} }
......
...@@ -57,6 +57,7 @@ class NNIRestHandler { ...@@ -57,6 +57,7 @@ class NNIRestHandler {
this.getMetricData(router); this.getMetricData(router);
this.getMetricDataByRange(router); this.getMetricDataByRange(router);
this.getLatestMetricData(router); this.getLatestMetricData(router);
this.getTrialLog(router);
this.exportData(router); this.exportData(router);
// Express-joi-validator configuration // Express-joi-validator configuration
...@@ -268,6 +269,19 @@ class NNIRestHandler { ...@@ -268,6 +269,19 @@ class NNIRestHandler {
}); });
} }
private getTrialLog(router: Router): void {
router.get('/trial-log/:id/:type', async(req: Request, res: Response) => {
this.nniManager.getTrialLog(req.params.id, req.params.type).then((log: string) => {
if (log === '') {
log = 'No logs available.'
}
res.send(log);
}).catch((err: Error) => {
this.handleError(err, res);
});
});
}
private exportData(router: Router): void { private exportData(router: Router): void {
router.get('/export-data', (req: Request, res: Response) => { router.get('/export-data', (req: Request, res: Response) => {
this.nniManager.exportData().then((exportedData: string) => { this.nniManager.exportData().then((exportedData: string) => {
......
...@@ -13,7 +13,7 @@ import { ...@@ -13,7 +13,7 @@ import {
TrialJobStatistics, NNIManagerStatus TrialJobStatistics, NNIManagerStatus
} from '../../common/manager'; } from '../../common/manager';
import { import {
TrialJobApplicationForm, TrialJobDetail, TrialJobStatus TrialJobApplicationForm, TrialJobDetail, TrialJobStatus, LogType
} from '../../common/trainingService'; } from '../../common/trainingService';
export const testManagerProvider: Provider = { export const testManagerProvider: Provider = {
...@@ -118,6 +118,9 @@ export class MockedNNIManager extends Manager { ...@@ -118,6 +118,9 @@ export class MockedNNIManager extends Manager {
public getLatestMetricData(): Promise<MetricDataRecord[]> { public getLatestMetricData(): Promise<MetricDataRecord[]> {
throw new MethodNotImplementedError(); throw new MethodNotImplementedError();
} }
public getTrialLog(trialJobId: string, logType: LogType): Promise<string> {
throw new MethodNotImplementedError();
}
public getExperimentProfile(): Promise<ExperimentProfile> { public getExperimentProfile(): Promise<ExperimentProfile> {
const profile: ExperimentProfile = { const profile: ExperimentProfile = {
params: { params: {
......
...@@ -12,9 +12,10 @@ import { EventEmitter } from 'events'; ...@@ -12,9 +12,10 @@ import { EventEmitter } from 'events';
import { String } from 'typescript-string-operations'; import { String } from 'typescript-string-operations';
import { getExperimentId } from '../../common/experimentStartupInfo'; import { getExperimentId } from '../../common/experimentStartupInfo';
import { getLogger, Logger } from '../../common/log'; import { getLogger, Logger } from '../../common/log';
import { MethodNotImplementedError } from '../../common/errors';
import { import {
NNIManagerIpConfig, TrainingService, NNIManagerIpConfig, TrainingService,
TrialJobApplicationForm, TrialJobDetail, TrialJobMetric TrialJobApplicationForm, TrialJobDetail, TrialJobMetric, LogType
} from '../../common/trainingService'; } from '../../common/trainingService';
import { DLTS_TRIAL_COMMAND_FORMAT } from './dltsData'; import { DLTS_TRIAL_COMMAND_FORMAT } from './dltsData';
import { CONTAINER_INSTALL_NNI_SHELL_FORMAT } from '../common/containerJobData'; import { CONTAINER_INSTALL_NNI_SHELL_FORMAT } from '../common/containerJobData';
...@@ -246,6 +247,10 @@ class DLTSTrainingService implements TrainingService { ...@@ -246,6 +247,10 @@ class DLTSTrainingService implements TrainingService {
return trialJob return trialJob
} }
public async getTrialLog(_trialJobId: string, _logType: LogType): Promise<string> {
throw new MethodNotImplementedError();
}
public addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void { public addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void {
this.metricsEmitter.on('metric', listener); this.metricsEmitter.on('metric', listener);
} }
......
...@@ -12,8 +12,9 @@ import { Base64 } from 'js-base64'; ...@@ -12,8 +12,9 @@ import { Base64 } from 'js-base64';
import { String } from 'typescript-string-operations'; import { String } from 'typescript-string-operations';
import { getExperimentId } from '../../common/experimentStartupInfo'; import { getExperimentId } from '../../common/experimentStartupInfo';
import { getLogger, Logger } from '../../common/log'; import { getLogger, Logger } from '../../common/log';
import { MethodNotImplementedError } from '../../common/errors';
import { import {
NNIManagerIpConfig, TrialJobDetail, TrialJobMetric NNIManagerIpConfig, TrialJobDetail, TrialJobMetric, LogType
} from '../../common/trainingService'; } from '../../common/trainingService';
import { delay, getExperimentRootDir, getIPV4Address, getJobCancelStatus, getVersion, uniqueString } from '../../common/utils'; import { delay, getExperimentRootDir, getIPV4Address, getJobCancelStatus, getVersion, uniqueString } from '../../common/utils';
import { AzureStorageClientUtility } from './azureStorageClientUtils'; import { AzureStorageClientUtility } from './azureStorageClientUtils';
...@@ -98,6 +99,10 @@ abstract class KubernetesTrainingService { ...@@ -98,6 +99,10 @@ abstract class KubernetesTrainingService {
return Promise.resolve(kubernetesTrialJob); return Promise.resolve(kubernetesTrialJob);
} }
public async getTrialLog(_trialJobId: string, _logType: LogType): Promise<string> {
throw new MethodNotImplementedError();
}
public addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void { public addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void {
this.metricsEmitter.on('metric', listener); this.metricsEmitter.on('metric', listener);
} }
......
...@@ -14,7 +14,7 @@ import { getExperimentId } from '../../common/experimentStartupInfo'; ...@@ -14,7 +14,7 @@ import { getExperimentId } from '../../common/experimentStartupInfo';
import { getLogger, Logger } from '../../common/log'; import { getLogger, Logger } from '../../common/log';
import { import {
HyperParameters, TrainingService, TrialJobApplicationForm, HyperParameters, TrainingService, TrialJobApplicationForm,
TrialJobDetail, TrialJobMetric, TrialJobStatus TrialJobDetail, TrialJobMetric, TrialJobStatus, LogType
} from '../../common/trainingService'; } from '../../common/trainingService';
import { import {
delay, generateParamFileName, getExperimentRootDir, getJobCancelStatus, getNewLine, isAlive, uniqueString delay, generateParamFileName, getExperimentRootDir, getJobCancelStatus, getNewLine, isAlive, uniqueString
...@@ -184,6 +184,18 @@ class LocalTrainingService implements TrainingService { ...@@ -184,6 +184,18 @@ class LocalTrainingService implements TrainingService {
return trialJob; return trialJob;
} }
public async getTrialLog(trialJobId: string, logType: LogType): Promise<string> {
let logPath: string;
if (logType === 'TRIAL_LOG') {
logPath = path.join(this.rootDir, 'trials', trialJobId, 'trial.log');
} else if (logType === 'TRIAL_ERROR') {
logPath = path.join(this.rootDir, 'trials', trialJobId, 'stderr');
} else {
throw new Error('unexpected log type');
}
return fs.promises.readFile(logPath, 'utf8');
}
public addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void { public addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void {
this.eventEmitter.on('metric', listener); this.eventEmitter.on('metric', listener);
} }
...@@ -450,8 +462,8 @@ class LocalTrainingService implements TrainingService { ...@@ -450,8 +462,8 @@ class LocalTrainingService implements TrainingService {
while (!this.stopping) { while (!this.stopping) {
while (!this.stopping && this.jobQueue.length !== 0) { while (!this.stopping && this.jobQueue.length !== 0) {
const trialJobId: string = this.jobQueue[0]; const trialJobId: string = this.jobQueue[0];
const trialJobDeatil: LocalTrialJobDetail | undefined = this.jobMap.get(trialJobId); const trialJobDetail: LocalTrialJobDetail | undefined = this.jobMap.get(trialJobId);
if (trialJobDeatil !== undefined && trialJobDeatil.status === 'WAITING') { if (trialJobDetail !== undefined && trialJobDetail.status === 'WAITING') {
const [success, resource] = this.tryGetAvailableResource(); const [success, resource] = this.tryGetAvailableResource();
if (!success) { if (!success) {
break; break;
......
...@@ -11,9 +11,10 @@ import { EventEmitter } from 'events'; ...@@ -11,9 +11,10 @@ import { EventEmitter } from 'events';
import { Deferred } from 'ts-deferred'; import { Deferred } from 'ts-deferred';
import { getExperimentId } from '../../common/experimentStartupInfo'; import { getExperimentId } from '../../common/experimentStartupInfo';
import { getLogger, Logger } from '../../common/log'; import { getLogger, Logger } from '../../common/log';
import { MethodNotImplementedError } from '../../common/errors';
import { import {
NNIManagerIpConfig, TrainingService, NNIManagerIpConfig, TrainingService,
TrialJobApplicationForm, TrialJobDetail, TrialJobMetric TrialJobApplicationForm, TrialJobDetail, TrialJobMetric, LogType
} from '../../common/trainingService'; } from '../../common/trainingService';
import { delay } from '../../common/utils'; import { delay } from '../../common/utils';
import { PAIJobInfoCollector } from './paiJobInfoCollector'; import { PAIJobInfoCollector } from './paiJobInfoCollector';
...@@ -117,6 +118,10 @@ abstract class PAITrainingService implements TrainingService { ...@@ -117,6 +118,10 @@ abstract class PAITrainingService implements TrainingService {
return jobs; return jobs;
} }
public async getTrialLog(_trialJobId: string, _logType: LogType): Promise<string> {
throw new MethodNotImplementedError();
}
public async getTrialJob(trialJobId: string): Promise<TrialJobDetail> { public async getTrialJob(trialJobId: string): Promise<TrialJobDetail> {
if (this.paiClusterConfig === undefined) { if (this.paiClusterConfig === undefined) {
throw new Error('PAI Cluster config is not initialized'); throw new Error('PAI Cluster config is not initialized');
......
...@@ -10,13 +10,13 @@ import * as path from 'path'; ...@@ -10,13 +10,13 @@ import * as path from 'path';
import { ShellExecutor } from 'training_service/remote_machine/shellExecutor'; import { ShellExecutor } from 'training_service/remote_machine/shellExecutor';
import { Deferred } from 'ts-deferred'; import { Deferred } from 'ts-deferred';
import * as component from '../../common/component'; import * as component from '../../common/component';
import { NNIError, NNIErrorNames } from '../../common/errors'; import { NNIError, NNIErrorNames, MethodNotImplementedError } from '../../common/errors';
import { getExperimentId } from '../../common/experimentStartupInfo'; import { getExperimentId } from '../../common/experimentStartupInfo';
import { getLogger, Logger } from '../../common/log'; import { getLogger, Logger } from '../../common/log';
import { ObservableTimer } from '../../common/observableTimer'; import { ObservableTimer } from '../../common/observableTimer';
import { import {
HyperParameters, NNIManagerIpConfig, TrainingService, TrialJobApplicationForm, HyperParameters, NNIManagerIpConfig, TrainingService, TrialJobApplicationForm,
TrialJobDetail, TrialJobMetric TrialJobDetail, TrialJobMetric, LogType
} from '../../common/trainingService'; } from '../../common/trainingService';
import { import {
delay, generateParamFileName, getExperimentRootDir, getIPV4Address, getJobCancelStatus, delay, generateParamFileName, getExperimentRootDir, getIPV4Address, getJobCancelStatus,
...@@ -180,6 +180,15 @@ class RemoteMachineTrainingService implements TrainingService { ...@@ -180,6 +180,15 @@ class RemoteMachineTrainingService implements TrainingService {
} }
} }
/**
* Get trial job log
* @param _trialJobId ID of trial job
* @param _logType 'TRIAL_LOG' | 'TRIAL_STDERR'
*/
public async getTrialLog(_trialJobId: string, _logType: LogType): Promise<string> {
throw new MethodNotImplementedError();
}
/** /**
* Add job metrics listener * Add job metrics listener
* @param listener callback listener * @param listener callback listener
......
...@@ -6,7 +6,8 @@ ...@@ -6,7 +6,8 @@
import { Container, Scope } from 'typescript-ioc'; import { Container, Scope } from 'typescript-ioc';
import * as component from '../../common/component'; import * as component from '../../common/component';
import { getLogger, Logger } from '../../common/log'; import { getLogger, Logger } from '../../common/log';
import { TrainingService, TrialJobApplicationForm, TrialJobDetail, TrialJobMetric } from '../../common/trainingService'; import { MethodNotImplementedError } from '../../common/errors'
import { TrainingService, TrialJobApplicationForm, TrialJobDetail, TrialJobMetric, LogType } from '../../common/trainingService';
import { delay } from '../../common/utils'; import { delay } from '../../common/utils';
import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey'; import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey';
import { PAIClusterConfig } from '../pai/paiConfig'; import { PAIClusterConfig } from '../pai/paiConfig';
...@@ -47,6 +48,10 @@ class RouterTrainingService implements TrainingService { ...@@ -47,6 +48,10 @@ class RouterTrainingService implements TrainingService {
return await this.internalTrainingService.getTrialJob(trialJobId); return await this.internalTrainingService.getTrialJob(trialJobId);
} }
public async getTrialLog(_trialJobId: string, _logType: LogType): Promise<string> {
throw new MethodNotImplementedError();
}
public addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void { public addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void {
if (this.internalTrainingService === undefined) { if (this.internalTrainingService === undefined) {
throw new Error("TrainingService is not assigned!"); throw new Error("TrainingService is not assigned!");
......
...@@ -9,10 +9,10 @@ import * as path from 'path'; ...@@ -9,10 +9,10 @@ import * as path from 'path';
import { Writable } from 'stream'; import { Writable } from 'stream';
import { String } from 'typescript-string-operations'; import { String } from 'typescript-string-operations';
import * as component from '../../common/component'; import * as component from '../../common/component';
import { NNIError, NNIErrorNames } from '../../common/errors'; import { NNIError, NNIErrorNames, MethodNotImplementedError } from '../../common/errors';
import { getBasePort, getExperimentId, getPlatform } from '../../common/experimentStartupInfo'; import { getBasePort, getExperimentId, getPlatform } from '../../common/experimentStartupInfo';
import { getLogger, Logger } from '../../common/log'; import { getLogger, Logger } from '../../common/log';
import { NNIManagerIpConfig, TrainingService, TrialJobApplicationForm, TrialJobMetric, TrialJobStatus } from '../../common/trainingService'; import { NNIManagerIpConfig, TrainingService, TrialJobApplicationForm, TrialJobMetric, TrialJobStatus, LogType } from '../../common/trainingService';
import { delay, getExperimentRootDir, getIPV4Address, getLogLevel, getVersion, mkDirPSync, uniqueString } from '../../common/utils'; import { delay, getExperimentRootDir, getIPV4Address, getLogLevel, getVersion, mkDirPSync, uniqueString } from '../../common/utils';
import { GPU_INFO, INITIALIZED, KILL_TRIAL_JOB, NEW_TRIAL_JOB, REPORT_METRIC_DATA, SEND_TRIAL_JOB_PARAMETER, STDOUT, TRIAL_END, VERSION_CHECK } from '../../core/commands'; import { GPU_INFO, INITIALIZED, KILL_TRIAL_JOB, NEW_TRIAL_JOB, REPORT_METRIC_DATA, SEND_TRIAL_JOB_PARAMETER, STDOUT, TRIAL_END, VERSION_CHECK } from '../../core/commands';
import { ScheduleResultType } from '../../training_service/common/gpuData'; import { ScheduleResultType } from '../../training_service/common/gpuData';
...@@ -111,6 +111,10 @@ class TrialDispatcher implements TrainingService { ...@@ -111,6 +111,10 @@ class TrialDispatcher implements TrainingService {
return trial; return trial;
} }
public async getTrialLog(_trialJobId: string, _logType: LogType): Promise<string> {
throw new MethodNotImplementedError();
}
public async submitTrialJob(form: TrialJobApplicationForm): Promise<TrialDetail> { public async submitTrialJob(form: TrialJobApplicationForm): Promise<TrialDetail> {
if (this.trialConfig === undefined) { if (this.trialConfig === undefined) {
throw new Error(`trialConfig not initialized!`); throw new Error(`trialConfig not initialized!`);
......
...@@ -3,14 +3,14 @@ ...@@ -3,14 +3,14 @@
'use strict'; 'use strict';
import * as assert from 'assert';
import * as chai from 'chai'; import * as chai from 'chai';
import * as chaiAsPromised from 'chai-as-promised'; import * as chaiAsPromised from 'chai-as-promised';
import * as fs from 'fs'; import * as fs from 'fs';
import * as path from 'path';
import * as tmp from 'tmp'; import * as tmp from 'tmp';
import * as component from '../../common/component'; import * as component from '../../common/component';
import { TrialJobApplicationForm, TrialJobDetail, TrainingService } from '../../common/trainingService'; import { TrialJobApplicationForm, TrialJobDetail} from '../../common/trainingService';
import { cleanupUnitTest, delay, prepareUnitTest } from '../../common/utils'; import { cleanupUnitTest, delay, prepareUnitTest, getExperimentRootDir } from '../../common/utils';
import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey'; import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey';
import { LocalTrainingService } from '../local/localTrainingService'; import { LocalTrainingService } from '../local/localTrainingService';
...@@ -72,6 +72,36 @@ describe('Unit Test for LocalTrainingService', () => { ...@@ -72,6 +72,36 @@ describe('Unit Test for LocalTrainingService', () => {
chai.expect(jobDetail.status).to.be.equals('USER_CANCELED'); chai.expect(jobDetail.status).to.be.equals('USER_CANCELED');
}).timeout(20000); }).timeout(20000);
it('Get trial log', async () => {
await localTrainingService.setClusterMetadata(TrialConfigMetadataKey.TRIAL_CONFIG, trialConfig);
// submit job
const form: TrialJobApplicationForm = {
sequenceId: 0,
hyperParameters: {
value: 'mock hyperparameters',
index: 0
}
};
const jobDetail: TrialJobDetail = await localTrainingService.submitTrialJob(form);
// get trial log
const rootDir: string = getExperimentRootDir()
fs.mkdirSync(path.join(rootDir, 'trials'))
fs.mkdirSync(jobDetail.workingDirectory)
fs.writeFileSync(path.join(jobDetail.workingDirectory, 'trial.log'), 'trial log')
fs.writeFileSync(path.join(jobDetail.workingDirectory, 'stderr'), 'trial stderr')
chai.expect(await localTrainingService.getTrialLog(jobDetail.id, 'TRIAL_LOG')).to.be.equals('trial log');
chai.expect(await localTrainingService.getTrialLog(jobDetail.id, 'TRIAL_ERROR')).to.be.equals('trial stderr');
fs.unlinkSync(path.join(jobDetail.workingDirectory, 'trial.log'))
fs.unlinkSync(path.join(jobDetail.workingDirectory, 'stderr'))
fs.rmdirSync(jobDetail.workingDirectory)
fs.rmdirSync(path.join(rootDir, 'trials'))
await localTrainingService.cancelTrialJob(jobDetail.id);
}).timeout(20000);
it('Read metrics, Add listener, and remove listener', async () => { it('Read metrics, Add listener, and remove listener', async () => {
// set meta data // set meta data
const trialConfig: string = `{\"command\":\"python3 mockedTrial.py\", \"codeDir\":\"${localCodeDir}\",\"gpuNum\":0}` const trialConfig: string = `{\"command\":\"python3 mockedTrial.py\", \"codeDir\":\"${localCodeDir}\",\"gpuNum\":0}`
......
...@@ -2,6 +2,7 @@ import * as React from 'react'; ...@@ -2,6 +2,7 @@ import * as React from 'react';
import * as copy from 'copy-to-clipboard'; import * as copy from 'copy-to-clipboard';
import { Stack, PrimaryButton, Pivot, PivotItem } from 'office-ui-fabric-react'; import { Stack, PrimaryButton, Pivot, PivotItem } from 'office-ui-fabric-react';
import { Trial } from '../../static/model/trial'; import { Trial } from '../../static/model/trial';
import { MANAGER_IP } from '../../static/const';
import { EXPERIMENT, TRIALS } from '../../static/datamodel'; import { EXPERIMENT, TRIALS } from '../../static/datamodel';
import JSONTree from 'react-json-tree'; import JSONTree from 'react-json-tree';
import PaiTrialLog from '../public-child/PaiTrialLog'; import PaiTrialLog from '../public-child/PaiTrialLog';
...@@ -9,6 +10,7 @@ import TrialLog from '../public-child/TrialLog'; ...@@ -9,6 +10,7 @@ import TrialLog from '../public-child/TrialLog';
import MessageInfo from '../Modals/MessageInfo'; import MessageInfo from '../Modals/MessageInfo';
import '../../static/style/overview.scss'; import '../../static/style/overview.scss';
import '../../static/style/copyParameter.scss'; import '../../static/style/copyParameter.scss';
import '../../static/style/openRow.scss';
interface OpenRowProps { interface OpenRowProps {
trialId: string; trialId: string;
...@@ -55,6 +57,10 @@ class OpenRow extends React.Component<OpenRowProps, OpenRowState> { ...@@ -55,6 +57,10 @@ class OpenRow extends React.Component<OpenRowProps, OpenRowState> {
} }
} }
openTrialLog = (type: string): void => {
window.open(`${MANAGER_IP}/trial-log/${this.props.trialId}/${type}`);
}
render(): React.ReactNode { render(): React.ReactNode {
const { isHidenInfo, typeInfo, info } = this.state; const { isHidenInfo, typeInfo, info } = this.state;
const trialId = this.props.trialId; const trialId = this.props.trialId;
...@@ -105,7 +111,23 @@ class OpenRow extends React.Component<OpenRowProps, OpenRowState> { ...@@ -105,7 +111,23 @@ class OpenRow extends React.Component<OpenRowProps, OpenRowState> {
logCollection={EXPERIMENT.logCollectionEnabled} logCollection={EXPERIMENT.logCollectionEnabled}
/> />
: :
<TrialLog logStr={logPathRow} id={trialId} /> <div>
<TrialLog logStr={logPathRow} id={trialId} />
{/* view each trial log in drawer*/}
<div id="trialog">
<div className="copy" style={{ marginTop: 15 }}>
<PrimaryButton
onClick={this.openTrialLog.bind(this, 'TRIAL_LOG')}
text="View trial log"
/>
<PrimaryButton
onClick={this.openTrialLog.bind(this, 'TRIAL_ERROR')}
text="View trial error"
styles={{ root: { marginLeft: 15 } }}
/>
</div>
</div>
</div>
} }
</PivotItem> </PivotItem>
</Pivot> </Pivot>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment