// Copyright (c) Microsoft Corporation. // Licensed under the MIT license. 'use strict'; import { getLogger, Logger } from '../../common/log'; import { MethodNotImplementedError } from '../../common/errors'; import { ExperimentConfig, RemoteConfig, OpenpaiConfig } from '../../common/experimentConfig'; import { TrainingService, TrialJobApplicationForm, TrialJobDetail, TrialJobMetric } from '../../common/trainingService'; import { delay } from '../../common/utils'; import { PAITrainingService } from '../pai/paiTrainingService'; import { RemoteMachineTrainingService } from '../remote_machine/remoteMachineTrainingService'; import { TrialDispatcher } from './trialDispatcher'; /** * It's a intermedia implementation to support reusable training service. * The final goal is to support reusable training job in higher level than training service. */ class RouterTrainingService implements TrainingService { private log!: Logger; private internalTrainingService!: TrainingService; public static async construct(config: ExperimentConfig): Promise { const instance = new RouterTrainingService(); instance.log = getLogger('RouterTrainingService'); const platform = Array.isArray(config.trainingService) ? 'hybrid' : config.trainingService.platform; if (platform === 'remote' && !(config.trainingService).reuseMode) { instance.internalTrainingService = new RemoteMachineTrainingService(config); } else if (platform === 'openpai' && !(config.trainingService).reuseMode) { instance.internalTrainingService = new PAITrainingService(config); } else { instance.internalTrainingService = await TrialDispatcher.construct(config); } return instance; } // eslint-disable-next-line @typescript-eslint/no-empty-function private constructor() { } public async listTrialJobs(): Promise { if (this.internalTrainingService === undefined) { throw new Error("TrainingService is not assigned!"); } return await this.internalTrainingService.listTrialJobs(); } public async getTrialJob(trialJobId: string): Promise { if (this.internalTrainingService === undefined) { throw new Error("TrainingService is not assigned!"); } return await this.internalTrainingService.getTrialJob(trialJobId); } public async getTrialFile(_trialJobId: string, _fileName: string): Promise { throw new MethodNotImplementedError(); } public addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void { if (this.internalTrainingService === undefined) { throw new Error("TrainingService is not assigned!"); } this.internalTrainingService.addTrialJobMetricListener(listener); } public removeTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void { if (this.internalTrainingService === undefined) { throw new Error("TrainingService is not assigned!"); } this.internalTrainingService.removeTrialJobMetricListener(listener); } public async submitTrialJob(form: TrialJobApplicationForm): Promise { if (this.internalTrainingService === undefined) { throw new Error("TrainingService is not assigned!"); } return await this.internalTrainingService.submitTrialJob(form); } public async updateTrialJob(trialJobId: string, form: TrialJobApplicationForm): Promise { if (this.internalTrainingService === undefined) { throw new Error("TrainingService is not assigned!"); } return await this.internalTrainingService.updateTrialJob(trialJobId, form); } public async cancelTrialJob(trialJobId: string, isEarlyStopped?: boolean | undefined): Promise { if (this.internalTrainingService === undefined) { throw new Error("TrainingService is not assigned!"); } await this.internalTrainingService.cancelTrialJob(trialJobId, isEarlyStopped); } public async setClusterMetadata(_key: string, _value: string): Promise { return; } public async getClusterMetadata(_key: string): Promise { return ''; } public async cleanUp(): Promise { if (this.internalTrainingService === undefined) { throw new Error("TrainingService is not assigned!"); } await this.internalTrainingService.cleanUp(); } public async run(): Promise { // wait internal training service is assigned. // It will be assigned after set metadata of paiConfig while (this.internalTrainingService === undefined) { await delay(100); } return await this.internalTrainingService.run(); } public async getTrialOutputLocalPath(trialJobId: string): Promise { if (this.internalTrainingService === undefined) { throw new Error("TrainingService is not assigned!"); } return this.internalTrainingService.getTrialOutputLocalPath(trialJobId); } public async fetchTrialOutput(trialJobId: string, subpath: string): Promise { if (this.internalTrainingService === undefined) { throw new Error("TrainingService is not assigned!"); } return this.internalTrainingService.fetchTrialOutput(trialJobId, subpath); } } export { RouterTrainingService };