nnimanager.ts 21.8 KB
Newer Older
Deshui Yu's avatar
Deshui Yu committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/**
 * Copyright (c) Microsoft Corporation
 * All rights reserved.
 *
 * MIT License
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 * documentation files (the "Software"), to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
 * to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
 * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

'use strict';

import * as assert from 'assert';
import * as cpp from 'child-process-promise';
import { ChildProcess, spawn } from 'child_process';
import { Deferred } from 'ts-deferred';
import * as component from '../common/component';
import { DataStore, MetricDataRecord, MetricType, TrialJobInfo } from '../common/datastore';
import { getExperimentId } from '../common/experimentStartupInfo';
import { getLogger, Logger } from '../common/log';
import {
    ExperimentParams, ExperimentProfile, Manager,
32
    NNIManagerStatus, ProfileUpdateType, TrialJobStatistics
Deshui Yu's avatar
Deshui Yu committed
33
34
35
36
} from '../common/manager';
import {
    TrainingService, TrialJobApplicationForm, TrialJobDetail, TrialJobMetric, TrialJobStatus
} from '../common/trainingService';
37
import { delay , getLogDir, getMsgDispatcherCommand} from '../common/utils';
Deshui Yu's avatar
Deshui Yu committed
38
39
import {
    ADD_CUSTOMIZED_TRIAL_JOB, KILL_TRIAL_JOB, NEW_TRIAL_JOB, NO_MORE_TRIAL_JOBS, REPORT_METRIC_DATA,
chicm-ms's avatar
chicm-ms committed
40
    REQUEST_TRIAL_JOBS, SEND_TRIAL_JOB_PARAMETER, TERMINATE, TRIAL_END, UPDATE_SEARCH_SPACE
Deshui Yu's avatar
Deshui Yu committed
41
} from './commands';
42
import { createDispatcherInterface, IpcInterface } from './ipcInterface';
Deshui Yu's avatar
Deshui Yu committed
43
44
45
46
47
48
49
import { TrialJobMaintainerEvent, TrialJobs } from './trialJobs';

/**
 * NNIManager
 */
class NNIManager implements Manager {
    private trainingService: TrainingService;
50
    private dispatcher: IpcInterface | undefined;
Deshui Yu's avatar
Deshui Yu committed
51
52
53
54
55
56
57
    private trialJobsMaintainer: TrialJobs | undefined;
    private currSubmittedTrialNum: number; // need to be recovered
    private trialConcurrencyReduction: number;
    private customizedTrials: string[]; // need to be recovered
    private log: Logger;
    private dataStore: DataStore;
    private experimentProfile: ExperimentProfile;
58
    private dispatcherPid: number;
59
    private status: NNIManagerStatus;
Deshui Yu's avatar
Deshui Yu committed
60
61
62
63
64
65
66

    constructor() {
        this.currSubmittedTrialNum = 0;
        this.trialConcurrencyReduction = 0;
        this.customizedTrials = [];
        this.trainingService = component.get(TrainingService);
        assert(this.trainingService);
67
        this.dispatcherPid = 0;
Deshui Yu's avatar
Deshui Yu committed
68
69
70

        this.log = getLogger();
        this.dataStore = component.get(DataStore);
71
72
73
74
        this.experimentProfile = this.createEmptyExperimentProfile();
        this.status = {
            status: 'INITIALIZED',
            errors: []
Deshui Yu's avatar
Deshui Yu committed
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
        };
    }

    public updateExperimentProfile(experimentProfile: ExperimentProfile, updateType: ProfileUpdateType): Promise<void> {
        switch (updateType) {
            case 'TRIAL_CONCURRENCY':
                this.updateTrialConcurrency(experimentProfile.params.trialConcurrency);
                break;
            case 'MAX_EXEC_DURATION':
                this.updateMaxExecDuration(experimentProfile.params.maxExecDuration);
                break;
            case 'SEARCH_SPACE':
                this.updateSearchSpace(experimentProfile.params.searchSpace);
                break;
            default:
                throw new Error('Error: unrecognized updateType');
        }

        return this.storeExperimentProfile();
    }

    public addCustomizedTrialJob(hyperParams: string): Promise<void> {
        if (this.currSubmittedTrialNum >= this.experimentProfile.params.maxTrialNum) {
            return Promise.reject(
                new Error('reach maxTrialNum')
            );
        }
        this.customizedTrials.push(hyperParams);

        // trial id has not been generated yet, thus use '' instead
        return this.dataStore.storeTrialJobEvent('ADD_CUSTOMIZED', '', hyperParams);
    }

    public async cancelTrialJobByUser(trialJobId: string): Promise<void> {
        await this.trainingService.cancelTrialJob(trialJobId);
        await this.dataStore.storeTrialJobEvent('USER_TO_CANCEL', trialJobId, '');
    }

    public async startExperiment(expParams: ExperimentParams): Promise<string> {
        this.log.debug(`Starting experiment: ${this.experimentProfile.id}`);
        this.experimentProfile.params = expParams;
        await this.storeExperimentProfile();
        this.log.debug('Setup tuner...');
118

chicm-ms's avatar
chicm-ms committed
119
        const dispatcherCommand: string = getMsgDispatcherCommand(expParams.tuner, expParams.assessor, expParams.multiPhase);
120
        console.log(`dispatcher command: ${dispatcherCommand}`);
Deshui Yu's avatar
Deshui Yu committed
121
        this.setupTuner(
122
123
124
            //expParams.tuner.tunerCommand,
            dispatcherCommand,
            undefined,
Deshui Yu's avatar
Deshui Yu committed
125
            'start',
126
            expParams.tuner.checkpointDir);
Deshui Yu's avatar
Deshui Yu committed
127

128
129
        this.experimentProfile.startTime = Date.now();
        this.status.status = 'EXPERIMENT_RUNNING';
Deshui Yu's avatar
Deshui Yu committed
130
        await this.storeExperimentProfile();
131
132
        this.run().catch((err: Error) => {
            this.criticalError(err);
Deshui Yu's avatar
Deshui Yu committed
133
134
135
136
137
138
139
140
141
142
        });
        return this.experimentProfile.id;
    }

    public async resumeExperiment(): Promise<void> {
        //Fetch back the experiment profile
        const experimentId: string = getExperimentId();
        this.experimentProfile = await this.dataStore.getExperimentProfile(experimentId);
        const expParams: ExperimentParams = this.experimentProfile.params;

chicm-ms's avatar
chicm-ms committed
143
        const dispatcherCommand: string = getMsgDispatcherCommand(expParams.tuner, expParams.assessor, expParams.multiPhase);
144
        console.log(`dispatcher command: ${dispatcherCommand}`);
Deshui Yu's avatar
Deshui Yu committed
145
        this.setupTuner(
146
147
            dispatcherCommand,
            undefined,
Deshui Yu's avatar
Deshui Yu committed
148
            'resume',
149
            expParams.tuner.checkpointDir);
Deshui Yu's avatar
Deshui Yu committed
150
151
152
153
154
155
156
157
158
159
160

        const allTrialJobs: TrialJobInfo[] = await this.dataStore.listTrialJobs();

        // Resume currSubmittedTrialNum
        this.currSubmittedTrialNum = allTrialJobs.length;

        // Check the final status for WAITING and RUNNING jobs
        await Promise.all(allTrialJobs
            .filter((job: TrialJobInfo) => job.status === 'WAITING' || job.status === 'RUNNING')
            .map((job: TrialJobInfo) => this.dataStore.storeTrialJobEvent('FAILED', job.id)));

161
162
        this.status.status = 'EXPERIMENT_RUNNING';

Deshui Yu's avatar
Deshui Yu committed
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
        // TO DO: update database record for resume event
        this.run().catch(console.error);
    }

    public getTrialJob(trialJobId: string): Promise<TrialJobDetail> {
        return Promise.resolve(
            this.trainingService.getTrialJob(trialJobId)
        );
    }

    public async setClusterMetadata(key: string, value: string): Promise<void> {
        let timeoutId: NodeJS.Timer;
        // TO DO: move timeout value to constants file
        const delay1: Promise<{}> = new Promise((resolve: Function, reject: Function): void => {
            timeoutId = setTimeout(
178
                () => { reject(new Error('TrainingService setClusterMetadata timeout. Please check your config file.')); },
Deshui Yu's avatar
Deshui Yu committed
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
                10000);
        });
        await Promise.race([delay1, this.trainingService.setClusterMetadata(key, value)]).finally(() => {
            clearTimeout(timeoutId);
        });
    }

    public getClusterMetadata(key: string): Promise<string> {
        return Promise.resolve(
            this.trainingService.getClusterMetadata(key)
        );
    }

    public async getTrialJobStatistics(): Promise<TrialJobStatistics[]> {
        return this.dataStore.getTrialJobStatistics();
    }

    public stopExperiment(): Promise<void> {
197
        this.status.status = 'STOPPING';
Deshui Yu's avatar
Deshui Yu committed
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
        if (this.trialJobsMaintainer !== undefined) {
            this.trialJobsMaintainer.setStopLoop();

            return Promise.resolve();
        } else {
            return Promise.reject(new Error('Error: undefined trialJobsMaintainer'));
        }
    }

    public async getMetricData(trialJobId: string, metricType: MetricType): Promise<MetricDataRecord[]> {
        return this.dataStore.getMetricData(trialJobId, metricType);
    }

    public getExperimentProfile(): Promise<ExperimentProfile> {
        // TO DO: using Promise.resolve()
        const deferred: Deferred<ExperimentProfile> = new Deferred<ExperimentProfile>();
        deferred.resolve(this.experimentProfile);

        return deferred.promise;
    }

219
220
221
222
    public getStatus(): NNIManagerStatus {
        return this.status;
    }

Deshui Yu's avatar
Deshui Yu committed
223
224
225
226
    public async listTrialJobs(status?: TrialJobStatus): Promise<TrialJobInfo[]> {
        return this.dataStore.listTrialJobs(status);
    }

227
228
    private setupTuner(command: string, cwd: string | undefined, mode: 'start' | 'resume', dataDirectory: string): void {
        if (this.dispatcher !== undefined) {
Deshui Yu's avatar
Deshui Yu committed
229
230
231
232
233
234
235
236
237
238
            return;
        }
        const stdio: (string | NodeJS.WriteStream)[] = ['ignore', process.stdout, process.stderr, 'pipe', 'pipe'];
        let newCwd: string;
        if (cwd === undefined || cwd === '') {
            newCwd = getLogDir();
        } else {
            newCwd = cwd;
        }
        // TO DO: add CUDA_VISIBLE_DEVICES
Zejun Lin's avatar
Zejun Lin committed
239
240
241
242
243
244
        let nniEnv = {
            NNI_MODE: mode,
            NNI_CHECKPOINT_DIRECTORY: dataDirectory,
            NNI_LOG_DIRECTORY: getLogDir()
        };
        let newEnv = Object.assign({}, process.env, nniEnv);
Deshui Yu's avatar
Deshui Yu committed
245
246
247
        const tunerProc: ChildProcess = spawn(command, [], {
            stdio,
            cwd: newCwd,
Zejun Lin's avatar
Zejun Lin committed
248
            env: newEnv,
Deshui Yu's avatar
Deshui Yu committed
249
250
            shell: true
        });
251
252
        this.dispatcherPid = tunerProc.pid;
        this.dispatcher = createDispatcherInterface(tunerProc);
Deshui Yu's avatar
Deshui Yu committed
253
254
255
256
257
258
259

        return;
    }

    private updateTrialConcurrency(trialConcurrency: number): void {
        // TO DO: this method can only be called after startExperiment/resumeExperiment
        if (trialConcurrency > this.experimentProfile.params.trialConcurrency) {
260
            if (this.dispatcher === undefined) {
Deshui Yu's avatar
Deshui Yu committed
261
262
                throw new Error('Error: tuner has to be initialized');
            }
263
            this.dispatcher.sendCommand(
Deshui Yu's avatar
Deshui Yu committed
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
                REQUEST_TRIAL_JOBS,
                String(trialConcurrency - this.experimentProfile.params.trialConcurrency)
            );
        } else {
            // we assume trialConcurrency >= 0, which is checked by restserver
            this.trialConcurrencyReduction += (this.experimentProfile.params.trialConcurrency - trialConcurrency);
        }
        this.experimentProfile.params.trialConcurrency = trialConcurrency;

        return;
    }

    private updateMaxExecDuration(duration: number): void {
        if (this.trialJobsMaintainer !== undefined) {
            this.trialJobsMaintainer.updateMaxExecDuration(duration);
        }
        this.experimentProfile.params.maxExecDuration = duration;

        return;
    }

    private updateSearchSpace(searchSpace: string): void {
286
        if (this.dispatcher === undefined) {
Deshui Yu's avatar
Deshui Yu committed
287
288
            throw new Error('Error: tuner has not been setup');
        }
289
        this.dispatcher.sendCommand(UPDATE_SEARCH_SPACE, searchSpace);
Deshui Yu's avatar
Deshui Yu committed
290
291
292
293
294
295
        this.experimentProfile.params.searchSpace = searchSpace;

        return;
    }

    private async experimentDoneCleanUp(): Promise<void> {
296
        if (this.dispatcher === undefined) {
Deshui Yu's avatar
Deshui Yu committed
297
298
            throw new Error('Error: tuner has not been setup');
        }
299
        this.dispatcher.sendCommand(TERMINATE);
Deshui Yu's avatar
Deshui Yu committed
300
301
302
        let tunerAlive: boolean = true;
        // gracefully terminate tuner and assessor here, wait at most 30 seconds.
        for (let i: number = 0; i < 30; i++) {
303
            if (!tunerAlive) { break; }
Deshui Yu's avatar
Deshui Yu committed
304
            try {
305
                await cpp.exec(`kill -0 ${this.dispatcherPid}`);
Deshui Yu's avatar
Deshui Yu committed
306
307
308
309
            } catch (error) { tunerAlive = false; }
            await delay(1000);
        }
        try {
310
            await cpp.exec(`kill ${this.dispatcherPid}`);
Deshui Yu's avatar
Deshui Yu committed
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
        } catch (error) {
            // this.tunerPid does not exist, do nothing here
        }
        const trialJobList: TrialJobDetail[] = await this.trainingService.listTrialJobs();
        // TO DO: to promise all
        for (const trialJob of trialJobList) {
            if (trialJob.status === 'RUNNING' ||
                trialJob.status === 'WAITING') {
                try {
                    await this.trainingService.cancelTrialJob(trialJob.id);
                } catch (error) {
                    // pid does not exist, do nothing here
                }
            }
        }
        await this.trainingService.cleanUp();
327
        this.experimentProfile.endTime = Date.now();
Deshui Yu's avatar
Deshui Yu committed
328
        await this.storeExperimentProfile();
329
        this.status.status = 'STOPPED';
Deshui Yu's avatar
Deshui Yu committed
330
331
332
    }

    private async periodicallyUpdateExecDuration(): Promise<void> {
333
        const startTime: number = Date.now();
Deshui Yu's avatar
Deshui Yu committed
334
335
336
        const execDuration: number = this.experimentProfile.execDuration;
        for (; ;) {
            await delay(1000 * 60 * 10); // 10 minutes
337
            this.experimentProfile.execDuration = execDuration + (Date.now() - startTime) / 1000;
Deshui Yu's avatar
Deshui Yu committed
338
339
340
341
342
343
344
345
346
347
            await this.storeExperimentProfile();
        }
    }

    private storeExperimentProfile(): Promise<void> {
        this.experimentProfile.revision += 1;

        return this.dataStore.storeExperimentProfile(this.experimentProfile);
    }

348
    private async run(): Promise<void> {
Deshui Yu's avatar
Deshui Yu committed
349
350
351
352
        this.trialJobsMaintainer = new TrialJobs(
            this.trainingService,
            this.experimentProfile.execDuration,
            this.experimentProfile.params.maxExecDuration);
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376

        assert(this.dispatcher !== undefined && this.trialJobsMaintainer !== undefined);

        this.addEventListeners();

        this.sendInitTunerCommands();

        await Promise.all([
            this.periodicallyUpdateExecDuration(),
            this.trainingService.run(),
            this.trialJobsMaintainer.run()]);
    }

     private addEventListeners(): void {
        // TO DO: cannot run this method more than once in one NNIManager instance
        if (this.dispatcher === undefined || this.trialJobsMaintainer === undefined) {
            throw new Error('Error: tuner or job maintainer have not been setup');
        }
        this.trainingService.addTrialJobMetricListener((metric: TrialJobMetric) => {
            this.onTrialJobMetrics(metric).catch((err: Error) => {
                this.criticalError(err);
            });
        });

Deshui Yu's avatar
Deshui Yu committed
377
        this.trialJobsMaintainer.on(async (event: TrialJobMaintainerEvent, trialJobDetail: TrialJobDetail) => {
378
379
380
            this.onTrialJobEvent(event, trialJobDetail).catch((err: Error) => {
                this.criticalError(err);
            });
Deshui Yu's avatar
Deshui Yu committed
381
382
        });

383
384
385
386
387
388
389
390
391
392
393
        this.dispatcher.onCommand((commandType: string, content: string) => {
            this.onTunerCommand(commandType, content).catch((err: Error) => {
                this.criticalError(err);
            });
        });
    }

    private sendInitTunerCommands(): void {
        if (this.dispatcher === undefined) {
            throw new Error('Error: tuner has not been setup');
        }
Deshui Yu's avatar
Deshui Yu committed
394
        // TO DO: we should send INITIALIZE command to tuner if user's tuner needs to run init method in tuner
395
396
        this.log.debug(`Send tuner command: update search space: ${this.experimentProfile.params.searchSpace}`);
        this.dispatcher.sendCommand(UPDATE_SEARCH_SPACE, this.experimentProfile.params.searchSpace);
Deshui Yu's avatar
Deshui Yu committed
397
        if (this.trialConcurrencyReduction !== 0) {
398
            throw new Error('Error: cannot modify trialConcurrency before startExperiment');
Deshui Yu's avatar
Deshui Yu committed
399
        }
400
        this.log.debug(`Send tuner command: ${this.experimentProfile.params.trialConcurrency}`);
401
        this.dispatcher.sendCommand(REQUEST_TRIAL_JOBS, String(this.experimentProfile.params.trialConcurrency));
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
    }

    private async onTrialJobMetrics(metric: TrialJobMetric): Promise<void> {
        await this.dataStore.storeMetricData(metric.id, metric.data);
        if (this.dispatcher === undefined) {
            throw new Error('Error: tuner has not been setup');
        }
        this.dispatcher.sendCommand(REPORT_METRIC_DATA, metric.data);
    }

    private async onTrialJobEvent(event: TrialJobMaintainerEvent, trialJobDetail: TrialJobDetail): Promise<void> {
        if (trialJobDetail !== undefined) {
            this.log.debug(`Job event: ${event}, id: ${trialJobDetail.id}`);
        } else {
            this.log.debug(`Job event: ${event}`);
        }
        if (this.dispatcher === undefined) {
            throw new Error('Error: tuner has not been setup');
        }
        switch (event) {
            case 'SUCCEEDED':
            case 'FAILED':
            case 'USER_CANCELED':
            case 'SYS_CANCELED':
                if (this.trialConcurrencyReduction > 0) {
                    this.trialConcurrencyReduction--;
                } else {
Deshui Yu's avatar
Deshui Yu committed
429
                    if (this.currSubmittedTrialNum < this.experimentProfile.params.maxTrialNum) {
430
431
432
433
434
                        if (this.customizedTrials.length > 0) {
                            const hyperParams: string | undefined = this.customizedTrials.shift();
                            this.dispatcher.sendCommand(ADD_CUSTOMIZED_TRIAL_JOB, hyperParams);
                        } else {
                            this.dispatcher.sendCommand(REQUEST_TRIAL_JOBS, '1');
Deshui Yu's avatar
Deshui Yu committed
435
436
                        }
                    }
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
                }
                this.dispatcher.sendCommand(TRIAL_END, JSON.stringify({trial_job_id: trialJobDetail.id, event: event}));
                await this.dataStore.storeTrialJobEvent(event, trialJobDetail.id, undefined, trialJobDetail.url);
                break;
            case 'RUNNING':
                await this.dataStore.storeTrialJobEvent(event, trialJobDetail.id, undefined, trialJobDetail.url);
                break;
            case 'EXPERIMENT_DONE':
                this.log.info('Experiment done, cleaning up...');
                await this.experimentDoneCleanUp();
                this.log.info('Experiment done.');
                break;
            default:
                throw new Error('Error: unrecognized event from trialJobsMaintainer');
        }
    }
Deshui Yu's avatar
Deshui Yu committed
453

454
455
456
457
458
459
460
461
462
463
464
    private async onTunerCommand(commandType: string, content: string): Promise<void> {
        this.log.info(`Command from tuner: ${commandType}, ${content}`);
        if (this.trialJobsMaintainer === undefined) {
            throw new Error('Error: trialJobsMaintainer not initialized');
        }
        switch (commandType) {
            case NEW_TRIAL_JOB:
                if (this.currSubmittedTrialNum < this.experimentProfile.params.maxTrialNum) {
                    this.currSubmittedTrialNum++;
                    const trialJobAppForm: TrialJobApplicationForm = {
                        jobType: 'TRIAL',
chicm-ms's avatar
chicm-ms committed
465
466
467
468
                        hyperParameters: {
                            value: content,
                            index: 0
                        }
469
470
471
472
473
474
475
476
477
478
479
                    };
                    const trialJobDetail: TrialJobDetail = await this.trainingService.submitTrialJob(trialJobAppForm);
                    this.trialJobsMaintainer.setTrialJob(trialJobDetail.id, Object.assign({}, trialJobDetail));
                    // TO DO: to uncomment
                    assert(trialJobDetail.status === 'WAITING');
                    await this.dataStore.storeTrialJobEvent(trialJobDetail.status, trialJobDetail.id, content, trialJobDetail.url);
                    if (this.currSubmittedTrialNum === this.experimentProfile.params.maxTrialNum) {
                        this.trialJobsMaintainer.setNoMoreTrials();
                    }
                }
                break;
chicm-ms's avatar
chicm-ms committed
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
            case SEND_TRIAL_JOB_PARAMETER:
                const tunerCommand: any = JSON.parse(content);
                assert(tunerCommand.parameter_index >= 0);
                assert(tunerCommand.trial_job_id !== undefined);

                const trialJobForm: TrialJobApplicationForm = {
                    jobType: 'TRIAL',
                    hyperParameters: {
                        value: content,
                        index: tunerCommand.parameter_index
                    }
                };
                await this.trainingService.updateTrialJob(tunerCommand.trial_job_id, trialJobForm);
                await this.dataStore.storeTrialJobEvent(
                        'ADD_HYPERPARAMETER', tunerCommand.trial_job_id, content, undefined);
                break;
496
497
498
499
500
501
502
503
504
            case NO_MORE_TRIAL_JOBS:
                this.trialJobsMaintainer.setNoMoreTrials();
                break;
            case KILL_TRIAL_JOB:
                await this.trainingService.cancelTrialJob(JSON.parse(content));
                break;
            default:
                throw new Error('Error: unsupported command type from tuner');
        }
Deshui Yu's avatar
Deshui Yu committed
505
506
    }

507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
    private criticalError(err: Error): void {
        this.logError(err);
        console.error(err);
    }

    private logError(err: Error): void {
        if (err.stack !== undefined) {
            this.log.error(err.stack);
        }
        this.status.errors.push(err.message);
        this.status.status = 'ERROR';
    }

    private createEmptyExperimentProfile(): ExperimentProfile {
        return {
            id: getExperimentId(),
            revision: 0,
            execDuration: 0,
            params: {
                authorName: '',
                experimentName: '',
                trialConcurrency: 0,
                maxExecDuration: 0, // unit: second
                maxTrialNum: 0, // maxTrialNum includes all the submitted trial jobs
                searchSpace: '',
                tuner: {
                    className: '',
                    classArgs: {},
                    checkpointDir: ''
                }
            }
        };
Deshui Yu's avatar
Deshui Yu committed
539
540
541
542
    }
}

export { NNIManager };