"vscode:/vscode.git/clone" did not exist on "ff877d8f91a4605fe3d0f9d8828244ca40617978"
restHandler.ts 16.7 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
Deshui Yu's avatar
Deshui Yu committed
3
4

import { Request, Response, Router } from 'express';
5
import path from 'path';
Deshui Yu's avatar
Deshui Yu committed
6
7
8
9

import * as component from '../common/component';
import { DataStore, MetricDataRecord, TrialJobInfo } from '../common/datastore';
import { NNIError, NNIErrorNames } from '../common/errors';
SparkSnail's avatar
SparkSnail committed
10
import { isNewExperiment, isReadonly } from '../common/experimentStartupInfo';
11
import globals from 'common/globals';
Deshui Yu's avatar
Deshui Yu committed
12
import { getLogger, Logger } from '../common/log';
chicm-ms's avatar
chicm-ms committed
13
import { ExperimentProfile, Manager, TrialJobStatistics } from '../common/manager';
14
import { getExperimentsManager } from 'extensions/experiments_manager';
J-shang's avatar
J-shang committed
15
import { TensorboardManager, TensorboardTaskInfo } from '../common/tensorboardManager';
16
import { ValidationSchemas } from './restValidationSchemas';
17
import { getVersion } from '../common/utils';
18
19
import { MetricType } from '../common/datastore';
import { ProfileUpdateType } from '../common/manager';
Yuge Zhang's avatar
Yuge Zhang committed
20
import { TrialJobStatus } from '../common/trainingService';
Deshui Yu's avatar
Deshui Yu committed
21

22
23
// TODO: fix expressJoi
//const expressJoi = require('express-joi-validator');
24

Deshui Yu's avatar
Deshui Yu committed
25
class NNIRestHandler {
26
    private nniManager: Manager;
J-shang's avatar
J-shang committed
27
    private tensorboardManager: TensorboardManager;
Deshui Yu's avatar
Deshui Yu committed
28
29
    private log: Logger;

30
    constructor() {
Deshui Yu's avatar
Deshui Yu committed
31
        this.nniManager = component.get(Manager);
J-shang's avatar
J-shang committed
32
        this.tensorboardManager = component.get(TensorboardManager);
liuzhe-lz's avatar
liuzhe-lz committed
33
        this.log = getLogger('NNIRestHandler');
Deshui Yu's avatar
Deshui Yu committed
34
35
36
37
38
39
    }

    public createRestHandler(): Router {
        const router: Router = Router();

        router.use((req: Request, res: Response, next) => {
liuzhe-lz's avatar
liuzhe-lz committed
40
            this.log.debug(`${req.method}: ${req.url}: body:`, req.body);
Deshui Yu's avatar
Deshui Yu committed
41
42
43
44
45
46
47
            res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept');
            res.header('Access-Control-Allow-Methods', 'PUT,POST,GET,DELETE,OPTIONS');

            res.setHeader('Content-Type', 'application/json');
            next();
        });

Gems Guo's avatar
Gems Guo committed
48
        this.version(router);
Deshui Yu's avatar
Deshui Yu committed
49
50
        this.checkStatus(router);
        this.getExperimentProfile(router);
Yuge Zhang's avatar
Yuge Zhang committed
51
        this.getExperimentMetadata(router);
Deshui Yu's avatar
Deshui Yu committed
52
        this.updateExperimentProfile(router);
53
        this.importData(router);
54
        this.getImportedData(router);
Deshui Yu's avatar
Deshui Yu committed
55
56
57
58
59
60
61
62
        this.startExperiment(router);
        this.getTrialJobStatistics(router);
        this.setClusterMetaData(router);
        this.listTrialJobs(router);
        this.getTrialJob(router);
        this.addTrialJob(router);
        this.cancelTrialJob(router);
        this.getMetricData(router);
63
64
        this.getMetricDataByRange(router);
        this.getLatestMetricData(router);
Yuge Zhang's avatar
Yuge Zhang committed
65
        this.getTrialFile(router);
66
        this.exportData(router);
67
        this.getExperimentsInfo(router);
J-shang's avatar
J-shang committed
68
69
70
71
72
73
        this.startTensorboardTask(router);
        this.getTensorboardTask(router);
        this.updateTensorboardTask(router);
        this.stopTensorboardTask(router);
        this.stopAllTensorboardTask(router);
        this.listTensorboardTask(router);
74
        this.stop(router);
Deshui Yu's avatar
Deshui Yu committed
75

76
        // Express-joi-validator configuration
77
        router.use((err: any, _req: Request, res: Response, _next: any): any => {
78
79
80
81
82
83
84
            if (err.isBoom) {
                this.log.error(err.output.payload);

                return res.status(err.output.statusCode).json(err.output.payload);
            }
        });

Deshui Yu's avatar
Deshui Yu committed
85
86
87
        return router;
    }

chicm-ms's avatar
chicm-ms committed
88
    private handleError(err: Error, res: Response, isFatal: boolean = false, errorCode: number = 500): void {
Deshui Yu's avatar
Deshui Yu committed
89
90
91
        if (err instanceof NNIError && err.name === NNIErrorNames.NOT_FOUND) {
            res.status(404);
        } else {
SparkSnail's avatar
SparkSnail committed
92
            res.status(errorCode);
Deshui Yu's avatar
Deshui Yu committed
93
94
95
96
        }
        res.send({
            error: err.message
        });
97
98

        // If it's a fatal error, exit process
chicm-ms's avatar
chicm-ms committed
99
        if (isFatal) {
100
            this.log.critical(err);
101
            process.exit(1);
chicm-ms's avatar
chicm-ms committed
102
103
        } else {
            this.log.error(err);
104
        }
Deshui Yu's avatar
Deshui Yu committed
105
106
    }

Gems Guo's avatar
Gems Guo committed
107
    private version(router: Router): void {
108
        router.get('/version', async (_req: Request, res: Response) => {
109
110
            const version = await getVersion();
            res.send(version);
Gems Guo's avatar
Gems Guo committed
111
112
113
        });
    }

Deshui Yu's avatar
Deshui Yu committed
114
115
    // TODO add validators for request params, query, body
    private checkStatus(router: Router): void {
116
        router.get('/check-status', (_req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
117
118
            const ds: DataStore = component.get<DataStore>(DataStore);
            ds.init().then(() => {
119
                res.send(this.nniManager.getStatus());
Deshui Yu's avatar
Deshui Yu committed
120
            }).catch(async (err: Error) => {
chicm-ms's avatar
chicm-ms committed
121
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
122
                this.log.error(err.message);
chicm-ms's avatar
chicm-ms committed
123
                this.log.error(`Datastore initialize failed, stopping rest server...`);
124
                globals.shutdown.criticalError('RestHandler', err);
Deshui Yu's avatar
Deshui Yu committed
125
126
127
128
129
            });
        });
    }

    private getExperimentProfile(router: Router): void {
130
        router.get('/experiment', (_req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
131
132
133
            this.nniManager.getExperimentProfile().then((profile: ExperimentProfile) => {
                res.send(profile);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
134
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
135
136
137
138
139
            });
        });
    }

    private updateExperimentProfile(router: Router): void {
140
        router.put('/experiment', (req: Request, res: Response) => {
141
            this.nniManager.updateExperimentProfile(req.body, req.query['update_type'] as ProfileUpdateType).then(() => {
Deshui Yu's avatar
Deshui Yu committed
142
143
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
144
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
145
146
147
            });
        });
    }
148

149
150
151
152
153
    private importData(router: Router): void {
        router.post('/experiment/import-data', (req: Request, res: Response) => {
            this.nniManager.importData(JSON.stringify(req.body)).then(() => {
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
154
                this.handleError(err, res);
155
156
157
            });
        });
    }
Deshui Yu's avatar
Deshui Yu committed
158

159
    private getImportedData(router: Router): void {
160
        router.get('/experiment/imported-data', (_req: Request, res: Response) => {
161
162
163
164
165
166
167
168
            this.nniManager.getImportedData().then((importedData: string[]) => {
                res.send(JSON.stringify(importedData));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

Deshui Yu's avatar
Deshui Yu committed
169
    private startExperiment(router: Router): void {
170
        router.post('/experiment', (req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
171
172
173
            if (isNewExperiment()) {
                this.nniManager.startExperiment(req.body).then((eid: string) => {
                    res.send({
chicm-ms's avatar
chicm-ms committed
174
                        experiment_id: eid // eslint-disable-line @typescript-eslint/camelcase
Deshui Yu's avatar
Deshui Yu committed
175
176
                    });
                }).catch((err: Error) => {
177
                    // Start experiment is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
178
                    this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
179
180
                });
            } else {
SparkSnail's avatar
SparkSnail committed
181
                this.nniManager.resumeExperiment(isReadonly()).then(() => {
Deshui Yu's avatar
Deshui Yu committed
182
183
                    res.send();
                }).catch((err: Error) => {
184
                    // Resume experiment is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
185
                    this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
186
                });
SparkSnail's avatar
SparkSnail committed
187
            } 
Deshui Yu's avatar
Deshui Yu committed
188
189
190
191
        });
    }

    private getTrialJobStatistics(router: Router): void {
192
        router.get('/job-statistics', (_req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
193
194
195
            this.nniManager.getTrialJobStatistics().then((statistics: TrialJobStatistics[]) => {
                res.send(statistics);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
196
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
197
198
199
200
201
            });
        });
    }

    private setClusterMetaData(router: Router): void {
202
        router.put(
203
            '/experiment/cluster-metadata', //TODO: Fix validation expressJoi(ValidationSchemas.SETCLUSTERMETADATA),
204
            async (req: Request, res: Response) => {
SparkSnail's avatar
SparkSnail committed
205
206
207
208
209
210
211
212
213
                const metadata: any = req.body;
                const keys: string[] = Object.keys(metadata);
                try {
                    for (const key of keys) {
                        await this.nniManager.setClusterMetadata(key, JSON.stringify(metadata[key]));
                    }
                    res.send();
                } catch (err) {
                    // setClusterMetata is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
214
                    this.handleError(NNIError.FromError(err), res, true);
Deshui Yu's avatar
Deshui Yu committed
215
216
217
218
219
220
                }
        });
    }

    private listTrialJobs(router: Router): void {
        router.get('/trial-jobs', (req: Request, res: Response) => {
221
            this.nniManager.listTrialJobs(req.query['status'] as TrialJobStatus).then((jobInfos: TrialJobInfo[]) => {
Deshui Yu's avatar
Deshui Yu committed
222
223
224
225
226
                jobInfos.forEach((trialJob: TrialJobInfo) => {
                    this.setErrorPathForFailedJob(trialJob);
                });
                res.send(jobInfos);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
227
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
228
229
230
231
232
233
            });
        });
    }

    private getTrialJob(router: Router): void {
        router.get('/trial-jobs/:id', (req: Request, res: Response) => {
234
            this.nniManager.getTrialJob(req.params['id']).then((jobDetail: TrialJobInfo) => {
Deshui Yu's avatar
Deshui Yu committed
235
236
237
                const jobInfo: TrialJobInfo = this.setErrorPathForFailedJob(jobDetail);
                res.send(jobInfo);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
238
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
239
240
241
242
243
244
            });
        });
    }

    private addTrialJob(router: Router): void {
        router.post('/trial-jobs', async (req: Request, res: Response) => {
245
246
            this.nniManager.addCustomizedTrialJob(JSON.stringify(req.body)).then((sequenceId: number) => {
                res.send({sequenceId});
Deshui Yu's avatar
Deshui Yu committed
247
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
248
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
249
250
251
252
253
254
            });
        });
    }

    private cancelTrialJob(router: Router): void {
        router.delete('/trial-jobs/:id', async (req: Request, res: Response) => {
255
            this.nniManager.cancelTrialJobByUser(req.params['id']).then(() => {
Deshui Yu's avatar
Deshui Yu committed
256
257
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
258
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
259
260
261
262
263
            });
        });
    }

    private getMetricData(router: Router): void {
264
        router.get('/metric-data/:job_id*?', async (req: Request, res: Response) => {
265
            this.nniManager.getMetricData(req.params['job_id'], req.query['type'] as MetricType).then((metricsData: MetricDataRecord[]) => {
266
267
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
268
                this.handleError(err, res);
269
270
271
272
273
274
            });
        });
    }

    private getMetricDataByRange(router: Router): void {
        router.get('/metric-data-range/:min_seq_id/:max_seq_id', async (req: Request, res: Response) => {
275
276
            const minSeqId = Number(req.params['min_seq_id']);
            const maxSeqId = Number(req.params['max_seq_id']);
277
278
279
            this.nniManager.getMetricDataByRange(minSeqId, maxSeqId).then((metricsData: MetricDataRecord[]) => {
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
280
                this.handleError(err, res);
281
282
283
284
285
            });
        });
    }

    private getLatestMetricData(router: Router): void {
286
        router.get('/metric-data-latest/', async (_req: Request, res: Response) => {
287
            this.nniManager.getLatestMetricData().then((metricsData: MetricDataRecord[]) => {
Deshui Yu's avatar
Deshui Yu committed
288
289
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
290
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
291
292
293
294
            });
        });
    }

Yuge Zhang's avatar
Yuge Zhang committed
295
296
297
    private getTrialFile(router: Router): void {
        router.get('/trial-file/:id/:filename', async(req: Request, res: Response) => {
            let encoding: string | null = null;
298
            const filename = req.params['filename'];
Yuge Zhang's avatar
Yuge Zhang committed
299
300
301
            if (!filename.includes('.') || filename.match(/.*\.(txt|log)/g)) {
                encoding = 'utf8';
            }
302
            this.nniManager.getTrialFile(req.params['id'], filename).then((content: Buffer | string) => {
liuzhe-lz's avatar
liuzhe-lz committed
303
304
305
306
                const contentType = content instanceof Buffer ? 'application/octet-stream' : 'text/plain';
                res.header('Content-Type', contentType);
                if (content === '') {
                    content = `${filename} is empty.`;  // FIXME: this should be handled in front-end
307
                }
Yuge Zhang's avatar
Yuge Zhang committed
308
                res.send(content);
309
310
311
312
313
314
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

315
    private exportData(router: Router): void {
316
        router.get('/export-data', (_req: Request, res: Response) => {
317
318
319
            this.nniManager.exportData().then((exportedData: string) => {
                res.send(exportedData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
320
                this.handleError(err, res);
321
322
323
324
            });
        });
    }

Yuge Zhang's avatar
Yuge Zhang committed
325
    private getExperimentMetadata(router: Router): void {
326
        router.get('/experiment-metadata', (_req: Request, res: Response) => {
Yuge Zhang's avatar
Yuge Zhang committed
327
328
            Promise.all([
                this.nniManager.getExperimentProfile(),
329
                getExperimentsManager().getExperimentsInfo()
Yuge Zhang's avatar
Yuge Zhang committed
330
331
332
333
334
335
336
337
338
339
340
341
342
            ]).then(([profile, experimentInfo]) => {
                for (const info of experimentInfo as any) {
                    if (info.id === profile.id) {
                        res.send(info);
                        break;
                    }
                }
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

343
    private getExperimentsInfo(router: Router): void {
344
        router.get('/experiments-info', (_req: Request, res: Response) => {
345
            getExperimentsManager().getExperimentsInfo().then((experimentInfo: JSON) => {
346
347
348
349
350
351
352
                res.send(JSON.stringify(experimentInfo));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

J-shang's avatar
J-shang committed
353
354
355
356
357
358
359
360
361
362
363
364
365
    private startTensorboardTask(router: Router): void {
        router.post('/tensorboard', (req: Request, res: Response) => {
            this.tensorboardManager.startTensorboardTask(req.body).then((taskDetail: TensorboardTaskInfo) => {
                this.log.info(taskDetail);
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res, false, 400);
            });
        });
    }

    private getTensorboardTask(router: Router): void {
        router.get('/tensorboard/:id', (req: Request, res: Response) => {
366
            this.tensorboardManager.getTensorboardTask(req.params['id']).then((taskDetail: TensorboardTaskInfo) => {
J-shang's avatar
J-shang committed
367
368
369
370
371
372
373
374
375
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private updateTensorboardTask(router: Router): void {
        router.put('/tensorboard/:id', (req: Request, res: Response) => {
376
            this.tensorboardManager.updateTensorboardTask(req.params['id']).then((taskDetail: TensorboardTaskInfo) => {
J-shang's avatar
J-shang committed
377
378
379
380
381
382
383
384
385
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private stopTensorboardTask(router: Router): void {
        router.delete('/tensorboard/:id', (req: Request, res: Response) => {
386
            this.tensorboardManager.stopTensorboardTask(req.params['id']).then((taskDetail: TensorboardTaskInfo) => {
J-shang's avatar
J-shang committed
387
388
389
390
391
392
393
394
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private stopAllTensorboardTask(router: Router): void {
395
        router.delete('/tensorboard-tasks', (_req: Request, res: Response) => {
J-shang's avatar
J-shang committed
396
397
398
399
400
401
402
403
404
            this.tensorboardManager.stopAllTensorboardTask().then(() => {
                res.send();
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private listTensorboardTask(router: Router): void {
405
        router.get('/tensorboard-tasks', (_req: Request, res: Response) => {
J-shang's avatar
J-shang committed
406
407
408
409
410
411
412
413
            this.tensorboardManager.listTensorboardTasks().then((taskDetails: TensorboardTaskInfo[]) => {
                res.send(taskDetails);
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

414
    private stop(router: Router): void {
415
        router.delete('/experiment', (_req: Request, res: Response) => {
416
417
            res.send();
            globals.shutdown.initiate('REST request');
418
419
420
        });
    }

Deshui Yu's avatar
Deshui Yu committed
421
422
423
424
    private setErrorPathForFailedJob(jobInfo: TrialJobInfo): TrialJobInfo {
        if (jobInfo === undefined || jobInfo.status !== 'FAILED' || jobInfo.logPath === undefined) {
            return jobInfo;
        }
chicm-ms's avatar
chicm-ms committed
425
        jobInfo.stderrPath = path.join(jobInfo.logPath, 'stderr');
Deshui Yu's avatar
Deshui Yu committed
426
427
428
429
430

        return jobInfo;
    }
}

431
432
export function createRestHandler(): Router {
    return new NNIRestHandler().createRestHandler();
Deshui Yu's avatar
Deshui Yu committed
433
}