restHandler.ts 16.8 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
Deshui Yu's avatar
Deshui Yu committed
3
4

import { Request, Response, Router } from 'express';
5
import path from 'path';
Deshui Yu's avatar
Deshui Yu committed
6
7
8
9

import * as component from '../common/component';
import { DataStore, MetricDataRecord, TrialJobInfo } from '../common/datastore';
import { NNIError, NNIErrorNames } from '../common/errors';
SparkSnail's avatar
SparkSnail committed
10
import { isNewExperiment, isReadonly } from '../common/experimentStartupInfo';
11
import globals from 'common/globals';
Deshui Yu's avatar
Deshui Yu committed
12
import { getLogger, Logger } from '../common/log';
chicm-ms's avatar
chicm-ms committed
13
import { ExperimentProfile, Manager, TrialJobStatistics } from '../common/manager';
14
import { ExperimentManager } from '../common/experimentManager';
J-shang's avatar
J-shang committed
15
import { TensorboardManager, TensorboardTaskInfo } from '../common/tensorboardManager';
16
import { ValidationSchemas } from './restValidationSchemas';
17
import { getVersion } from '../common/utils';
18
19
import { MetricType } from '../common/datastore';
import { ProfileUpdateType } from '../common/manager';
Yuge Zhang's avatar
Yuge Zhang committed
20
import { TrialJobStatus } from '../common/trainingService';
Deshui Yu's avatar
Deshui Yu committed
21

22
23
// TODO: fix expressJoi
//const expressJoi = require('express-joi-validator');
24

Deshui Yu's avatar
Deshui Yu committed
25
class NNIRestHandler {
26
27
    private nniManager: Manager;
    private experimentsManager: ExperimentManager;
J-shang's avatar
J-shang committed
28
    private tensorboardManager: TensorboardManager;
Deshui Yu's avatar
Deshui Yu committed
29
30
    private log: Logger;

31
    constructor() {
Deshui Yu's avatar
Deshui Yu committed
32
        this.nniManager = component.get(Manager);
33
        this.experimentsManager = component.get(ExperimentManager);
J-shang's avatar
J-shang committed
34
        this.tensorboardManager = component.get(TensorboardManager);
liuzhe-lz's avatar
liuzhe-lz committed
35
        this.log = getLogger('NNIRestHandler');
Deshui Yu's avatar
Deshui Yu committed
36
37
38
39
40
41
    }

    public createRestHandler(): Router {
        const router: Router = Router();

        router.use((req: Request, res: Response, next) => {
liuzhe-lz's avatar
liuzhe-lz committed
42
            this.log.debug(`${req.method}: ${req.url}: body:`, req.body);
Deshui Yu's avatar
Deshui Yu committed
43
44
45
46
47
48
49
            res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept');
            res.header('Access-Control-Allow-Methods', 'PUT,POST,GET,DELETE,OPTIONS');

            res.setHeader('Content-Type', 'application/json');
            next();
        });

Gems Guo's avatar
Gems Guo committed
50
        this.version(router);
Deshui Yu's avatar
Deshui Yu committed
51
52
        this.checkStatus(router);
        this.getExperimentProfile(router);
Yuge Zhang's avatar
Yuge Zhang committed
53
        this.getExperimentMetadata(router);
Deshui Yu's avatar
Deshui Yu committed
54
        this.updateExperimentProfile(router);
55
        this.importData(router);
56
        this.getImportedData(router);
Deshui Yu's avatar
Deshui Yu committed
57
58
59
60
61
62
63
64
        this.startExperiment(router);
        this.getTrialJobStatistics(router);
        this.setClusterMetaData(router);
        this.listTrialJobs(router);
        this.getTrialJob(router);
        this.addTrialJob(router);
        this.cancelTrialJob(router);
        this.getMetricData(router);
65
66
        this.getMetricDataByRange(router);
        this.getLatestMetricData(router);
Yuge Zhang's avatar
Yuge Zhang committed
67
        this.getTrialFile(router);
68
        this.exportData(router);
69
        this.getExperimentsInfo(router);
J-shang's avatar
J-shang committed
70
71
72
73
74
75
        this.startTensorboardTask(router);
        this.getTensorboardTask(router);
        this.updateTensorboardTask(router);
        this.stopTensorboardTask(router);
        this.stopAllTensorboardTask(router);
        this.listTensorboardTask(router);
76
        this.stop(router);
Deshui Yu's avatar
Deshui Yu committed
77

78
        // Express-joi-validator configuration
79
        router.use((err: any, _req: Request, res: Response, _next: any): any => {
80
81
82
83
84
85
86
            if (err.isBoom) {
                this.log.error(err.output.payload);

                return res.status(err.output.statusCode).json(err.output.payload);
            }
        });

Deshui Yu's avatar
Deshui Yu committed
87
88
89
        return router;
    }

chicm-ms's avatar
chicm-ms committed
90
    private handleError(err: Error, res: Response, isFatal: boolean = false, errorCode: number = 500): void {
Deshui Yu's avatar
Deshui Yu committed
91
92
93
        if (err instanceof NNIError && err.name === NNIErrorNames.NOT_FOUND) {
            res.status(404);
        } else {
SparkSnail's avatar
SparkSnail committed
94
            res.status(errorCode);
Deshui Yu's avatar
Deshui Yu committed
95
96
97
98
        }
        res.send({
            error: err.message
        });
99
100

        // If it's a fatal error, exit process
chicm-ms's avatar
chicm-ms committed
101
        if (isFatal) {
102
            this.log.critical(err);
103
            process.exit(1);
chicm-ms's avatar
chicm-ms committed
104
105
        } else {
            this.log.error(err);
106
        }
Deshui Yu's avatar
Deshui Yu committed
107
108
    }

Gems Guo's avatar
Gems Guo committed
109
    private version(router: Router): void {
110
        router.get('/version', async (_req: Request, res: Response) => {
111
112
            const version = await getVersion();
            res.send(version);
Gems Guo's avatar
Gems Guo committed
113
114
115
        });
    }

Deshui Yu's avatar
Deshui Yu committed
116
117
    // TODO add validators for request params, query, body
    private checkStatus(router: Router): void {
118
        router.get('/check-status', (_req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
119
120
            const ds: DataStore = component.get<DataStore>(DataStore);
            ds.init().then(() => {
121
                res.send(this.nniManager.getStatus());
Deshui Yu's avatar
Deshui Yu committed
122
            }).catch(async (err: Error) => {
chicm-ms's avatar
chicm-ms committed
123
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
124
                this.log.error(err.message);
chicm-ms's avatar
chicm-ms committed
125
                this.log.error(`Datastore initialize failed, stopping rest server...`);
126
                globals.shutdown.criticalError('RestHandler', err);
Deshui Yu's avatar
Deshui Yu committed
127
128
129
130
131
            });
        });
    }

    private getExperimentProfile(router: Router): void {
132
        router.get('/experiment', (_req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
133
134
135
            this.nniManager.getExperimentProfile().then((profile: ExperimentProfile) => {
                res.send(profile);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
136
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
137
138
139
140
141
            });
        });
    }

    private updateExperimentProfile(router: Router): void {
142
        router.put('/experiment', (req: Request, res: Response) => {
143
            this.nniManager.updateExperimentProfile(req.body, req.query['update_type'] as ProfileUpdateType).then(() => {
Deshui Yu's avatar
Deshui Yu committed
144
145
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
146
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
147
148
149
            });
        });
    }
150

151
152
153
154
155
    private importData(router: Router): void {
        router.post('/experiment/import-data', (req: Request, res: Response) => {
            this.nniManager.importData(JSON.stringify(req.body)).then(() => {
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
156
                this.handleError(err, res);
157
158
159
            });
        });
    }
Deshui Yu's avatar
Deshui Yu committed
160

161
    private getImportedData(router: Router): void {
162
        router.get('/experiment/imported-data', (_req: Request, res: Response) => {
163
164
165
166
167
168
169
170
            this.nniManager.getImportedData().then((importedData: string[]) => {
                res.send(JSON.stringify(importedData));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

Deshui Yu's avatar
Deshui Yu committed
171
    private startExperiment(router: Router): void {
172
        router.post('/experiment', (req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
173
174
175
            if (isNewExperiment()) {
                this.nniManager.startExperiment(req.body).then((eid: string) => {
                    res.send({
chicm-ms's avatar
chicm-ms committed
176
                        experiment_id: eid // eslint-disable-line @typescript-eslint/camelcase
Deshui Yu's avatar
Deshui Yu committed
177
178
                    });
                }).catch((err: Error) => {
179
                    // Start experiment is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
180
                    this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
181
182
                });
            } else {
SparkSnail's avatar
SparkSnail committed
183
                this.nniManager.resumeExperiment(isReadonly()).then(() => {
Deshui Yu's avatar
Deshui Yu committed
184
185
                    res.send();
                }).catch((err: Error) => {
186
                    // Resume experiment is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
187
                    this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
188
                });
SparkSnail's avatar
SparkSnail committed
189
            } 
Deshui Yu's avatar
Deshui Yu committed
190
191
192
193
        });
    }

    private getTrialJobStatistics(router: Router): void {
194
        router.get('/job-statistics', (_req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
195
196
197
            this.nniManager.getTrialJobStatistics().then((statistics: TrialJobStatistics[]) => {
                res.send(statistics);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
198
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
199
200
201
202
203
            });
        });
    }

    private setClusterMetaData(router: Router): void {
204
        router.put(
205
            '/experiment/cluster-metadata', //TODO: Fix validation expressJoi(ValidationSchemas.SETCLUSTERMETADATA),
206
            async (req: Request, res: Response) => {
SparkSnail's avatar
SparkSnail committed
207
208
209
210
211
212
213
214
215
                const metadata: any = req.body;
                const keys: string[] = Object.keys(metadata);
                try {
                    for (const key of keys) {
                        await this.nniManager.setClusterMetadata(key, JSON.stringify(metadata[key]));
                    }
                    res.send();
                } catch (err) {
                    // setClusterMetata is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
216
                    this.handleError(NNIError.FromError(err), res, true);
Deshui Yu's avatar
Deshui Yu committed
217
218
219
220
221
222
                }
        });
    }

    private listTrialJobs(router: Router): void {
        router.get('/trial-jobs', (req: Request, res: Response) => {
223
            this.nniManager.listTrialJobs(req.query['status'] as TrialJobStatus).then((jobInfos: TrialJobInfo[]) => {
Deshui Yu's avatar
Deshui Yu committed
224
225
226
227
228
                jobInfos.forEach((trialJob: TrialJobInfo) => {
                    this.setErrorPathForFailedJob(trialJob);
                });
                res.send(jobInfos);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
229
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
230
231
232
233
234
235
            });
        });
    }

    private getTrialJob(router: Router): void {
        router.get('/trial-jobs/:id', (req: Request, res: Response) => {
236
            this.nniManager.getTrialJob(req.params['id']).then((jobDetail: TrialJobInfo) => {
Deshui Yu's avatar
Deshui Yu committed
237
238
239
                const jobInfo: TrialJobInfo = this.setErrorPathForFailedJob(jobDetail);
                res.send(jobInfo);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
240
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
241
242
243
244
245
246
            });
        });
    }

    private addTrialJob(router: Router): void {
        router.post('/trial-jobs', async (req: Request, res: Response) => {
247
248
            this.nniManager.addCustomizedTrialJob(JSON.stringify(req.body)).then((sequenceId: number) => {
                res.send({sequenceId});
Deshui Yu's avatar
Deshui Yu committed
249
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
250
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
251
252
253
254
255
256
            });
        });
    }

    private cancelTrialJob(router: Router): void {
        router.delete('/trial-jobs/:id', async (req: Request, res: Response) => {
257
            this.nniManager.cancelTrialJobByUser(req.params['id']).then(() => {
Deshui Yu's avatar
Deshui Yu committed
258
259
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
260
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
261
262
263
264
265
            });
        });
    }

    private getMetricData(router: Router): void {
266
        router.get('/metric-data/:job_id*?', async (req: Request, res: Response) => {
267
            this.nniManager.getMetricData(req.params['job_id'], req.query['type'] as MetricType).then((metricsData: MetricDataRecord[]) => {
268
269
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
270
                this.handleError(err, res);
271
272
273
274
275
276
            });
        });
    }

    private getMetricDataByRange(router: Router): void {
        router.get('/metric-data-range/:min_seq_id/:max_seq_id', async (req: Request, res: Response) => {
277
278
            const minSeqId = Number(req.params['min_seq_id']);
            const maxSeqId = Number(req.params['max_seq_id']);
279
280
281
            this.nniManager.getMetricDataByRange(minSeqId, maxSeqId).then((metricsData: MetricDataRecord[]) => {
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
282
                this.handleError(err, res);
283
284
285
286
287
            });
        });
    }

    private getLatestMetricData(router: Router): void {
288
        router.get('/metric-data-latest/', async (_req: Request, res: Response) => {
289
            this.nniManager.getLatestMetricData().then((metricsData: MetricDataRecord[]) => {
Deshui Yu's avatar
Deshui Yu committed
290
291
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
292
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
293
294
295
296
            });
        });
    }

Yuge Zhang's avatar
Yuge Zhang committed
297
298
299
    private getTrialFile(router: Router): void {
        router.get('/trial-file/:id/:filename', async(req: Request, res: Response) => {
            let encoding: string | null = null;
300
            const filename = req.params['filename'];
Yuge Zhang's avatar
Yuge Zhang committed
301
302
303
            if (!filename.includes('.') || filename.match(/.*\.(txt|log)/g)) {
                encoding = 'utf8';
            }
304
            this.nniManager.getTrialFile(req.params['id'], filename).then((content: Buffer | string) => {
liuzhe-lz's avatar
liuzhe-lz committed
305
306
307
308
                const contentType = content instanceof Buffer ? 'application/octet-stream' : 'text/plain';
                res.header('Content-Type', contentType);
                if (content === '') {
                    content = `${filename} is empty.`;  // FIXME: this should be handled in front-end
309
                }
Yuge Zhang's avatar
Yuge Zhang committed
310
                res.send(content);
311
312
313
314
315
316
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

317
    private exportData(router: Router): void {
318
        router.get('/export-data', (_req: Request, res: Response) => {
319
320
321
            this.nniManager.exportData().then((exportedData: string) => {
                res.send(exportedData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
322
                this.handleError(err, res);
323
324
325
326
            });
        });
    }

Yuge Zhang's avatar
Yuge Zhang committed
327
    private getExperimentMetadata(router: Router): void {
328
        router.get('/experiment-metadata', (_req: Request, res: Response) => {
Yuge Zhang's avatar
Yuge Zhang committed
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
            Promise.all([
                this.nniManager.getExperimentProfile(),
                this.experimentsManager.getExperimentsInfo()
            ]).then(([profile, experimentInfo]) => {
                for (const info of experimentInfo as any) {
                    if (info.id === profile.id) {
                        res.send(info);
                        break;
                    }
                }
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

345
    private getExperimentsInfo(router: Router): void {
346
        router.get('/experiments-info', (_req: Request, res: Response) => {
347
348
349
350
351
352
353
354
            this.experimentsManager.getExperimentsInfo().then((experimentInfo: JSON) => {
                res.send(JSON.stringify(experimentInfo));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

J-shang's avatar
J-shang committed
355
356
357
358
359
360
361
362
363
364
365
366
367
    private startTensorboardTask(router: Router): void {
        router.post('/tensorboard', (req: Request, res: Response) => {
            this.tensorboardManager.startTensorboardTask(req.body).then((taskDetail: TensorboardTaskInfo) => {
                this.log.info(taskDetail);
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res, false, 400);
            });
        });
    }

    private getTensorboardTask(router: Router): void {
        router.get('/tensorboard/:id', (req: Request, res: Response) => {
368
            this.tensorboardManager.getTensorboardTask(req.params['id']).then((taskDetail: TensorboardTaskInfo) => {
J-shang's avatar
J-shang committed
369
370
371
372
373
374
375
376
377
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private updateTensorboardTask(router: Router): void {
        router.put('/tensorboard/:id', (req: Request, res: Response) => {
378
            this.tensorboardManager.updateTensorboardTask(req.params['id']).then((taskDetail: TensorboardTaskInfo) => {
J-shang's avatar
J-shang committed
379
380
381
382
383
384
385
386
387
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private stopTensorboardTask(router: Router): void {
        router.delete('/tensorboard/:id', (req: Request, res: Response) => {
388
            this.tensorboardManager.stopTensorboardTask(req.params['id']).then((taskDetail: TensorboardTaskInfo) => {
J-shang's avatar
J-shang committed
389
390
391
392
393
394
395
396
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private stopAllTensorboardTask(router: Router): void {
397
        router.delete('/tensorboard-tasks', (_req: Request, res: Response) => {
J-shang's avatar
J-shang committed
398
399
400
401
402
403
404
405
406
            this.tensorboardManager.stopAllTensorboardTask().then(() => {
                res.send();
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private listTensorboardTask(router: Router): void {
407
        router.get('/tensorboard-tasks', (_req: Request, res: Response) => {
J-shang's avatar
J-shang committed
408
409
410
411
412
413
414
415
            this.tensorboardManager.listTensorboardTasks().then((taskDetails: TensorboardTaskInfo[]) => {
                res.send(taskDetails);
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

416
    private stop(router: Router): void {
417
        router.delete('/experiment', (_req: Request, res: Response) => {
418
419
            res.send();
            globals.shutdown.initiate('REST request');
420
421
422
        });
    }

Deshui Yu's avatar
Deshui Yu committed
423
424
425
426
    private setErrorPathForFailedJob(jobInfo: TrialJobInfo): TrialJobInfo {
        if (jobInfo === undefined || jobInfo.status !== 'FAILED' || jobInfo.logPath === undefined) {
            return jobInfo;
        }
chicm-ms's avatar
chicm-ms committed
427
        jobInfo.stderrPath = path.join(jobInfo.logPath, 'stderr');
Deshui Yu's avatar
Deshui Yu committed
428
429
430
431
432

        return jobInfo;
    }
}

433
434
export function createRestHandler(): Router {
    return new NNIRestHandler().createRestHandler();
Deshui Yu's avatar
Deshui Yu committed
435
}