restHandler.ts 16.9 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
Deshui Yu's avatar
Deshui Yu committed
3
4
5
6
7
8
9
10
11

'use strict';

import { Request, Response, Router } from 'express';
import * as path from 'path';

import * as component from '../common/component';
import { DataStore, MetricDataRecord, TrialJobInfo } from '../common/datastore';
import { NNIError, NNIErrorNames } from '../common/errors';
SparkSnail's avatar
SparkSnail committed
12
import { isNewExperiment, isReadonly } from '../common/experimentStartupInfo';
Deshui Yu's avatar
Deshui Yu committed
13
import { getLogger, Logger } from '../common/log';
chicm-ms's avatar
chicm-ms committed
14
import { ExperimentProfile, Manager, TrialJobStatistics } from '../common/manager';
15
import { ExperimentManager } from '../common/experimentManager';
J-shang's avatar
J-shang committed
16
import { TensorboardManager, TensorboardTaskInfo } from '../common/tensorboardManager';
17
18
import { ValidationSchemas } from './restValidationSchemas';
import { NNIRestServer } from './nniRestServer';
19
import { getVersion } from '../common/utils';
20
21
import { MetricType } from '../common/datastore';
import { ProfileUpdateType } from '../common/manager';
Yuge Zhang's avatar
Yuge Zhang committed
22
import { TrialJobStatus } from '../common/trainingService';
Deshui Yu's avatar
Deshui Yu committed
23

24
25
const expressJoi = require('express-joi-validator');

Deshui Yu's avatar
Deshui Yu committed
26
class NNIRestHandler {
27
    private restServer: NNIRestServer;
28
29
    private nniManager: Manager;
    private experimentsManager: ExperimentManager;
J-shang's avatar
J-shang committed
30
    private tensorboardManager: TensorboardManager;
Deshui Yu's avatar
Deshui Yu committed
31
32
    private log: Logger;

33
    constructor(rs: NNIRestServer) {
Deshui Yu's avatar
Deshui Yu committed
34
        this.nniManager = component.get(Manager);
35
        this.experimentsManager = component.get(ExperimentManager);
J-shang's avatar
J-shang committed
36
        this.tensorboardManager = component.get(TensorboardManager);
Deshui Yu's avatar
Deshui Yu committed
37
        this.restServer = rs;
liuzhe-lz's avatar
liuzhe-lz committed
38
        this.log = getLogger('NNIRestHandler');
Deshui Yu's avatar
Deshui Yu committed
39
40
41
42
43
44
    }

    public createRestHandler(): Router {
        const router: Router = Router();

        router.use((req: Request, res: Response, next) => {
liuzhe-lz's avatar
liuzhe-lz committed
45
            this.log.debug(`${req.method}: ${req.url}: body:`, req.body);
Deshui Yu's avatar
Deshui Yu committed
46
47
48
49
50
51
52
            res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept');
            res.header('Access-Control-Allow-Methods', 'PUT,POST,GET,DELETE,OPTIONS');

            res.setHeader('Content-Type', 'application/json');
            next();
        });

Gems Guo's avatar
Gems Guo committed
53
        this.version(router);
Deshui Yu's avatar
Deshui Yu committed
54
55
        this.checkStatus(router);
        this.getExperimentProfile(router);
Yuge Zhang's avatar
Yuge Zhang committed
56
        this.getExperimentMetadata(router);
Deshui Yu's avatar
Deshui Yu committed
57
        this.updateExperimentProfile(router);
58
        this.importData(router);
59
        this.getImportedData(router);
Deshui Yu's avatar
Deshui Yu committed
60
61
62
63
64
65
66
67
        this.startExperiment(router);
        this.getTrialJobStatistics(router);
        this.setClusterMetaData(router);
        this.listTrialJobs(router);
        this.getTrialJob(router);
        this.addTrialJob(router);
        this.cancelTrialJob(router);
        this.getMetricData(router);
68
69
        this.getMetricDataByRange(router);
        this.getLatestMetricData(router);
Yuge Zhang's avatar
Yuge Zhang committed
70
        this.getTrialFile(router);
71
        this.exportData(router);
72
        this.getExperimentsInfo(router);
J-shang's avatar
J-shang committed
73
74
75
76
77
78
        this.startTensorboardTask(router);
        this.getTensorboardTask(router);
        this.updateTensorboardTask(router);
        this.stopTensorboardTask(router);
        this.stopAllTensorboardTask(router);
        this.listTensorboardTask(router);
79
        this.stop(router);
Deshui Yu's avatar
Deshui Yu committed
80

81
        // Express-joi-validator configuration
82
        router.use((err: any, _req: Request, res: Response, _next: any) => {
83
84
85
86
87
88
89
            if (err.isBoom) {
                this.log.error(err.output.payload);

                return res.status(err.output.statusCode).json(err.output.payload);
            }
        });

Deshui Yu's avatar
Deshui Yu committed
90
91
92
        return router;
    }

chicm-ms's avatar
chicm-ms committed
93
    private handleError(err: Error, res: Response, isFatal: boolean = false, errorCode: number = 500): void {
Deshui Yu's avatar
Deshui Yu committed
94
95
96
        if (err instanceof NNIError && err.name === NNIErrorNames.NOT_FOUND) {
            res.status(404);
        } else {
SparkSnail's avatar
SparkSnail committed
97
            res.status(errorCode);
Deshui Yu's avatar
Deshui Yu committed
98
99
100
101
        }
        res.send({
            error: err.message
        });
102
103

        // If it's a fatal error, exit process
chicm-ms's avatar
chicm-ms committed
104
        if (isFatal) {
105
            this.log.fatal(err);
106
            process.exit(1);
chicm-ms's avatar
chicm-ms committed
107
108
        } else {
            this.log.error(err);
109
        }
Deshui Yu's avatar
Deshui Yu committed
110
111
    }

Gems Guo's avatar
Gems Guo committed
112
113
    private version(router: Router): void {
        router.get('/version', async (req: Request, res: Response) => {
114
115
            const version = await getVersion();
            res.send(version);
Gems Guo's avatar
Gems Guo committed
116
117
118
        });
    }

Deshui Yu's avatar
Deshui Yu committed
119
120
121
122
123
    // TODO add validators for request params, query, body
    private checkStatus(router: Router): void {
        router.get('/check-status', (req: Request, res: Response) => {
            const ds: DataStore = component.get<DataStore>(DataStore);
            ds.init().then(() => {
124
                res.send(this.nniManager.getStatus());
Deshui Yu's avatar
Deshui Yu committed
125
            }).catch(async (err: Error) => {
chicm-ms's avatar
chicm-ms committed
126
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
127
                this.log.error(err.message);
chicm-ms's avatar
chicm-ms committed
128
                this.log.error(`Datastore initialize failed, stopping rest server...`);
Deshui Yu's avatar
Deshui Yu committed
129
130
131
132
133
134
135
136
137
138
                await this.restServer.stop();
            });
        });
    }

    private getExperimentProfile(router: Router): void {
        router.get('/experiment', (req: Request, res: Response) => {
            this.nniManager.getExperimentProfile().then((profile: ExperimentProfile) => {
                res.send(profile);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
139
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
140
141
142
143
144
            });
        });
    }

    private updateExperimentProfile(router: Router): void {
145
        router.put('/experiment', (req: Request, res: Response) => {
146
            this.nniManager.updateExperimentProfile(req.body, req.query.update_type as ProfileUpdateType).then(() => {
Deshui Yu's avatar
Deshui Yu committed
147
148
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
149
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
150
151
152
            });
        });
    }
153

154
155
156
157
158
    private importData(router: Router): void {
        router.post('/experiment/import-data', (req: Request, res: Response) => {
            this.nniManager.importData(JSON.stringify(req.body)).then(() => {
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
159
                this.handleError(err, res);
160
161
162
            });
        });
    }
Deshui Yu's avatar
Deshui Yu committed
163

164
165
166
167
168
169
170
171
172
173
    private getImportedData(router: Router): void {
        router.get('/experiment/imported-data', (req: Request, res: Response) => {
            this.nniManager.getImportedData().then((importedData: string[]) => {
                res.send(JSON.stringify(importedData));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

Deshui Yu's avatar
Deshui Yu committed
174
    private startExperiment(router: Router): void {
175
        router.post('/experiment', (req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
176
177
178
            if (isNewExperiment()) {
                this.nniManager.startExperiment(req.body).then((eid: string) => {
                    res.send({
chicm-ms's avatar
chicm-ms committed
179
                        experiment_id: eid // eslint-disable-line @typescript-eslint/camelcase
Deshui Yu's avatar
Deshui Yu committed
180
181
                    });
                }).catch((err: Error) => {
182
                    // Start experiment is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
183
                    this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
184
185
                });
            } else {
SparkSnail's avatar
SparkSnail committed
186
                this.nniManager.resumeExperiment(isReadonly()).then(() => {
Deshui Yu's avatar
Deshui Yu committed
187
188
                    res.send();
                }).catch((err: Error) => {
189
                    // Resume experiment is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
190
                    this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
191
                });
SparkSnail's avatar
SparkSnail committed
192
            } 
Deshui Yu's avatar
Deshui Yu committed
193
194
195
196
197
198
199
200
        });
    }

    private getTrialJobStatistics(router: Router): void {
        router.get('/job-statistics', (req: Request, res: Response) => {
            this.nniManager.getTrialJobStatistics().then((statistics: TrialJobStatistics[]) => {
                res.send(statistics);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
201
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
202
203
204
205
206
            });
        });
    }

    private setClusterMetaData(router: Router): void {
207
208
209
        router.put(
            '/experiment/cluster-metadata', expressJoi(ValidationSchemas.SETCLUSTERMETADATA),
            async (req: Request, res: Response) => {
SparkSnail's avatar
SparkSnail committed
210
211
212
213
214
215
216
217
218
                const metadata: any = req.body;
                const keys: string[] = Object.keys(metadata);
                try {
                    for (const key of keys) {
                        await this.nniManager.setClusterMetadata(key, JSON.stringify(metadata[key]));
                    }
                    res.send();
                } catch (err) {
                    // setClusterMetata is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
219
                    this.handleError(NNIError.FromError(err), res, true);
Deshui Yu's avatar
Deshui Yu committed
220
221
222
223
224
225
                }
        });
    }

    private listTrialJobs(router: Router): void {
        router.get('/trial-jobs', (req: Request, res: Response) => {
226
            this.nniManager.listTrialJobs(req.query.status as TrialJobStatus).then((jobInfos: TrialJobInfo[]) => {
Deshui Yu's avatar
Deshui Yu committed
227
228
229
230
231
                jobInfos.forEach((trialJob: TrialJobInfo) => {
                    this.setErrorPathForFailedJob(trialJob);
                });
                res.send(jobInfos);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
232
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
233
234
235
236
237
238
239
240
241
242
            });
        });
    }

    private getTrialJob(router: Router): void {
        router.get('/trial-jobs/:id', (req: Request, res: Response) => {
            this.nniManager.getTrialJob(req.params.id).then((jobDetail: TrialJobInfo) => {
                const jobInfo: TrialJobInfo = this.setErrorPathForFailedJob(jobDetail);
                res.send(jobInfo);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
243
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
244
245
246
247
248
249
            });
        });
    }

    private addTrialJob(router: Router): void {
        router.post('/trial-jobs', async (req: Request, res: Response) => {
250
251
            this.nniManager.addCustomizedTrialJob(JSON.stringify(req.body)).then((sequenceId: number) => {
                res.send({sequenceId});
Deshui Yu's avatar
Deshui Yu committed
252
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
253
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
254
255
256
257
258
259
260
261
262
            });
        });
    }

    private cancelTrialJob(router: Router): void {
        router.delete('/trial-jobs/:id', async (req: Request, res: Response) => {
            this.nniManager.cancelTrialJobByUser(req.params.id).then(() => {
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
263
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
264
265
266
267
268
            });
        });
    }

    private getMetricData(router: Router): void {
269
        router.get('/metric-data/:job_id*?', async (req: Request, res: Response) => {
270
            this.nniManager.getMetricData(req.params.job_id, req.query.type as MetricType).then((metricsData: MetricDataRecord[]) => {
271
272
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
273
                this.handleError(err, res);
274
275
276
277
278
279
280
281
282
283
284
            });
        });
    }

    private getMetricDataByRange(router: Router): void {
        router.get('/metric-data-range/:min_seq_id/:max_seq_id', async (req: Request, res: Response) => {
            const minSeqId = Number(req.params.min_seq_id);
            const maxSeqId = Number(req.params.max_seq_id);
            this.nniManager.getMetricDataByRange(minSeqId, maxSeqId).then((metricsData: MetricDataRecord[]) => {
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
285
                this.handleError(err, res);
286
287
288
289
290
291
292
            });
        });
    }

    private getLatestMetricData(router: Router): void {
        router.get('/metric-data-latest/', async (req: Request, res: Response) => {
            this.nniManager.getLatestMetricData().then((metricsData: MetricDataRecord[]) => {
Deshui Yu's avatar
Deshui Yu committed
293
294
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
295
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
296
297
298
299
            });
        });
    }

Yuge Zhang's avatar
Yuge Zhang committed
300
301
302
303
304
305
306
307
308
309
310
311
    private getTrialFile(router: Router): void {
        router.get('/trial-file/:id/:filename', async(req: Request, res: Response) => {
            let encoding: string | null = null;
            const filename = req.params.filename;
            if (!filename.includes('.') || filename.match(/.*\.(txt|log)/g)) {
                encoding = 'utf8';
            }
            this.nniManager.getTrialFile(req.params.id, filename).then((content: Buffer | string) => {
                if (content instanceof Buffer) {
                    res.header('Content-Type', 'application/octet-stream');
                } else if (content === '') {
                    content = `${filename} is empty.`;
312
                }
Yuge Zhang's avatar
Yuge Zhang committed
313
                res.send(content);
314
315
316
317
318
319
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

320
321
322
323
324
    private exportData(router: Router): void {
        router.get('/export-data', (req: Request, res: Response) => {
            this.nniManager.exportData().then((exportedData: string) => {
                res.send(exportedData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
325
                this.handleError(err, res);
326
327
328
329
            });
        });
    }

Yuge Zhang's avatar
Yuge Zhang committed
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
    private getExperimentMetadata(router: Router): void {
        router.get('/experiment-metadata', (req: Request, res: Response) => {
            Promise.all([
                this.nniManager.getExperimentProfile(),
                this.experimentsManager.getExperimentsInfo()
            ]).then(([profile, experimentInfo]) => {
                for (const info of experimentInfo as any) {
                    if (info.id === profile.id) {
                        res.send(info);
                        break;
                    }
                }
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

348
349
350
351
352
353
354
355
356
357
    private getExperimentsInfo(router: Router): void {
        router.get('/experiments-info', (req: Request, res: Response) => {
            this.experimentsManager.getExperimentsInfo().then((experimentInfo: JSON) => {
                res.send(JSON.stringify(experimentInfo));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

J-shang's avatar
J-shang committed
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
    private startTensorboardTask(router: Router): void {
        router.post('/tensorboard', (req: Request, res: Response) => {
            this.tensorboardManager.startTensorboardTask(req.body).then((taskDetail: TensorboardTaskInfo) => {
                this.log.info(taskDetail);
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res, false, 400);
            });
        });
    }

    private getTensorboardTask(router: Router): void {
        router.get('/tensorboard/:id', (req: Request, res: Response) => {
            this.tensorboardManager.getTensorboardTask(req.params.id).then((taskDetail: TensorboardTaskInfo) => {
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private updateTensorboardTask(router: Router): void {
        router.put('/tensorboard/:id', (req: Request, res: Response) => {
            this.tensorboardManager.updateTensorboardTask(req.params.id).then((taskDetail: TensorboardTaskInfo) => {
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private stopTensorboardTask(router: Router): void {
        router.delete('/tensorboard/:id', (req: Request, res: Response) => {
            this.tensorboardManager.stopTensorboardTask(req.params.id).then((taskDetail: TensorboardTaskInfo) => {
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private stopAllTensorboardTask(router: Router): void {
        router.delete('/tensorboard-tasks', (req: Request, res: Response) => {
            this.tensorboardManager.stopAllTensorboardTask().then(() => {
                res.send();
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private listTensorboardTask(router: Router): void {
        router.get('/tensorboard-tasks', (req: Request, res: Response) => {
            this.tensorboardManager.listTensorboardTasks().then((taskDetails: TensorboardTaskInfo[]) => {
                res.send(taskDetails);
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

419
420
421
422
423
424
425
426
427
    private stop(router: Router): void {
        router.delete('/experiment', (req: Request, res: Response) => {
            this.nniManager.stopExperimentTopHalf().then(() => {
                res.send();
                this.nniManager.stopExperimentBottomHalf();
            });
        });
    }

Deshui Yu's avatar
Deshui Yu committed
428
429
430
431
    private setErrorPathForFailedJob(jobInfo: TrialJobInfo): TrialJobInfo {
        if (jobInfo === undefined || jobInfo.status !== 'FAILED' || jobInfo.logPath === undefined) {
            return jobInfo;
        }
chicm-ms's avatar
chicm-ms committed
432
        jobInfo.stderrPath = path.join(jobInfo.logPath, 'stderr');
Deshui Yu's avatar
Deshui Yu committed
433
434
435
436
437

        return jobInfo;
    }
}

438
export function createRestHandler(rs: NNIRestServer): Router {
Deshui Yu's avatar
Deshui Yu committed
439
440
441
442
    const handler: NNIRestHandler = new NNIRestHandler(rs);

    return handler.createRestHandler();
}