restHandler.ts 15.7 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
Deshui Yu's avatar
Deshui Yu committed
3
4
5
6
7
8
9
10
11

'use strict';

import { Request, Response, Router } from 'express';
import * as path from 'path';

import * as component from '../common/component';
import { DataStore, MetricDataRecord, TrialJobInfo } from '../common/datastore';
import { NNIError, NNIErrorNames } from '../common/errors';
SparkSnail's avatar
SparkSnail committed
12
import { isNewExperiment, isReadonly } from '../common/experimentStartupInfo';
Deshui Yu's avatar
Deshui Yu committed
13
import { getLogger, Logger } from '../common/log';
chicm-ms's avatar
chicm-ms committed
14
import { ExperimentProfile, Manager, TrialJobStatistics } from '../common/manager';
15
import { ExperimentManager } from '../common/experimentManager';
J-shang's avatar
J-shang committed
16
import { TensorboardManager, TensorboardTaskInfo } from '../common/tensorboardManager';
17
18
import { ValidationSchemas } from './restValidationSchemas';
import { NNIRestServer } from './nniRestServer';
19
import { getVersion } from '../common/utils';
Deshui Yu's avatar
Deshui Yu committed
20

21
22
const expressJoi = require('express-joi-validator');

Deshui Yu's avatar
Deshui Yu committed
23
class NNIRestHandler {
24
    private restServer: NNIRestServer;
25
26
    private nniManager: Manager;
    private experimentsManager: ExperimentManager;
J-shang's avatar
J-shang committed
27
    private tensorboardManager: TensorboardManager;
Deshui Yu's avatar
Deshui Yu committed
28
29
    private log: Logger;

30
    constructor(rs: NNIRestServer) {
Deshui Yu's avatar
Deshui Yu committed
31
        this.nniManager = component.get(Manager);
32
        this.experimentsManager = component.get(ExperimentManager);
J-shang's avatar
J-shang committed
33
        this.tensorboardManager = component.get(TensorboardManager);
Deshui Yu's avatar
Deshui Yu committed
34
35
36
37
38
39
40
41
        this.restServer = rs;
        this.log = getLogger();
    }

    public createRestHandler(): Router {
        const router: Router = Router();

        router.use((req: Request, res: Response, next) => {
chicm-ms's avatar
chicm-ms committed
42
            this.log.debug(`${req.method}: ${req.url}: body:\n${JSON.stringify(req.body, undefined, 4)}`);
Deshui Yu's avatar
Deshui Yu committed
43
44
45
46
47
48
49
50
            res.header('Access-Control-Allow-Origin', '*');
            res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept');
            res.header('Access-Control-Allow-Methods', 'PUT,POST,GET,DELETE,OPTIONS');

            res.setHeader('Content-Type', 'application/json');
            next();
        });

Gems Guo's avatar
Gems Guo committed
51
        this.version(router);
Deshui Yu's avatar
Deshui Yu committed
52
53
54
        this.checkStatus(router);
        this.getExperimentProfile(router);
        this.updateExperimentProfile(router);
55
        this.importData(router);
56
        this.getImportedData(router);
Deshui Yu's avatar
Deshui Yu committed
57
58
59
60
61
62
63
64
        this.startExperiment(router);
        this.getTrialJobStatistics(router);
        this.setClusterMetaData(router);
        this.listTrialJobs(router);
        this.getTrialJob(router);
        this.addTrialJob(router);
        this.cancelTrialJob(router);
        this.getMetricData(router);
65
66
        this.getMetricDataByRange(router);
        this.getLatestMetricData(router);
67
        this.getTrialLog(router);
68
        this.exportData(router);
69
        this.getExperimentsInfo(router);
J-shang's avatar
J-shang committed
70
71
72
73
74
75
        this.startTensorboardTask(router);
        this.getTensorboardTask(router);
        this.updateTensorboardTask(router);
        this.stopTensorboardTask(router);
        this.stopAllTensorboardTask(router);
        this.listTensorboardTask(router);
76
        this.stop(router);
Deshui Yu's avatar
Deshui Yu committed
77

78
        // Express-joi-validator configuration
79
        router.use((err: any, _req: Request, res: Response, _next: any) => {
80
81
82
83
84
85
86
            if (err.isBoom) {
                this.log.error(err.output.payload);

                return res.status(err.output.statusCode).json(err.output.payload);
            }
        });

Deshui Yu's avatar
Deshui Yu committed
87
88
89
        return router;
    }

chicm-ms's avatar
chicm-ms committed
90
    private handleError(err: Error, res: Response, isFatal: boolean = false, errorCode: number = 500): void {
Deshui Yu's avatar
Deshui Yu committed
91
92
93
        if (err instanceof NNIError && err.name === NNIErrorNames.NOT_FOUND) {
            res.status(404);
        } else {
SparkSnail's avatar
SparkSnail committed
94
            res.status(errorCode);
Deshui Yu's avatar
Deshui Yu committed
95
96
97
98
        }
        res.send({
            error: err.message
        });
99
100

        // If it's a fatal error, exit process
chicm-ms's avatar
chicm-ms committed
101
        if (isFatal) {
102
            this.log.fatal(err);
103
            process.exit(1);
chicm-ms's avatar
chicm-ms committed
104
105
        } else {
            this.log.error(err);
106
        }
Deshui Yu's avatar
Deshui Yu committed
107
108
    }

Gems Guo's avatar
Gems Guo committed
109
110
    private version(router: Router): void {
        router.get('/version', async (req: Request, res: Response) => {
111
112
            const version = await getVersion();
            res.send(version);
Gems Guo's avatar
Gems Guo committed
113
114
115
        });
    }

Deshui Yu's avatar
Deshui Yu committed
116
117
118
119
120
    // TODO add validators for request params, query, body
    private checkStatus(router: Router): void {
        router.get('/check-status', (req: Request, res: Response) => {
            const ds: DataStore = component.get<DataStore>(DataStore);
            ds.init().then(() => {
121
                res.send(this.nniManager.getStatus());
Deshui Yu's avatar
Deshui Yu committed
122
            }).catch(async (err: Error) => {
chicm-ms's avatar
chicm-ms committed
123
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
124
                this.log.error(err.message);
chicm-ms's avatar
chicm-ms committed
125
                this.log.error(`Datastore initialize failed, stopping rest server...`);
Deshui Yu's avatar
Deshui Yu committed
126
127
128
129
130
131
132
133
134
135
                await this.restServer.stop();
            });
        });
    }

    private getExperimentProfile(router: Router): void {
        router.get('/experiment', (req: Request, res: Response) => {
            this.nniManager.getExperimentProfile().then((profile: ExperimentProfile) => {
                res.send(profile);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
136
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
137
138
139
140
141
            });
        });
    }

    private updateExperimentProfile(router: Router): void {
142
        router.put('/experiment', expressJoi(ValidationSchemas.UPDATEEXPERIMENT), (req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
143
144
145
            this.nniManager.updateExperimentProfile(req.body, req.query.update_type).then(() => {
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
146
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
147
148
149
            });
        });
    }
150

151
152
153
154
155
    private importData(router: Router): void {
        router.post('/experiment/import-data', (req: Request, res: Response) => {
            this.nniManager.importData(JSON.stringify(req.body)).then(() => {
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
156
                this.handleError(err, res);
157
158
159
            });
        });
    }
Deshui Yu's avatar
Deshui Yu committed
160

161
162
163
164
165
166
167
168
169
170
    private getImportedData(router: Router): void {
        router.get('/experiment/imported-data', (req: Request, res: Response) => {
            this.nniManager.getImportedData().then((importedData: string[]) => {
                res.send(JSON.stringify(importedData));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

Deshui Yu's avatar
Deshui Yu committed
171
    private startExperiment(router: Router): void {
172
        router.post('/experiment', expressJoi(ValidationSchemas.STARTEXPERIMENT), (req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
173
174
175
            if (isNewExperiment()) {
                this.nniManager.startExperiment(req.body).then((eid: string) => {
                    res.send({
chicm-ms's avatar
chicm-ms committed
176
                        experiment_id: eid // eslint-disable-line @typescript-eslint/camelcase
Deshui Yu's avatar
Deshui Yu committed
177
178
                    });
                }).catch((err: Error) => {
179
                    // Start experiment is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
180
                    this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
181
182
                });
            } else {
SparkSnail's avatar
SparkSnail committed
183
                this.nniManager.resumeExperiment(isReadonly()).then(() => {
Deshui Yu's avatar
Deshui Yu committed
184
185
                    res.send();
                }).catch((err: Error) => {
186
                    // Resume experiment is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
187
                    this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
188
                });
SparkSnail's avatar
SparkSnail committed
189
            } 
Deshui Yu's avatar
Deshui Yu committed
190
191
192
193
194
195
196
197
        });
    }

    private getTrialJobStatistics(router: Router): void {
        router.get('/job-statistics', (req: Request, res: Response) => {
            this.nniManager.getTrialJobStatistics().then((statistics: TrialJobStatistics[]) => {
                res.send(statistics);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
198
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
199
200
201
202
203
            });
        });
    }

    private setClusterMetaData(router: Router): void {
204
205
206
        router.put(
            '/experiment/cluster-metadata', expressJoi(ValidationSchemas.SETCLUSTERMETADATA),
            async (req: Request, res: Response) => {
SparkSnail's avatar
SparkSnail committed
207
208
209
210
211
212
213
214
215
                const metadata: any = req.body;
                const keys: string[] = Object.keys(metadata);
                try {
                    for (const key of keys) {
                        await this.nniManager.setClusterMetadata(key, JSON.stringify(metadata[key]));
                    }
                    res.send();
                } catch (err) {
                    // setClusterMetata is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
216
                    this.handleError(NNIError.FromError(err), res, true);
Deshui Yu's avatar
Deshui Yu committed
217
218
219
220
221
222
223
224
225
226
227
228
                }
        });
    }

    private listTrialJobs(router: Router): void {
        router.get('/trial-jobs', (req: Request, res: Response) => {
            this.nniManager.listTrialJobs(req.query.status).then((jobInfos: TrialJobInfo[]) => {
                jobInfos.forEach((trialJob: TrialJobInfo) => {
                    this.setErrorPathForFailedJob(trialJob);
                });
                res.send(jobInfos);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
229
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
230
231
232
233
234
235
236
237
238
239
            });
        });
    }

    private getTrialJob(router: Router): void {
        router.get('/trial-jobs/:id', (req: Request, res: Response) => {
            this.nniManager.getTrialJob(req.params.id).then((jobDetail: TrialJobInfo) => {
                const jobInfo: TrialJobInfo = this.setErrorPathForFailedJob(jobDetail);
                res.send(jobInfo);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
240
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
241
242
243
244
245
246
            });
        });
    }

    private addTrialJob(router: Router): void {
        router.post('/trial-jobs', async (req: Request, res: Response) => {
247
248
            this.nniManager.addCustomizedTrialJob(JSON.stringify(req.body)).then((sequenceId: number) => {
                res.send({sequenceId});
Deshui Yu's avatar
Deshui Yu committed
249
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
250
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
251
252
253
254
255
256
257
258
259
            });
        });
    }

    private cancelTrialJob(router: Router): void {
        router.delete('/trial-jobs/:id', async (req: Request, res: Response) => {
            this.nniManager.cancelTrialJobByUser(req.params.id).then(() => {
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
260
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
261
262
263
264
265
            });
        });
    }

    private getMetricData(router: Router): void {
266
        router.get('/metric-data/:job_id*?', async (req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
267
            this.nniManager.getMetricData(req.params.job_id, req.query.type).then((metricsData: MetricDataRecord[]) => {
268
269
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
270
                this.handleError(err, res);
271
272
273
274
275
276
277
278
279
280
281
            });
        });
    }

    private getMetricDataByRange(router: Router): void {
        router.get('/metric-data-range/:min_seq_id/:max_seq_id', async (req: Request, res: Response) => {
            const minSeqId = Number(req.params.min_seq_id);
            const maxSeqId = Number(req.params.max_seq_id);
            this.nniManager.getMetricDataByRange(minSeqId, maxSeqId).then((metricsData: MetricDataRecord[]) => {
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
282
                this.handleError(err, res);
283
284
285
286
287
288
289
            });
        });
    }

    private getLatestMetricData(router: Router): void {
        router.get('/metric-data-latest/', async (req: Request, res: Response) => {
            this.nniManager.getLatestMetricData().then((metricsData: MetricDataRecord[]) => {
Deshui Yu's avatar
Deshui Yu committed
290
291
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
292
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
293
294
295
296
            });
        });
    }

297
298
299
300
301
302
303
304
305
306
307
308
309
    private getTrialLog(router: Router): void {
        router.get('/trial-log/:id/:type', async(req: Request, res: Response) => {
            this.nniManager.getTrialLog(req.params.id, req.params.type).then((log: string) => {
                if (log === '') {
                    log = 'No logs available.'
                }
                res.send(log);
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

310
311
312
313
314
    private exportData(router: Router): void {
        router.get('/export-data', (req: Request, res: Response) => {
            this.nniManager.exportData().then((exportedData: string) => {
                res.send(exportedData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
315
                this.handleError(err, res);
316
317
318
319
            });
        });
    }

320
321
322
323
324
325
326
327
328
329
    private getExperimentsInfo(router: Router): void {
        router.get('/experiments-info', (req: Request, res: Response) => {
            this.experimentsManager.getExperimentsInfo().then((experimentInfo: JSON) => {
                res.send(JSON.stringify(experimentInfo));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

J-shang's avatar
J-shang committed
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
    private startTensorboardTask(router: Router): void {
        router.post('/tensorboard', (req: Request, res: Response) => {
            this.tensorboardManager.startTensorboardTask(req.body).then((taskDetail: TensorboardTaskInfo) => {
                this.log.info(taskDetail);
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res, false, 400);
            });
        });
    }

    private getTensorboardTask(router: Router): void {
        router.get('/tensorboard/:id', (req: Request, res: Response) => {
            this.tensorboardManager.getTensorboardTask(req.params.id).then((taskDetail: TensorboardTaskInfo) => {
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private updateTensorboardTask(router: Router): void {
        router.put('/tensorboard/:id', (req: Request, res: Response) => {
            this.tensorboardManager.updateTensorboardTask(req.params.id).then((taskDetail: TensorboardTaskInfo) => {
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private stopTensorboardTask(router: Router): void {
        router.delete('/tensorboard/:id', (req: Request, res: Response) => {
            this.tensorboardManager.stopTensorboardTask(req.params.id).then((taskDetail: TensorboardTaskInfo) => {
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private stopAllTensorboardTask(router: Router): void {
        router.delete('/tensorboard-tasks', (req: Request, res: Response) => {
            this.tensorboardManager.stopAllTensorboardTask().then(() => {
                res.send();
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private listTensorboardTask(router: Router): void {
        router.get('/tensorboard-tasks', (req: Request, res: Response) => {
            this.tensorboardManager.listTensorboardTasks().then((taskDetails: TensorboardTaskInfo[]) => {
                res.send(taskDetails);
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

391
392
393
394
395
396
397
398
399
    private stop(router: Router): void {
        router.delete('/experiment', (req: Request, res: Response) => {
            this.nniManager.stopExperimentTopHalf().then(() => {
                res.send();
                this.nniManager.stopExperimentBottomHalf();
            });
        });
    }

Deshui Yu's avatar
Deshui Yu committed
400
401
402
403
    private setErrorPathForFailedJob(jobInfo: TrialJobInfo): TrialJobInfo {
        if (jobInfo === undefined || jobInfo.status !== 'FAILED' || jobInfo.logPath === undefined) {
            return jobInfo;
        }
chicm-ms's avatar
chicm-ms committed
404
        jobInfo.stderrPath = path.join(jobInfo.logPath, 'stderr');
Deshui Yu's avatar
Deshui Yu committed
405
406
407
408
409

        return jobInfo;
    }
}

410
export function createRestHandler(rs: NNIRestServer): Router {
Deshui Yu's avatar
Deshui Yu committed
411
412
413
414
    const handler: NNIRestHandler = new NNIRestHandler(rs);

    return handler.createRestHandler();
}