restHandler.ts 15.6 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
Deshui Yu's avatar
Deshui Yu committed
3
4
5
6
7
8
9
10
11

'use strict';

import { Request, Response, Router } from 'express';
import * as path from 'path';

import * as component from '../common/component';
import { DataStore, MetricDataRecord, TrialJobInfo } from '../common/datastore';
import { NNIError, NNIErrorNames } from '../common/errors';
SparkSnail's avatar
SparkSnail committed
12
import { isNewExperiment, isReadonly } from '../common/experimentStartupInfo';
Deshui Yu's avatar
Deshui Yu committed
13
import { getLogger, Logger } from '../common/log';
chicm-ms's avatar
chicm-ms committed
14
import { ExperimentProfile, Manager, TrialJobStatistics } from '../common/manager';
15
import { ExperimentManager } from '../common/experimentManager';
J-shang's avatar
J-shang committed
16
import { TensorboardManager, TensorboardTaskInfo } from '../common/tensorboardManager';
17
18
import { ValidationSchemas } from './restValidationSchemas';
import { NNIRestServer } from './nniRestServer';
19
import { getVersion } from '../common/utils';
Deshui Yu's avatar
Deshui Yu committed
20

21
22
const expressJoi = require('express-joi-validator');

Deshui Yu's avatar
Deshui Yu committed
23
class NNIRestHandler {
24
    private restServer: NNIRestServer;
25
26
    private nniManager: Manager;
    private experimentsManager: ExperimentManager;
J-shang's avatar
J-shang committed
27
    private tensorboardManager: TensorboardManager;
Deshui Yu's avatar
Deshui Yu committed
28
29
    private log: Logger;

30
    constructor(rs: NNIRestServer) {
Deshui Yu's avatar
Deshui Yu committed
31
        this.nniManager = component.get(Manager);
32
        this.experimentsManager = component.get(ExperimentManager);
J-shang's avatar
J-shang committed
33
        this.tensorboardManager = component.get(TensorboardManager);
Deshui Yu's avatar
Deshui Yu committed
34
35
36
37
38
39
40
41
        this.restServer = rs;
        this.log = getLogger();
    }

    public createRestHandler(): Router {
        const router: Router = Router();

        router.use((req: Request, res: Response, next) => {
chicm-ms's avatar
chicm-ms committed
42
            this.log.debug(`${req.method}: ${req.url}: body:\n${JSON.stringify(req.body, undefined, 4)}`);
Deshui Yu's avatar
Deshui Yu committed
43
44
45
46
47
48
49
            res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept');
            res.header('Access-Control-Allow-Methods', 'PUT,POST,GET,DELETE,OPTIONS');

            res.setHeader('Content-Type', 'application/json');
            next();
        });

Gems Guo's avatar
Gems Guo committed
50
        this.version(router);
Deshui Yu's avatar
Deshui Yu committed
51
52
53
        this.checkStatus(router);
        this.getExperimentProfile(router);
        this.updateExperimentProfile(router);
54
        this.importData(router);
55
        this.getImportedData(router);
Deshui Yu's avatar
Deshui Yu committed
56
57
58
59
60
61
62
63
        this.startExperiment(router);
        this.getTrialJobStatistics(router);
        this.setClusterMetaData(router);
        this.listTrialJobs(router);
        this.getTrialJob(router);
        this.addTrialJob(router);
        this.cancelTrialJob(router);
        this.getMetricData(router);
64
65
        this.getMetricDataByRange(router);
        this.getLatestMetricData(router);
66
        this.getTrialLog(router);
67
        this.exportData(router);
68
        this.getExperimentsInfo(router);
J-shang's avatar
J-shang committed
69
70
71
72
73
74
        this.startTensorboardTask(router);
        this.getTensorboardTask(router);
        this.updateTensorboardTask(router);
        this.stopTensorboardTask(router);
        this.stopAllTensorboardTask(router);
        this.listTensorboardTask(router);
75
        this.stop(router);
Deshui Yu's avatar
Deshui Yu committed
76

77
        // Express-joi-validator configuration
78
        router.use((err: any, _req: Request, res: Response, _next: any) => {
79
80
81
82
83
84
85
            if (err.isBoom) {
                this.log.error(err.output.payload);

                return res.status(err.output.statusCode).json(err.output.payload);
            }
        });

Deshui Yu's avatar
Deshui Yu committed
86
87
88
        return router;
    }

chicm-ms's avatar
chicm-ms committed
89
    private handleError(err: Error, res: Response, isFatal: boolean = false, errorCode: number = 500): void {
Deshui Yu's avatar
Deshui Yu committed
90
91
92
        if (err instanceof NNIError && err.name === NNIErrorNames.NOT_FOUND) {
            res.status(404);
        } else {
SparkSnail's avatar
SparkSnail committed
93
            res.status(errorCode);
Deshui Yu's avatar
Deshui Yu committed
94
95
96
97
        }
        res.send({
            error: err.message
        });
98
99

        // If it's a fatal error, exit process
chicm-ms's avatar
chicm-ms committed
100
        if (isFatal) {
101
            this.log.fatal(err);
102
            process.exit(1);
chicm-ms's avatar
chicm-ms committed
103
104
        } else {
            this.log.error(err);
105
        }
Deshui Yu's avatar
Deshui Yu committed
106
107
    }

Gems Guo's avatar
Gems Guo committed
108
109
    private version(router: Router): void {
        router.get('/version', async (req: Request, res: Response) => {
110
111
            const version = await getVersion();
            res.send(version);
Gems Guo's avatar
Gems Guo committed
112
113
114
        });
    }

Deshui Yu's avatar
Deshui Yu committed
115
116
117
118
119
    // TODO add validators for request params, query, body
    private checkStatus(router: Router): void {
        router.get('/check-status', (req: Request, res: Response) => {
            const ds: DataStore = component.get<DataStore>(DataStore);
            ds.init().then(() => {
120
                res.send(this.nniManager.getStatus());
Deshui Yu's avatar
Deshui Yu committed
121
            }).catch(async (err: Error) => {
chicm-ms's avatar
chicm-ms committed
122
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
123
                this.log.error(err.message);
chicm-ms's avatar
chicm-ms committed
124
                this.log.error(`Datastore initialize failed, stopping rest server...`);
Deshui Yu's avatar
Deshui Yu committed
125
126
127
128
129
130
131
132
133
134
                await this.restServer.stop();
            });
        });
    }

    private getExperimentProfile(router: Router): void {
        router.get('/experiment', (req: Request, res: Response) => {
            this.nniManager.getExperimentProfile().then((profile: ExperimentProfile) => {
                res.send(profile);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
135
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
136
137
138
139
140
            });
        });
    }

    private updateExperimentProfile(router: Router): void {
141
        router.put('/experiment', (req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
142
143
144
            this.nniManager.updateExperimentProfile(req.body, req.query.update_type).then(() => {
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
145
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
146
147
148
            });
        });
    }
149

150
151
152
153
154
    private importData(router: Router): void {
        router.post('/experiment/import-data', (req: Request, res: Response) => {
            this.nniManager.importData(JSON.stringify(req.body)).then(() => {
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
155
                this.handleError(err, res);
156
157
158
            });
        });
    }
Deshui Yu's avatar
Deshui Yu committed
159

160
161
162
163
164
165
166
167
168
169
    private getImportedData(router: Router): void {
        router.get('/experiment/imported-data', (req: Request, res: Response) => {
            this.nniManager.getImportedData().then((importedData: string[]) => {
                res.send(JSON.stringify(importedData));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

Deshui Yu's avatar
Deshui Yu committed
170
    private startExperiment(router: Router): void {
171
        router.post('/experiment', (req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
172
173
174
            if (isNewExperiment()) {
                this.nniManager.startExperiment(req.body).then((eid: string) => {
                    res.send({
chicm-ms's avatar
chicm-ms committed
175
                        experiment_id: eid // eslint-disable-line @typescript-eslint/camelcase
Deshui Yu's avatar
Deshui Yu committed
176
177
                    });
                }).catch((err: Error) => {
178
                    // Start experiment is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
179
                    this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
180
181
                });
            } else {
SparkSnail's avatar
SparkSnail committed
182
                this.nniManager.resumeExperiment(isReadonly()).then(() => {
Deshui Yu's avatar
Deshui Yu committed
183
184
                    res.send();
                }).catch((err: Error) => {
185
                    // Resume experiment is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
186
                    this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
187
                });
SparkSnail's avatar
SparkSnail committed
188
            } 
Deshui Yu's avatar
Deshui Yu committed
189
190
191
192
193
194
195
196
        });
    }

    private getTrialJobStatistics(router: Router): void {
        router.get('/job-statistics', (req: Request, res: Response) => {
            this.nniManager.getTrialJobStatistics().then((statistics: TrialJobStatistics[]) => {
                res.send(statistics);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
197
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
198
199
200
201
202
            });
        });
    }

    private setClusterMetaData(router: Router): void {
203
204
205
        router.put(
            '/experiment/cluster-metadata', expressJoi(ValidationSchemas.SETCLUSTERMETADATA),
            async (req: Request, res: Response) => {
SparkSnail's avatar
SparkSnail committed
206
207
208
209
210
211
212
213
214
                const metadata: any = req.body;
                const keys: string[] = Object.keys(metadata);
                try {
                    for (const key of keys) {
                        await this.nniManager.setClusterMetadata(key, JSON.stringify(metadata[key]));
                    }
                    res.send();
                } catch (err) {
                    // setClusterMetata is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
215
                    this.handleError(NNIError.FromError(err), res, true);
Deshui Yu's avatar
Deshui Yu committed
216
217
218
219
220
221
222
223
224
225
226
227
                }
        });
    }

    private listTrialJobs(router: Router): void {
        router.get('/trial-jobs', (req: Request, res: Response) => {
            this.nniManager.listTrialJobs(req.query.status).then((jobInfos: TrialJobInfo[]) => {
                jobInfos.forEach((trialJob: TrialJobInfo) => {
                    this.setErrorPathForFailedJob(trialJob);
                });
                res.send(jobInfos);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
228
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
229
230
231
232
233
234
235
236
237
238
            });
        });
    }

    private getTrialJob(router: Router): void {
        router.get('/trial-jobs/:id', (req: Request, res: Response) => {
            this.nniManager.getTrialJob(req.params.id).then((jobDetail: TrialJobInfo) => {
                const jobInfo: TrialJobInfo = this.setErrorPathForFailedJob(jobDetail);
                res.send(jobInfo);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
239
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
240
241
242
243
244
245
            });
        });
    }

    private addTrialJob(router: Router): void {
        router.post('/trial-jobs', async (req: Request, res: Response) => {
246
247
            this.nniManager.addCustomizedTrialJob(JSON.stringify(req.body)).then((sequenceId: number) => {
                res.send({sequenceId});
Deshui Yu's avatar
Deshui Yu committed
248
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
249
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
250
251
252
253
254
255
256
257
258
            });
        });
    }

    private cancelTrialJob(router: Router): void {
        router.delete('/trial-jobs/:id', async (req: Request, res: Response) => {
            this.nniManager.cancelTrialJobByUser(req.params.id).then(() => {
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
259
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
260
261
262
263
264
            });
        });
    }

    private getMetricData(router: Router): void {
265
        router.get('/metric-data/:job_id*?', async (req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
266
            this.nniManager.getMetricData(req.params.job_id, req.query.type).then((metricsData: MetricDataRecord[]) => {
267
268
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
269
                this.handleError(err, res);
270
271
272
273
274
275
276
277
278
279
280
            });
        });
    }

    private getMetricDataByRange(router: Router): void {
        router.get('/metric-data-range/:min_seq_id/:max_seq_id', async (req: Request, res: Response) => {
            const minSeqId = Number(req.params.min_seq_id);
            const maxSeqId = Number(req.params.max_seq_id);
            this.nniManager.getMetricDataByRange(minSeqId, maxSeqId).then((metricsData: MetricDataRecord[]) => {
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
281
                this.handleError(err, res);
282
283
284
285
286
287
288
            });
        });
    }

    private getLatestMetricData(router: Router): void {
        router.get('/metric-data-latest/', async (req: Request, res: Response) => {
            this.nniManager.getLatestMetricData().then((metricsData: MetricDataRecord[]) => {
Deshui Yu's avatar
Deshui Yu committed
289
290
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
291
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
292
293
294
295
            });
        });
    }

296
297
298
299
300
301
302
303
304
305
306
307
308
    private getTrialLog(router: Router): void {
        router.get('/trial-log/:id/:type', async(req: Request, res: Response) => {
            this.nniManager.getTrialLog(req.params.id, req.params.type).then((log: string) => {
                if (log === '') {
                    log = 'No logs available.'
                }
                res.send(log);
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

309
310
311
312
313
    private exportData(router: Router): void {
        router.get('/export-data', (req: Request, res: Response) => {
            this.nniManager.exportData().then((exportedData: string) => {
                res.send(exportedData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
314
                this.handleError(err, res);
315
316
317
318
            });
        });
    }

319
320
321
322
323
324
325
326
327
328
    private getExperimentsInfo(router: Router): void {
        router.get('/experiments-info', (req: Request, res: Response) => {
            this.experimentsManager.getExperimentsInfo().then((experimentInfo: JSON) => {
                res.send(JSON.stringify(experimentInfo));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

J-shang's avatar
J-shang committed
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
    private startTensorboardTask(router: Router): void {
        router.post('/tensorboard', (req: Request, res: Response) => {
            this.tensorboardManager.startTensorboardTask(req.body).then((taskDetail: TensorboardTaskInfo) => {
                this.log.info(taskDetail);
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res, false, 400);
            });
        });
    }

    private getTensorboardTask(router: Router): void {
        router.get('/tensorboard/:id', (req: Request, res: Response) => {
            this.tensorboardManager.getTensorboardTask(req.params.id).then((taskDetail: TensorboardTaskInfo) => {
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private updateTensorboardTask(router: Router): void {
        router.put('/tensorboard/:id', (req: Request, res: Response) => {
            this.tensorboardManager.updateTensorboardTask(req.params.id).then((taskDetail: TensorboardTaskInfo) => {
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private stopTensorboardTask(router: Router): void {
        router.delete('/tensorboard/:id', (req: Request, res: Response) => {
            this.tensorboardManager.stopTensorboardTask(req.params.id).then((taskDetail: TensorboardTaskInfo) => {
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private stopAllTensorboardTask(router: Router): void {
        router.delete('/tensorboard-tasks', (req: Request, res: Response) => {
            this.tensorboardManager.stopAllTensorboardTask().then(() => {
                res.send();
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private listTensorboardTask(router: Router): void {
        router.get('/tensorboard-tasks', (req: Request, res: Response) => {
            this.tensorboardManager.listTensorboardTasks().then((taskDetails: TensorboardTaskInfo[]) => {
                res.send(taskDetails);
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

390
391
392
393
394
395
396
397
398
    private stop(router: Router): void {
        router.delete('/experiment', (req: Request, res: Response) => {
            this.nniManager.stopExperimentTopHalf().then(() => {
                res.send();
                this.nniManager.stopExperimentBottomHalf();
            });
        });
    }

Deshui Yu's avatar
Deshui Yu committed
399
400
401
402
    private setErrorPathForFailedJob(jobInfo: TrialJobInfo): TrialJobInfo {
        if (jobInfo === undefined || jobInfo.status !== 'FAILED' || jobInfo.logPath === undefined) {
            return jobInfo;
        }
chicm-ms's avatar
chicm-ms committed
403
        jobInfo.stderrPath = path.join(jobInfo.logPath, 'stderr');
Deshui Yu's avatar
Deshui Yu committed
404
405
406
407
408

        return jobInfo;
    }
}

409
export function createRestHandler(rs: NNIRestServer): Router {
Deshui Yu's avatar
Deshui Yu committed
410
411
412
413
    const handler: NNIRestHandler = new NNIRestHandler(rs);

    return handler.createRestHandler();
}