restHandler.ts 17 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
Deshui Yu's avatar
Deshui Yu committed
3
4

import { Request, Response, Router } from 'express';
5
import path from 'path';
Deshui Yu's avatar
Deshui Yu committed
6
7
8
9

import * as component from '../common/component';
import { DataStore, MetricDataRecord, TrialJobInfo } from '../common/datastore';
import { NNIError, NNIErrorNames } from '../common/errors';
SparkSnail's avatar
SparkSnail committed
10
import { isNewExperiment, isReadonly } from '../common/experimentStartupInfo';
Deshui Yu's avatar
Deshui Yu committed
11
import { getLogger, Logger } from '../common/log';
chicm-ms's avatar
chicm-ms committed
12
import { ExperimentProfile, Manager, TrialJobStatistics } from '../common/manager';
13
import { ExperimentManager } from '../common/experimentManager';
J-shang's avatar
J-shang committed
14
import { TensorboardManager, TensorboardTaskInfo } from '../common/tensorboardManager';
15
import { ValidationSchemas } from './restValidationSchemas';
16
import { getVersion } from '../common/utils';
17
18
import { MetricType } from '../common/datastore';
import { ProfileUpdateType } from '../common/manager';
Yuge Zhang's avatar
Yuge Zhang committed
19
import { TrialJobStatus } from '../common/trainingService';
Deshui Yu's avatar
Deshui Yu committed
20

21
22
// TODO: fix expressJoi
//const expressJoi = require('express-joi-validator');
23

Deshui Yu's avatar
Deshui Yu committed
24
class NNIRestHandler {
liuzhe-lz's avatar
liuzhe-lz committed
25
    private stopCallback: () => Promise<void>;
26
27
    private nniManager: Manager;
    private experimentsManager: ExperimentManager;
J-shang's avatar
J-shang committed
28
    private tensorboardManager: TensorboardManager;
Deshui Yu's avatar
Deshui Yu committed
29
30
    private log: Logger;

liuzhe-lz's avatar
liuzhe-lz committed
31
    constructor(stopCallback: () => Promise<void>) {
Deshui Yu's avatar
Deshui Yu committed
32
        this.nniManager = component.get(Manager);
33
        this.experimentsManager = component.get(ExperimentManager);
J-shang's avatar
J-shang committed
34
        this.tensorboardManager = component.get(TensorboardManager);
liuzhe-lz's avatar
liuzhe-lz committed
35
        this.stopCallback = stopCallback;
liuzhe-lz's avatar
liuzhe-lz committed
36
        this.log = getLogger('NNIRestHandler');
Deshui Yu's avatar
Deshui Yu committed
37
38
39
40
41
42
    }

    public createRestHandler(): Router {
        const router: Router = Router();

        router.use((req: Request, res: Response, next) => {
liuzhe-lz's avatar
liuzhe-lz committed
43
            this.log.debug(`${req.method}: ${req.url}: body:`, req.body);
Deshui Yu's avatar
Deshui Yu committed
44
45
46
47
48
49
50
            res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept');
            res.header('Access-Control-Allow-Methods', 'PUT,POST,GET,DELETE,OPTIONS');

            res.setHeader('Content-Type', 'application/json');
            next();
        });

Gems Guo's avatar
Gems Guo committed
51
        this.version(router);
Deshui Yu's avatar
Deshui Yu committed
52
53
        this.checkStatus(router);
        this.getExperimentProfile(router);
Yuge Zhang's avatar
Yuge Zhang committed
54
        this.getExperimentMetadata(router);
Deshui Yu's avatar
Deshui Yu committed
55
        this.updateExperimentProfile(router);
56
        this.importData(router);
57
        this.getImportedData(router);
Deshui Yu's avatar
Deshui Yu committed
58
59
60
61
62
63
64
65
        this.startExperiment(router);
        this.getTrialJobStatistics(router);
        this.setClusterMetaData(router);
        this.listTrialJobs(router);
        this.getTrialJob(router);
        this.addTrialJob(router);
        this.cancelTrialJob(router);
        this.getMetricData(router);
66
67
        this.getMetricDataByRange(router);
        this.getLatestMetricData(router);
Yuge Zhang's avatar
Yuge Zhang committed
68
        this.getTrialFile(router);
69
        this.exportData(router);
70
        this.getExperimentsInfo(router);
J-shang's avatar
J-shang committed
71
72
73
74
75
76
        this.startTensorboardTask(router);
        this.getTensorboardTask(router);
        this.updateTensorboardTask(router);
        this.stopTensorboardTask(router);
        this.stopAllTensorboardTask(router);
        this.listTensorboardTask(router);
77
        this.stop(router);
Deshui Yu's avatar
Deshui Yu committed
78

79
        // Express-joi-validator configuration
80
        router.use((err: any, _req: Request, res: Response, _next: any): any => {
81
82
83
84
85
86
87
            if (err.isBoom) {
                this.log.error(err.output.payload);

                return res.status(err.output.statusCode).json(err.output.payload);
            }
        });

Deshui Yu's avatar
Deshui Yu committed
88
89
90
        return router;
    }

chicm-ms's avatar
chicm-ms committed
91
    private handleError(err: Error, res: Response, isFatal: boolean = false, errorCode: number = 500): void {
Deshui Yu's avatar
Deshui Yu committed
92
93
94
        if (err instanceof NNIError && err.name === NNIErrorNames.NOT_FOUND) {
            res.status(404);
        } else {
SparkSnail's avatar
SparkSnail committed
95
            res.status(errorCode);
Deshui Yu's avatar
Deshui Yu committed
96
97
98
99
        }
        res.send({
            error: err.message
        });
100
101

        // If it's a fatal error, exit process
chicm-ms's avatar
chicm-ms committed
102
        if (isFatal) {
103
            this.log.critical(err);
104
            process.exit(1);
chicm-ms's avatar
chicm-ms committed
105
106
        } else {
            this.log.error(err);
107
        }
Deshui Yu's avatar
Deshui Yu committed
108
109
    }

Gems Guo's avatar
Gems Guo committed
110
    private version(router: Router): void {
111
        router.get('/version', async (_req: Request, res: Response) => {
112
113
            const version = await getVersion();
            res.send(version);
Gems Guo's avatar
Gems Guo committed
114
115
116
        });
    }

Deshui Yu's avatar
Deshui Yu committed
117
118
    // TODO add validators for request params, query, body
    private checkStatus(router: Router): void {
119
        router.get('/check-status', (_req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
120
121
            const ds: DataStore = component.get<DataStore>(DataStore);
            ds.init().then(() => {
122
                res.send(this.nniManager.getStatus());
Deshui Yu's avatar
Deshui Yu committed
123
            }).catch(async (err: Error) => {
chicm-ms's avatar
chicm-ms committed
124
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
125
                this.log.error(err.message);
chicm-ms's avatar
chicm-ms committed
126
                this.log.error(`Datastore initialize failed, stopping rest server...`);
liuzhe-lz's avatar
liuzhe-lz committed
127
                await this.stopCallback();
Deshui Yu's avatar
Deshui Yu committed
128
129
130
131
132
            });
        });
    }

    private getExperimentProfile(router: Router): void {
133
        router.get('/experiment', (_req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
134
135
136
            this.nniManager.getExperimentProfile().then((profile: ExperimentProfile) => {
                res.send(profile);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
137
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
138
139
140
141
142
            });
        });
    }

    private updateExperimentProfile(router: Router): void {
143
        router.put('/experiment', (req: Request, res: Response) => {
144
            this.nniManager.updateExperimentProfile(req.body, req.query['update_type'] as ProfileUpdateType).then(() => {
Deshui Yu's avatar
Deshui Yu committed
145
146
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
147
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
148
149
150
            });
        });
    }
151

152
153
154
155
156
    private importData(router: Router): void {
        router.post('/experiment/import-data', (req: Request, res: Response) => {
            this.nniManager.importData(JSON.stringify(req.body)).then(() => {
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
157
                this.handleError(err, res);
158
159
160
            });
        });
    }
Deshui Yu's avatar
Deshui Yu committed
161

162
    private getImportedData(router: Router): void {
163
        router.get('/experiment/imported-data', (_req: Request, res: Response) => {
164
165
166
167
168
169
170
171
            this.nniManager.getImportedData().then((importedData: string[]) => {
                res.send(JSON.stringify(importedData));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

Deshui Yu's avatar
Deshui Yu committed
172
    private startExperiment(router: Router): void {
173
        router.post('/experiment', (req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
174
175
176
            if (isNewExperiment()) {
                this.nniManager.startExperiment(req.body).then((eid: string) => {
                    res.send({
chicm-ms's avatar
chicm-ms committed
177
                        experiment_id: eid // eslint-disable-line @typescript-eslint/camelcase
Deshui Yu's avatar
Deshui Yu committed
178
179
                    });
                }).catch((err: Error) => {
180
                    // Start experiment is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
181
                    this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
182
183
                });
            } else {
SparkSnail's avatar
SparkSnail committed
184
                this.nniManager.resumeExperiment(isReadonly()).then(() => {
Deshui Yu's avatar
Deshui Yu committed
185
186
                    res.send();
                }).catch((err: Error) => {
187
                    // Resume experiment is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
188
                    this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
189
                });
SparkSnail's avatar
SparkSnail committed
190
            } 
Deshui Yu's avatar
Deshui Yu committed
191
192
193
194
        });
    }

    private getTrialJobStatistics(router: Router): void {
195
        router.get('/job-statistics', (_req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
196
197
198
            this.nniManager.getTrialJobStatistics().then((statistics: TrialJobStatistics[]) => {
                res.send(statistics);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
199
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
200
201
202
203
204
            });
        });
    }

    private setClusterMetaData(router: Router): void {
205
        router.put(
206
            '/experiment/cluster-metadata', //TODO: Fix validation expressJoi(ValidationSchemas.SETCLUSTERMETADATA),
207
            async (req: Request, res: Response) => {
SparkSnail's avatar
SparkSnail committed
208
209
210
211
212
213
214
215
216
                const metadata: any = req.body;
                const keys: string[] = Object.keys(metadata);
                try {
                    for (const key of keys) {
                        await this.nniManager.setClusterMetadata(key, JSON.stringify(metadata[key]));
                    }
                    res.send();
                } catch (err) {
                    // setClusterMetata is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
217
                    this.handleError(NNIError.FromError(err), res, true);
Deshui Yu's avatar
Deshui Yu committed
218
219
220
221
222
223
                }
        });
    }

    private listTrialJobs(router: Router): void {
        router.get('/trial-jobs', (req: Request, res: Response) => {
224
            this.nniManager.listTrialJobs(req.query['status'] as TrialJobStatus).then((jobInfos: TrialJobInfo[]) => {
Deshui Yu's avatar
Deshui Yu committed
225
226
227
228
229
                jobInfos.forEach((trialJob: TrialJobInfo) => {
                    this.setErrorPathForFailedJob(trialJob);
                });
                res.send(jobInfos);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
230
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
231
232
233
234
235
236
            });
        });
    }

    private getTrialJob(router: Router): void {
        router.get('/trial-jobs/:id', (req: Request, res: Response) => {
237
            this.nniManager.getTrialJob(req.params['id']).then((jobDetail: TrialJobInfo) => {
Deshui Yu's avatar
Deshui Yu committed
238
239
240
                const jobInfo: TrialJobInfo = this.setErrorPathForFailedJob(jobDetail);
                res.send(jobInfo);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
241
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
242
243
244
245
246
247
            });
        });
    }

    private addTrialJob(router: Router): void {
        router.post('/trial-jobs', async (req: Request, res: Response) => {
248
249
            this.nniManager.addCustomizedTrialJob(JSON.stringify(req.body)).then((sequenceId: number) => {
                res.send({sequenceId});
Deshui Yu's avatar
Deshui Yu committed
250
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
251
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
252
253
254
255
256
257
            });
        });
    }

    private cancelTrialJob(router: Router): void {
        router.delete('/trial-jobs/:id', async (req: Request, res: Response) => {
258
            this.nniManager.cancelTrialJobByUser(req.params['id']).then(() => {
Deshui Yu's avatar
Deshui Yu committed
259
260
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
261
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
262
263
264
265
266
            });
        });
    }

    private getMetricData(router: Router): void {
267
        router.get('/metric-data/:job_id*?', async (req: Request, res: Response) => {
268
            this.nniManager.getMetricData(req.params['job_id'], req.query['type'] as MetricType).then((metricsData: MetricDataRecord[]) => {
269
270
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
271
                this.handleError(err, res);
272
273
274
275
276
277
            });
        });
    }

    private getMetricDataByRange(router: Router): void {
        router.get('/metric-data-range/:min_seq_id/:max_seq_id', async (req: Request, res: Response) => {
278
279
            const minSeqId = Number(req.params['min_seq_id']);
            const maxSeqId = Number(req.params['max_seq_id']);
280
281
282
            this.nniManager.getMetricDataByRange(minSeqId, maxSeqId).then((metricsData: MetricDataRecord[]) => {
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
283
                this.handleError(err, res);
284
285
286
287
288
            });
        });
    }

    private getLatestMetricData(router: Router): void {
289
        router.get('/metric-data-latest/', async (_req: Request, res: Response) => {
290
            this.nniManager.getLatestMetricData().then((metricsData: MetricDataRecord[]) => {
Deshui Yu's avatar
Deshui Yu committed
291
292
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
293
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
294
295
296
297
            });
        });
    }

Yuge Zhang's avatar
Yuge Zhang committed
298
299
300
    private getTrialFile(router: Router): void {
        router.get('/trial-file/:id/:filename', async(req: Request, res: Response) => {
            let encoding: string | null = null;
301
            const filename = req.params['filename'];
Yuge Zhang's avatar
Yuge Zhang committed
302
303
304
            if (!filename.includes('.') || filename.match(/.*\.(txt|log)/g)) {
                encoding = 'utf8';
            }
305
            this.nniManager.getTrialFile(req.params['id'], filename).then((content: Buffer | string) => {
liuzhe-lz's avatar
liuzhe-lz committed
306
307
308
309
                const contentType = content instanceof Buffer ? 'application/octet-stream' : 'text/plain';
                res.header('Content-Type', contentType);
                if (content === '') {
                    content = `${filename} is empty.`;  // FIXME: this should be handled in front-end
310
                }
Yuge Zhang's avatar
Yuge Zhang committed
311
                res.send(content);
312
313
314
315
316
317
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

318
    private exportData(router: Router): void {
319
        router.get('/export-data', (_req: Request, res: Response) => {
320
321
322
            this.nniManager.exportData().then((exportedData: string) => {
                res.send(exportedData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
323
                this.handleError(err, res);
324
325
326
327
            });
        });
    }

Yuge Zhang's avatar
Yuge Zhang committed
328
    private getExperimentMetadata(router: Router): void {
329
        router.get('/experiment-metadata', (_req: Request, res: Response) => {
Yuge Zhang's avatar
Yuge Zhang committed
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
            Promise.all([
                this.nniManager.getExperimentProfile(),
                this.experimentsManager.getExperimentsInfo()
            ]).then(([profile, experimentInfo]) => {
                for (const info of experimentInfo as any) {
                    if (info.id === profile.id) {
                        res.send(info);
                        break;
                    }
                }
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

346
    private getExperimentsInfo(router: Router): void {
347
        router.get('/experiments-info', (_req: Request, res: Response) => {
348
349
350
351
352
353
354
355
            this.experimentsManager.getExperimentsInfo().then((experimentInfo: JSON) => {
                res.send(JSON.stringify(experimentInfo));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

J-shang's avatar
J-shang committed
356
357
358
359
360
361
362
363
364
365
366
367
368
    private startTensorboardTask(router: Router): void {
        router.post('/tensorboard', (req: Request, res: Response) => {
            this.tensorboardManager.startTensorboardTask(req.body).then((taskDetail: TensorboardTaskInfo) => {
                this.log.info(taskDetail);
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res, false, 400);
            });
        });
    }

    private getTensorboardTask(router: Router): void {
        router.get('/tensorboard/:id', (req: Request, res: Response) => {
369
            this.tensorboardManager.getTensorboardTask(req.params['id']).then((taskDetail: TensorboardTaskInfo) => {
J-shang's avatar
J-shang committed
370
371
372
373
374
375
376
377
378
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private updateTensorboardTask(router: Router): void {
        router.put('/tensorboard/:id', (req: Request, res: Response) => {
379
            this.tensorboardManager.updateTensorboardTask(req.params['id']).then((taskDetail: TensorboardTaskInfo) => {
J-shang's avatar
J-shang committed
380
381
382
383
384
385
386
387
388
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private stopTensorboardTask(router: Router): void {
        router.delete('/tensorboard/:id', (req: Request, res: Response) => {
389
            this.tensorboardManager.stopTensorboardTask(req.params['id']).then((taskDetail: TensorboardTaskInfo) => {
J-shang's avatar
J-shang committed
390
391
392
393
394
395
396
397
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private stopAllTensorboardTask(router: Router): void {
398
        router.delete('/tensorboard-tasks', (_req: Request, res: Response) => {
J-shang's avatar
J-shang committed
399
400
401
402
403
404
405
406
407
            this.tensorboardManager.stopAllTensorboardTask().then(() => {
                res.send();
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private listTensorboardTask(router: Router): void {
408
        router.get('/tensorboard-tasks', (_req: Request, res: Response) => {
J-shang's avatar
J-shang committed
409
410
411
412
413
414
415
416
            this.tensorboardManager.listTensorboardTasks().then((taskDetails: TensorboardTaskInfo[]) => {
                res.send(taskDetails);
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

417
    private stop(router: Router): void {
418
        router.delete('/experiment', (_req: Request, res: Response) => {
419
420
421
422
423
424
425
            this.nniManager.stopExperimentTopHalf().then(() => {
                res.send();
                this.nniManager.stopExperimentBottomHalf();
            });
        });
    }

Deshui Yu's avatar
Deshui Yu committed
426
427
428
429
    private setErrorPathForFailedJob(jobInfo: TrialJobInfo): TrialJobInfo {
        if (jobInfo === undefined || jobInfo.status !== 'FAILED' || jobInfo.logPath === undefined) {
            return jobInfo;
        }
chicm-ms's avatar
chicm-ms committed
430
        jobInfo.stderrPath = path.join(jobInfo.logPath, 'stderr');
Deshui Yu's avatar
Deshui Yu committed
431
432
433
434
435

        return jobInfo;
    }
}

liuzhe-lz's avatar
liuzhe-lz committed
436
437
export function createRestHandler(stopCallback: () => Promise<void>): Router {
    const handler: NNIRestHandler = new NNIRestHandler(stopCallback);
Deshui Yu's avatar
Deshui Yu committed
438
439
440

    return handler.createRestHandler();
}