restHandler.ts 17 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
Deshui Yu's avatar
Deshui Yu committed
3
4

import { Request, Response, Router } from 'express';
5
import path from 'path';
Deshui Yu's avatar
Deshui Yu committed
6
7
8
9

import * as component from '../common/component';
import { DataStore, MetricDataRecord, TrialJobInfo } from '../common/datastore';
import { NNIError, NNIErrorNames } from '../common/errors';
SparkSnail's avatar
SparkSnail committed
10
import { isNewExperiment, isReadonly } from '../common/experimentStartupInfo';
Deshui Yu's avatar
Deshui Yu committed
11
import { getLogger, Logger } from '../common/log';
chicm-ms's avatar
chicm-ms committed
12
import { ExperimentProfile, Manager, TrialJobStatistics } from '../common/manager';
13
import { ExperimentManager } from '../common/experimentManager';
J-shang's avatar
J-shang committed
14
import { TensorboardManager, TensorboardTaskInfo } from '../common/tensorboardManager';
15
16
import { ValidationSchemas } from './restValidationSchemas';
import { NNIRestServer } from './nniRestServer';
17
import { getVersion } from '../common/utils';
18
19
import { MetricType } from '../common/datastore';
import { ProfileUpdateType } from '../common/manager';
Yuge Zhang's avatar
Yuge Zhang committed
20
import { TrialJobStatus } from '../common/trainingService';
Deshui Yu's avatar
Deshui Yu committed
21

22
23
// TODO: fix expressJoi
//const expressJoi = require('express-joi-validator');
24

Deshui Yu's avatar
Deshui Yu committed
25
class NNIRestHandler {
26
    private restServer: NNIRestServer;
27
28
    private nniManager: Manager;
    private experimentsManager: ExperimentManager;
J-shang's avatar
J-shang committed
29
    private tensorboardManager: TensorboardManager;
Deshui Yu's avatar
Deshui Yu committed
30
31
    private log: Logger;

32
    constructor(rs: NNIRestServer) {
Deshui Yu's avatar
Deshui Yu committed
33
        this.nniManager = component.get(Manager);
34
        this.experimentsManager = component.get(ExperimentManager);
J-shang's avatar
J-shang committed
35
        this.tensorboardManager = component.get(TensorboardManager);
Deshui Yu's avatar
Deshui Yu committed
36
        this.restServer = rs;
liuzhe-lz's avatar
liuzhe-lz committed
37
        this.log = getLogger('NNIRestHandler');
Deshui Yu's avatar
Deshui Yu committed
38
39
40
41
42
43
    }

    public createRestHandler(): Router {
        const router: Router = Router();

        router.use((req: Request, res: Response, next) => {
liuzhe-lz's avatar
liuzhe-lz committed
44
            this.log.debug(`${req.method}: ${req.url}: body:`, req.body);
Deshui Yu's avatar
Deshui Yu committed
45
46
47
48
49
50
51
            res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept');
            res.header('Access-Control-Allow-Methods', 'PUT,POST,GET,DELETE,OPTIONS');

            res.setHeader('Content-Type', 'application/json');
            next();
        });

Gems Guo's avatar
Gems Guo committed
52
        this.version(router);
Deshui Yu's avatar
Deshui Yu committed
53
54
        this.checkStatus(router);
        this.getExperimentProfile(router);
Yuge Zhang's avatar
Yuge Zhang committed
55
        this.getExperimentMetadata(router);
Deshui Yu's avatar
Deshui Yu committed
56
        this.updateExperimentProfile(router);
57
        this.importData(router);
58
        this.getImportedData(router);
Deshui Yu's avatar
Deshui Yu committed
59
60
61
62
63
64
65
66
        this.startExperiment(router);
        this.getTrialJobStatistics(router);
        this.setClusterMetaData(router);
        this.listTrialJobs(router);
        this.getTrialJob(router);
        this.addTrialJob(router);
        this.cancelTrialJob(router);
        this.getMetricData(router);
67
68
        this.getMetricDataByRange(router);
        this.getLatestMetricData(router);
Yuge Zhang's avatar
Yuge Zhang committed
69
        this.getTrialFile(router);
70
        this.exportData(router);
71
        this.getExperimentsInfo(router);
J-shang's avatar
J-shang committed
72
73
74
75
76
77
        this.startTensorboardTask(router);
        this.getTensorboardTask(router);
        this.updateTensorboardTask(router);
        this.stopTensorboardTask(router);
        this.stopAllTensorboardTask(router);
        this.listTensorboardTask(router);
78
        this.stop(router);
Deshui Yu's avatar
Deshui Yu committed
79

80
        // Express-joi-validator configuration
81
        router.use((err: any, _req: Request, res: Response, _next: any): any => {
82
83
84
85
86
87
88
            if (err.isBoom) {
                this.log.error(err.output.payload);

                return res.status(err.output.statusCode).json(err.output.payload);
            }
        });

Deshui Yu's avatar
Deshui Yu committed
89
90
91
        return router;
    }

chicm-ms's avatar
chicm-ms committed
92
    private handleError(err: Error, res: Response, isFatal: boolean = false, errorCode: number = 500): void {
Deshui Yu's avatar
Deshui Yu committed
93
94
95
        if (err instanceof NNIError && err.name === NNIErrorNames.NOT_FOUND) {
            res.status(404);
        } else {
SparkSnail's avatar
SparkSnail committed
96
            res.status(errorCode);
Deshui Yu's avatar
Deshui Yu committed
97
98
99
100
        }
        res.send({
            error: err.message
        });
101
102

        // If it's a fatal error, exit process
chicm-ms's avatar
chicm-ms committed
103
        if (isFatal) {
104
            this.log.fatal(err);
105
            process.exit(1);
chicm-ms's avatar
chicm-ms committed
106
107
        } else {
            this.log.error(err);
108
        }
Deshui Yu's avatar
Deshui Yu committed
109
110
    }

Gems Guo's avatar
Gems Guo committed
111
    private version(router: Router): void {
112
        router.get('/version', async (_req: Request, res: Response) => {
113
114
            const version = await getVersion();
            res.send(version);
Gems Guo's avatar
Gems Guo committed
115
116
117
        });
    }

Deshui Yu's avatar
Deshui Yu committed
118
119
    // TODO add validators for request params, query, body
    private checkStatus(router: Router): void {
120
        router.get('/check-status', (_req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
121
122
            const ds: DataStore = component.get<DataStore>(DataStore);
            ds.init().then(() => {
123
                res.send(this.nniManager.getStatus());
Deshui Yu's avatar
Deshui Yu committed
124
            }).catch(async (err: Error) => {
chicm-ms's avatar
chicm-ms committed
125
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
126
                this.log.error(err.message);
chicm-ms's avatar
chicm-ms committed
127
                this.log.error(`Datastore initialize failed, stopping rest server...`);
Deshui Yu's avatar
Deshui Yu committed
128
129
130
131
132
133
                await this.restServer.stop();
            });
        });
    }

    private getExperimentProfile(router: Router): void {
134
        router.get('/experiment', (_req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
135
136
137
            this.nniManager.getExperimentProfile().then((profile: ExperimentProfile) => {
                res.send(profile);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
138
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
139
140
141
142
143
            });
        });
    }

    private updateExperimentProfile(router: Router): void {
144
        router.put('/experiment', (req: Request, res: Response) => {
145
            this.nniManager.updateExperimentProfile(req.body, req.query['update_type'] as ProfileUpdateType).then(() => {
Deshui Yu's avatar
Deshui Yu committed
146
147
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
148
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
149
150
151
            });
        });
    }
152

153
154
155
156
157
    private importData(router: Router): void {
        router.post('/experiment/import-data', (req: Request, res: Response) => {
            this.nniManager.importData(JSON.stringify(req.body)).then(() => {
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
158
                this.handleError(err, res);
159
160
161
            });
        });
    }
Deshui Yu's avatar
Deshui Yu committed
162

163
    private getImportedData(router: Router): void {
164
        router.get('/experiment/imported-data', (_req: Request, res: Response) => {
165
166
167
168
169
170
171
172
            this.nniManager.getImportedData().then((importedData: string[]) => {
                res.send(JSON.stringify(importedData));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

Deshui Yu's avatar
Deshui Yu committed
173
    private startExperiment(router: Router): void {
174
        router.post('/experiment', (req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
175
176
177
            if (isNewExperiment()) {
                this.nniManager.startExperiment(req.body).then((eid: string) => {
                    res.send({
chicm-ms's avatar
chicm-ms committed
178
                        experiment_id: eid // eslint-disable-line @typescript-eslint/camelcase
Deshui Yu's avatar
Deshui Yu committed
179
180
                    });
                }).catch((err: Error) => {
181
                    // Start experiment is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
182
                    this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
183
184
                });
            } else {
SparkSnail's avatar
SparkSnail committed
185
                this.nniManager.resumeExperiment(isReadonly()).then(() => {
Deshui Yu's avatar
Deshui Yu committed
186
187
                    res.send();
                }).catch((err: Error) => {
188
                    // Resume experiment is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
189
                    this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
190
                });
SparkSnail's avatar
SparkSnail committed
191
            } 
Deshui Yu's avatar
Deshui Yu committed
192
193
194
195
        });
    }

    private getTrialJobStatistics(router: Router): void {
196
        router.get('/job-statistics', (_req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
197
198
199
            this.nniManager.getTrialJobStatistics().then((statistics: TrialJobStatistics[]) => {
                res.send(statistics);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
200
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
201
202
203
204
205
            });
        });
    }

    private setClusterMetaData(router: Router): void {
206
        router.put(
207
            '/experiment/cluster-metadata', //TODO: Fix validation expressJoi(ValidationSchemas.SETCLUSTERMETADATA),
208
            async (req: Request, res: Response) => {
SparkSnail's avatar
SparkSnail committed
209
210
211
212
213
214
215
216
217
                const metadata: any = req.body;
                const keys: string[] = Object.keys(metadata);
                try {
                    for (const key of keys) {
                        await this.nniManager.setClusterMetadata(key, JSON.stringify(metadata[key]));
                    }
                    res.send();
                } catch (err) {
                    // setClusterMetata is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
218
                    this.handleError(NNIError.FromError(err), res, true);
Deshui Yu's avatar
Deshui Yu committed
219
220
221
222
223
224
                }
        });
    }

    private listTrialJobs(router: Router): void {
        router.get('/trial-jobs', (req: Request, res: Response) => {
225
            this.nniManager.listTrialJobs(req.query['status'] as TrialJobStatus).then((jobInfos: TrialJobInfo[]) => {
Deshui Yu's avatar
Deshui Yu committed
226
227
228
229
230
                jobInfos.forEach((trialJob: TrialJobInfo) => {
                    this.setErrorPathForFailedJob(trialJob);
                });
                res.send(jobInfos);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
231
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
232
233
234
235
236
237
            });
        });
    }

    private getTrialJob(router: Router): void {
        router.get('/trial-jobs/:id', (req: Request, res: Response) => {
238
            this.nniManager.getTrialJob(req.params['id']).then((jobDetail: TrialJobInfo) => {
Deshui Yu's avatar
Deshui Yu committed
239
240
241
                const jobInfo: TrialJobInfo = this.setErrorPathForFailedJob(jobDetail);
                res.send(jobInfo);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
242
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
243
244
245
246
247
248
            });
        });
    }

    private addTrialJob(router: Router): void {
        router.post('/trial-jobs', async (req: Request, res: Response) => {
249
250
            this.nniManager.addCustomizedTrialJob(JSON.stringify(req.body)).then((sequenceId: number) => {
                res.send({sequenceId});
Deshui Yu's avatar
Deshui Yu committed
251
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
252
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
253
254
255
256
257
258
            });
        });
    }

    private cancelTrialJob(router: Router): void {
        router.delete('/trial-jobs/:id', async (req: Request, res: Response) => {
259
            this.nniManager.cancelTrialJobByUser(req.params['id']).then(() => {
Deshui Yu's avatar
Deshui Yu committed
260
261
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
262
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
263
264
265
266
267
            });
        });
    }

    private getMetricData(router: Router): void {
268
        router.get('/metric-data/:job_id*?', async (req: Request, res: Response) => {
269
            this.nniManager.getMetricData(req.params['job_id'], req.query['type'] as MetricType).then((metricsData: MetricDataRecord[]) => {
270
271
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
272
                this.handleError(err, res);
273
274
275
276
277
278
            });
        });
    }

    private getMetricDataByRange(router: Router): void {
        router.get('/metric-data-range/:min_seq_id/:max_seq_id', async (req: Request, res: Response) => {
279
280
            const minSeqId = Number(req.params['min_seq_id']);
            const maxSeqId = Number(req.params['max_seq_id']);
281
282
283
            this.nniManager.getMetricDataByRange(minSeqId, maxSeqId).then((metricsData: MetricDataRecord[]) => {
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
284
                this.handleError(err, res);
285
286
287
288
289
            });
        });
    }

    private getLatestMetricData(router: Router): void {
290
        router.get('/metric-data-latest/', async (_req: Request, res: Response) => {
291
            this.nniManager.getLatestMetricData().then((metricsData: MetricDataRecord[]) => {
Deshui Yu's avatar
Deshui Yu committed
292
293
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
294
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
295
296
297
298
            });
        });
    }

Yuge Zhang's avatar
Yuge Zhang committed
299
300
301
    private getTrialFile(router: Router): void {
        router.get('/trial-file/:id/:filename', async(req: Request, res: Response) => {
            let encoding: string | null = null;
302
            const filename = req.params['filename'];
Yuge Zhang's avatar
Yuge Zhang committed
303
304
305
            if (!filename.includes('.') || filename.match(/.*\.(txt|log)/g)) {
                encoding = 'utf8';
            }
306
            this.nniManager.getTrialFile(req.params['id'], filename).then((content: Buffer | string) => {
liuzhe-lz's avatar
liuzhe-lz committed
307
308
309
310
                const contentType = content instanceof Buffer ? 'application/octet-stream' : 'text/plain';
                res.header('Content-Type', contentType);
                if (content === '') {
                    content = `${filename} is empty.`;  // FIXME: this should be handled in front-end
311
                }
Yuge Zhang's avatar
Yuge Zhang committed
312
                res.send(content);
313
314
315
316
317
318
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

319
    private exportData(router: Router): void {
320
        router.get('/export-data', (_req: Request, res: Response) => {
321
322
323
            this.nniManager.exportData().then((exportedData: string) => {
                res.send(exportedData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
324
                this.handleError(err, res);
325
326
327
328
            });
        });
    }

Yuge Zhang's avatar
Yuge Zhang committed
329
    private getExperimentMetadata(router: Router): void {
330
        router.get('/experiment-metadata', (_req: Request, res: Response) => {
Yuge Zhang's avatar
Yuge Zhang committed
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
            Promise.all([
                this.nniManager.getExperimentProfile(),
                this.experimentsManager.getExperimentsInfo()
            ]).then(([profile, experimentInfo]) => {
                for (const info of experimentInfo as any) {
                    if (info.id === profile.id) {
                        res.send(info);
                        break;
                    }
                }
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

347
    private getExperimentsInfo(router: Router): void {
348
        router.get('/experiments-info', (_req: Request, res: Response) => {
349
350
351
352
353
354
355
356
            this.experimentsManager.getExperimentsInfo().then((experimentInfo: JSON) => {
                res.send(JSON.stringify(experimentInfo));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

J-shang's avatar
J-shang committed
357
358
359
360
361
362
363
364
365
366
367
368
369
    private startTensorboardTask(router: Router): void {
        router.post('/tensorboard', (req: Request, res: Response) => {
            this.tensorboardManager.startTensorboardTask(req.body).then((taskDetail: TensorboardTaskInfo) => {
                this.log.info(taskDetail);
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res, false, 400);
            });
        });
    }

    private getTensorboardTask(router: Router): void {
        router.get('/tensorboard/:id', (req: Request, res: Response) => {
370
            this.tensorboardManager.getTensorboardTask(req.params['id']).then((taskDetail: TensorboardTaskInfo) => {
J-shang's avatar
J-shang committed
371
372
373
374
375
376
377
378
379
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private updateTensorboardTask(router: Router): void {
        router.put('/tensorboard/:id', (req: Request, res: Response) => {
380
            this.tensorboardManager.updateTensorboardTask(req.params['id']).then((taskDetail: TensorboardTaskInfo) => {
J-shang's avatar
J-shang committed
381
382
383
384
385
386
387
388
389
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private stopTensorboardTask(router: Router): void {
        router.delete('/tensorboard/:id', (req: Request, res: Response) => {
390
            this.tensorboardManager.stopTensorboardTask(req.params['id']).then((taskDetail: TensorboardTaskInfo) => {
J-shang's avatar
J-shang committed
391
392
393
394
395
396
397
398
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private stopAllTensorboardTask(router: Router): void {
399
        router.delete('/tensorboard-tasks', (_req: Request, res: Response) => {
J-shang's avatar
J-shang committed
400
401
402
403
404
405
406
407
408
            this.tensorboardManager.stopAllTensorboardTask().then(() => {
                res.send();
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private listTensorboardTask(router: Router): void {
409
        router.get('/tensorboard-tasks', (_req: Request, res: Response) => {
J-shang's avatar
J-shang committed
410
411
412
413
414
415
416
417
            this.tensorboardManager.listTensorboardTasks().then((taskDetails: TensorboardTaskInfo[]) => {
                res.send(taskDetails);
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

418
    private stop(router: Router): void {
419
        router.delete('/experiment', (_req: Request, res: Response) => {
420
421
422
423
424
425
426
            this.nniManager.stopExperimentTopHalf().then(() => {
                res.send();
                this.nniManager.stopExperimentBottomHalf();
            });
        });
    }

Deshui Yu's avatar
Deshui Yu committed
427
428
429
430
    private setErrorPathForFailedJob(jobInfo: TrialJobInfo): TrialJobInfo {
        if (jobInfo === undefined || jobInfo.status !== 'FAILED' || jobInfo.logPath === undefined) {
            return jobInfo;
        }
chicm-ms's avatar
chicm-ms committed
431
        jobInfo.stderrPath = path.join(jobInfo.logPath, 'stderr');
Deshui Yu's avatar
Deshui Yu committed
432
433
434
435
436

        return jobInfo;
    }
}

437
export function createRestHandler(rs: NNIRestServer): Router {
Deshui Yu's avatar
Deshui Yu committed
438
439
440
441
    const handler: NNIRestHandler = new NNIRestHandler(rs);

    return handler.createRestHandler();
}