"test/vscode:/vscode.git/clone" did not exist on "e64b773f22c91a4ff0c692d82be23455481888d8"
restHandler.ts 16.9 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
Deshui Yu's avatar
Deshui Yu committed
3
4
5
6
7
8
9
10
11

'use strict';

import { Request, Response, Router } from 'express';
import * as path from 'path';

import * as component from '../common/component';
import { DataStore, MetricDataRecord, TrialJobInfo } from '../common/datastore';
import { NNIError, NNIErrorNames } from '../common/errors';
SparkSnail's avatar
SparkSnail committed
12
import { isNewExperiment, isReadonly } from '../common/experimentStartupInfo';
Deshui Yu's avatar
Deshui Yu committed
13
import { getLogger, Logger } from '../common/log';
chicm-ms's avatar
chicm-ms committed
14
import { ExperimentProfile, Manager, TrialJobStatistics } from '../common/manager';
15
import { ExperimentManager } from '../common/experimentManager';
J-shang's avatar
J-shang committed
16
import { TensorboardManager, TensorboardTaskInfo } from '../common/tensorboardManager';
17
18
import { ValidationSchemas } from './restValidationSchemas';
import { NNIRestServer } from './nniRestServer';
19
import { getVersion } from '../common/utils';
20
21
import { MetricType } from '../common/datastore';
import { ProfileUpdateType } from '../common/manager';
Yuge Zhang's avatar
Yuge Zhang committed
22
import { TrialJobStatus } from '../common/trainingService';
Deshui Yu's avatar
Deshui Yu committed
23

24
25
// TODO: fix expressJoi
//const expressJoi = require('express-joi-validator');
26

Deshui Yu's avatar
Deshui Yu committed
27
class NNIRestHandler {
28
    private restServer: NNIRestServer;
29
30
    private nniManager: Manager;
    private experimentsManager: ExperimentManager;
J-shang's avatar
J-shang committed
31
    private tensorboardManager: TensorboardManager;
Deshui Yu's avatar
Deshui Yu committed
32
33
    private log: Logger;

34
    constructor(rs: NNIRestServer) {
Deshui Yu's avatar
Deshui Yu committed
35
        this.nniManager = component.get(Manager);
36
        this.experimentsManager = component.get(ExperimentManager);
J-shang's avatar
J-shang committed
37
        this.tensorboardManager = component.get(TensorboardManager);
Deshui Yu's avatar
Deshui Yu committed
38
        this.restServer = rs;
liuzhe-lz's avatar
liuzhe-lz committed
39
        this.log = getLogger('NNIRestHandler');
Deshui Yu's avatar
Deshui Yu committed
40
41
42
43
44
45
    }

    public createRestHandler(): Router {
        const router: Router = Router();

        router.use((req: Request, res: Response, next) => {
liuzhe-lz's avatar
liuzhe-lz committed
46
            this.log.debug(`${req.method}: ${req.url}: body:`, req.body);
Deshui Yu's avatar
Deshui Yu committed
47
48
49
50
51
52
53
            res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept');
            res.header('Access-Control-Allow-Methods', 'PUT,POST,GET,DELETE,OPTIONS');

            res.setHeader('Content-Type', 'application/json');
            next();
        });

Gems Guo's avatar
Gems Guo committed
54
        this.version(router);
Deshui Yu's avatar
Deshui Yu committed
55
56
        this.checkStatus(router);
        this.getExperimentProfile(router);
Yuge Zhang's avatar
Yuge Zhang committed
57
        this.getExperimentMetadata(router);
Deshui Yu's avatar
Deshui Yu committed
58
        this.updateExperimentProfile(router);
59
        this.importData(router);
60
        this.getImportedData(router);
Deshui Yu's avatar
Deshui Yu committed
61
62
63
64
65
66
67
68
        this.startExperiment(router);
        this.getTrialJobStatistics(router);
        this.setClusterMetaData(router);
        this.listTrialJobs(router);
        this.getTrialJob(router);
        this.addTrialJob(router);
        this.cancelTrialJob(router);
        this.getMetricData(router);
69
70
        this.getMetricDataByRange(router);
        this.getLatestMetricData(router);
Yuge Zhang's avatar
Yuge Zhang committed
71
        this.getTrialFile(router);
72
        this.exportData(router);
73
        this.getExperimentsInfo(router);
J-shang's avatar
J-shang committed
74
75
76
77
78
79
        this.startTensorboardTask(router);
        this.getTensorboardTask(router);
        this.updateTensorboardTask(router);
        this.stopTensorboardTask(router);
        this.stopAllTensorboardTask(router);
        this.listTensorboardTask(router);
80
        this.stop(router);
Deshui Yu's avatar
Deshui Yu committed
81

82
        // Express-joi-validator configuration
83
        router.use((err: any, _req: Request, res: Response, _next: any) => {
84
85
86
87
88
89
90
            if (err.isBoom) {
                this.log.error(err.output.payload);

                return res.status(err.output.statusCode).json(err.output.payload);
            }
        });

Deshui Yu's avatar
Deshui Yu committed
91
92
93
        return router;
    }

chicm-ms's avatar
chicm-ms committed
94
    private handleError(err: Error, res: Response, isFatal: boolean = false, errorCode: number = 500): void {
Deshui Yu's avatar
Deshui Yu committed
95
96
97
        if (err instanceof NNIError && err.name === NNIErrorNames.NOT_FOUND) {
            res.status(404);
        } else {
SparkSnail's avatar
SparkSnail committed
98
            res.status(errorCode);
Deshui Yu's avatar
Deshui Yu committed
99
100
101
102
        }
        res.send({
            error: err.message
        });
103
104

        // If it's a fatal error, exit process
chicm-ms's avatar
chicm-ms committed
105
        if (isFatal) {
106
            this.log.fatal(err);
107
            process.exit(1);
chicm-ms's avatar
chicm-ms committed
108
109
        } else {
            this.log.error(err);
110
        }
Deshui Yu's avatar
Deshui Yu committed
111
112
    }

Gems Guo's avatar
Gems Guo committed
113
114
    private version(router: Router): void {
        router.get('/version', async (req: Request, res: Response) => {
115
116
            const version = await getVersion();
            res.send(version);
Gems Guo's avatar
Gems Guo committed
117
118
119
        });
    }

Deshui Yu's avatar
Deshui Yu committed
120
121
122
123
124
    // TODO add validators for request params, query, body
    private checkStatus(router: Router): void {
        router.get('/check-status', (req: Request, res: Response) => {
            const ds: DataStore = component.get<DataStore>(DataStore);
            ds.init().then(() => {
125
                res.send(this.nniManager.getStatus());
Deshui Yu's avatar
Deshui Yu committed
126
            }).catch(async (err: Error) => {
chicm-ms's avatar
chicm-ms committed
127
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
128
                this.log.error(err.message);
chicm-ms's avatar
chicm-ms committed
129
                this.log.error(`Datastore initialize failed, stopping rest server...`);
Deshui Yu's avatar
Deshui Yu committed
130
131
132
133
134
135
136
137
138
139
                await this.restServer.stop();
            });
        });
    }

    private getExperimentProfile(router: Router): void {
        router.get('/experiment', (req: Request, res: Response) => {
            this.nniManager.getExperimentProfile().then((profile: ExperimentProfile) => {
                res.send(profile);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
140
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
141
142
143
144
145
            });
        });
    }

    private updateExperimentProfile(router: Router): void {
146
        router.put('/experiment', (req: Request, res: Response) => {
147
            this.nniManager.updateExperimentProfile(req.body, req.query.update_type as ProfileUpdateType).then(() => {
Deshui Yu's avatar
Deshui Yu committed
148
149
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
150
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
151
152
153
            });
        });
    }
154

155
156
157
158
159
    private importData(router: Router): void {
        router.post('/experiment/import-data', (req: Request, res: Response) => {
            this.nniManager.importData(JSON.stringify(req.body)).then(() => {
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
160
                this.handleError(err, res);
161
162
163
            });
        });
    }
Deshui Yu's avatar
Deshui Yu committed
164

165
166
167
168
169
170
171
172
173
174
    private getImportedData(router: Router): void {
        router.get('/experiment/imported-data', (req: Request, res: Response) => {
            this.nniManager.getImportedData().then((importedData: string[]) => {
                res.send(JSON.stringify(importedData));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

Deshui Yu's avatar
Deshui Yu committed
175
    private startExperiment(router: Router): void {
176
        router.post('/experiment', (req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
177
178
179
            if (isNewExperiment()) {
                this.nniManager.startExperiment(req.body).then((eid: string) => {
                    res.send({
chicm-ms's avatar
chicm-ms committed
180
                        experiment_id: eid // eslint-disable-line @typescript-eslint/camelcase
Deshui Yu's avatar
Deshui Yu committed
181
182
                    });
                }).catch((err: Error) => {
183
                    // Start experiment is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
184
                    this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
185
186
                });
            } else {
SparkSnail's avatar
SparkSnail committed
187
                this.nniManager.resumeExperiment(isReadonly()).then(() => {
Deshui Yu's avatar
Deshui Yu committed
188
189
                    res.send();
                }).catch((err: Error) => {
190
                    // Resume experiment is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
191
                    this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
192
                });
SparkSnail's avatar
SparkSnail committed
193
            } 
Deshui Yu's avatar
Deshui Yu committed
194
195
196
197
198
199
200
201
        });
    }

    private getTrialJobStatistics(router: Router): void {
        router.get('/job-statistics', (req: Request, res: Response) => {
            this.nniManager.getTrialJobStatistics().then((statistics: TrialJobStatistics[]) => {
                res.send(statistics);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
202
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
203
204
205
206
207
            });
        });
    }

    private setClusterMetaData(router: Router): void {
208
        router.put(
209
            '/experiment/cluster-metadata', //TODO: Fix validation expressJoi(ValidationSchemas.SETCLUSTERMETADATA),
210
            async (req: Request, res: Response) => {
SparkSnail's avatar
SparkSnail committed
211
212
213
214
215
216
217
218
219
                const metadata: any = req.body;
                const keys: string[] = Object.keys(metadata);
                try {
                    for (const key of keys) {
                        await this.nniManager.setClusterMetadata(key, JSON.stringify(metadata[key]));
                    }
                    res.send();
                } catch (err) {
                    // setClusterMetata is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
220
                    this.handleError(NNIError.FromError(err), res, true);
Deshui Yu's avatar
Deshui Yu committed
221
222
223
224
225
226
                }
        });
    }

    private listTrialJobs(router: Router): void {
        router.get('/trial-jobs', (req: Request, res: Response) => {
227
            this.nniManager.listTrialJobs(req.query.status as TrialJobStatus).then((jobInfos: TrialJobInfo[]) => {
Deshui Yu's avatar
Deshui Yu committed
228
229
230
231
232
                jobInfos.forEach((trialJob: TrialJobInfo) => {
                    this.setErrorPathForFailedJob(trialJob);
                });
                res.send(jobInfos);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
233
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
234
235
236
237
238
239
240
241
242
243
            });
        });
    }

    private getTrialJob(router: Router): void {
        router.get('/trial-jobs/:id', (req: Request, res: Response) => {
            this.nniManager.getTrialJob(req.params.id).then((jobDetail: TrialJobInfo) => {
                const jobInfo: TrialJobInfo = this.setErrorPathForFailedJob(jobDetail);
                res.send(jobInfo);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
244
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
245
246
247
248
249
250
            });
        });
    }

    private addTrialJob(router: Router): void {
        router.post('/trial-jobs', async (req: Request, res: Response) => {
251
252
            this.nniManager.addCustomizedTrialJob(JSON.stringify(req.body)).then((sequenceId: number) => {
                res.send({sequenceId});
Deshui Yu's avatar
Deshui Yu committed
253
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
254
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
255
256
257
258
259
260
261
262
263
            });
        });
    }

    private cancelTrialJob(router: Router): void {
        router.delete('/trial-jobs/:id', async (req: Request, res: Response) => {
            this.nniManager.cancelTrialJobByUser(req.params.id).then(() => {
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
264
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
265
266
267
268
269
            });
        });
    }

    private getMetricData(router: Router): void {
270
        router.get('/metric-data/:job_id*?', async (req: Request, res: Response) => {
271
            this.nniManager.getMetricData(req.params.job_id, req.query.type as MetricType).then((metricsData: MetricDataRecord[]) => {
272
273
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
274
                this.handleError(err, res);
275
276
277
278
279
280
281
282
283
284
285
            });
        });
    }

    private getMetricDataByRange(router: Router): void {
        router.get('/metric-data-range/:min_seq_id/:max_seq_id', async (req: Request, res: Response) => {
            const minSeqId = Number(req.params.min_seq_id);
            const maxSeqId = Number(req.params.max_seq_id);
            this.nniManager.getMetricDataByRange(minSeqId, maxSeqId).then((metricsData: MetricDataRecord[]) => {
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
286
                this.handleError(err, res);
287
288
289
290
291
292
293
            });
        });
    }

    private getLatestMetricData(router: Router): void {
        router.get('/metric-data-latest/', async (req: Request, res: Response) => {
            this.nniManager.getLatestMetricData().then((metricsData: MetricDataRecord[]) => {
Deshui Yu's avatar
Deshui Yu committed
294
295
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
296
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
297
298
299
300
            });
        });
    }

Yuge Zhang's avatar
Yuge Zhang committed
301
302
303
304
305
306
307
308
309
310
311
312
    private getTrialFile(router: Router): void {
        router.get('/trial-file/:id/:filename', async(req: Request, res: Response) => {
            let encoding: string | null = null;
            const filename = req.params.filename;
            if (!filename.includes('.') || filename.match(/.*\.(txt|log)/g)) {
                encoding = 'utf8';
            }
            this.nniManager.getTrialFile(req.params.id, filename).then((content: Buffer | string) => {
                if (content instanceof Buffer) {
                    res.header('Content-Type', 'application/octet-stream');
                } else if (content === '') {
                    content = `${filename} is empty.`;
313
                }
Yuge Zhang's avatar
Yuge Zhang committed
314
                res.send(content);
315
316
317
318
319
320
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

321
322
323
324
325
    private exportData(router: Router): void {
        router.get('/export-data', (req: Request, res: Response) => {
            this.nniManager.exportData().then((exportedData: string) => {
                res.send(exportedData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
326
                this.handleError(err, res);
327
328
329
330
            });
        });
    }

Yuge Zhang's avatar
Yuge Zhang committed
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
    private getExperimentMetadata(router: Router): void {
        router.get('/experiment-metadata', (req: Request, res: Response) => {
            Promise.all([
                this.nniManager.getExperimentProfile(),
                this.experimentsManager.getExperimentsInfo()
            ]).then(([profile, experimentInfo]) => {
                for (const info of experimentInfo as any) {
                    if (info.id === profile.id) {
                        res.send(info);
                        break;
                    }
                }
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

349
350
351
352
353
354
355
356
357
358
    private getExperimentsInfo(router: Router): void {
        router.get('/experiments-info', (req: Request, res: Response) => {
            this.experimentsManager.getExperimentsInfo().then((experimentInfo: JSON) => {
                res.send(JSON.stringify(experimentInfo));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

J-shang's avatar
J-shang committed
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
    private startTensorboardTask(router: Router): void {
        router.post('/tensorboard', (req: Request, res: Response) => {
            this.tensorboardManager.startTensorboardTask(req.body).then((taskDetail: TensorboardTaskInfo) => {
                this.log.info(taskDetail);
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res, false, 400);
            });
        });
    }

    private getTensorboardTask(router: Router): void {
        router.get('/tensorboard/:id', (req: Request, res: Response) => {
            this.tensorboardManager.getTensorboardTask(req.params.id).then((taskDetail: TensorboardTaskInfo) => {
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private updateTensorboardTask(router: Router): void {
        router.put('/tensorboard/:id', (req: Request, res: Response) => {
            this.tensorboardManager.updateTensorboardTask(req.params.id).then((taskDetail: TensorboardTaskInfo) => {
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private stopTensorboardTask(router: Router): void {
        router.delete('/tensorboard/:id', (req: Request, res: Response) => {
            this.tensorboardManager.stopTensorboardTask(req.params.id).then((taskDetail: TensorboardTaskInfo) => {
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private stopAllTensorboardTask(router: Router): void {
        router.delete('/tensorboard-tasks', (req: Request, res: Response) => {
            this.tensorboardManager.stopAllTensorboardTask().then(() => {
                res.send();
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private listTensorboardTask(router: Router): void {
        router.get('/tensorboard-tasks', (req: Request, res: Response) => {
            this.tensorboardManager.listTensorboardTasks().then((taskDetails: TensorboardTaskInfo[]) => {
                res.send(taskDetails);
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

420
421
422
423
424
425
426
427
428
    private stop(router: Router): void {
        router.delete('/experiment', (req: Request, res: Response) => {
            this.nniManager.stopExperimentTopHalf().then(() => {
                res.send();
                this.nniManager.stopExperimentBottomHalf();
            });
        });
    }

Deshui Yu's avatar
Deshui Yu committed
429
430
431
432
    private setErrorPathForFailedJob(jobInfo: TrialJobInfo): TrialJobInfo {
        if (jobInfo === undefined || jobInfo.status !== 'FAILED' || jobInfo.logPath === undefined) {
            return jobInfo;
        }
chicm-ms's avatar
chicm-ms committed
433
        jobInfo.stderrPath = path.join(jobInfo.logPath, 'stderr');
Deshui Yu's avatar
Deshui Yu committed
434
435
436
437
438

        return jobInfo;
    }
}

439
export function createRestHandler(rs: NNIRestServer): Router {
Deshui Yu's avatar
Deshui Yu committed
440
441
442
443
    const handler: NNIRestHandler = new NNIRestHandler(rs);

    return handler.createRestHandler();
}