"src/targets/gpu/vscode:/vscode.git/clone" did not exist on "26aabd2af42a1a07bcfca3e670bf0a037d5255dc"
restHandler.ts 15.8 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
Deshui Yu's avatar
Deshui Yu committed
3
4
5
6
7
8
9
10
11

'use strict';

import { Request, Response, Router } from 'express';
import * as path from 'path';

import * as component from '../common/component';
import { DataStore, MetricDataRecord, TrialJobInfo } from '../common/datastore';
import { NNIError, NNIErrorNames } from '../common/errors';
SparkSnail's avatar
SparkSnail committed
12
import { isNewExperiment, isReadonly } from '../common/experimentStartupInfo';
Deshui Yu's avatar
Deshui Yu committed
13
import { getLogger, Logger } from '../common/log';
chicm-ms's avatar
chicm-ms committed
14
import { ExperimentProfile, Manager, TrialJobStatistics } from '../common/manager';
15
import { ExperimentManager } from '../common/experimentManager';
J-shang's avatar
J-shang committed
16
import { TensorboardManager, TensorboardTaskInfo } from '../common/tensorboardManager';
17
18
import { ValidationSchemas } from './restValidationSchemas';
import { NNIRestServer } from './nniRestServer';
19
import { getVersion } from '../common/utils';
20
21
22
import { MetricType } from '../common/datastore';
import { ProfileUpdateType } from '../common/manager';
import { LogType, TrialJobStatus } from '../common/trainingService';
Deshui Yu's avatar
Deshui Yu committed
23

24
25
const expressJoi = require('express-joi-validator');

Deshui Yu's avatar
Deshui Yu committed
26
class NNIRestHandler {
27
    private restServer: NNIRestServer;
28
29
    private nniManager: Manager;
    private experimentsManager: ExperimentManager;
J-shang's avatar
J-shang committed
30
    private tensorboardManager: TensorboardManager;
Deshui Yu's avatar
Deshui Yu committed
31
32
    private log: Logger;

33
    constructor(rs: NNIRestServer) {
Deshui Yu's avatar
Deshui Yu committed
34
        this.nniManager = component.get(Manager);
35
        this.experimentsManager = component.get(ExperimentManager);
J-shang's avatar
J-shang committed
36
        this.tensorboardManager = component.get(TensorboardManager);
Deshui Yu's avatar
Deshui Yu committed
37
        this.restServer = rs;
liuzhe-lz's avatar
liuzhe-lz committed
38
        this.log = getLogger('NNIRestHandler');
Deshui Yu's avatar
Deshui Yu committed
39
40
41
42
43
44
    }

    public createRestHandler(): Router {
        const router: Router = Router();

        router.use((req: Request, res: Response, next) => {
liuzhe-lz's avatar
liuzhe-lz committed
45
            this.log.debug(`${req.method}: ${req.url}: body:`, req.body);
Deshui Yu's avatar
Deshui Yu committed
46
47
48
49
50
51
52
            res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept');
            res.header('Access-Control-Allow-Methods', 'PUT,POST,GET,DELETE,OPTIONS');

            res.setHeader('Content-Type', 'application/json');
            next();
        });

Gems Guo's avatar
Gems Guo committed
53
        this.version(router);
Deshui Yu's avatar
Deshui Yu committed
54
55
56
        this.checkStatus(router);
        this.getExperimentProfile(router);
        this.updateExperimentProfile(router);
57
        this.importData(router);
58
        this.getImportedData(router);
Deshui Yu's avatar
Deshui Yu committed
59
60
61
62
63
64
65
66
        this.startExperiment(router);
        this.getTrialJobStatistics(router);
        this.setClusterMetaData(router);
        this.listTrialJobs(router);
        this.getTrialJob(router);
        this.addTrialJob(router);
        this.cancelTrialJob(router);
        this.getMetricData(router);
67
68
        this.getMetricDataByRange(router);
        this.getLatestMetricData(router);
69
        this.getTrialLog(router);
70
        this.exportData(router);
71
        this.getExperimentsInfo(router);
J-shang's avatar
J-shang committed
72
73
74
75
76
77
        this.startTensorboardTask(router);
        this.getTensorboardTask(router);
        this.updateTensorboardTask(router);
        this.stopTensorboardTask(router);
        this.stopAllTensorboardTask(router);
        this.listTensorboardTask(router);
78
        this.stop(router);
Deshui Yu's avatar
Deshui Yu committed
79

80
        // Express-joi-validator configuration
81
        router.use((err: any, _req: Request, res: Response, _next: any) => {
82
83
84
85
86
87
88
            if (err.isBoom) {
                this.log.error(err.output.payload);

                return res.status(err.output.statusCode).json(err.output.payload);
            }
        });

Deshui Yu's avatar
Deshui Yu committed
89
90
91
        return router;
    }

chicm-ms's avatar
chicm-ms committed
92
    private handleError(err: Error, res: Response, isFatal: boolean = false, errorCode: number = 500): void {
Deshui Yu's avatar
Deshui Yu committed
93
94
95
        if (err instanceof NNIError && err.name === NNIErrorNames.NOT_FOUND) {
            res.status(404);
        } else {
SparkSnail's avatar
SparkSnail committed
96
            res.status(errorCode);
Deshui Yu's avatar
Deshui Yu committed
97
98
99
100
        }
        res.send({
            error: err.message
        });
101
102

        // If it's a fatal error, exit process
chicm-ms's avatar
chicm-ms committed
103
        if (isFatal) {
104
            this.log.fatal(err);
105
            process.exit(1);
chicm-ms's avatar
chicm-ms committed
106
107
        } else {
            this.log.error(err);
108
        }
Deshui Yu's avatar
Deshui Yu committed
109
110
    }

Gems Guo's avatar
Gems Guo committed
111
112
    private version(router: Router): void {
        router.get('/version', async (req: Request, res: Response) => {
113
114
            const version = await getVersion();
            res.send(version);
Gems Guo's avatar
Gems Guo committed
115
116
117
        });
    }

Deshui Yu's avatar
Deshui Yu committed
118
119
120
121
122
    // TODO add validators for request params, query, body
    private checkStatus(router: Router): void {
        router.get('/check-status', (req: Request, res: Response) => {
            const ds: DataStore = component.get<DataStore>(DataStore);
            ds.init().then(() => {
123
                res.send(this.nniManager.getStatus());
Deshui Yu's avatar
Deshui Yu committed
124
            }).catch(async (err: Error) => {
chicm-ms's avatar
chicm-ms committed
125
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
126
                this.log.error(err.message);
chicm-ms's avatar
chicm-ms committed
127
                this.log.error(`Datastore initialize failed, stopping rest server...`);
Deshui Yu's avatar
Deshui Yu committed
128
129
130
131
132
133
134
135
136
137
                await this.restServer.stop();
            });
        });
    }

    private getExperimentProfile(router: Router): void {
        router.get('/experiment', (req: Request, res: Response) => {
            this.nniManager.getExperimentProfile().then((profile: ExperimentProfile) => {
                res.send(profile);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
138
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
139
140
141
142
143
            });
        });
    }

    private updateExperimentProfile(router: Router): void {
144
        router.put('/experiment', (req: Request, res: Response) => {
145
            this.nniManager.updateExperimentProfile(req.body, req.query.update_type as ProfileUpdateType).then(() => {
Deshui Yu's avatar
Deshui Yu committed
146
147
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
148
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
149
150
151
            });
        });
    }
152

153
154
155
156
157
    private importData(router: Router): void {
        router.post('/experiment/import-data', (req: Request, res: Response) => {
            this.nniManager.importData(JSON.stringify(req.body)).then(() => {
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
158
                this.handleError(err, res);
159
160
161
            });
        });
    }
Deshui Yu's avatar
Deshui Yu committed
162

163
164
165
166
167
168
169
170
171
172
    private getImportedData(router: Router): void {
        router.get('/experiment/imported-data', (req: Request, res: Response) => {
            this.nniManager.getImportedData().then((importedData: string[]) => {
                res.send(JSON.stringify(importedData));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

Deshui Yu's avatar
Deshui Yu committed
173
    private startExperiment(router: Router): void {
174
        router.post('/experiment', (req: Request, res: Response) => {
Deshui Yu's avatar
Deshui Yu committed
175
176
177
            if (isNewExperiment()) {
                this.nniManager.startExperiment(req.body).then((eid: string) => {
                    res.send({
chicm-ms's avatar
chicm-ms committed
178
                        experiment_id: eid // eslint-disable-line @typescript-eslint/camelcase
Deshui Yu's avatar
Deshui Yu committed
179
180
                    });
                }).catch((err: Error) => {
181
                    // Start experiment is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
182
                    this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
183
184
                });
            } else {
SparkSnail's avatar
SparkSnail committed
185
                this.nniManager.resumeExperiment(isReadonly()).then(() => {
Deshui Yu's avatar
Deshui Yu committed
186
187
                    res.send();
                }).catch((err: Error) => {
188
                    // Resume experiment is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
189
                    this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
190
                });
SparkSnail's avatar
SparkSnail committed
191
            } 
Deshui Yu's avatar
Deshui Yu committed
192
193
194
195
196
197
198
199
        });
    }

    private getTrialJobStatistics(router: Router): void {
        router.get('/job-statistics', (req: Request, res: Response) => {
            this.nniManager.getTrialJobStatistics().then((statistics: TrialJobStatistics[]) => {
                res.send(statistics);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
200
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
201
202
203
204
205
            });
        });
    }

    private setClusterMetaData(router: Router): void {
206
207
208
        router.put(
            '/experiment/cluster-metadata', expressJoi(ValidationSchemas.SETCLUSTERMETADATA),
            async (req: Request, res: Response) => {
SparkSnail's avatar
SparkSnail committed
209
210
211
212
213
214
215
216
217
                const metadata: any = req.body;
                const keys: string[] = Object.keys(metadata);
                try {
                    for (const key of keys) {
                        await this.nniManager.setClusterMetadata(key, JSON.stringify(metadata[key]));
                    }
                    res.send();
                } catch (err) {
                    // setClusterMetata is a step of initialization, so any exception thrown is a fatal
chicm-ms's avatar
chicm-ms committed
218
                    this.handleError(NNIError.FromError(err), res, true);
Deshui Yu's avatar
Deshui Yu committed
219
220
221
222
223
224
                }
        });
    }

    private listTrialJobs(router: Router): void {
        router.get('/trial-jobs', (req: Request, res: Response) => {
225
            this.nniManager.listTrialJobs(req.query.status as TrialJobStatus).then((jobInfos: TrialJobInfo[]) => {
Deshui Yu's avatar
Deshui Yu committed
226
227
228
229
230
                jobInfos.forEach((trialJob: TrialJobInfo) => {
                    this.setErrorPathForFailedJob(trialJob);
                });
                res.send(jobInfos);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
231
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
232
233
234
235
236
237
238
239
240
241
            });
        });
    }

    private getTrialJob(router: Router): void {
        router.get('/trial-jobs/:id', (req: Request, res: Response) => {
            this.nniManager.getTrialJob(req.params.id).then((jobDetail: TrialJobInfo) => {
                const jobInfo: TrialJobInfo = this.setErrorPathForFailedJob(jobDetail);
                res.send(jobInfo);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
242
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
243
244
245
246
247
248
            });
        });
    }

    private addTrialJob(router: Router): void {
        router.post('/trial-jobs', async (req: Request, res: Response) => {
249
250
            this.nniManager.addCustomizedTrialJob(JSON.stringify(req.body)).then((sequenceId: number) => {
                res.send({sequenceId});
Deshui Yu's avatar
Deshui Yu committed
251
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
252
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
253
254
255
256
257
258
259
260
261
            });
        });
    }

    private cancelTrialJob(router: Router): void {
        router.delete('/trial-jobs/:id', async (req: Request, res: Response) => {
            this.nniManager.cancelTrialJobByUser(req.params.id).then(() => {
                res.send();
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
262
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
263
264
265
266
267
            });
        });
    }

    private getMetricData(router: Router): void {
268
        router.get('/metric-data/:job_id*?', async (req: Request, res: Response) => {
269
            this.nniManager.getMetricData(req.params.job_id, req.query.type as MetricType).then((metricsData: MetricDataRecord[]) => {
270
271
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
272
                this.handleError(err, res);
273
274
275
276
277
278
279
280
281
282
283
            });
        });
    }

    private getMetricDataByRange(router: Router): void {
        router.get('/metric-data-range/:min_seq_id/:max_seq_id', async (req: Request, res: Response) => {
            const minSeqId = Number(req.params.min_seq_id);
            const maxSeqId = Number(req.params.max_seq_id);
            this.nniManager.getMetricDataByRange(minSeqId, maxSeqId).then((metricsData: MetricDataRecord[]) => {
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
284
                this.handleError(err, res);
285
286
287
288
289
290
291
            });
        });
    }

    private getLatestMetricData(router: Router): void {
        router.get('/metric-data-latest/', async (req: Request, res: Response) => {
            this.nniManager.getLatestMetricData().then((metricsData: MetricDataRecord[]) => {
Deshui Yu's avatar
Deshui Yu committed
292
293
                res.send(metricsData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
294
                this.handleError(err, res);
Deshui Yu's avatar
Deshui Yu committed
295
296
297
298
            });
        });
    }

299
300
    private getTrialLog(router: Router): void {
        router.get('/trial-log/:id/:type', async(req: Request, res: Response) => {
301
            this.nniManager.getTrialLog(req.params.id, req.params.type as LogType).then((log: string) => {
302
303
304
305
306
307
308
309
310
311
                if (log === '') {
                    log = 'No logs available.'
                }
                res.send(log);
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

312
313
314
315
316
    private exportData(router: Router): void {
        router.get('/export-data', (req: Request, res: Response) => {
            this.nniManager.exportData().then((exportedData: string) => {
                res.send(exportedData);
            }).catch((err: Error) => {
chicm-ms's avatar
chicm-ms committed
317
                this.handleError(err, res);
318
319
320
321
            });
        });
    }

322
323
324
325
326
327
328
329
330
331
    private getExperimentsInfo(router: Router): void {
        router.get('/experiments-info', (req: Request, res: Response) => {
            this.experimentsManager.getExperimentsInfo().then((experimentInfo: JSON) => {
                res.send(JSON.stringify(experimentInfo));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

J-shang's avatar
J-shang committed
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
    private startTensorboardTask(router: Router): void {
        router.post('/tensorboard', (req: Request, res: Response) => {
            this.tensorboardManager.startTensorboardTask(req.body).then((taskDetail: TensorboardTaskInfo) => {
                this.log.info(taskDetail);
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res, false, 400);
            });
        });
    }

    private getTensorboardTask(router: Router): void {
        router.get('/tensorboard/:id', (req: Request, res: Response) => {
            this.tensorboardManager.getTensorboardTask(req.params.id).then((taskDetail: TensorboardTaskInfo) => {
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private updateTensorboardTask(router: Router): void {
        router.put('/tensorboard/:id', (req: Request, res: Response) => {
            this.tensorboardManager.updateTensorboardTask(req.params.id).then((taskDetail: TensorboardTaskInfo) => {
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private stopTensorboardTask(router: Router): void {
        router.delete('/tensorboard/:id', (req: Request, res: Response) => {
            this.tensorboardManager.stopTensorboardTask(req.params.id).then((taskDetail: TensorboardTaskInfo) => {
                res.send(Object.assign({}, taskDetail));
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private stopAllTensorboardTask(router: Router): void {
        router.delete('/tensorboard-tasks', (req: Request, res: Response) => {
            this.tensorboardManager.stopAllTensorboardTask().then(() => {
                res.send();
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

    private listTensorboardTask(router: Router): void {
        router.get('/tensorboard-tasks', (req: Request, res: Response) => {
            this.tensorboardManager.listTensorboardTasks().then((taskDetails: TensorboardTaskInfo[]) => {
                res.send(taskDetails);
            }).catch((err: Error) => {
                this.handleError(err, res);
            });
        });
    }

393
394
395
396
397
398
399
400
401
    private stop(router: Router): void {
        router.delete('/experiment', (req: Request, res: Response) => {
            this.nniManager.stopExperimentTopHalf().then(() => {
                res.send();
                this.nniManager.stopExperimentBottomHalf();
            });
        });
    }

Deshui Yu's avatar
Deshui Yu committed
402
403
404
405
    private setErrorPathForFailedJob(jobInfo: TrialJobInfo): TrialJobInfo {
        if (jobInfo === undefined || jobInfo.status !== 'FAILED' || jobInfo.logPath === undefined) {
            return jobInfo;
        }
chicm-ms's avatar
chicm-ms committed
406
        jobInfo.stderrPath = path.join(jobInfo.logPath, 'stderr');
Deshui Yu's avatar
Deshui Yu committed
407
408
409
410
411

        return jobInfo;
    }
}

412
export function createRestHandler(rs: NNIRestServer): Router {
Deshui Yu's avatar
Deshui Yu committed
413
414
415
416
    const handler: NNIRestHandler = new NNIRestHandler(rs);

    return handler.createRestHandler();
}