main.ts 3.17 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
Deshui Yu's avatar
Deshui Yu committed
3

4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
/**
 *  Entry point of NNI manager.
 *
 *  NNI manager is normally started by "nni/experiment/launcher.py".
 *  It requires command line arguments defined as NniManagerArgs in "common/globals/arguments.ts".
 *
 *  Example usage:
 *
 *      node main.js \
 *          --port 8080 \
 *          --experiment-id ID \
 *          --action create \
 *          --experiments-directory /home/USER/nni-experiments \
 *          --log-level info \
 *          --foreground false \  (optional)
 *          --mode local  (required for now, will be removed later)
 **/

import 'app-module-path/register';  // so we can use absolute path to import

import fs from 'fs';

Deshui Yu's avatar
Deshui Yu committed
26
27
import { Container, Scope } from 'typescript-ioc';

28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import * as component from 'common/component';
import { Database, DataStore } from 'common/datastore';
import { ExperimentManager } from 'common/experimentManager';
import globals, { initGlobals } from 'common/globals';
import { getLogger, setLogLevel, startLogging } from 'common/log';
import { Manager } from 'common/manager';
import { TensorboardManager } from 'common/tensorboardManager';
import { NNIDataStore } from 'core/nniDataStore';
import { NNIExperimentsManager } from 'core/nniExperimentsManager';
import { NNITensorboardManager } from 'core/nniTensorboardManager';
import { NNIManager } from 'core/nnimanager';
import { SqlDB } from 'core/sqlDatabase';
import { RestServer } from 'rest_server';

import path from 'path';

async function start(): Promise<void> {
    getLogger('main').info('Start NNI manager');

    Container.bind(Manager).to(NNIManager).scope(Scope.Singleton);
    Container.bind(Database).to(SqlDB).scope(Scope.Singleton);
    Container.bind(DataStore).to(NNIDataStore).scope(Scope.Singleton);
    Container.bind(ExperimentManager).to(NNIExperimentsManager).scope(Scope.Singleton);
    Container.bind(TensorboardManager).to(NNITensorboardManager).scope(Scope.Singleton);
Deshui Yu's avatar
Deshui Yu committed
52
53
54
55

    const ds: DataStore = component.get(DataStore);
    await ds.init();

56
57
    const restServer = new RestServer(globals.args.port, globals.args.urlPrefix);
    await restServer.start();
Deshui Yu's avatar
Deshui Yu committed
58
59
}

60
61
function shutdown(): void {
    (component.get(Manager) as Manager).stopExperiment();
Deshui Yu's avatar
Deshui Yu committed
62
}
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
    
// Register callbacks to free training service resources on unexpected shutdown.
// A graceful stop should use REST API,
// because interrupts can cause strange behaviors in children processes.
process.on('SIGTERM', shutdown);
process.on('SIGBREAK', shutdown);
process.on('SIGINT', shutdown);

/* main */

initGlobals();

// TODO: these should be handled inside globals module
startLogging(globals.paths.nniManagerLog);
setLogLevel(globals.args.logLevel);

start().then(() => {
    getLogger('main').debug('start() returned.');
}).catch((error) => {
    try {
        getLogger('main').error('Failed to start:', error);
    } catch (loggerError) {
        console.log('Failed to start:', error);
        console.log('Seems logger is faulty:', loggerError);
87
    }
SparkSnail's avatar
SparkSnail committed
88
    process.exit(1);
89
});
90

91
92
93
// Node.js exits when there is no active handler,
// and we have registered a lot of handlers which are never cleaned up.
// So it runs forever until NNIManager calls `process.exit()`.