main.ts 3.02 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
Deshui Yu's avatar
Deshui Yu committed
3

liuzhe-lz's avatar
liuzhe-lz committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
/**
 *  Entry point of NNI manager.
 *
 *  NNI manager is normally started by "nni/experiment/launcher.py".
 *  It requires command line arguments defined as NniManagerArgs in "common/globals/arguments.ts".
 *
 *  Example usage:
 *
 *      node main.js \
 *          --port 8080 \
 *          --experiment-id ID \
 *          --action create \
 *          --experiments-directory /home/USER/nni-experiments \
 *          --log-level info \
 *          --foreground false \  (optional)
 *          --mode local  (required for now, will be removed later)
 **/

import 'app-module-path/register';  // so we can use absolute path to import

import fs from 'fs';

Deshui Yu's avatar
Deshui Yu committed
26
27
import { Container, Scope } from 'typescript-ioc';

liuzhe-lz's avatar
liuzhe-lz committed
28
29
30
import * as component from 'common/component';
import { Database, DataStore } from 'common/datastore';
import { ExperimentManager } from 'common/experimentManager';
31
import globals, { initGlobals } from 'common/globals';
32
import { getLogger } from 'common/log';
liuzhe-lz's avatar
liuzhe-lz committed
33
34
35
36
37
38
39
40
41
42
43
44
45
import { Manager } from 'common/manager';
import { TensorboardManager } from 'common/tensorboardManager';
import { NNIDataStore } from 'core/nniDataStore';
import { NNIExperimentsManager } from 'core/nniExperimentsManager';
import { NNITensorboardManager } from 'core/nniTensorboardManager';
import { NNIManager } from 'core/nnimanager';
import { SqlDB } from 'core/sqlDatabase';
import { RestServer } from 'rest_server';

import path from 'path';

async function start(): Promise<void> {
    getLogger('main').info('Start NNI manager');
Deshui Yu's avatar
Deshui Yu committed
46

liuzhe-lz's avatar
liuzhe-lz committed
47
48
49
50
51
52
53
    Container.bind(Manager).to(NNIManager).scope(Scope.Singleton);
    Container.bind(Database).to(SqlDB).scope(Scope.Singleton);
    Container.bind(DataStore).to(NNIDataStore).scope(Scope.Singleton);
    Container.bind(ExperimentManager).to(NNIExperimentsManager).scope(Scope.Singleton);
    Container.bind(TensorboardManager).to(NNITensorboardManager).scope(Scope.Singleton);

    const ds: DataStore = component.get(DataStore);
Deshui Yu's avatar
Deshui Yu committed
54
    await ds.init();
liuzhe-lz's avatar
liuzhe-lz committed
55

56
    const restServer = new RestServer(globals.args.port, globals.args.urlPrefix);
liuzhe-lz's avatar
liuzhe-lz committed
57
    await restServer.start();
Deshui Yu's avatar
Deshui Yu committed
58
59
}

liuzhe-lz's avatar
liuzhe-lz committed
60
function shutdown(): void {
61
    (component.get(Manager) as Manager).stopExperiment();
62
}
liuzhe-lz's avatar
liuzhe-lz committed
63
64
65
66
67
68
69
70
71
72
    
// Register callbacks to free training service resources on unexpected shutdown.
// A graceful stop should use REST API,
// because interrupts can cause strange behaviors in children processes.
process.on('SIGTERM', shutdown);
process.on('SIGBREAK', shutdown);
process.on('SIGINT', shutdown);

/* main */

73
74
initGlobals();

liuzhe-lz's avatar
liuzhe-lz committed
75
76
77
78
79
80
start().then(() => {
    getLogger('main').debug('start() returned.');
}).catch((error) => {
    try {
        getLogger('main').error('Failed to start:', error);
    } catch (loggerError) {
81
82
        console.error('Failed to start:', error);
        console.error('Seems logger is faulty:', loggerError);
liuzhe-lz's avatar
liuzhe-lz committed
83
84
85
    }
    process.exit(1);
});
86

liuzhe-lz's avatar
liuzhe-lz committed
87
88
89
// Node.js exits when there is no active handler,
// and we have registered a lot of handlers which are never cleaned up.
// So it runs forever until NNIManager calls `process.exit()`.