Unverified Commit 627e823f authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Merge pull request #119 from Microsoft/master

merge master
parents 1089e808 d9c83c0c
# Setting variables
SHELL := /bin/bash
PIP_INSTALL := python3 -m pip install --no-cache-dir
PIP_INSTALL := python3 -m pip install
PIP_UNINSTALL := python3 -m pip uninstall
## Colorful output
......@@ -19,22 +19,25 @@ else
endif
## Install directories
ROOT_FOLDER ?= $(shell python3 -c 'import site; from pathlib import Path; print(Path(site.getsitepackages()[0]).parents[2])')
IS_SYS_PYTHON ?= $(shell [[ $(ROOT_FOLDER) == /usr* || $(ROOT_FOLDER) == /Library* ]] && echo TRUE || echo FALSE)
ifeq ($(shell id -u), 0) # is root
_ROOT := 1
BASH_COMP_PREFIX ?= /usr/share/bash-completion/completions
else # is normal user
ifeq (TRUE, $(IS_SYS_PYTHON))
ROOT_FOLDER := $(shell python3 -c 'import site; from pathlib import Path; print(Path(site.getusersitepackages()).parents[2])')
endif
ifndef VIRTUAL_ENV
ifeq (, $(shell echo $$PATH | grep 'conda'))
## For apt-get or pip installed virtualenv
ifdef VIRTUAL_ENV
ROOT_FOLDER ?= $(VIRTUAL_ENV)
BASH_COMP_PREFIX ?= ${HOME}/.bash_completion.d
else
ROOT_FOLDER ?= $(shell python3 -c 'import site; from pathlib import Path; print(Path(site.getsitepackages()[0]).parents[2])')
IS_SYS_PYTHON ?= $(shell [[ $(ROOT_FOLDER) == /usr* || $(ROOT_FOLDER) == /Library* ]] && echo TRUE || echo FALSE)
ifeq ($(shell id -u), 0) # is root
_ROOT := 1
BASH_COMP_PREFIX ?= /usr/share/bash-completion/completions
else # is normal user
ifeq (TRUE, $(IS_SYS_PYTHON))
ROOT_FOLDER := $(shell python3 -c 'import site; from pathlib import Path; print(Path(site.getusersitepackages()).parents[2])')
PIP_MODE ?= --user
endif
BASH_COMP_PREFIX ?= ${HOME}/.bash_completion.d
endif
BASH_COMP_PREFIX ?= ${HOME}/.bash_completion.d
endif
BASH_COMP_SCRIPT := $(BASH_COMP_PREFIX)/nnictl
......
......@@ -4,7 +4,7 @@
-----------
[![MIT licensed](https://img.shields.io/badge/license-MIT-yellow.svg)](https://github.com/Microsoft/nni/blob/master/LICENSE)
[![MIT licensed](https://img.shields.io/badge/license-MIT-brightgreen.svg)](https://github.com/Microsoft/nni/blob/master/LICENSE)
[![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/Microsoft.nni)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=6)
[![Issues](https://img.shields.io/github/issues-raw/Microsoft/nni.svg)](https://github.com/Microsoft/nni/issues?q=is%3Aissue+is%3Aopen)
[![Bugs](https://img.shields.io/github/issues/Microsoft/nni/bug.svg)](https://github.com/Microsoft/nni/issues?q=is%3Aissue+is%3Aopen+label%3Abug)
......
......@@ -177,8 +177,18 @@ machineList:
__nniManagerIp__ set the IP address of the machine on which nni manager process runs. This field is optional, and if it's not set, eth0 device IP will be used instead.
Note: run ifconfig on NNI manager's machine to check if eth0 device exists. If not, we recommend to set nnimanagerIp explicitly.
* __logDir__
* Description
__logDir__ configures the directory to store logs and data of the experiment. The default value is `<user home directory>/nni/experiment`
* __logLevel__
* Description
__logLevel__ sets log level for the experiment, available log levels are: `trace, debug, info, warning, error, fatal`. The default value is `info`.
* __tuner__
* Description
......
......@@ -43,6 +43,7 @@ nnictl --version
| ------ | ------ | ------ |------ |
| --config, -c| True| |yaml configure file of the experiment|
| --port, -p | False| |the port of restful server|
| --debug, -d | False| |Set log level to debug|
* __nnictl resume__
......@@ -62,6 +63,7 @@ nnictl --version
| ------ | ------ | ------ |------ |
| id| False| |The id of the experiment you want to resume|
| --port, -p| False| |Rest port of the experiment you want to resume|
| --debug, -d | False| |Set log level to debug|
* __nnictl stop__
* Description
......
......@@ -20,6 +20,8 @@
'use strict';
import * as assert from 'assert';
import * as os from 'os';
import * as path from 'path';
import * as component from '../common/component';
@component.Singleton
......@@ -29,8 +31,10 @@ class ExperimentStartupInfo {
private basePort: number = -1;
private initialized: boolean = false;
private initTrialSequenceID: number = 0;
private logDir: string = '';
private logLevel: string = '';
public setStartupInfo(newExperiment: boolean, experimentId: string, basePort: number): void {
public setStartupInfo(newExperiment: boolean, experimentId: string, basePort: number, logDir?: string, logLevel?: string): void {
assert(!this.initialized);
assert(experimentId.trim().length > 0);
......@@ -38,6 +42,16 @@ class ExperimentStartupInfo {
this.experimentId = experimentId;
this.basePort = basePort;
this.initialized = true;
if (logDir !== undefined && logDir.length > 0) {
this.logDir = path.join(logDir, getExperimentId());
} else {
this.logDir = path.join(os.homedir(), 'nni', 'experiments', getExperimentId());
}
if (logLevel !== undefined && logLevel.length > 1) {
this.logLevel = logLevel;
}
}
public getExperimentId(): string {
......@@ -58,6 +72,18 @@ class ExperimentStartupInfo {
return this.newExperiment;
}
public getLogDir(): string {
assert(this.initialized);
return this.logDir;
}
public getLogLevel(): string {
assert(this.initialized);
return this.logLevel;
}
public setInitTrialSequenceId(initSequenceId: number): void {
assert(this.initialized);
this.initTrialSequenceID = initSequenceId;
......@@ -90,9 +116,15 @@ function getInitTrialSequenceId(): number {
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).getInitTrialSequenceId();
}
function setExperimentStartupInfo(newExperiment: boolean, experimentId: string, basePort: number): void {
component.get<ExperimentStartupInfo>(ExperimentStartupInfo).setStartupInfo(newExperiment, experimentId, basePort);
function getExperimentStartupInfo(): ExperimentStartupInfo {
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo);
}
function setExperimentStartupInfo(
newExperiment: boolean, experimentId: string, basePort: number, logDir?: string, logLevel?: string): void {
component.get<ExperimentStartupInfo>(ExperimentStartupInfo)
.setStartupInfo(newExperiment, experimentId, basePort, logDir, logLevel);
}
export { ExperimentStartupInfo, getBasePort, getExperimentId, isNewExperiment,
export { ExperimentStartupInfo, getBasePort, getExperimentId, isNewExperiment, getExperimentStartupInfo,
setExperimentStartupInfo, setInitTrialSequenceId, getInitTrialSequenceId };
......@@ -26,13 +26,18 @@ import { Writable } from 'stream';
import { WritableStreamBuffer } from 'stream-buffers';
import { format } from 'util';
import * as component from '../common/component';
import { getExperimentStartupInfo } from './experimentStartupInfo';
import { getLogDir } from './utils';
const CRITICAL: number = 1;
const FATAL: number = 1;
const ERROR: number = 2;
const WARNING: number = 3;
const INFO: number = 4;
const DEBUG: number = 5;
const TRACE: number = 6;
const logLevelNameMap: Map<string, number> = new Map([['fatal', FATAL],
['error', ERROR], ['warning', WARNING], ['info', INFO], ['debug', DEBUG], ['trace', TRACE]]);
class BufferSerialEmitter {
private buffer: Buffer;
......@@ -83,12 +88,25 @@ class Logger {
autoClose: true
});
this.bufferSerialEmitter = new BufferSerialEmitter(this.writable);
const logLevelName: string = getExperimentStartupInfo()
.getLogLevel();
const logLevel: number | undefined = logLevelNameMap.get(logLevelName);
if (logLevel !== undefined) {
this.level = logLevel;
}
}
public close() {
this.writable.destroy();
}
public trace(...param: any[]): void {
if (this.level >= TRACE) {
this.log('TRACE', param);
}
}
public debug(...param: any[]): void {
if (this.level >= DEBUG) {
this.log('DEBUG', param);
......@@ -113,8 +131,8 @@ class Logger {
}
}
public critical(...param: any[]): void {
this.log('CRITICAL', param);
public fatal(...param: any[]): void {
this.log('FATAL', param);
}
private log(level: string, param: any[]): void {
......
......@@ -30,13 +30,14 @@ import { Container } from 'typescript-ioc';
import * as util from 'util';
import { Database, DataStore } from './datastore';
import { ExperimentStartupInfo, getExperimentId, setExperimentStartupInfo } from './experimentStartupInfo';
import { ExperimentStartupInfo, getExperimentId, getExperimentStartupInfo, setExperimentStartupInfo } from './experimentStartupInfo';
import { Manager } from './manager';
import { HyperParameters, TrainingService, TrialJobStatus } from './trainingService';
import { getLogger } from './log';
function getExperimentRootDir(): string {
return path.join(os.homedir(), 'nni', 'experiments', getExperimentId());
return getExperimentStartupInfo()
.getLogDir();
}
function getLogDir(): string{
......
......@@ -35,7 +35,7 @@ import {
import {
TrainingService, TrialJobApplicationForm, TrialJobDetail, TrialJobMetric, TrialJobStatus
} from '../common/trainingService';
import { delay, getCheckpointDir, getLogDir, getMsgDispatcherCommand, mkDirP } from '../common/utils';
import { delay, getCheckpointDir, getExperimentRootDir, getLogDir, getMsgDispatcherCommand, mkDirP } from '../common/utils';
import {
ADD_CUSTOMIZED_TRIAL_JOB, INITIALIZE, INITIALIZED, KILL_TRIAL_JOB, NEW_TRIAL_JOB, NO_MORE_TRIAL_JOBS, PING,
REPORT_METRIC_DATA, REQUEST_TRIAL_JOBS, SEND_TRIAL_JOB_PARAMETER, TERMINATE, TRIAL_END, UPDATE_SEARCH_SPACE
......@@ -670,7 +670,7 @@ class NNIManager implements Manager {
id: getExperimentId(),
revision: 0,
execDuration: 0,
logDir: getLogDir(),
logDir: getExperimentRootDir(),
maxSequenceId: 0,
params: {
authorName: '',
......
......@@ -22,6 +22,7 @@
import { Container, Scope } from 'typescript-ioc';
import * as component from './common/component';
import * as fs from 'fs';
import { Database, DataStore } from './common/datastore';
import { setExperimentStartupInfo } from './common/experimentStartupInfo';
import { getLogger, Logger } from './common/log';
......@@ -40,10 +41,10 @@ import { PAITrainingService } from './training_service/pai/paiTrainingService';
import { KubeflowTrainingService } from './training_service/kubernetes/kubeflow/kubeflowTrainingService';
import { FrameworkControllerTrainingService } from './training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService';
function initStartupInfo(startExpMode: string, resumeExperimentId: string, basePort: number) {
function initStartupInfo(startExpMode: string, resumeExperimentId: string, basePort: number, logDirectory: string, experimentLogLevel: string) {
const createNew: boolean = (startExpMode === 'new');
const expId: string = createNew ? uniqueString(8) : resumeExperimentId;
setExperimentStartupInfo(createNew, expId, basePort);
setExperimentStartupInfo(createNew, expId, basePort, logDirectory, experimentLogLevel);
}
async function initContainer(platformMode: string): Promise<void> {
......@@ -102,7 +103,19 @@ if (startMode === 'resume' && experimentId.trim().length < 1) {
process.exit(1);
}
initStartupInfo(startMode, experimentId, port);
const logDir: string = parseArg(['--log_dir', '-ld']);
if (logDir.length > 0) {
if (!fs.existsSync(logDir)) {
console.log(`FATAL: log_dir ${logDir} does not exist`);
}
}
const logLevel: string = parseArg(['--log_level', '-ll']);
if (logLevel.length > 0 && !['debug', 'info', 'error', 'warning', 'critical'].includes(logLevel)) {
console.log(`FATAL: invalid log_level: ${logLevel}`);
}
initStartupInfo(startMode, experimentId, port, logDir, logLevel);
mkDirP(getLogDir()).then(async () => {
const log: Logger = getLogger();
......
......@@ -105,7 +105,7 @@ class NNIRestHandler {
// If it's a fatal error, exit process
if(isFatal) {
this.log.critical(err);
this.log.fatal(err);
process.exit(1);
}
......
......@@ -221,12 +221,12 @@ class SlideBar extends React.Component<{}, SliderState> {
Download <Icon type="down" />
</a>
</Dropdown>
<a href="https://github.com/Microsoft/nni/issues/new" target="_blank">
<a href="https://github.com/Microsoft/nni/issues/new?labels=v0.5" target="_blank">
<img
src={require('../static/img/icon/issue.png')}
alt="NNI github issue"
/>
FeedBack
Feedback
</a>
<span className="version">Version: {version}</span>
</Col>
......
......@@ -43,7 +43,7 @@ class BasicInfo extends React.Component<BasicInfoProps, {}> {
</div>
</Col>
<Col span={8} className="padItem basic">
<p>LogPath</p>
<p>Log Directory</p>
<div className="nowrap">
<Tooltip placement="top" title={trialProfile.logDir}>
{trialProfile.logDir}
......
......@@ -33,6 +33,8 @@ Optional('searchSpacePath'): os.path.exists,
Optional('multiPhase'): bool,
Optional('multiThread'): bool,
Optional('nniManagerIp'): str,
Optional('logDir'): os.path.isdir,
Optional('logLevel'): Or('trace', 'debug', 'info', 'warning', 'error', 'fatal'),
'useAnnotation': bool,
Optional('advisor'): Or({
'builtinAdvisorName': Or('Hyperband'),
......
......@@ -58,8 +58,47 @@ def print_log_content(config_file_name):
stderr_content = check_output(stderr_cmds)
print(stderr_content.decode('utf-8'))
def get_nni_installation_path():
''' Find nni lib from the following locations in order
Return nni root directory if it exists
'''
def try_installation_path_sequentially(*sitepackages):
'''Try different installation path sequentially util nni is found.
Return None if nothing is found
'''
def _generate_installation_path(sitepackages_path):
python_dir = str(Path(sitepackages_path).parents[2])
entry_file = os.path.join(python_dir, 'nni', 'main.js')
if os.path.isfile(entry_file):
return python_dir
return None
for sitepackage in sitepackages:
python_dir = _generate_installation_path(sitepackage)
if python_dir:
return python_dir
return None
if os.getenv('VIRTUAL_ENV'):
# if 'virtualenv' package is used, `site` has not attr getsitepackages, so we will instead use VIRTUAL_ENV
# Note that conda venv will not have VIRTUAL_ENV
python_dir = os.getenv('VIRTUAL_ENV')
else:
python_sitepackage = site.getsitepackages()[0]
# If system-wide python is used, we will give priority to using `local sitepackage`--"usersitepackages()" given that nni exists there
if python_sitepackage.startswith('/usr') or python_sitepackage.startswith('/Library'):
python_dir = try_installation_path_sequentially(site.getusersitepackages(), site.getsitepackages()[0])
else:
python_dir = try_installation_path_sequentially(site.getsitepackages()[0], site.getusersitepackages())
if python_dir:
entry_file = os.path.join(python_dir, 'nni', 'main.js')
if os.path.isfile(entry_file):
return os.path.join(python_dir, 'nni')
print_error('Fail to find nni under python library')
exit(1)
def start_rest_server(port, platform, mode, config_file_name, experiment_id=None):
def start_rest_server(port, platform, mode, config_file_name, experiment_id=None, log_dir=None, log_level=None):
'''Run nni manager process'''
nni_config = Config(config_file_name)
if detect_port(port):
......@@ -74,27 +113,15 @@ def start_rest_server(port, platform, mode, config_file_name, experiment_id=None
exit(1)
print_normal('Starting restful server...')
# Find nni lib from the following locations in order
sys_wide_python = True
python_sitepackage = site.getsitepackages()[0]
# If system-wide python is used, we will give priority to using user-sitepackage given that nni exists there
if python_sitepackage.startswith('/usr') or python_sitepackage.startswith('/Library'):
local_python_dir = str(Path(site.getusersitepackages()).parents[2])
entry_file = os.path.join(local_python_dir, 'nni', 'main.js')
entry_dir = os.path.join(local_python_dir, 'nni')
else:
# If this python is not system-wide python, we will use its site-package directly
sys_wide_python = False
if not sys_wide_python or not os.path.isfile(entry_file):
python_dir = str(Path(python_sitepackage).parents[2])
entry_file = os.path.join(python_dir, 'nni', 'main.js')
entry_dir = os.path.join(python_dir, 'nni')
# Nothing is found
if not os.path.isfile(entry_file):
raise Exception('Fail to find nni under both "%s" and "%s"' % (local_python_dir, python_dir))
entry_dir = get_nni_installation_path()
entry_file = os.path.join(entry_dir, 'main.js')
cmds = ['node', entry_file, '--port', str(port), '--mode', platform, '--start_mode', mode]
if log_dir is not None:
cmds += ['--log_dir', log_dir]
if log_level is not None:
cmds += ['--log_level', log_level]
if mode == 'resume':
cmds += ['--experiment_id', experiment_id]
stdout_full_path, stderr_full_path = get_log_path(config_file_name)
......@@ -294,9 +321,12 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
except ModuleNotFoundError as e:
print_error('The tuner %s should be installed through nnictl'%(tuner_name))
exit(1)
log_dir = experiment_config['logDir'] if experiment_config.get('logDir') else None
log_level = experiment_config['logLevel'] if experiment_config.get('logLevel') else None
if log_level not in ['trace', 'debug'] and args.debug:
log_level = 'debug'
# start rest server
rest_process, start_time = start_rest_server(args.port, experiment_config['trainingServicePlatform'], mode, config_file_name, experiment_id)
rest_process, start_time = start_rest_server(args.port, experiment_config['trainingServicePlatform'], mode, config_file_name, experiment_id, log_dir, log_level)
nni_config.set_config('restServerPid', rest_process.pid)
# Deal with annotation
if experiment_config.get('useAnnotation'):
......@@ -310,8 +340,8 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
experiment_config['searchSpace'] = json.dumps(search_space)
assert search_space, ERROR_INFO % 'Generated search space is empty'
elif experiment_config.get('searchSpacePath'):
search_space = get_json_content(experiment_config.get('searchSpacePath'))
experiment_config['searchSpace'] = json.dumps(search_space)
search_space = get_json_content(experiment_config.get('searchSpacePath'))
experiment_config['searchSpace'] = json.dumps(search_space)
else:
experiment_config['searchSpace'] = json.dumps('')
......
......@@ -51,12 +51,14 @@ def parse_args():
parser_start = subparsers.add_parser('create', help='create a new experiment')
parser_start.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file')
parser_start.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_start.add_argument('--debug', '-d', action='store_true', help=' set log level to debug')
parser_start.set_defaults(func=create_experiment)
# parse resume command
parser_resume = subparsers.add_parser('resume', help='resume a new experiment')
parser_resume.add_argument('id', nargs='?', help='The id of the experiment you want to resume')
parser_resume.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_resume.add_argument('--debug', '-d', action='store_true', help=' set log level to debug')
parser_resume.set_defaults(func=resume_experiment)
# parse update command
......
......@@ -38,11 +38,12 @@ from .url_utils import gen_send_stdout_url
@unique
class LogType(Enum):
Trace = 'TRACE'
Debug = 'DEBUG'
Info = 'INFO'
Warning = 'WARNING'
Error = 'ERROR'
Critical = 'CRITICAL'
Fatal = 'FATAL'
@unique
class StdOutputType(Enum):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment