Unverified Commit 627e823f authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Merge pull request #119 from Microsoft/master

merge master
parents 1089e808 d9c83c0c
# Setting variables
SHELL := /bin/bash
PIP_INSTALL := python3 -m pip install --no-cache-dir
PIP_INSTALL := python3 -m pip install
PIP_UNINSTALL := python3 -m pip uninstall
## Colorful output
......@@ -19,22 +19,25 @@ else
endif
## Install directories
ROOT_FOLDER ?= $(shell python3 -c 'import site; from pathlib import Path; print(Path(site.getsitepackages()[0]).parents[2])')
IS_SYS_PYTHON ?= $(shell [[ $(ROOT_FOLDER) == /usr* || $(ROOT_FOLDER) == /Library* ]] && echo TRUE || echo FALSE)
ifeq ($(shell id -u), 0) # is root
## For apt-get or pip installed virtualenv
ifdef VIRTUAL_ENV
ROOT_FOLDER ?= $(VIRTUAL_ENV)
BASH_COMP_PREFIX ?= ${HOME}/.bash_completion.d
else
ROOT_FOLDER ?= $(shell python3 -c 'import site; from pathlib import Path; print(Path(site.getsitepackages()[0]).parents[2])')
IS_SYS_PYTHON ?= $(shell [[ $(ROOT_FOLDER) == /usr* || $(ROOT_FOLDER) == /Library* ]] && echo TRUE || echo FALSE)
ifeq ($(shell id -u), 0) # is root
_ROOT := 1
BASH_COMP_PREFIX ?= /usr/share/bash-completion/completions
else # is normal user
else # is normal user
ifeq (TRUE, $(IS_SYS_PYTHON))
ROOT_FOLDER := $(shell python3 -c 'import site; from pathlib import Path; print(Path(site.getusersitepackages()).parents[2])')
endif
ifndef VIRTUAL_ENV
ifeq (, $(shell echo $$PATH | grep 'conda'))
PIP_MODE ?= --user
endif
endif
BASH_COMP_PREFIX ?= ${HOME}/.bash_completion.d
endif
endif
BASH_COMP_SCRIPT := $(BASH_COMP_PREFIX)/nnictl
......
......@@ -4,7 +4,7 @@
-----------
[![MIT licensed](https://img.shields.io/badge/license-MIT-yellow.svg)](https://github.com/Microsoft/nni/blob/master/LICENSE)
[![MIT licensed](https://img.shields.io/badge/license-MIT-brightgreen.svg)](https://github.com/Microsoft/nni/blob/master/LICENSE)
[![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/Microsoft.nni)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=6)
[![Issues](https://img.shields.io/github/issues-raw/Microsoft/nni.svg)](https://github.com/Microsoft/nni/issues?q=is%3Aissue+is%3Aopen)
[![Bugs](https://img.shields.io/github/issues/Microsoft/nni/bug.svg)](https://github.com/Microsoft/nni/issues?q=is%3Aissue+is%3Aopen+label%3Abug)
......
......@@ -178,6 +178,16 @@ machineList:
Note: run ifconfig on NNI manager's machine to check if eth0 device exists. If not, we recommend to set nnimanagerIp explicitly.
* __logDir__
* Description
__logDir__ configures the directory to store logs and data of the experiment. The default value is `<user home directory>/nni/experiment`
* __logLevel__
* Description
__logLevel__ sets log level for the experiment, available log levels are: `trace, debug, info, warning, error, fatal`. The default value is `info`.
* __tuner__
* Description
......
......@@ -43,6 +43,7 @@ nnictl --version
| ------ | ------ | ------ |------ |
| --config, -c| True| |yaml configure file of the experiment|
| --port, -p | False| |the port of restful server|
| --debug, -d | False| |Set log level to debug|
* __nnictl resume__
......@@ -62,6 +63,7 @@ nnictl --version
| ------ | ------ | ------ |------ |
| id| False| |The id of the experiment you want to resume|
| --port, -p| False| |Rest port of the experiment you want to resume|
| --debug, -d | False| |Set log level to debug|
* __nnictl stop__
* Description
......
......@@ -20,6 +20,8 @@
'use strict';
import * as assert from 'assert';
import * as os from 'os';
import * as path from 'path';
import * as component from '../common/component';
@component.Singleton
......@@ -29,8 +31,10 @@ class ExperimentStartupInfo {
private basePort: number = -1;
private initialized: boolean = false;
private initTrialSequenceID: number = 0;
private logDir: string = '';
private logLevel: string = '';
public setStartupInfo(newExperiment: boolean, experimentId: string, basePort: number): void {
public setStartupInfo(newExperiment: boolean, experimentId: string, basePort: number, logDir?: string, logLevel?: string): void {
assert(!this.initialized);
assert(experimentId.trim().length > 0);
......@@ -38,6 +42,16 @@ class ExperimentStartupInfo {
this.experimentId = experimentId;
this.basePort = basePort;
this.initialized = true;
if (logDir !== undefined && logDir.length > 0) {
this.logDir = path.join(logDir, getExperimentId());
} else {
this.logDir = path.join(os.homedir(), 'nni', 'experiments', getExperimentId());
}
if (logLevel !== undefined && logLevel.length > 1) {
this.logLevel = logLevel;
}
}
public getExperimentId(): string {
......@@ -58,6 +72,18 @@ class ExperimentStartupInfo {
return this.newExperiment;
}
public getLogDir(): string {
assert(this.initialized);
return this.logDir;
}
public getLogLevel(): string {
assert(this.initialized);
return this.logLevel;
}
public setInitTrialSequenceId(initSequenceId: number): void {
assert(this.initialized);
this.initTrialSequenceID = initSequenceId;
......@@ -90,9 +116,15 @@ function getInitTrialSequenceId(): number {
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).getInitTrialSequenceId();
}
function setExperimentStartupInfo(newExperiment: boolean, experimentId: string, basePort: number): void {
component.get<ExperimentStartupInfo>(ExperimentStartupInfo).setStartupInfo(newExperiment, experimentId, basePort);
function getExperimentStartupInfo(): ExperimentStartupInfo {
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo);
}
function setExperimentStartupInfo(
newExperiment: boolean, experimentId: string, basePort: number, logDir?: string, logLevel?: string): void {
component.get<ExperimentStartupInfo>(ExperimentStartupInfo)
.setStartupInfo(newExperiment, experimentId, basePort, logDir, logLevel);
}
export { ExperimentStartupInfo, getBasePort, getExperimentId, isNewExperiment,
export { ExperimentStartupInfo, getBasePort, getExperimentId, isNewExperiment, getExperimentStartupInfo,
setExperimentStartupInfo, setInitTrialSequenceId, getInitTrialSequenceId };
......@@ -26,13 +26,18 @@ import { Writable } from 'stream';
import { WritableStreamBuffer } from 'stream-buffers';
import { format } from 'util';
import * as component from '../common/component';
import { getExperimentStartupInfo } from './experimentStartupInfo';
import { getLogDir } from './utils';
const CRITICAL: number = 1;
const FATAL: number = 1;
const ERROR: number = 2;
const WARNING: number = 3;
const INFO: number = 4;
const DEBUG: number = 5;
const TRACE: number = 6;
const logLevelNameMap: Map<string, number> = new Map([['fatal', FATAL],
['error', ERROR], ['warning', WARNING], ['info', INFO], ['debug', DEBUG], ['trace', TRACE]]);
class BufferSerialEmitter {
private buffer: Buffer;
......@@ -83,12 +88,25 @@ class Logger {
autoClose: true
});
this.bufferSerialEmitter = new BufferSerialEmitter(this.writable);
const logLevelName: string = getExperimentStartupInfo()
.getLogLevel();
const logLevel: number | undefined = logLevelNameMap.get(logLevelName);
if (logLevel !== undefined) {
this.level = logLevel;
}
}
public close() {
this.writable.destroy();
}
public trace(...param: any[]): void {
if (this.level >= TRACE) {
this.log('TRACE', param);
}
}
public debug(...param: any[]): void {
if (this.level >= DEBUG) {
this.log('DEBUG', param);
......@@ -113,8 +131,8 @@ class Logger {
}
}
public critical(...param: any[]): void {
this.log('CRITICAL', param);
public fatal(...param: any[]): void {
this.log('FATAL', param);
}
private log(level: string, param: any[]): void {
......
......@@ -30,13 +30,14 @@ import { Container } from 'typescript-ioc';
import * as util from 'util';
import { Database, DataStore } from './datastore';
import { ExperimentStartupInfo, getExperimentId, setExperimentStartupInfo } from './experimentStartupInfo';
import { ExperimentStartupInfo, getExperimentId, getExperimentStartupInfo, setExperimentStartupInfo } from './experimentStartupInfo';
import { Manager } from './manager';
import { HyperParameters, TrainingService, TrialJobStatus } from './trainingService';
import { getLogger } from './log';
function getExperimentRootDir(): string {
return path.join(os.homedir(), 'nni', 'experiments', getExperimentId());
return getExperimentStartupInfo()
.getLogDir();
}
function getLogDir(): string{
......
......@@ -35,7 +35,7 @@ import {
import {
TrainingService, TrialJobApplicationForm, TrialJobDetail, TrialJobMetric, TrialJobStatus
} from '../common/trainingService';
import { delay, getCheckpointDir, getLogDir, getMsgDispatcherCommand, mkDirP } from '../common/utils';
import { delay, getCheckpointDir, getExperimentRootDir, getLogDir, getMsgDispatcherCommand, mkDirP } from '../common/utils';
import {
ADD_CUSTOMIZED_TRIAL_JOB, INITIALIZE, INITIALIZED, KILL_TRIAL_JOB, NEW_TRIAL_JOB, NO_MORE_TRIAL_JOBS, PING,
REPORT_METRIC_DATA, REQUEST_TRIAL_JOBS, SEND_TRIAL_JOB_PARAMETER, TERMINATE, TRIAL_END, UPDATE_SEARCH_SPACE
......@@ -670,7 +670,7 @@ class NNIManager implements Manager {
id: getExperimentId(),
revision: 0,
execDuration: 0,
logDir: getLogDir(),
logDir: getExperimentRootDir(),
maxSequenceId: 0,
params: {
authorName: '',
......
......@@ -22,6 +22,7 @@
import { Container, Scope } from 'typescript-ioc';
import * as component from './common/component';
import * as fs from 'fs';
import { Database, DataStore } from './common/datastore';
import { setExperimentStartupInfo } from './common/experimentStartupInfo';
import { getLogger, Logger } from './common/log';
......@@ -40,10 +41,10 @@ import { PAITrainingService } from './training_service/pai/paiTrainingService';
import { KubeflowTrainingService } from './training_service/kubernetes/kubeflow/kubeflowTrainingService';
import { FrameworkControllerTrainingService } from './training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService';
function initStartupInfo(startExpMode: string, resumeExperimentId: string, basePort: number) {
function initStartupInfo(startExpMode: string, resumeExperimentId: string, basePort: number, logDirectory: string, experimentLogLevel: string) {
const createNew: boolean = (startExpMode === 'new');
const expId: string = createNew ? uniqueString(8) : resumeExperimentId;
setExperimentStartupInfo(createNew, expId, basePort);
setExperimentStartupInfo(createNew, expId, basePort, logDirectory, experimentLogLevel);
}
async function initContainer(platformMode: string): Promise<void> {
......@@ -102,7 +103,19 @@ if (startMode === 'resume' && experimentId.trim().length < 1) {
process.exit(1);
}
initStartupInfo(startMode, experimentId, port);
const logDir: string = parseArg(['--log_dir', '-ld']);
if (logDir.length > 0) {
if (!fs.existsSync(logDir)) {
console.log(`FATAL: log_dir ${logDir} does not exist`);
}
}
const logLevel: string = parseArg(['--log_level', '-ll']);
if (logLevel.length > 0 && !['debug', 'info', 'error', 'warning', 'critical'].includes(logLevel)) {
console.log(`FATAL: invalid log_level: ${logLevel}`);
}
initStartupInfo(startMode, experimentId, port, logDir, logLevel);
mkDirP(getLogDir()).then(async () => {
const log: Logger = getLogger();
......
......@@ -105,7 +105,7 @@ class NNIRestHandler {
// If it's a fatal error, exit process
if(isFatal) {
this.log.critical(err);
this.log.fatal(err);
process.exit(1);
}
......
......@@ -221,12 +221,12 @@ class SlideBar extends React.Component<{}, SliderState> {
Download <Icon type="down" />
</a>
</Dropdown>
<a href="https://github.com/Microsoft/nni/issues/new" target="_blank">
<a href="https://github.com/Microsoft/nni/issues/new?labels=v0.5" target="_blank">
<img
src={require('../static/img/icon/issue.png')}
alt="NNI github issue"
/>
FeedBack
Feedback
</a>
<span className="version">Version: {version}</span>
</Col>
......
......@@ -43,7 +43,7 @@ class BasicInfo extends React.Component<BasicInfoProps, {}> {
</div>
</Col>
<Col span={8} className="padItem basic">
<p>LogPath</p>
<p>Log Directory</p>
<div className="nowrap">
<Tooltip placement="top" title={trialProfile.logDir}>
{trialProfile.logDir}
......
......@@ -33,6 +33,8 @@ Optional('searchSpacePath'): os.path.exists,
Optional('multiPhase'): bool,
Optional('multiThread'): bool,
Optional('nniManagerIp'): str,
Optional('logDir'): os.path.isdir,
Optional('logLevel'): Or('trace', 'debug', 'info', 'warning', 'error', 'fatal'),
'useAnnotation': bool,
Optional('advisor'): Or({
'builtinAdvisorName': Or('Hyperband'),
......
......@@ -58,8 +58,47 @@ def print_log_content(config_file_name):
stderr_content = check_output(stderr_cmds)
print(stderr_content.decode('utf-8'))
def get_nni_installation_path():
''' Find nni lib from the following locations in order
Return nni root directory if it exists
'''
def try_installation_path_sequentially(*sitepackages):
'''Try different installation path sequentially util nni is found.
Return None if nothing is found
'''
def _generate_installation_path(sitepackages_path):
python_dir = str(Path(sitepackages_path).parents[2])
entry_file = os.path.join(python_dir, 'nni', 'main.js')
if os.path.isfile(entry_file):
return python_dir
return None
for sitepackage in sitepackages:
python_dir = _generate_installation_path(sitepackage)
if python_dir:
return python_dir
return None
if os.getenv('VIRTUAL_ENV'):
# if 'virtualenv' package is used, `site` has not attr getsitepackages, so we will instead use VIRTUAL_ENV
# Note that conda venv will not have VIRTUAL_ENV
python_dir = os.getenv('VIRTUAL_ENV')
else:
python_sitepackage = site.getsitepackages()[0]
# If system-wide python is used, we will give priority to using `local sitepackage`--"usersitepackages()" given that nni exists there
if python_sitepackage.startswith('/usr') or python_sitepackage.startswith('/Library'):
python_dir = try_installation_path_sequentially(site.getusersitepackages(), site.getsitepackages()[0])
else:
python_dir = try_installation_path_sequentially(site.getsitepackages()[0], site.getusersitepackages())
def start_rest_server(port, platform, mode, config_file_name, experiment_id=None):
if python_dir:
entry_file = os.path.join(python_dir, 'nni', 'main.js')
if os.path.isfile(entry_file):
return os.path.join(python_dir, 'nni')
print_error('Fail to find nni under python library')
exit(1)
def start_rest_server(port, platform, mode, config_file_name, experiment_id=None, log_dir=None, log_level=None):
'''Run nni manager process'''
nni_config = Config(config_file_name)
if detect_port(port):
......@@ -74,27 +113,15 @@ def start_rest_server(port, platform, mode, config_file_name, experiment_id=None
exit(1)
print_normal('Starting restful server...')
# Find nni lib from the following locations in order
sys_wide_python = True
python_sitepackage = site.getsitepackages()[0]
# If system-wide python is used, we will give priority to using user-sitepackage given that nni exists there
if python_sitepackage.startswith('/usr') or python_sitepackage.startswith('/Library'):
local_python_dir = str(Path(site.getusersitepackages()).parents[2])
entry_file = os.path.join(local_python_dir, 'nni', 'main.js')
entry_dir = os.path.join(local_python_dir, 'nni')
else:
# If this python is not system-wide python, we will use its site-package directly
sys_wide_python = False
if not sys_wide_python or not os.path.isfile(entry_file):
python_dir = str(Path(python_sitepackage).parents[2])
entry_file = os.path.join(python_dir, 'nni', 'main.js')
entry_dir = os.path.join(python_dir, 'nni')
# Nothing is found
if not os.path.isfile(entry_file):
raise Exception('Fail to find nni under both "%s" and "%s"' % (local_python_dir, python_dir))
entry_dir = get_nni_installation_path()
entry_file = os.path.join(entry_dir, 'main.js')
cmds = ['node', entry_file, '--port', str(port), '--mode', platform, '--start_mode', mode]
if log_dir is not None:
cmds += ['--log_dir', log_dir]
if log_level is not None:
cmds += ['--log_level', log_level]
if mode == 'resume':
cmds += ['--experiment_id', experiment_id]
stdout_full_path, stderr_full_path = get_log_path(config_file_name)
......@@ -294,9 +321,12 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
except ModuleNotFoundError as e:
print_error('The tuner %s should be installed through nnictl'%(tuner_name))
exit(1)
log_dir = experiment_config['logDir'] if experiment_config.get('logDir') else None
log_level = experiment_config['logLevel'] if experiment_config.get('logLevel') else None
if log_level not in ['trace', 'debug'] and args.debug:
log_level = 'debug'
# start rest server
rest_process, start_time = start_rest_server(args.port, experiment_config['trainingServicePlatform'], mode, config_file_name, experiment_id)
rest_process, start_time = start_rest_server(args.port, experiment_config['trainingServicePlatform'], mode, config_file_name, experiment_id, log_dir, log_level)
nni_config.set_config('restServerPid', rest_process.pid)
# Deal with annotation
if experiment_config.get('useAnnotation'):
......
......@@ -51,12 +51,14 @@ def parse_args():
parser_start = subparsers.add_parser('create', help='create a new experiment')
parser_start.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file')
parser_start.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_start.add_argument('--debug', '-d', action='store_true', help=' set log level to debug')
parser_start.set_defaults(func=create_experiment)
# parse resume command
parser_resume = subparsers.add_parser('resume', help='resume a new experiment')
parser_resume.add_argument('id', nargs='?', help='The id of the experiment you want to resume')
parser_resume.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_resume.add_argument('--debug', '-d', action='store_true', help=' set log level to debug')
parser_resume.set_defaults(func=resume_experiment)
# parse update command
......
......@@ -38,11 +38,12 @@ from .url_utils import gen_send_stdout_url
@unique
class LogType(Enum):
Trace = 'TRACE'
Debug = 'DEBUG'
Info = 'INFO'
Warning = 'WARNING'
Error = 'ERROR'
Critical = 'CRITICAL'
Fatal = 'FATAL'
@unique
class StdOutputType(Enum):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment