Commit 19173aa4 authored by Guoxin's avatar Guoxin Committed by QuanluZhang
Browse files

merge v1.0(bug bash) back to master (#1462)

* squash commits in v1.0 first round bug bash
parent f721b431
...@@ -4,4 +4,4 @@ ...@@ -4,4 +4,4 @@
[SMAC](https://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf) 基于 Sequential Model-Based Optimization (SMBO). 它利用使用过的结果好的模型(高斯随机过程模型),并将随机森林引入到 SMBO 中,来处理分类参数。 NNI 的 SMAC 通过包装 [SMAC3](https://github.com/automl/SMAC3) 来支持。 [SMAC](https://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf) 基于 Sequential Model-Based Optimization (SMBO). 它利用使用过的结果好的模型(高斯随机过程模型),并将随机森林引入到 SMBO 中,来处理分类参数。 NNI 的 SMAC 通过包装 [SMAC3](https://github.com/automl/SMAC3) 来支持。
NNI 中的 SMAC 只支持部分类型的[搜索空间](../Tutorial/SearchSpaceSpec.md),包括`choice`, `randint`, `uniform`, `loguniform`, `quniform(q=1)` NNI 中的 SMAC 只支持部分类型的[搜索空间](../Tutorial/SearchSpaceSpec.md),包括`choice`, `randint`, `uniform`, `loguniform`, `quniform`
\ No newline at end of file \ No newline at end of file
...@@ -46,7 +46,7 @@ export async function validateCodeDir(codeDir: string) : Promise<number> { ...@@ -46,7 +46,7 @@ export async function validateCodeDir(codeDir: string) : Promise<number> {
} }
try { try {
fileNameValid = await validateFileNameRecursively(codeDir); fileNameValid = await validateFileNameRecursively(codeDir);
} catch(error) { } catch (error) {
throw new Error(`Validate file name error: ${error}`); throw new Error(`Validate file name error: ${error}`);
} }
...@@ -55,23 +55,24 @@ export async function validateCodeDir(codeDir: string) : Promise<number> { ...@@ -55,23 +55,24 @@ export async function validateCodeDir(codeDir: string) : Promise<number> {
+ ` please check if it's a valid code dir`; + ` please check if it's a valid code dir`;
throw new Error(errMessage); throw new Error(errMessage);
} }
if(!fileNameValid) { if (!fileNameValid) {
const errMessage: string = `File name in ${codeDir} is not valid, please check file names, only support digit number、alphabet and (.-_) in file name.`; const errMessage: string = `File name in ${codeDir} is not valid, please check file names, only support digit number、alphabet and (.-_) in file name.`;
throw new Error(errMessage); throw new Error(errMessage);
} }
return fileCount; return fileCount;
} }
/** /**
* crete a new directory * crete a new directory
* @param directory * @param directory
*/ */
export async function execMkdir(directory: string): Promise<void> { export async function execMkdir(directory: string, share: boolean = false): Promise<void> {
if (process.platform === 'win32') { if (process.platform === 'win32') {
await cpp.exec(`powershell.exe New-Item -Path ${directory} -ItemType "directory" -Force`); await cpp.exec(`powershell.exe New-Item -Path ${directory} -ItemType "directory" -Force`);
} else if (share) {
await cpp.exec(`(umask 0; mkdir -p ${directory})`);
} else { } else {
await cpp.exec(`mkdir -p ${directory}`); await cpp.exec(`mkdir -p ${directory}`);
} }
......
...@@ -54,6 +54,9 @@ class GPUScheduler { ...@@ -54,6 +54,9 @@ class GPUScheduler {
} catch (error) { } catch (error) {
this.log.error('Read GPU summary failed with error: ', error); this.log.error('Read GPU summary failed with error: ', error);
} }
if (this.gpuSummary !== undefined && this.gpuSummary.gpuCount === 0) {
throw new Error('GPU not available. Please check your CUDA configuration');
}
await delay(5000); await delay(5000);
} }
} }
...@@ -97,7 +100,7 @@ class GPUScheduler { ...@@ -97,7 +100,7 @@ class GPUScheduler {
* used to run in remote machine, and will be deleted after uploaded from local. * used to run in remote machine, and will be deleted after uploaded from local.
*/ */
private async runGpuMetricsCollectorScript(): Promise<void> { private async runGpuMetricsCollectorScript(): Promise<void> {
await execMkdir(this.gpuMetricCollectorScriptFolder); await execMkdir(this.gpuMetricCollectorScriptFolder, true);
//generate gpu_metrics_collector script //generate gpu_metrics_collector script
const gpuMetricsCollectorScriptPath: string = const gpuMetricsCollectorScriptPath: string =
path.join(this.gpuMetricCollectorScriptFolder, getScriptName('gpu_metrics_collector')); path.join(this.gpuMetricCollectorScriptFolder, getScriptName('gpu_metrics_collector'));
......
...@@ -131,7 +131,7 @@ class LocalTrainingService implements TrainingService { ...@@ -131,7 +131,7 @@ class LocalTrainingService implements TrainingService {
private readonly occupiedGpuIndexNumMap: Map<number, number>; private readonly occupiedGpuIndexNumMap: Map<number, number>;
private designatedGpuIndices!: Set<number>; private designatedGpuIndices!: Set<number>;
private readonly log: Logger; private readonly log: Logger;
private localTrailConfig?: TrialConfig; private localTrialConfig?: TrialConfig;
private localConfig?: LocalConfig; private localConfig?: LocalConfig;
private isMultiPhase: boolean; private isMultiPhase: boolean;
private readonly jobStreamMap: Map<string, ts.Stream>; private readonly jobStreamMap: Map<string, ts.Stream>;
...@@ -204,7 +204,7 @@ class LocalTrainingService implements TrainingService { ...@@ -204,7 +204,7 @@ class LocalTrainingService implements TrainingService {
} catch (error) { } catch (error) {
//ignore //ignore
} }
this.log.debug(`trailJob status update: ${trialJobId}, ${trialJob.status}`); this.log.debug(`trialJob status update: ${trialJobId}, ${trialJob.status}`);
} }
} }
...@@ -302,14 +302,14 @@ class LocalTrainingService implements TrainingService { ...@@ -302,14 +302,14 @@ class LocalTrainingService implements TrainingService {
} }
switch (key) { switch (key) {
case TrialConfigMetadataKey.TRIAL_CONFIG: case TrialConfigMetadataKey.TRIAL_CONFIG:
this.localTrailConfig = <TrialConfig>JSON.parse(value); this.localTrialConfig = <TrialConfig>JSON.parse(value);
// Parse trial config failed, throw Error // Parse trial config failed, throw Error
if (this.localTrailConfig === undefined) { if (this.localTrialConfig === undefined) {
throw new Error('trial config parsed failed'); throw new Error('trial config parsed failed');
} }
if (this.localTrailConfig.gpuNum !== undefined) { if (this.localTrialConfig.gpuNum !== undefined) {
this.log.info(`required GPU number is ${this.localTrailConfig.gpuNum}`); this.log.info(`required GPU number is ${this.localTrialConfig.gpuNum}`);
if (this.gpuScheduler === undefined && this.localTrailConfig.gpuNum > 0) { if (this.gpuScheduler === undefined && this.localTrialConfig.gpuNum > 0) {
this.gpuScheduler = new GPUScheduler(); this.gpuScheduler = new GPUScheduler();
} }
} }
...@@ -343,10 +343,10 @@ class LocalTrainingService implements TrainingService { ...@@ -343,10 +343,10 @@ class LocalTrainingService implements TrainingService {
switch (key) { switch (key) {
case TrialConfigMetadataKey.TRIAL_CONFIG: case TrialConfigMetadataKey.TRIAL_CONFIG:
let getResult: Promise<string>; let getResult: Promise<string>;
if (this.localTrailConfig === undefined) { if (this.localTrialConfig === undefined) {
getResult = Promise.reject(new NNIError(NNIErrorNames.NOT_FOUND, `${key} is never set yet`)); getResult = Promise.reject(new NNIError(NNIErrorNames.NOT_FOUND, `${key} is never set yet`));
} else { } else {
getResult = Promise.resolve(JSON.stringify(this.localTrailConfig)); getResult = Promise.resolve(JSON.stringify(this.localTrialConfig));
} }
return getResult; return getResult;
...@@ -359,8 +359,8 @@ class LocalTrainingService implements TrainingService { ...@@ -359,8 +359,8 @@ class LocalTrainingService implements TrainingService {
this.log.info('Stopping local machine training service...'); this.log.info('Stopping local machine training service...');
this.stopping = true; this.stopping = true;
for (const stream of this.jobStreamMap.values()) { for (const stream of this.jobStreamMap.values()) {
stream.end(0) stream.end(0);
stream.emit('end') stream.emit('end');
} }
if (this.gpuScheduler !== undefined) { if (this.gpuScheduler !== undefined) {
await this.gpuScheduler.stop(); await this.gpuScheduler.stop();
...@@ -378,8 +378,8 @@ class LocalTrainingService implements TrainingService { ...@@ -378,8 +378,8 @@ class LocalTrainingService implements TrainingService {
throw new Error(`Could not find stream in trial ${trialJob.id}`); throw new Error(`Could not find stream in trial ${trialJob.id}`);
} }
//Refer https://github.com/Juul/tail-stream/issues/20 //Refer https://github.com/Juul/tail-stream/issues/20
stream.end(0) stream.end(0);
stream.emit('end') stream.emit('end');
this.jobStreamMap.delete(trialJob.id); this.jobStreamMap.delete(trialJob.id);
} }
} }
...@@ -427,8 +427,8 @@ class LocalTrainingService implements TrainingService { ...@@ -427,8 +427,8 @@ class LocalTrainingService implements TrainingService {
} }
private tryGetAvailableResource(): [boolean, { gpuIndices: number[]}] { private tryGetAvailableResource(): [boolean, { gpuIndices: number[]}] {
if (this.localTrailConfig === undefined) { if (this.localTrialConfig === undefined) {
throw new Error('localTrailConfig is not initialized!'); throw new Error('localTrialConfig is not initialized!');
} }
const resource: { gpuIndices: number[] } = { gpuIndices: [] }; const resource: { gpuIndices: number[] } = { gpuIndices: [] };
...@@ -450,11 +450,11 @@ class LocalTrainingService implements TrainingService { ...@@ -450,11 +450,11 @@ class LocalTrainingService implements TrainingService {
selectedGPUIndices = selectedGPUIndices.filter((index: number) => this.designatedGpuIndices.has(index)); selectedGPUIndices = selectedGPUIndices.filter((index: number) => this.designatedGpuIndices.has(index));
} }
if (selectedGPUIndices.length < this.localTrailConfig.gpuNum) { if (selectedGPUIndices.length < this.localTrialConfig.gpuNum) {
return [false, resource]; return [false, resource];
} }
selectedGPUIndices.splice(this.localTrailConfig.gpuNum); selectedGPUIndices.splice(this.localTrialConfig.gpuNum);
Object.assign(resource, { gpuIndices: selectedGPUIndices }); Object.assign(resource, { gpuIndices: selectedGPUIndices });
return [true, resource]; return [true, resource];
...@@ -494,7 +494,7 @@ class LocalTrainingService implements TrainingService { ...@@ -494,7 +494,7 @@ class LocalTrainingService implements TrainingService {
if (!success) { if (!success) {
break; break;
} }
this.occupyResource(resource); this.occupyResource(resource);
await this.runTrialJob(trialJobId, resource); await this.runTrialJob(trialJobId, resource);
} }
...@@ -512,18 +512,23 @@ class LocalTrainingService implements TrainingService { ...@@ -512,18 +512,23 @@ class LocalTrainingService implements TrainingService {
} }
} }
private getScript(localTrailConfig: TrialConfig, workingDirectory: string): string[] { private getScript(localTrialConfig: TrialConfig, workingDirectory: string): string[] {
const script: string[] = []; const script: string[] = [];
if (process.platform === 'win32') { if (process.platform === 'win32') {
script.push( script.push(
`cmd /c ${localTrailConfig.command} 2>${path.join(workingDirectory, 'stderr')}`, `cmd /c ${localTrialConfig.command} 2>${path.join(workingDirectory, 'stderr')}`,
`$NOW_DATE = [int64](([datetime]::UtcNow)-(get-date "1/1/1970")).TotalSeconds`, `$NOW_DATE = [int64](([datetime]::UtcNow)-(get-date "1/1/1970")).TotalSeconds`,
`$NOW_DATE = "$NOW_DATE" + (Get-Date -Format fff).ToString()`, `$NOW_DATE = "$NOW_DATE" + (Get-Date -Format fff).ToString()`,
`Write $LASTEXITCODE " " $NOW_DATE | Out-File ${path.join(workingDirectory, '.nni', 'state')} -NoNewline -encoding utf8`); `Write $LASTEXITCODE " " $NOW_DATE | Out-File ${path.join(workingDirectory, '.nni', 'state')} -NoNewline -encoding utf8`);
} else { } else {
script.push( script.push(`eval ${localTrialConfig.command} 2>${path.join(workingDirectory, 'stderr')}`);
`eval ${localTrailConfig.command} 2>${path.join(workingDirectory, 'stderr')}`, if (process.platform === 'darwin') {
`echo $? \`date +%s%3N\` >${path.join(workingDirectory, '.nni', 'state')}`); // https://superuser.com/questions/599072/how-to-get-bash-execution-time-in-milliseconds-under-mac-os-x
// Considering the worst case, write 999 to avoid negative duration
script.push(`echo $? \`date +%s999\` >${path.join(workingDirectory, '.nni', 'state')}`);
} else {
script.push(`echo $? \`date +%s%3N\` >${path.join(workingDirectory, '.nni', 'state')}`);
}
} }
return script; return script;
...@@ -531,23 +536,23 @@ class LocalTrainingService implements TrainingService { ...@@ -531,23 +536,23 @@ class LocalTrainingService implements TrainingService {
private async runTrialJob(trialJobId: string, resource: {gpuIndices: number[]}): Promise<void> { private async runTrialJob(trialJobId: string, resource: {gpuIndices: number[]}): Promise<void> {
const trialJobDetail: LocalTrialJobDetail = <LocalTrialJobDetail>this.jobMap.get(trialJobId); const trialJobDetail: LocalTrialJobDetail = <LocalTrialJobDetail>this.jobMap.get(trialJobId);
if (this.localTrailConfig === undefined) { if (this.localTrialConfig === undefined) {
throw new Error(`localTrialConfig not initialized!`); throw new Error(`localTrialConfig not initialized!`);
} }
const variables: { key: string; value: string }[] = this.getEnvironmentVariables(trialJobDetail, resource, this.localTrailConfig.gpuNum); const variables: { key: string; value: string }[] = this.getEnvironmentVariables(trialJobDetail, resource, this.localTrialConfig.gpuNum);
if (this.localTrailConfig === undefined) { if (this.localTrialConfig === undefined) {
throw new Error('trial config is not initialized'); throw new Error('trial config is not initialized');
} }
const runScriptContent: string[] = []; const runScriptContent: string[] = [];
if (process.platform !== 'win32') { if (process.platform !== 'win32') {
runScriptContent.push('#!/bin/bash'); runScriptContent.push('#!/bin/bash');
} }
runScriptContent.push(`cd ${this.localTrailConfig.codeDir}`); runScriptContent.push(`cd ${this.localTrialConfig.codeDir}`);
for (const variable of variables) { for (const variable of variables) {
runScriptContent.push(setEnvironmentVariable(variable)); runScriptContent.push(setEnvironmentVariable(variable));
} }
const scripts: string[] = this.getScript(this.localTrailConfig, trialJobDetail.workingDirectory); const scripts: string[] = this.getScript(this.localTrialConfig, trialJobDetail.workingDirectory);
scripts.forEach((script: string) => { scripts.forEach((script: string) => {
runScriptContent.push(script); runScriptContent.push(script);
}); });
......
...@@ -32,7 +32,7 @@ export namespace HDFSClientUtility { ...@@ -32,7 +32,7 @@ export namespace HDFSClientUtility {
* Get NNI experiment root directory * Get NNI experiment root directory
* @param hdfsUserName HDFS user name * @param hdfsUserName HDFS user name
*/ */
function hdfsExpRootDir(hdfsUserName: string): string { export function hdfsExpRootDir(hdfsUserName: string): string {
// tslint:disable-next-line:prefer-template // tslint:disable-next-line:prefer-template
return '/' + unixPathJoin(hdfsUserName, 'nni', 'experiments', getExperimentId()); return '/' + unixPathJoin(hdfsUserName, 'nni', 'experiments', getExperimentId());
} }
......
...@@ -74,9 +74,11 @@ class PAITrainingService implements TrainingService { ...@@ -74,9 +74,11 @@ class PAITrainingService implements TrainingService {
private paiRestServerPort?: number; private paiRestServerPort?: number;
private nniManagerIpConfig?: NNIManagerIpConfig; private nniManagerIpConfig?: NNIManagerIpConfig;
private copyExpCodeDirPromise?: Promise<void>; private copyExpCodeDirPromise?: Promise<void>;
private copyAuthFilePromise?: Promise<void>;
private versionCheck: boolean = true; private versionCheck: boolean = true;
private logCollection: string; private logCollection: string;
private isMultiPhase: boolean = false; private isMultiPhase: boolean = false;
private authFileHdfsPath: string | undefined = undefined;
constructor() { constructor() {
this.log = getLogger(); this.log = getLogger();
...@@ -292,6 +294,12 @@ class PAITrainingService implements TrainingService { ...@@ -292,6 +294,12 @@ class PAITrainingService implements TrainingService {
HDFSClientUtility.getHdfsExpCodeDir(this.paiClusterConfig.userName), HDFSClientUtility.getHdfsExpCodeDir(this.paiClusterConfig.userName),
this.hdfsClient this.hdfsClient
); );
// Upload authFile to hdfs
if (this.paiTrialConfig.authFile) {
this.authFileHdfsPath = unixPathJoin(HDFSClientUtility.hdfsExpRootDir(this.paiClusterConfig.userName), 'authFile');
this.copyAuthFilePromise = HDFSClientUtility.copyFileToHdfs(this.paiTrialConfig.authFile, this.authFileHdfsPath, this.hdfsClient);
}
deferred.resolve(); deferred.resolve();
break; break;
...@@ -373,6 +381,10 @@ class PAITrainingService implements TrainingService { ...@@ -373,6 +381,10 @@ class PAITrainingService implements TrainingService {
await this.copyExpCodeDirPromise; await this.copyExpCodeDirPromise;
} }
//Make sure authFile is copied from local to HDFS
if (this.paiTrialConfig.authFile) {
await this.copyAuthFilePromise;
}
// Step 1. Prepare PAI job configuration // Step 1. Prepare PAI job configuration
const trialLocalTempFolder: string = path.join(getExperimentRootDir(), 'trials-local', trialJobId); const trialLocalTempFolder: string = path.join(getExperimentRootDir(), 'trials-local', trialJobId);
...@@ -449,7 +461,7 @@ class PAITrainingService implements TrainingService { ...@@ -449,7 +461,7 @@ class PAITrainingService implements TrainingService {
// Add Virutal Cluster // Add Virutal Cluster
this.paiTrialConfig.virtualCluster === undefined ? 'default' : this.paiTrialConfig.virtualCluster.toString(), this.paiTrialConfig.virtualCluster === undefined ? 'default' : this.paiTrialConfig.virtualCluster.toString(),
//Task auth File //Task auth File
this.paiTrialConfig.authFile this.authFileHdfsPath
); );
// Step 2. Upload code files in codeDir onto HDFS // Step 2. Upload code files in codeDir onto HDFS
......
...@@ -511,12 +511,16 @@ class RemoteMachineTrainingService implements TrainingService { ...@@ -511,12 +511,16 @@ class RemoteMachineTrainingService implements TrainingService {
// tslint:disable-next-line: no-floating-promises // tslint:disable-next-line: no-floating-promises
SSHClientUtility.remoteExeCommand(`bash ${unixPathJoin(remoteGpuScriptCollectorDir, 'gpu_metrics_collector.sh')}`, conn); SSHClientUtility.remoteExeCommand(`bash ${unixPathJoin(remoteGpuScriptCollectorDir, 'gpu_metrics_collector.sh')}`, conn);
this.timer.subscribe( const disposable: Rx.IDisposable = this.timer.subscribe(
async (tick: number) => { async (tick: number) => {
const cmdresult: RemoteCommandResult = await SSHClientUtility.remoteExeCommand( const cmdresult: RemoteCommandResult = await SSHClientUtility.remoteExeCommand(
`tail -n 1 ${unixPathJoin(remoteGpuScriptCollectorDir, 'gpu_metrics')}`, conn); `tail -n 1 ${unixPathJoin(remoteGpuScriptCollectorDir, 'gpu_metrics')}`, conn);
if (cmdresult !== undefined && cmdresult.stdout !== undefined) { if (cmdresult !== undefined && cmdresult.stdout !== undefined) {
rmMeta.gpuSummary = <GPUSummary>JSON.parse(cmdresult.stdout); rmMeta.gpuSummary = <GPUSummary>JSON.parse(cmdresult.stdout);
if (rmMeta.gpuSummary.gpuCount === 0) {
this.log.warning(`No GPU found on remote machine ${rmMeta.ip}`);
this.timer.unsubscribe(disposable);
}
} }
} }
); );
......
...@@ -31,7 +31,7 @@ import ConfigSpace.hyperparameters as CSH ...@@ -31,7 +31,7 @@ import ConfigSpace.hyperparameters as CSH
from nni.protocol import CommandType, send from nni.protocol import CommandType, send
from nni.msg_dispatcher_base import MsgDispatcherBase from nni.msg_dispatcher_base import MsgDispatcherBase
from nni.utils import OptimizeMode, MetricType, extract_scalar_reward, randint_to_quniform from nni.utils import OptimizeMode, MetricType, extract_scalar_reward
from nni.common import multi_phase_enabled from nni.common import multi_phase_enabled
from .config_generator import CG_BOHB from .config_generator import CG_BOHB
...@@ -467,7 +467,6 @@ class BOHB(MsgDispatcherBase): ...@@ -467,7 +467,6 @@ class BOHB(MsgDispatcherBase):
search space of this experiment search space of this experiment
""" """
search_space = data search_space = data
randint_to_quniform(search_space)
cs = CS.ConfigurationSpace() cs = CS.ConfigurationSpace()
for var in search_space: for var in search_space:
_type = str(search_space[var]["_type"]) _type = str(search_space[var]["_type"])
...@@ -476,7 +475,7 @@ class BOHB(MsgDispatcherBase): ...@@ -476,7 +475,7 @@ class BOHB(MsgDispatcherBase):
var, choices=search_space[var]["_value"])) var, choices=search_space[var]["_value"]))
elif _type == 'randint': elif _type == 'randint':
cs.add_hyperparameter(CSH.UniformIntegerHyperparameter( cs.add_hyperparameter(CSH.UniformIntegerHyperparameter(
var, lower=0, upper=search_space[var]["_value"][0])) var, lower=search_space[var]["_value"][0], upper=search_space[var]["_value"][1] - 1))
elif _type == 'uniform': elif _type == 'uniform':
cs.add_hyperparameter(CSH.UniformFloatHyperparameter( cs.add_hyperparameter(CSH.UniformFloatHyperparameter(
var, lower=search_space[var]["_value"][0], upper=search_space[var]["_value"][1])) var, lower=search_space[var]["_value"][0], upper=search_space[var]["_value"][1]))
......
...@@ -26,7 +26,7 @@ import random ...@@ -26,7 +26,7 @@ import random
import numpy as np import numpy as np
from nni.tuner import Tuner from nni.tuner import Tuner
from nni.utils import NodeType, OptimizeMode, extract_scalar_reward, split_index, randint_to_quniform from nni.utils import NodeType, OptimizeMode, extract_scalar_reward, split_index
import nni.parameter_expressions as parameter_expressions import nni.parameter_expressions as parameter_expressions
...@@ -175,7 +175,6 @@ class EvolutionTuner(Tuner): ...@@ -175,7 +175,6 @@ class EvolutionTuner(Tuner):
search_space : dict search_space : dict
""" """
self.searchspace_json = search_space self.searchspace_json = search_space
randint_to_quniform(self.searchspace_json)
self.space = json2space(self.searchspace_json) self.space = json2space(self.searchspace_json)
self.random_state = np.random.RandomState() self.random_state = np.random.RandomState()
......
...@@ -31,7 +31,7 @@ import json_tricks ...@@ -31,7 +31,7 @@ import json_tricks
from nni.protocol import CommandType, send from nni.protocol import CommandType, send
from nni.msg_dispatcher_base import MsgDispatcherBase from nni.msg_dispatcher_base import MsgDispatcherBase
from nni.common import init_logger, multi_phase_enabled from nni.common import init_logger, multi_phase_enabled
from nni.utils import NodeType, OptimizeMode, MetricType, extract_scalar_reward, randint_to_quniform from nni.utils import NodeType, OptimizeMode, MetricType, extract_scalar_reward
import nni.parameter_expressions as parameter_expressions import nni.parameter_expressions as parameter_expressions
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
...@@ -358,7 +358,6 @@ class Hyperband(MsgDispatcherBase): ...@@ -358,7 +358,6 @@ class Hyperband(MsgDispatcherBase):
number of trial jobs number of trial jobs
""" """
self.searchspace_json = data self.searchspace_json = data
randint_to_quniform(self.searchspace_json)
self.random_state = np.random.RandomState() self.random_state = np.random.RandomState()
def _handle_trial_end(self, parameter_id): def _handle_trial_end(self, parameter_id):
......
...@@ -27,7 +27,7 @@ import logging ...@@ -27,7 +27,7 @@ import logging
import hyperopt as hp import hyperopt as hp
import numpy as np import numpy as np
from nni.tuner import Tuner from nni.tuner import Tuner
from nni.utils import NodeType, OptimizeMode, extract_scalar_reward, split_index, randint_to_quniform from nni.utils import NodeType, OptimizeMode, extract_scalar_reward, split_index
logger = logging.getLogger('hyperopt_AutoML') logger = logging.getLogger('hyperopt_AutoML')
...@@ -51,6 +51,8 @@ def json2space(in_x, name=NodeType.ROOT): ...@@ -51,6 +51,8 @@ def json2space(in_x, name=NodeType.ROOT):
_value = json2space(in_x[NodeType.VALUE], name=name) _value = json2space(in_x[NodeType.VALUE], name=name)
if _type == 'choice': if _type == 'choice':
out_y = eval('hp.hp.choice')(name, _value) out_y = eval('hp.hp.choice')(name, _value)
elif _type == 'randint':
out_y = hp.hp.randint(name, _value[1] - _value[0])
else: else:
if _type in ['loguniform', 'qloguniform']: if _type in ['loguniform', 'qloguniform']:
_value[:2] = np.log(_value[:2]) _value[:2] = np.log(_value[:2])
...@@ -93,6 +95,8 @@ def json2parameter(in_x, parameter, name=NodeType.ROOT): ...@@ -93,6 +95,8 @@ def json2parameter(in_x, parameter, name=NodeType.ROOT):
else: else:
if _type in ['quniform', 'qloguniform']: if _type in ['quniform', 'qloguniform']:
out_y = np.clip(parameter[name], in_x[NodeType.VALUE][0], in_x[NodeType.VALUE][1]) out_y = np.clip(parameter[name], in_x[NodeType.VALUE][0], in_x[NodeType.VALUE][1])
elif _type == 'randint':
out_y = parameter[name] + in_x[NodeType.VALUE][0]
else: else:
out_y = parameter[name] out_y = parameter[name]
else: else:
...@@ -247,7 +251,6 @@ class HyperoptTuner(Tuner): ...@@ -247,7 +251,6 @@ class HyperoptTuner(Tuner):
search_space : dict search_space : dict
""" """
self.json = search_space self.json = search_space
randint_to_quniform(self.json)
search_space_instance = json2space(self.json) search_space_instance = json2space(self.json)
rstate = np.random.RandomState() rstate = np.random.RandomState()
...@@ -279,7 +282,7 @@ class HyperoptTuner(Tuner): ...@@ -279,7 +282,7 @@ class HyperoptTuner(Tuner):
total_params = self.get_suggestion(random_search=False) total_params = self.get_suggestion(random_search=False)
# avoid generating same parameter with concurrent trials because hyperopt doesn't support parallel mode # avoid generating same parameter with concurrent trials because hyperopt doesn't support parallel mode
if total_params in self.total_data.values(): if total_params in self.total_data.values():
# but it can cause deplicate parameter rarely # but it can cause duplicate parameter rarely
total_params = self.get_suggestion(random_search=True) total_params = self.get_suggestion(random_search=True)
self.total_data[parameter_id] = total_params self.total_data[parameter_id] = total_params
...@@ -315,6 +318,10 @@ class HyperoptTuner(Tuner): ...@@ -315,6 +318,10 @@ class HyperoptTuner(Tuner):
rval = self.CL_rval rval = self.CL_rval
else: else:
rval = self.rval rval = self.rval
# ignore duplicated reported final result (due to aware of intermedate result)
if parameter_id not in self.running_data:
logger.info("Received duplicated final result with parameter id: %s", parameter_id)
return
self.running_data.remove(parameter_id) self.running_data.remove(parameter_id)
# update the reward of optimal_y # update the reward of optimal_y
......
...@@ -25,7 +25,7 @@ from unittest import TestCase, main ...@@ -25,7 +25,7 @@ from unittest import TestCase, main
import hyperopt as hp import hyperopt as hp
from nni.hyperopt_tuner.hyperopt_tuner import json2space, json2parameter, json2vals from nni.hyperopt_tuner.hyperopt_tuner import json2space, json2parameter, json2vals, HyperoptTuner
class HyperoptTunerTestCase(TestCase): class HyperoptTunerTestCase(TestCase):
...@@ -99,6 +99,29 @@ class HyperoptTunerTestCase(TestCase): ...@@ -99,6 +99,29 @@ class HyperoptTunerTestCase(TestCase):
self.assertEqual(out_y["root[optimizer]-choice"], 0) self.assertEqual(out_y["root[optimizer]-choice"], 0)
self.assertEqual(out_y["root[learning_rate]-choice"], 1) self.assertEqual(out_y["root[learning_rate]-choice"], 1)
def test_tuner_generate(self):
for algorithm in ["tpe", "random_search", "anneal"]:
tuner = HyperoptTuner(algorithm)
choice_list = ["a", "b", 1, 2]
tuner.update_search_space({
"a": {
"_type": "randint",
"_value": [1, 3]
},
"b": {
"_type": "choice",
"_value": choice_list
}
})
for k in range(30):
# sample multiple times
param = tuner.generate_parameters(k)
print(param)
self.assertIsInstance(param["a"], int)
self.assertGreaterEqual(param["a"], 1)
self.assertLessEqual(param["a"], 2)
self.assertIn(param["b"], choice_list)
if __name__ == '__main__': if __name__ == '__main__':
main() main()
...@@ -55,7 +55,7 @@ def rand(x_bounds, x_types): ...@@ -55,7 +55,7 @@ def rand(x_bounds, x_types):
temp = x_bounds[i][random.randint(0, len(x_bounds[i]) - 1)] temp = x_bounds[i][random.randint(0, len(x_bounds[i]) - 1)]
outputs.append(temp) outputs.append(temp)
elif x_types[i] == "range_int": elif x_types[i] == "range_int":
temp = random.randint(x_bounds[i][0], x_bounds[i][1]) temp = random.randint(x_bounds[i][0], x_bounds[i][1] -1)
outputs.append(temp) outputs.append(temp)
elif x_types[i] == "range_continuous": elif x_types[i] == "range_continuous":
temp = random.uniform(x_bounds[i][0], x_bounds[i][1]) temp = random.uniform(x_bounds[i][0], x_bounds[i][1])
......
...@@ -121,13 +121,12 @@ class MetisTuner(Tuner): ...@@ -121,13 +121,12 @@ class MetisTuner(Tuner):
key_range = search_space[key]['_value'] key_range = search_space[key]['_value']
idx = self.key_order.index(key) idx = self.key_order.index(key)
if key_type == 'quniform': if key_type == 'quniform':
if key_range[2] == 1: if key_range[2] == 1 and key_range[0].is_integer() and key_range[1].is_integer():
self.x_bounds[idx] = [key_range[0], key_range[1]] self.x_bounds[idx] = [key_range[0], key_range[1]+1]
self.x_types[idx] = 'range_int' self.x_types[idx] = 'range_int'
else: else:
bounds = [] low, high, q = key_range
for value in np.arange(key_range[0], key_range[1], key_range[2]): bounds = np.clip(np.arange(np.round(low/q), np.round(high/q)+1) * q, low, high)
bounds.append(value)
self.x_bounds[idx] = bounds self.x_bounds[idx] = bounds
self.x_types[idx] = 'discrete_int' self.x_types[idx] = 'discrete_int'
elif key_type == 'randint': elif key_type == 'randint':
......
...@@ -32,12 +32,14 @@ def choice(options, random_state): ...@@ -32,12 +32,14 @@ def choice(options, random_state):
return random_state.choice(options) return random_state.choice(options)
def randint(upper, random_state): def randint(lower, upper, random_state):
''' '''
Generate a random integer from `lower` (inclusive) to `upper` (exclusive).
lower: an int that represent an lower bound
upper: an int that represent an upper bound upper: an int that represent an upper bound
random_state: an object of numpy.random.RandomState random_state: an object of numpy.random.RandomState
''' '''
return random_state.randint(upper) return random_state.randint(lower, upper)
def uniform(low, high, random_state): def uniform(low, high, random_state):
......
...@@ -88,10 +88,10 @@ def generate_pcs(nni_search_space_content): ...@@ -88,10 +88,10 @@ def generate_pcs(nni_search_space_content):
raise RuntimeError('%s has already existed, please make sure search space has no duplicate key.' % key) raise RuntimeError('%s has already existed, please make sure search space has no duplicate key.' % key)
categorical_dict[key] = search_space[key]['_value'] categorical_dict[key] = search_space[key]['_value']
elif search_space[key]['_type'] == 'randint': elif search_space[key]['_type'] == 'randint':
# TODO: support lower bound in randint pcs_fd.write('%s integer [%d, %d] [%d]\n' % (
pcs_fd.write('%s integer [0, %d] [%d]\n' % (
key, key,
search_space[key]['_value'][0], search_space[key]['_value'][0],
search_space[key]['_value'][1] - 1,
search_space[key]['_value'][0])) search_space[key]['_value'][0]))
elif search_space[key]['_type'] == 'uniform': elif search_space[key]['_type'] == 'uniform':
pcs_fd.write('%s real %s [%s]\n' % ( pcs_fd.write('%s real %s [%s]\n' % (
...@@ -105,13 +105,13 @@ def generate_pcs(nni_search_space_content): ...@@ -105,13 +105,13 @@ def generate_pcs(nni_search_space_content):
key, key,
json.dumps(search_space[key]['_value']), json.dumps(search_space[key]['_value']),
json.dumps(search_space[key]['_value'][0]))) json.dumps(search_space[key]['_value'][0])))
elif search_space[key]['_type'] == 'quniform' \ elif search_space[key]['_type'] == 'quniform':
and search_space[key]['_value'][2] == 1: low, high, q = search_space[key]['_value'][0:3]
pcs_fd.write('%s integer [%d, %d] [%d]\n' % ( vals = np.clip(np.arange(np.round(low/q), np.round(high/q)+1) * q, low, high).tolist()
pcs_fd.write('%s ordinal {%s} [%s]\n' % (
key, key,
search_space[key]['_value'][0], json.dumps(vals)[1:-1],
search_space[key]['_value'][1], json.dumps(vals[0])))
search_space[key]['_value'][0]))
else: else:
raise RuntimeError('unsupported _type %s' % search_space[key]['_type']) raise RuntimeError('unsupported _type %s' % search_space[key]['_type'])
except: except:
......
...@@ -38,7 +38,7 @@ from ConfigSpaceNNI import Configuration ...@@ -38,7 +38,7 @@ from ConfigSpaceNNI import Configuration
from .convert_ss_to_scenario import generate_scenario from .convert_ss_to_scenario import generate_scenario
from nni.tuner import Tuner from nni.tuner import Tuner
from nni.utils import OptimizeMode, extract_scalar_reward, randint_to_quniform from nni.utils import OptimizeMode, extract_scalar_reward
class SMACTuner(Tuner): class SMACTuner(Tuner):
...@@ -139,7 +139,6 @@ class SMACTuner(Tuner): ...@@ -139,7 +139,6 @@ class SMACTuner(Tuner):
search_space: search_space:
search space search space
""" """
randint_to_quniform(search_space)
if not self.update_ss_done: if not self.update_ss_done:
self.categorical_dict = generate_scenario(search_space) self.categorical_dict = generate_scenario(search_space)
if self.categorical_dict is None: if self.categorical_dict is None:
......
...@@ -19,11 +19,11 @@ ...@@ -19,11 +19,11 @@
# ================================================================================================== # ==================================================================================================
import random
import numpy as np import numpy as np
from .env_vars import trial_env_vars from .env_vars import trial_env_vars
from . import trial from . import trial
from . import parameter_expressions as param_exp
from .nas_utils import classic_mode, enas_mode, oneshot_mode, darts_mode from .nas_utils import classic_mode, enas_mode, oneshot_mode, darts_mode
...@@ -47,39 +47,39 @@ __all__ = [ ...@@ -47,39 +47,39 @@ __all__ = [
if trial_env_vars.NNI_PLATFORM is None: if trial_env_vars.NNI_PLATFORM is None:
def choice(*options, name=None): def choice(*options, name=None):
return random.choice(options) return param_exp.choice(options, np.random.RandomState())
def randint(upper, name=None): def randint(lower, upper, name=None):
return random.randrange(upper) return param_exp.randint(lower, upper, np.random.RandomState())
def uniform(low, high, name=None): def uniform(low, high, name=None):
return random.uniform(low, high) return param_exp.uniform(low, high, np.random.RandomState())
def quniform(low, high, q, name=None): def quniform(low, high, q, name=None):
assert high > low, 'Upper bound must be larger than lower bound' assert high > low, 'Upper bound must be larger than lower bound'
return np.clip(round(random.uniform(low, high) / q) * q, low, high) return param_exp.quniform(low, high, q, np.random.RandomState())
def loguniform(low, high, name=None): def loguniform(low, high, name=None):
assert low > 0, 'Lower bound must be positive' assert low > 0, 'Lower bound must be positive'
return np.exp(random.uniform(np.log(low), np.log(high))) return param_exp.loguniform(low, high, np.random.RandomState())
def qloguniform(low, high, q, name=None): def qloguniform(low, high, q, name=None):
return np.clip(round(loguniform(low, high) / q) * q, low, high) return param_exp.qloguniform(low, high, q, np.random.RandomState())
def normal(mu, sigma, name=None): def normal(mu, sigma, name=None):
return random.gauss(mu, sigma) return param_exp.normal(mu, sigma, np.random.RandomState())
def qnormal(mu, sigma, q, name=None): def qnormal(mu, sigma, q, name=None):
return round(random.gauss(mu, sigma) / q) * q return param_exp.qnormal(mu, sigma, q, np.random.RandomState())
def lognormal(mu, sigma, name=None): def lognormal(mu, sigma, name=None):
return np.exp(random.gauss(mu, sigma)) return param_exp.lognormal(mu, sigma, np.random.RandomState())
def qlognormal(mu, sigma, q, name=None): def qlognormal(mu, sigma, q, name=None):
return round(lognormal(mu, sigma) / q) * q return param_exp.qlognormal(mu, sigma, q, np.random.RandomState())
def function_choice(*funcs, name=None): def function_choice(*funcs, name=None):
return random.choice(funcs)() return param_exp.choice(funcs, np.random.RandomState())()
def mutable_layer(): def mutable_layer():
raise RuntimeError('Cannot call nni.mutable_layer in this mode') raise RuntimeError('Cannot call nni.mutable_layer in this mode')
...@@ -89,7 +89,7 @@ else: ...@@ -89,7 +89,7 @@ else:
def choice(options, name=None, key=None): def choice(options, name=None, key=None):
return options[_get_param(key)] return options[_get_param(key)]
def randint(upper, name=None, key=None): def randint(lower, upper, name=None, key=None):
return _get_param(key) return _get_param(key)
def uniform(low, high, name=None, key=None): def uniform(low, high, name=None, key=None):
......
...@@ -111,23 +111,3 @@ def init_dispatcher_logger(): ...@@ -111,23 +111,3 @@ def init_dispatcher_logger():
if dispatcher_env_vars.NNI_LOG_DIRECTORY is not None: if dispatcher_env_vars.NNI_LOG_DIRECTORY is not None:
logger_file_path = os.path.join(dispatcher_env_vars.NNI_LOG_DIRECTORY, logger_file_path) logger_file_path = os.path.join(dispatcher_env_vars.NNI_LOG_DIRECTORY, logger_file_path)
init_logger(logger_file_path, dispatcher_env_vars.NNI_LOG_LEVEL) init_logger(logger_file_path, dispatcher_env_vars.NNI_LOG_LEVEL)
def randint_to_quniform(in_x):
if isinstance(in_x, dict):
if NodeType.TYPE in in_x.keys():
if in_x[NodeType.TYPE] == 'randint':
value = in_x[NodeType.VALUE]
value.append(1)
in_x[NodeType.TYPE] = 'quniform'
in_x[NodeType.VALUE] = value
elif in_x[NodeType.TYPE] == 'choice':
randint_to_quniform(in_x[NodeType.VALUE])
else:
for key in in_x.keys():
randint_to_quniform(in_x[key])
elif isinstance(in_x, list):
for temp in in_x:
randint_to_quniform(temp)
import * as React from 'react'; import * as React from 'react';
import { Row, Col } from 'antd'; import { Row, Col } from 'antd';
import axios from 'axios';
import { COLUMN, MANAGER_IP } from './static/const';
import './App.css'; import './App.css';
import SlideBar from './components/SlideBar'; import SlideBar from './components/SlideBar';
interface AppState { interface AppState {
interval: number; interval: number;
whichPageToFresh: string; whichPageToFresh: string;
columnList: Array<string>;
concurrency: number;
} }
class App extends React.Component<{}, AppState> { class App extends React.Component<{}, AppState> {
...@@ -14,7 +18,9 @@ class App extends React.Component<{}, AppState> { ...@@ -14,7 +18,9 @@ class App extends React.Component<{}, AppState> {
super(props); super(props);
this.state = { this.state = {
interval: 10, // sendons interval: 10, // sendons
whichPageToFresh: '' whichPageToFresh: '',
columnList: COLUMN,
concurrency: 1
}; };
} }
...@@ -31,25 +37,57 @@ class App extends React.Component<{}, AppState> { ...@@ -31,25 +37,57 @@ class App extends React.Component<{}, AppState> {
} }
} }
changeColumn = (columnList: Array<string>) => {
if (this._isMounted === true) {
this.setState(() => ({ columnList: columnList }));
}
}
changeConcurrency = (val: number) => {
if (this._isMounted === true) {
this.setState(() => ({ concurrency: val }));
}
}
getConcurrency = () => {
axios(`${MANAGER_IP}/experiment`, {
method: 'GET'
})
.then(res => {
if (res.status === 200) {
const params = res.data.params;
if (this._isMounted) {
this.setState(() => ({ concurrency: params.trialConcurrency }));
}
}
});
}
componentDidMount() { componentDidMount() {
this._isMounted = true; this._isMounted = true;
this.getConcurrency();
} }
componentWillUnmount() { componentWillUnmount() {
this._isMounted = false; this._isMounted = false;
} }
render() { render() {
const { interval, whichPageToFresh } = this.state; const { interval, whichPageToFresh, columnList, concurrency } = this.state;
const reactPropsChildren = React.Children.map(this.props.children, child => const reactPropsChildren = React.Children.map(this.props.children, child =>
// tslint:disable-next-line:no-any React.cloneElement(
React.cloneElement(child as React.ReactElement<any>, { interval, whichPageToFresh }) // tslint:disable-next-line:no-any
child as React.ReactElement<any>, {
interval, whichPageToFresh,
columnList, changeColumn: this.changeColumn,
concurrency, changeConcurrency: this.changeConcurrency
})
); );
return ( return (
<Row className="nni" style={{ minHeight: window.innerHeight }}> <Row className="nni" style={{ minHeight: window.innerHeight }}>
<Row className="header"> <Row className="header">
<Col span={1} /> <Col span={1} />
<Col className="headerCon" span={22}> <Col className="headerCon" span={22}>
<SlideBar changeInterval={this.changeInterval} changeFresh={this.changeFresh}/> <SlideBar changeInterval={this.changeInterval} changeFresh={this.changeFresh} />
</Col> </Col>
<Col span={1} /> <Col span={1} />
</Row> </Row>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment