"docs/en_US/TrialExample/GbdtExample.md" did not exist on "a441558c7b79fa0feaf4868b4b8fa1d66b4120c1"
Unverified Commit 88ef6c04 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Merge pull request #197 from microsoft/master

merge master
parents 5f3c5ffd 555334de
This diff is collapsed.
**Automatic Feature Engineering in nni**
===
Now we have an [example](https://github.com/SpongebBob/tabular_automl_NNI), which could automaticlly do feature engineering in nni.
These code come from our contributors. And thanks our lovely contributors!
And welcome more and more people to join us!
......@@ -7,7 +7,9 @@ def random_archi_generator(nas_ss, random_state):
'''
chosen_archi = {}
print("zql: nas search space: ", nas_ss)
for block_name, block in nas_ss.items():
for block_name, block_value in nas_ss.items():
assert block_value['_type'] == "mutable_layer", "Random NAS Tuner only receives NAS search space whose _type is 'mutable_layer'"
block = block_value['_value']
tmp_block = {}
for layer_name, layer in block.items():
tmp_layer = {}
......
......@@ -35,9 +35,10 @@ setup(
license = 'MIT',
url = 'https://github.com/Microsoft/nni',
packages = find_packages('src/sdk/pynni', exclude=['tests']) + find_packages('tools'),
packages = find_packages('src/sdk/pynni', exclude=['tests']) + find_packages('src/sdk/pycli') + find_packages('tools'),
package_dir = {
'nni': 'src/sdk/pynni/nni',
'nnicli': 'src/sdk/pycli/nnicli',
'nni_annotation': 'tools/nni_annotation',
'nni_cmd': 'tools/nni_cmd',
'nni_trial_tool':'tools/nni_trial_tool',
......
......@@ -51,10 +51,12 @@ export namespace ValidationSchemas {
command: joi.string().min(1),
virtualCluster: joi.string(),
shmMB: joi.number(),
authFile: joi.string(),
nasMode: joi.string().valid('classic_mode', 'enas_mode', 'oneshot_mode'),
worker: joi.object({
replicas: joi.number().min(1).required(),
image: joi.string().min(1),
privateRegistryAuthPath: joi.string().min(1),
outputDir: joi.string(),
cpuNum: joi.number().min(1),
memoryMB: joi.number().min(100),
......@@ -64,6 +66,7 @@ export namespace ValidationSchemas {
ps: joi.object({
replicas: joi.number().min(1).required(),
image: joi.string().min(1),
privateRegistryAuthPath: joi.string().min(1),
outputDir: joi.string(),
cpuNum: joi.number().min(1),
memoryMB: joi.number().min(100),
......@@ -73,6 +76,7 @@ export namespace ValidationSchemas {
master: joi.object({
replicas: joi.number().min(1).required(),
image: joi.string().min(1),
privateRegistryAuthPath: joi.string().min(1),
outputDir: joi.string(),
cpuNum: joi.number().min(1),
memoryMB: joi.number().min(100),
......@@ -83,6 +87,7 @@ export namespace ValidationSchemas {
name: joi.string().min(1),
taskNum: joi.number().min(1).required(),
image: joi.string().min(1),
privateRegistryAuthPath: joi.string().min(1),
outputDir: joi.string(),
cpuNum: joi.number().min(1),
memoryMB: joi.number().min(100),
......
......@@ -43,8 +43,8 @@ export class FrameworkControllerTrialConfigTemplate extends KubernetesTrialConfi
public readonly taskNum: number;
constructor(taskNum: number, command : string, gpuNum : number,
cpuNum: number, memoryMB: number, image: string,
frameworkAttemptCompletionPolicy: FrameworkAttemptCompletionPolicy) {
super(command, gpuNum, cpuNum, memoryMB, image);
frameworkAttemptCompletionPolicy: FrameworkAttemptCompletionPolicy, privateRegistryFilePath?: string | undefined) {
super(command, gpuNum, cpuNum, memoryMB, image, privateRegistryFilePath);
this.frameworkAttemptCompletionPolicy = frameworkAttemptCompletionPolicy;
this.name = name;
this.taskNum = taskNum;
......
......@@ -305,7 +305,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
}
// Generate frameworkcontroller job resource config object
const frameworkcontrollerJobConfig: any =
this.generateFrameworkControllerJobConfig(trialJobId, trialWorkingFolder, frameworkcontrollerJobName, podResources);
await this.generateFrameworkControllerJobConfig(trialJobId, trialWorkingFolder, frameworkcontrollerJobName, podResources);
return Promise.resolve(frameworkcontrollerJobConfig);
}
......@@ -329,8 +329,8 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
* @param frameworkcontrollerJobName job name
* @param podResources pod template
*/
private generateFrameworkControllerJobConfig(trialJobId: string, trialWorkingFolder: string,
frameworkcontrollerJobName : string, podResources : any) : any {
private async generateFrameworkControllerJobConfig(trialJobId: string, trialWorkingFolder: string,
frameworkcontrollerJobName : string, podResources : any) : Promise<any> {
if (this.fcClusterConfig === undefined) {
throw new Error('frameworkcontroller Cluster config is not initialized');
}
......@@ -345,12 +345,14 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
if (containerPort === undefined) {
throw new Error('Container port is not initialized');
}
const taskRole: any = this.generateTaskRoleConfig(
trialWorkingFolder,
this.fcTrialConfig.taskRoles[index].image,
`run_${this.fcTrialConfig.taskRoles[index].name}.sh`,
podResources[index],
containerPort
containerPort,
await this.createRegistrySecret(this.fcTrialConfig.taskRoles[index].privateRegistryAuthPath)
);
taskRoles.push({
name: this.fcTrialConfig.taskRoles[index].name,
......@@ -363,7 +365,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
});
}
return {
return Promise.resolve({
apiVersion: `frameworkcontroller.microsoft.com/v1`,
kind: 'Framework',
metadata: {
......@@ -379,11 +381,11 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
executionType: 'Start',
taskRoles: taskRoles
}
};
});
}
private generateTaskRoleConfig(trialWorkingFolder: string, replicaImage: string, runScriptFile: string,
podResources: any, containerPort: number): any {
private generateTaskRoleConfig(trialWorkingFolder: string, replicaImage: string, runScriptFile: string,
podResources: any, containerPort: number, privateRegistrySecretName: string | undefined): any {
if (this.fcClusterConfig === undefined) {
throw new Error('frameworkcontroller Cluster config is not initialized');
}
......@@ -451,13 +453,22 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
mountPath: '/mnt/frameworkbarrier'
}]
}];
const spec: any = {
containers: containers,
initContainers: initContainers,
restartPolicy: 'OnFailure',
volumes: volumeSpecMap.get('nniVolumes'),
hostNetwork: false
let spec: any = {
containers: containers,
initContainers: initContainers,
restartPolicy: 'OnFailure',
volumes: volumeSpecMap.get('nniVolumes'),
hostNetwork: false
};
if(privateRegistrySecretName) {
spec.imagePullSecrets = [
{
name: privateRegistrySecretName
}
]
}
if (this.fcClusterConfig.serviceAccountName !== undefined) {
spec.serviceAccountName = this.fcClusterConfig.serviceAccountName;
}
......
......@@ -135,8 +135,8 @@ export class KubeflowTrialConfig extends KubernetesTrialConfig {
export class KubeflowTrialConfigTemplate extends KubernetesTrialConfigTemplate {
public readonly replicas: number;
constructor(replicas: number, command : string, gpuNum : number,
cpuNum: number, memoryMB: number, image: string) {
super(command, gpuNum, cpuNum, memoryMB, image);
cpuNum: number, memoryMB: number, image: string, privateRegistryAuthPath?: string) {
super(command, gpuNum, cpuNum, memoryMB, image, privateRegistryAuthPath);
this.replicas = replicas;
}
}
......
......@@ -347,7 +347,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
}
// Generate kubeflow job resource config object
const kubeflowJobConfig: any = this.generateKubeflowJobConfig(trialJobId, trialWorkingFolder, kubeflowJobName, workerPodResources,
const kubeflowJobConfig: any = await this.generateKubeflowJobConfig(trialJobId, trialWorkingFolder, kubeflowJobName, workerPodResources,
nonWorkerResources);
return Promise.resolve(kubeflowJobConfig);
......@@ -361,8 +361,8 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
* @param workerPodResources worker pod template
* @param nonWorkerPodResources non-worker pod template, like ps or master
*/
private generateKubeflowJobConfig(trialJobId: string, trialWorkingFolder: string, kubeflowJobName : string, workerPodResources : any,
nonWorkerPodResources?: any) : any {
private async generateKubeflowJobConfig(trialJobId: string, trialWorkingFolder: string, kubeflowJobName : string, workerPodResources : any,
nonWorkerPodResources?: any) : Promise<any> {
if (this.kubeflowClusterConfig === undefined) {
throw new Error('Kubeflow Cluster config is not initialized');
}
......@@ -377,29 +377,32 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
const replicaSpecsObj: any = {};
const replicaSpecsObjMap: Map<string, object> = new Map<string, object>();
if (this.kubeflowTrialConfig.operatorType === 'tf-operator') {
const tensorflowTrialConfig: KubeflowTrialConfigTensorflow = <KubeflowTrialConfigTensorflow>this.kubeflowTrialConfig;
let privateRegistrySecretName = await this.createRegistrySecret(tensorflowTrialConfig.worker.privateRegistryAuthPath);
replicaSpecsObj.Worker = this.generateReplicaConfig(trialWorkingFolder, tensorflowTrialConfig.worker.replicas,
tensorflowTrialConfig.worker.image, 'run_worker.sh', workerPodResources);
tensorflowTrialConfig.worker.image, 'run_worker.sh', workerPodResources, privateRegistrySecretName);
if (tensorflowTrialConfig.ps !== undefined) {
let privateRegistrySecretName: string | undefined = await this.createRegistrySecret(tensorflowTrialConfig.ps.privateRegistryAuthPath);
replicaSpecsObj.Ps = this.generateReplicaConfig(trialWorkingFolder, tensorflowTrialConfig.ps.replicas,
tensorflowTrialConfig.ps.image, 'run_ps.sh', nonWorkerPodResources);
tensorflowTrialConfig.ps.image, 'run_ps.sh', nonWorkerPodResources, privateRegistrySecretName);
}
replicaSpecsObjMap.set(this.kubernetesCRDClient.jobKind, {tfReplicaSpecs: replicaSpecsObj});
} else if (this.kubeflowTrialConfig.operatorType === 'pytorch-operator') {
const pytorchTrialConfig: KubeflowTrialConfigPytorch = <KubeflowTrialConfigPytorch>this.kubeflowTrialConfig;
if (pytorchTrialConfig.worker !== undefined) {
let privateRegistrySecretName: string | undefined = await this.createRegistrySecret(pytorchTrialConfig.worker.privateRegistryAuthPath);
replicaSpecsObj.Worker = this.generateReplicaConfig(trialWorkingFolder, pytorchTrialConfig.worker.replicas,
pytorchTrialConfig.worker.image, 'run_worker.sh', workerPodResources);
pytorchTrialConfig.worker.image, 'run_worker.sh', workerPodResources, privateRegistrySecretName);
}
let privateRegistrySecretName: string | undefined = await this.createRegistrySecret(pytorchTrialConfig.master.privateRegistryAuthPath);
replicaSpecsObj.Master = this.generateReplicaConfig(trialWorkingFolder, pytorchTrialConfig.master.replicas,
pytorchTrialConfig.master.image, 'run_master.sh', nonWorkerPodResources);
pytorchTrialConfig.master.image, 'run_master.sh', nonWorkerPodResources, privateRegistrySecretName);
replicaSpecsObjMap.set(this.kubernetesCRDClient.jobKind, {pytorchReplicaSpecs: replicaSpecsObj});
}
return {
return Promise.resolve({
apiVersion: `kubeflow.org/${this.kubernetesCRDClient.apiVersion}`,
kind: this.kubernetesCRDClient.jobKind,
metadata: {
......@@ -412,7 +415,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
}
},
spec: replicaSpecsObjMap.get(this.kubernetesCRDClient.jobKind)
};
});
}
/**
......@@ -424,7 +427,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
* @param podResources pod resource config section
*/
private generateReplicaConfig(trialWorkingFolder: string, replicaNumber: number, replicaImage: string, runScriptFile: string,
podResources: any): any {
podResources: any, privateRegistrySecretName: string | undefined): any {
if (this.kubeflowClusterConfig === undefined) {
throw new Error('Kubeflow Cluster config is not initialized');
}
......@@ -436,7 +439,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
if (this.kubernetesCRDClient === undefined) {
throw new Error('Kubeflow operator client is not initialized');
}
// The config spec for volume field
const volumeSpecMap: Map<string, object> = new Map<string, object>();
if (this.kubeflowClusterConfig.storageType === 'azureStorage') {
volumeSpecMap.set('nniVolumes', [
......@@ -459,7 +462,34 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
}
}]);
}
// The config spec for container field
const containersSpecMap: Map<string, object> = new Map<string, object>();
containersSpecMap.set('containers', [
{
// Kubeflow tensorflow operator requires that containers' name must be tensorflow
// TODO: change the name based on operator's type
name: this.kubernetesCRDClient.containerName,
image: replicaImage,
args: ['sh', `${path.join(trialWorkingFolder, runScriptFile)}`],
volumeMounts: [
{
name: 'nni-vol',
mountPath: this.CONTAINER_MOUNT_PATH
}],
resources: podResources
}
]);
let spec: any = {
containers: containersSpecMap.get('containers'),
restartPolicy: 'ExitCode',
volumes: volumeSpecMap.get('nniVolumes')
}
if (privateRegistrySecretName) {
spec.imagePullSecrets = [
{
name: privateRegistrySecretName
}]
}
return {
replicas: replicaNumber,
template: {
......@@ -467,26 +497,9 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
// tslint:disable-next-line:no-null-keyword
creationTimestamp: null
},
spec: {
containers: [
{
// Kubeflow tensorflow operator requires that containers' name must be tensorflow
// TODO: change the name based on operator's type
name: this.kubernetesCRDClient.containerName,
image: replicaImage,
args: ['sh', `${path.join(trialWorkingFolder, runScriptFile)}`],
volumeMounts: [
{
name: 'nni-vol',
mountPath: this.CONTAINER_MOUNT_PATH
}],
resources: podResources
}],
restartPolicy: 'ExitCode',
volumes: volumeSpecMap.get('nniVolumes')
}
spec: spec
}
};
}
}
}
// tslint:enable: no-unsafe-any no-any
......
......@@ -179,6 +179,9 @@ export class KubernetesTrialConfigTemplate {
// Docker image
public readonly image: string;
// Private registry config file path to download docker iamge
public readonly privateRegistryAuthPath?: string;
// Trail command
public readonly command : string;
......@@ -186,12 +189,13 @@ export class KubernetesTrialConfigTemplate {
public readonly gpuNum : number;
constructor(command : string, gpuNum : number,
cpuNum: number, memoryMB: number, image: string) {
cpuNum: number, memoryMB: number, image: string, privateRegistryAuthPath?: string) {
this.command = command;
this.gpuNum = gpuNum;
this.cpuNum = cpuNum;
this.memoryMB = memoryMB;
this.image = image;
this.privateRegistryAuthPath = privateRegistryAuthPath;
}
}
......
......@@ -38,6 +38,8 @@ import { KubernetesClusterConfig } from './kubernetesConfig';
import { kubernetesScriptFormat, KubernetesTrialJobDetail } from './kubernetesData';
import { KubernetesJobRestServer } from './kubernetesJobRestServer';
var fs = require('fs');
/**
* Training Service implementation for Kubernetes
*/
......@@ -327,5 +329,34 @@ abstract class KubernetesTrainingService {
return Promise.resolve();
}
protected async createRegistrySecret(filePath: string | undefined): Promise<string | undefined> {
if(filePath === undefined || filePath === '') {
return undefined;
}
let body = fs.readFileSync(filePath).toString('base64');
let registrySecretName = String.Format('nni-secret-{0}', uniqueString(8)
.toLowerCase());
await this.genericK8sClient.createSecret(
{
apiVersion: 'v1',
kind: 'Secret',
metadata: {
name: registrySecretName,
namespace: 'default',
labels: {
app: this.NNI_KUBERNETES_TRIAL_LABEL,
expId: getExperimentId()
}
},
type: 'kubernetes.io/dockerconfigjson',
data: {
'.dockerconfigjson': body
}
}
);
return registrySecretName;
}
}
export { KubernetesTrainingService };
......@@ -71,6 +71,8 @@ export class PAIJobConfig {
public readonly image: string;
// Code directory on HDFS
public readonly codeDir: string;
//authentication file used for private Docker registry
public readonly authFile?: string;
// List of taskRole, one task role at least
public taskRoles: PAITaskRole[];
......@@ -87,12 +89,13 @@ export class PAIJobConfig {
* @param taskRoles List of taskRole, one task role at least
*/
constructor(jobName: string, image : string, codeDir : string,
taskRoles : PAITaskRole[], virtualCluster: string) {
taskRoles : PAITaskRole[], virtualCluster: string, authFile?: string) {
this.jobName = jobName;
this.image = image;
this.codeDir = codeDir;
this.taskRoles = taskRoles;
this.virtualCluster = virtualCluster;
this.authFile = authFile;
}
}
......@@ -129,14 +132,17 @@ export class NNIPAITrialConfig extends TrialConfig {
public virtualCluster?: string;
//Shared memory for one task in the task role
public shmMB?: number;
//authentication file used for private Docker registry
public authFile?: string;
constructor(command : string, codeDir : string, gpuNum : number, cpuNum: number, memoryMB: number,
image: string, virtualCluster?: string, shmMB?: number) {
image: string, virtualCluster?: string, shmMB?: number, authFile?: string) {
super(command, codeDir, gpuNum);
this.cpuNum = cpuNum;
this.memoryMB = memoryMB;
this.image = image;
this.virtualCluster = virtualCluster;
this.shmMB = shmMB;
this.authFile = authFile;
}
}
......@@ -442,7 +442,7 @@ class PAITrainingService implements TrainingService {
// Task command
nniPaiTrialCommand,
// Task shared memory
this.paiTrialConfig.shmMB
this.paiTrialConfig.shmMB,
)
];
......@@ -456,7 +456,9 @@ class PAITrainingService implements TrainingService {
// PAI Task roles
paiTaskRoles,
// Add Virutal Cluster
this.paiTrialConfig.virtualCluster === undefined ? 'default' : this.paiTrialConfig.virtualCluster.toString()
this.paiTrialConfig.virtualCluster === undefined ? 'default' : this.paiTrialConfig.virtualCluster.toString(),
//Task auth File
this.paiTrialConfig.authFile
);
// Step 2. Upload code files in codeDir onto HDFS
......
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
from .nni_client import *
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
""" A python wrapper for nni rest api
Example:
import nnicli as nc
nc.start_nni('../../../../examples/trials/mnist/config.yml')
nc.set_endpoint('http://localhost:8080')
print(nc.version())
print(nc.get_experiment_status())
print(nc.get_job_statistics())
print(nc.list_trial_jobs())
nc.stop_nni()
"""
import sys
import os
import subprocess
import requests
__all__ = [
'start_nni',
'stop_nni',
'set_endpoint',
'version',
'get_experiment_status',
'get_experiment_profile',
'get_trial_job',
'list_trial_jobs',
'get_job_statistics',
'get_job_metrics',
'export_data'
]
EXPERIMENT_PATH = 'experiment'
VERSION_PATH = 'version'
STATUS_PATH = 'check-status'
JOB_STATISTICS_PATH = 'job-statistics'
TRIAL_JOBS_PATH = 'trial-jobs'
METRICS_PATH = 'metric-data'
EXPORT_DATA_PATH = 'export-data'
API_ROOT_PATH = 'api/v1/nni'
_api_endpoint = None
def set_endpoint(endpoint):
"""set endpoint of nni rest server for nnicli, for example:
http://localhost:8080
"""
global _api_endpoint
_api_endpoint = endpoint
def _check_endpoint():
if _api_endpoint is None:
raise AssertionError("Please call set_endpoint to specify nni endpoint")
def _nni_rest_get(api_path, response_type='json'):
_check_endpoint()
uri = '{}/{}/{}'.format(_api_endpoint, API_ROOT_PATH, api_path)
res = requests.get(uri)
if _http_succeed(res.status_code):
if response_type == 'json':
return res.json()
elif response_type == 'text':
return res.text
else:
raise AssertionError('Incorrect response_type')
else:
return None
def _http_succeed(status_code):
return status_code // 100 == 2
def _create_process(cmd):
if sys.platform == 'win32':
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)
else:
process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
while process.poll() is None:
output = process.stdout.readline()
if output:
print(output.decode('utf-8').strip())
return process.returncode
def start_nni(config_file):
"""start nni experiment with specified configuration file"""
cmd = 'nnictl create --config {}'.format(config_file).split(' ')
if _create_process(cmd) != 0:
raise RuntimeError('Failed to start nni.')
def stop_nni():
"""stop nni experiment"""
cmd = 'nnictl stop'.split(' ')
if _create_process(cmd) != 0:
raise RuntimeError('Failed to stop nni.')
def version():
"""return version of nni"""
return _nni_rest_get(VERSION_PATH, 'text')
def get_experiment_status():
"""return experiment status as a dict"""
return _nni_rest_get(STATUS_PATH)
def get_experiment_profile():
"""return experiment profile as a dict"""
return _nni_rest_get(EXPERIMENT_PATH)
def get_trial_job(trial_job_id):
"""return trial job information as a dict"""
assert trial_job_id is not None
return _nni_rest_get(os.path.join(TRIAL_JOBS_PATH, trial_job_id))
def list_trial_jobs():
"""return information for all trial jobs as a list"""
return _nni_rest_get(TRIAL_JOBS_PATH)
def get_job_statistics():
"""return trial job statistics information as a dict"""
return _nni_rest_get(JOB_STATISTICS_PATH)
def get_job_metrics(trial_job_id=None):
"""return trial job metrics"""
api_path = METRICS_PATH if trial_job_id is None else os.path.join(METRICS_PATH, trial_job_id)
return _nni_rest_get(api_path)
def export_data():
"""return exported information for all trial jobs"""
return _nni_rest_get(EXPORT_DATA_PATH)
import setuptools
setuptools.setup(
name = 'nnicli',
version = '999.0.0-developing',
packages = setuptools.find_packages(),
python_requires = '>=3.5',
install_requires = [
'requests'
],
author = 'Microsoft NNI Team',
author_email = 'nni@microsoft.com',
description = 'nnicli for Neural Network Intelligence project',
license = 'MIT',
url = 'https://github.com/Microsoft/nni',
)
......@@ -190,13 +190,19 @@ class HyperoptTuner(Tuner):
HyperoptTuner is a tuner which using hyperopt algorithm.
"""
def __init__(self, algorithm_name, optimize_mode='minimize'):
def __init__(self, algorithm_name, optimize_mode='minimize',
parallel_optimize=False, constant_liar_type='min'):
"""
Parameters
----------
algorithm_name : str
algorithm_name includes "tpe", "random_search" and anneal".
optimize_mode : str
parallel_optimize : bool
More detail could reference: docs/en_US/Tuner/HyperoptTuner.md
constant_liar_type : str
constant_liar_type including "min", "max" and "mean"
More detail could reference: docs/en_US/Tuner/HyperoptTuner.md
"""
self.algorithm_name = algorithm_name
self.optimize_mode = OptimizeMode(optimize_mode)
......@@ -205,6 +211,13 @@ class HyperoptTuner(Tuner):
self.rval = None
self.supplement_data_num = 0
self.parallel = parallel_optimize
if self.parallel:
self.CL_rval = None
self.constant_liar_type = constant_liar_type
self.running_data = []
self.optimal_y = None
def _choose_tuner(self, algorithm_name):
"""
Parameters
......@@ -266,6 +279,10 @@ class HyperoptTuner(Tuner):
# but it can cause deplicate parameter rarely
total_params = self.get_suggestion(random_search=True)
self.total_data[parameter_id] = total_params
if self.parallel:
self.running_data.append(parameter_id)
params = split_index(total_params)
return params
......@@ -287,10 +304,39 @@ class HyperoptTuner(Tuner):
raise RuntimeError('Received parameter_id not in total_data.')
params = self.total_data[parameter_id]
# code for parallel
if self.parallel:
constant_liar = kwargs.get('constant_liar', False)
if constant_liar:
rval = self.CL_rval
else:
rval = self.rval
self.running_data.remove(parameter_id)
# update the reward of optimal_y
if self.optimal_y is None:
if self.constant_liar_type == 'mean':
self.optimal_y = [reward, 1]
else:
self.optimal_y = reward
else:
if self.constant_liar_type == 'mean':
_sum = self.optimal_y[0] + reward
_number = self.optimal_y[1] + 1
self.optimal_y = [_sum, _number]
elif self.constant_liar_type == 'min':
self.optimal_y = min(self.optimal_y, reward)
elif self.constant_liar_type == 'max':
self.optimal_y = max(self.optimal_y, reward)
logger.debug("Update optimal_y with reward, optimal_y = %s", self.optimal_y)
else:
rval = self.rval
if self.optimize_mode is OptimizeMode.Maximize:
reward = -reward
rval = self.rval
domain = rval.domain
trials = rval.trials
......@@ -375,13 +421,26 @@ class HyperoptTuner(Tuner):
total_params : dict
parameter suggestion
"""
if self.parallel and len(self.total_data)>20 and len(self.running_data) and self.optimal_y is not None:
self.CL_rval = copy.deepcopy(self.rval)
if self.constant_liar_type == 'mean':
_constant_liar_y = self.optimal_y[0] / self.optimal_y[1]
else:
_constant_liar_y = self.optimal_y
for _parameter_id in self.running_data:
self.receive_trial_result(parameter_id=_parameter_id, parameters=None, value=_constant_liar_y, constant_liar=True)
rval = self.CL_rval
rval = self.rval
random_state = np.random.randint(2**31 - 1)
else:
rval = self.rval
random_state = rval.rstate.randint(2**31 - 1)
trials = rval.trials
algorithm = rval.algo
new_ids = rval.trials.new_trial_ids(1)
rval.trials.refresh()
random_state = rval.rstate.randint(2**31 - 1)
if random_search:
new_trials = hp.rand.suggest(new_ids, rval.domain, trials,
random_state)
......
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import sys
import time
import traceback
from utils import GREEN, RED, CLEAR, setup_experiment
def test_nni_cli():
import nnicli as nc
config_file = 'config_test/examples/mnist.test.yml'
try:
# Sleep here to make sure previous stopped exp has enough time to exit to avoid port conflict
time.sleep(6)
print(GREEN + 'Testing nnicli:' + config_file + CLEAR)
nc.start_nni(config_file)
time.sleep(3)
nc.set_endpoint('http://localhost:8080')
print(nc.version())
print(nc.get_job_statistics())
print(nc.get_experiment_status())
nc.list_trial_jobs()
print(GREEN + 'Test nnicli {}: TEST PASS'.format(config_file) + CLEAR)
except Exception as error:
print(RED + 'Test nnicli {}: TEST FAIL'.format(config_file) + CLEAR)
print('%r' % error)
traceback.print_exc()
raise error
finally:
nc.stop_nni()
if __name__ == '__main__':
installed = (sys.argv[-1] != '--preinstall')
setup_experiment(installed)
test_nni_cli()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment