Unverified Commit d48ad027 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Merge pull request #184 from microsoft/master

merge master
parents 9352cc88 22993e5d
......@@ -3,5 +3,5 @@
{
"_type" : "choice",
"_value" : [1, 100]
}
}
}
\ No newline at end of file
......@@ -5,8 +5,8 @@
"_value" : [{"optimizer": "Adam", "learning_rate": 0.00001},
{"optimizer": "Adam", "learning_rate": 0.0001},
{"optimizer": "Adam", "learning_rate": 0.001},
{"optimizer": "SGD", "learning_rate": 0.01},
{"optimizer": "SGD", "learning_rate": 0.005},
{"optimizer": "SGD", "learning_rate": 0.01},
{"optimizer": "SGD", "learning_rate": 0.005},
{"optimizer": "SGD", "learning_rate": 0.0002}]
}
}
\ No newline at end of file
......@@ -69,7 +69,7 @@ def get_metric_results(metrics):
elif metric['type'] == 'FINAL':
final_result.append(metric['data'])
print(intermediate_result, final_result)
return [round(float(x),6) for x in intermediate_result], [round(float(x), 6) for x in final_result]
def get_max_values(config_file):
......
......@@ -3,5 +3,5 @@
{
"_type" : "choice",
"_value" : [1, 100]
}
}
}
\ No newline at end of file
......@@ -4,7 +4,7 @@ import nni
if __name__ == '__main__':
hyper_params = nni.get_next_parameter()
for i in range(10):
for i in range(10):
nni.report_intermediate_result(0.1*(i+1))
time.sleep(2)
nni.report_final_result(1.0)
......@@ -15,6 +15,6 @@
* When the experiment is done, meaning it is successfully done in this case, `Experiment done` can be detected in the nni_manager.log file.
## Issues
* Private APIs are used to detect whether tuner and assessor have terminated successfully.
* Private APIs are used to detect whether tuner and assessor have terminated successfully.
* The output of REST server is not tested.
* Remote machine training service is not tested.
\ No newline at end of file
......@@ -62,7 +62,7 @@ def start_container(image, name, nnimanager_os):
file.write(str(port))
def stop_container(name):
'''Stop docker container'''
'''Stop docker container'''
stop_cmds = ['docker', 'container', 'stop', name]
check_call(stop_cmds)
rm_cmds = ['docker', 'container', 'rm', name]
......
......@@ -6,7 +6,7 @@ $CWD = $PWD
echo ""
echo "===========================Testing: nni_annotation==========================="
cd $CWD/../tools/
python -m unittest -v nni_annotation/test_annotation.py
python -m unittest -v nni_annotation/test_annotation.py
## Export certain environment variables for unittest code to work
$env:NNI_TRIAL_JOB_ID="test_trial_job_id"
......
## NNI CTL
The NNI CTL module is used to control Neural Network Intelligence, including start a new experiment, stop an experiment and update an experiment etc.
The NNI CTL module is used to control Neural Network Intelligence, including start a new experiment, stop an experiment and update an experiment etc.
## Environment
```
......@@ -9,7 +9,7 @@ python >= 3.5
## Installation
1. Enter tools directory
1. Enter tools directory
1. Use pip to install packages
* Install for current user:
......@@ -24,17 +24,17 @@ python >= 3.5
python3 -m pip install -e .
```
1. Change the mode of nnictl file
1. Change the mode of nnictl file
```bash
chmod +x ./nnictl
```
1. Add nnictl to your PATH system environment variable.
* You could use `export` command to set PATH variable temporary.
export PATH={your nnictl path}:$PATH
export PATH={your nnictl path}:$PATH
* Or you could edit your `/etc/profile` file.
......
# NNI Annotation
# NNI Annotation
## Overview
To improve user experience and reduce user effort, we design an annotation grammar. Using NNI annotation, users can adapt their code to NNI just by adding some standalone annotating strings, which does not affect the execution of the original code.
To improve user experience and reduce user effort, we design an annotation grammar. Using NNI annotation, users can adapt their code to NNI just by adding some standalone annotating strings, which does not affect the execution of the original code.
Below is an example:
......@@ -28,7 +28,7 @@ In NNI, there are mainly four types of annotation:
**Arguments**
- **sampling_algo**: Sampling algorithm that specifies a search space. User should replace it with a built-in NNI sampling function whose name consists of an `nni.` identification and a search space type specified in [SearchSpaceSpec](https://nni.readthedocs.io/en/latest/SearchSpaceSpec.html) such as `choice` or `uniform`.
- **sampling_algo**: Sampling algorithm that specifies a search space. User should replace it with a built-in NNI sampling function whose name consists of an `nni.` identification and a search space type specified in [SearchSpaceSpec](https://nni.readthedocs.io/en/latest/SearchSpaceSpec.html) such as `choice` or `uniform`.
- **name**: The name of the variable that the selected value will be assigned to. Note that this argument should be the same as the left value of the following assignment statement.
There are 10 types to express your search space as follows:
......
......@@ -33,7 +33,7 @@ __all__ = ['generate_search_space', 'expand_annotations']
slash = '/'
if sys.platform == "win32":
slash = '\\'
slash = '\\'
def generate_search_space(code_dir):
"""Generate search space from Python source code.
......@@ -41,7 +41,7 @@ def generate_search_space(code_dir):
code_dir: directory path of source files (str)
"""
search_space = {}
if code_dir.endswith(slash):
code_dir = code_dir[:-1]
......@@ -84,7 +84,7 @@ def expand_annotations(src_dir, dst_dir, exp_id='', trial_id=''):
"""
if src_dir[-1] == slash:
src_dir = src_dir[:-1]
if dst_dir[-1] == slash:
dst_dir = dst_dir[:-1]
......
......@@ -230,7 +230,7 @@ def test_variable_equal(node1, node2):
if len(node1) != len(node2):
return False
return all(test_variable_equal(n1, n2) for n1, n2 in zip(node1, node2))
return node1 == node2
......
......@@ -161,7 +161,7 @@ def main():
def generate_default_params():
params = {'data_dir': '/tmp/tensorflow/mnist/input_data',
'dropout_rate': 0.5, 'channel_1_num': 32, 'channel_2_num': 64,
'conv_size': 5, 'pool_size': 2, 'hidden_size': 1024, 'batch_size':
'conv_size': 5, 'pool_size': 2, 'hidden_size': 1024, 'batch_size':
50, 'batch_num': 200, 'learning_rate': 0.0001}
return params
......
......@@ -44,7 +44,7 @@ class MnistNetwork(object):
self.x = tf.placeholder(tf.float32, [None, self.x_dim], name = 'input_x')
self.y = tf.placeholder(tf.float32, [None, self.y_dim], name = 'input_y')
self.keep_prob = tf.placeholder(tf.float32, name = 'keep_prob')
# Reshape to use within a convolutional neural net.
# Last dimension is for "features" - there is only one here, since images are
# grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
......@@ -55,8 +55,8 @@ class MnistNetwork(object):
#print('input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim))
logger.debug('input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim))
raise
x_image = tf.reshape(self.x, [-1, input_dim, input_dim, 1])
x_image = tf.reshape(self.x, [-1, input_dim, input_dim, 1])
# First convolutional layer - maps one grayscale image to 32 feature maps.
with tf.name_scope('conv1'):
W_conv1 = weight_variable([self.conv_size, self.conv_size, 1, self.channel_1_num])
......@@ -68,38 +68,38 @@ class MnistNetwork(object):
with tf.name_scope('pool1'):
"""@nni.function_choice(max_pool(h_conv1, self.pool_size),avg_pool(h_conv1, self.pool_size),name=max_pool)"""
h_pool1 = max_pool(h_conv1, self.pool_size)
# Second convolutional layer -- maps 32 feature maps to 64.
with tf.name_scope('conv2'):
W_conv2 = weight_variable([self.conv_size, self.conv_size, self.channel_1_num, self.channel_2_num])
b_conv2 = bias_variable([self.channel_2_num])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
# Second pooling layer.
with tf.name_scope('pool2'):
#"""@nni.dynamic(input={cnn_block:1, concat:2},function_choice={"cnn_block":(x,nni.choice([3,4])),"cnn_block":(x),"concat":(x,y)},limit={"cnn_block.input":[concat,input],"concat.input":[this.depth-1,this.depth-3,this.depth-5],"graph.width":[1]})"""
h_pool2 = max_pool(h_conv2, self.pool_size)
# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
# is down to 7x7x64 feature maps -- maps this to 1024 features.
last_dim = int(input_dim / (self.pool_size * self.pool_size))
with tf.name_scope('fc1'):
W_fc1 = weight_variable([last_dim * last_dim * self.channel_2_num, self.hidden_size])
b_fc1 = bias_variable([self.hidden_size])
h_pool2_flat = tf.reshape(h_pool2, [-1, last_dim * last_dim * self.channel_2_num])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# Dropout - controls the complexity of the model, prevents co-adaptation of features.
with tf.name_scope('dropout'):
h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)
# Map the 1024 features to 10 classes, one for each digit
with tf.name_scope('fc2'):
W_fc2 = weight_variable([self.hidden_size, self.y_dim])
b_fc2 = bias_variable([self.y_dim])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
with tf.name_scope('loss'):
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = self.y, logits = y_conv))
with tf.name_scope('adam_optimizer'):
......@@ -121,7 +121,7 @@ def max_pool(x, pool_size):
strides=[1, pool_size, pool_size, 1], padding='SAME')
def avg_pool(x,pool_size):
return tf.nn.avg_pool(x, ksize=[1, pool_size, pool_size, 1],
strides=[1, pool_size, pool_size, 1], padding='SAME')
strides=[1, pool_size, pool_size, 1], padding='SAME')
def weight_variable(shape):
"""weight_variable generates a weight variable of a given shape."""
......@@ -163,12 +163,12 @@ def main():
'''@nni.variable(nni.choice(1,5),name=dropout_rate)'''
dropout_rate=0.5
mnist_network.train_step.run(feed_dict={mnist_network.x: batch[0], mnist_network.y: batch[1], mnist_network.keep_prob: dropout_rate})
if i % 100 == 0:
#train_accuracy = mnist_network.accuracy.eval(feed_dict={
# mnist_network.x: batch[0], mnist_network.y: batch[1], mnist_network.keep_prob: params['dropout_rate']})
#print('step %d, training accuracy %g' % (i, train_accuracy))
test_acc = mnist_network.accuracy.eval(feed_dict={
mnist_network.x: mnist.test.images, mnist_network.y: mnist.test.labels, mnist_network.keep_prob: 1.0})
'''@nni.report_intermediate_result(test_acc)'''
......@@ -196,7 +196,7 @@ if __name__ == '__main__':
#FLAGS, unparsed = parse_command()
#original_params = parse_init_json(FLAGS.init_file_path, {})
#pipe_interface.set_params_to_env()
'''@nni.get_next_parameter()'''
try:
......
......@@ -128,7 +128,7 @@ advisor_schema_dict = {
'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'),
Optional('min_budget'): setNumberRange('min_budget', int, 0, 9999),
Optional('max_budget'): setNumberRange('max_budget', int, 0, 9999),
Optional('eta'):setNumberRange('eta', int, 0, 9999),
Optional('eta'):setNumberRange('eta', int, 0, 9999),
Optional('min_points_in_model'): setNumberRange('min_points_in_model', int, 0, 9999),
Optional('top_n_percent'): setNumberRange('top_n_percent', int, 1, 99),
Optional('num_samples'): setNumberRange('num_samples', int, 1, 9999),
......@@ -235,7 +235,7 @@ kubeflow_trial_schema = {
'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
'memoryMB': setType('memoryMB', int),
'image': setType('image', str)
}
}
}
}
......
......@@ -83,7 +83,7 @@ class Experiments:
self.experiments[id]['fileName'] = file_name
self.experiments[id]['platform'] = platform
self.write_file()
def update_experiment(self, id, key, value):
'''Update experiment'''
if id not in self.experiments:
......@@ -91,17 +91,17 @@ class Experiments:
self.experiments[id][key] = value
self.write_file()
return True
def remove_experiment(self, id):
'''remove an experiment by id'''
if id in self.experiments:
self.experiments.pop(id)
self.write_file()
def get_all_experiments(self):
'''return all of experiments'''
return self.experiments
def write_file(self):
'''save config to local file'''
try:
......
......@@ -39,6 +39,7 @@ import site
import time
from pathlib import Path
from .command_utils import check_output_command, kill_command
from .nnictl_utils import update_experiment
def get_log_path(config_file_name):
'''generate stdout and stderr log path'''
......@@ -102,7 +103,7 @@ def start_rest_server(port, platform, mode, config_file_name, experiment_id=None
print_error('Port %s is used by another process, please reset the port!\n' \
'You could use \'nnictl create --help\' to get help information' % port)
exit(1)
if (platform != 'local') and detect_port(int(port) + 1):
print_error('PAI mode need an additional adjacent port %d, and the port %d is used by another process!\n' \
'You could set another port to start experiment!\n' \
......@@ -110,7 +111,7 @@ def start_rest_server(port, platform, mode, config_file_name, experiment_id=None
exit(1)
print_normal('Starting restful server...')
entry_dir = get_nni_installation_path()
entry_file = os.path.join(entry_dir, 'main.js')
......@@ -220,7 +221,7 @@ def setNNIManagerIp(experiment_config, port, config_file_name):
return True, None
def set_pai_config(experiment_config, port, config_file_name):
'''set pai configuration'''
'''set pai configuration'''
pai_config_data = dict()
pai_config_data['pai_config'] = experiment_config['paiConfig']
response = rest_put(cluster_metadata_url(port), json.dumps(pai_config_data), REST_TIME_OUT)
......@@ -239,7 +240,7 @@ def set_pai_config(experiment_config, port, config_file_name):
return set_trial_config(experiment_config, port, config_file_name), err_message
def set_kubeflow_config(experiment_config, port, config_file_name):
'''set kubeflow configuration'''
'''set kubeflow configuration'''
kubeflow_config_data = dict()
kubeflow_config_data['kubeflow_config'] = experiment_config['kubeflowConfig']
response = rest_put(cluster_metadata_url(port), json.dumps(kubeflow_config_data), REST_TIME_OUT)
......@@ -258,7 +259,7 @@ def set_kubeflow_config(experiment_config, port, config_file_name):
return set_trial_config(experiment_config, port, config_file_name), err_message
def set_frameworkcontroller_config(experiment_config, port, config_file_name):
'''set kubeflow configuration'''
'''set kubeflow configuration'''
frameworkcontroller_config_data = dict()
frameworkcontroller_config_data['frameworkcontroller_config'] = experiment_config['frameworkcontrollerConfig']
response = rest_put(cluster_metadata_url(port), json.dumps(frameworkcontroller_config_data), REST_TIME_OUT)
......@@ -318,7 +319,7 @@ def set_experiment(experiment_config, mode, port, config_file_name):
{'key': 'trial_config', 'value': experiment_config['trial']})
elif experiment_config['trainingServicePlatform'] == 'pai':
request_data['clusterMetaData'].append(
{'key': 'pai_config', 'value': experiment_config['paiConfig']})
{'key': 'pai_config', 'value': experiment_config['paiConfig']})
request_data['clusterMetaData'].append(
{'key': 'trial_config', 'value': experiment_config['trial']})
elif experiment_config['trainingServicePlatform'] == 'kubeflow':
......@@ -346,13 +347,6 @@ def set_experiment(experiment_config, mode, port, config_file_name):
def launch_experiment(args, experiment_config, mode, config_file_name, experiment_id=None):
'''follow steps to start rest server and start experiment'''
nni_config = Config(config_file_name)
# check execution policy in powershell
if sys.platform == 'win32':
execution_policy = check_output(['powershell.exe','Get-ExecutionPolicy']).decode('ascii').strip()
if execution_policy == 'Restricted':
print_error('PowerShell execution policy error, please run PowerShell as administrator with this command first:\r\n'\
+ '\'Set-ExecutionPolicy -ExecutionPolicy Unrestricted\'')
exit(1)
# check packages for tuner
package_name, module_name = None, None
if experiment_config.get('tuner') and experiment_config['tuner'].get('builtinTunerName'):
......@@ -430,7 +424,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
except Exception:
raise Exception(ERROR_INFO % 'Rest server stopped!')
exit(1)
#set pai config
if experiment_config['trainingServicePlatform'] == 'pai':
print_normal('Setting pai config...')
......@@ -445,7 +439,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
except Exception:
raise Exception(ERROR_INFO % 'Restful server stopped!')
exit(1)
#set kubeflow config
if experiment_config['trainingServicePlatform'] == 'kubeflow':
print_normal('Setting kubeflow config...')
......@@ -461,7 +455,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
raise Exception(ERROR_INFO % 'Restful server stopped!')
exit(1)
#set kubeflow config
#set frameworkcontroller config
if experiment_config['trainingServicePlatform'] == 'frameworkcontroller':
print_normal('Setting frameworkcontroller config...')
config_result, err_msg = set_frameworkcontroller_config(experiment_config, args.port, config_file_name)
......@@ -499,7 +493,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
else:
web_ui_url_list = get_local_urls(args.port)
nni_config.set_config('webuiUrl', web_ui_url_list)
#save experiment information
nnictl_experiment_config = Experiments()
nnictl_experiment_config.add_experiment(experiment_id, args.port, start_time, config_file_name, experiment_config['trainingServicePlatform'])
......@@ -508,6 +502,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
def resume_experiment(args):
'''resume an experiment'''
update_experiment()
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
experiment_id = None
......
......@@ -21,7 +21,7 @@
import os
import json
from .config_schema import LOCAL_CONFIG_SCHEMA, REMOTE_CONFIG_SCHEMA, PAI_CONFIG_SCHEMA, KUBEFLOW_CONFIG_SCHEMA, FRAMEWORKCONTROLLER_CONFIG_SCHEMA, \
tuner_schema_dict, advisor_schema_dict, assessor_schema_dict
tuner_schema_dict, advisor_schema_dict, assessor_schema_dict
from schema import SchemaMissingKeyError, SchemaForbiddenKeyError, SchemaUnexpectedTypeError, SchemaWrongKeyError, SchemaError
from .common_utils import get_json_content, print_error, print_warning, print_normal
from schema import Schema, And, Use, Optional, Regex, Or
......@@ -62,7 +62,7 @@ def parse_path(experiment_config, config_path):
expand_path(experiment_config['assessor'], 'codeDir')
if experiment_config.get('advisor'):
expand_path(experiment_config['advisor'], 'codeDir')
#if users use relative path, convert it to absolute path
root_path = os.path.dirname(config_path)
if experiment_config.get('searchSpacePath'):
......@@ -80,8 +80,8 @@ def parse_path(experiment_config, config_path):
parse_relative_path(root_path, experiment_config['machineList'][index], 'sshKeyPath')
def validate_search_space_content(experiment_config):
'''Validate searchspace content,
if the searchspace file is not json format or its values does not contain _type and _value which must be specified,
'''Validate searchspace content,
if the searchspace file is not json format or its values does not contain _type and _value which must be specified,
it will not be a valid searchspace file'''
try:
search_space_content = json.load(open(experiment_config.get('searchSpacePath'), 'r'))
......@@ -110,7 +110,7 @@ def validate_kubeflow_operators(experiment_config):
if experiment_config.get('trial').get('master') is None:
print_error('kubeflow with pytorch-operator must set master')
exit(1)
if experiment_config.get('kubeflowConfig').get('storage') == 'nfs':
if experiment_config.get('kubeflowConfig').get('nfs') is None:
print_error('please set nfs configuration!')
......@@ -170,7 +170,7 @@ def validate_common_content(experiment_config):
else:
print_error(error)
exit(1)
#set default value
if experiment_config.get('maxExecDuration') is None:
experiment_config['maxExecDuration'] = '999d'
......
......@@ -176,7 +176,7 @@ def parse_args():
parser_package_subparsers = parser_package.add_subparsers()
parser_package_install = parser_package_subparsers.add_parser('install', help='install packages')
parser_package_install.add_argument('--name', '-n', dest='name', help='package name to be installed')
parser_package_install.set_defaults(func=package_install)
parser_package_install.set_defaults(func=package_install)
parser_package_show = parser_package_subparsers.add_parser('show', help='show the information of packages')
parser_package_show.set_defaults(func=package_show)
......
......@@ -321,7 +321,7 @@ def log_internal(args, filetype):
else:
file_full_path = os.path.join(NNICTL_HOME_DIR, file_name, 'stderr')
print(check_output_command(file_full_path, head=args.head, tail=args.tail))
def log_stdout(args):
'''get stdout log'''
log_internal(args, 'stdout')
......@@ -393,7 +393,7 @@ def experiment_list(args):
print_warning('There is no experiment running...\nYou can use \'nnictl experiment list all\' to list all stopped experiments!')
experiment_information = ""
for key in experiment_id_list:
experiment_information += (EXPERIMENT_DETAIL_FORMAT % (key, experiment_dict[key]['status'], experiment_dict[key]['port'],\
experiment_dict[key].get('platform'), experiment_dict[key]['startTime'], experiment_dict[key]['endTime']))
print(EXPERIMENT_INFORMATION_FORMAT % experiment_information)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment