Commit ba8dccd6 authored by suiguoxin's avatar suiguoxin
Browse files

Merge branch 'master' of https://github.com/microsoft/nni

parents 56a1575b 150ee83a
...@@ -3,5 +3,5 @@ ...@@ -3,5 +3,5 @@
{ {
"_type" : "choice", "_type" : "choice",
"_value" : [1, 100] "_value" : [1, 100]
} }
} }
\ No newline at end of file
...@@ -3,5 +3,5 @@ ...@@ -3,5 +3,5 @@
{ {
"_type" : "choice", "_type" : "choice",
"_value" : [1, 100] "_value" : [1, 100]
} }
} }
\ No newline at end of file
...@@ -5,8 +5,8 @@ ...@@ -5,8 +5,8 @@
"_value" : [{"optimizer": "Adam", "learning_rate": 0.00001}, "_value" : [{"optimizer": "Adam", "learning_rate": 0.00001},
{"optimizer": "Adam", "learning_rate": 0.0001}, {"optimizer": "Adam", "learning_rate": 0.0001},
{"optimizer": "Adam", "learning_rate": 0.001}, {"optimizer": "Adam", "learning_rate": 0.001},
{"optimizer": "SGD", "learning_rate": 0.01}, {"optimizer": "SGD", "learning_rate": 0.01},
{"optimizer": "SGD", "learning_rate": 0.005}, {"optimizer": "SGD", "learning_rate": 0.005},
{"optimizer": "SGD", "learning_rate": 0.0002}] {"optimizer": "SGD", "learning_rate": 0.0002}]
} }
} }
\ No newline at end of file
...@@ -69,7 +69,7 @@ def get_metric_results(metrics): ...@@ -69,7 +69,7 @@ def get_metric_results(metrics):
elif metric['type'] == 'FINAL': elif metric['type'] == 'FINAL':
final_result.append(metric['data']) final_result.append(metric['data'])
print(intermediate_result, final_result) print(intermediate_result, final_result)
return [round(float(x),6) for x in intermediate_result], [round(float(x), 6) for x in final_result] return [round(float(x),6) for x in intermediate_result], [round(float(x), 6) for x in final_result]
def get_max_values(config_file): def get_max_values(config_file):
......
...@@ -3,5 +3,5 @@ ...@@ -3,5 +3,5 @@
{ {
"_type" : "choice", "_type" : "choice",
"_value" : [1, 100] "_value" : [1, 100]
} }
} }
\ No newline at end of file
...@@ -4,7 +4,7 @@ import nni ...@@ -4,7 +4,7 @@ import nni
if __name__ == '__main__': if __name__ == '__main__':
hyper_params = nni.get_next_parameter() hyper_params = nni.get_next_parameter()
for i in range(10): for i in range(10):
nni.report_intermediate_result(0.1*(i+1)) nni.report_intermediate_result(0.1*(i+1))
time.sleep(2) time.sleep(2)
nni.report_final_result(1.0) nni.report_final_result(1.0)
...@@ -15,6 +15,6 @@ ...@@ -15,6 +15,6 @@
* When the experiment is done, meaning it is successfully done in this case, `Experiment done` can be detected in the nni_manager.log file. * When the experiment is done, meaning it is successfully done in this case, `Experiment done` can be detected in the nni_manager.log file.
## Issues ## Issues
* Private APIs are used to detect whether tuner and assessor have terminated successfully. * Private APIs are used to detect whether tuner and assessor have terminated successfully.
* The output of REST server is not tested. * The output of REST server is not tested.
* Remote machine training service is not tested. * Remote machine training service is not tested.
\ No newline at end of file
...@@ -62,7 +62,7 @@ def start_container(image, name, nnimanager_os): ...@@ -62,7 +62,7 @@ def start_container(image, name, nnimanager_os):
file.write(str(port)) file.write(str(port))
def stop_container(name): def stop_container(name):
'''Stop docker container''' '''Stop docker container'''
stop_cmds = ['docker', 'container', 'stop', name] stop_cmds = ['docker', 'container', 'stop', name]
check_call(stop_cmds) check_call(stop_cmds)
rm_cmds = ['docker', 'container', 'rm', name] rm_cmds = ['docker', 'container', 'rm', name]
......
...@@ -6,7 +6,7 @@ $CWD = $PWD ...@@ -6,7 +6,7 @@ $CWD = $PWD
echo "" echo ""
echo "===========================Testing: nni_annotation===========================" echo "===========================Testing: nni_annotation==========================="
cd $CWD/../tools/ cd $CWD/../tools/
python -m unittest -v nni_annotation/test_annotation.py python -m unittest -v nni_annotation/test_annotation.py
## Export certain environment variables for unittest code to work ## Export certain environment variables for unittest code to work
$env:NNI_TRIAL_JOB_ID="test_trial_job_id" $env:NNI_TRIAL_JOB_ID="test_trial_job_id"
......
## NNI CTL ## NNI CTL
The NNI CTL module is used to control Neural Network Intelligence, including start a new experiment, stop an experiment and update an experiment etc. The NNI CTL module is used to control Neural Network Intelligence, including start a new experiment, stop an experiment and update an experiment etc.
## Environment ## Environment
``` ```
...@@ -9,7 +9,7 @@ python >= 3.5 ...@@ -9,7 +9,7 @@ python >= 3.5
## Installation ## Installation
1. Enter tools directory 1. Enter tools directory
1. Use pip to install packages 1. Use pip to install packages
* Install for current user: * Install for current user:
...@@ -24,17 +24,17 @@ python >= 3.5 ...@@ -24,17 +24,17 @@ python >= 3.5
python3 -m pip install -e . python3 -m pip install -e .
``` ```
1. Change the mode of nnictl file 1. Change the mode of nnictl file
```bash ```bash
chmod +x ./nnictl chmod +x ./nnictl
``` ```
1. Add nnictl to your PATH system environment variable. 1. Add nnictl to your PATH system environment variable.
* You could use `export` command to set PATH variable temporary. * You could use `export` command to set PATH variable temporary.
export PATH={your nnictl path}:$PATH export PATH={your nnictl path}:$PATH
* Or you could edit your `/etc/profile` file. * Or you could edit your `/etc/profile` file.
......
# NNI Annotation # NNI Annotation
## Overview ## Overview
To improve user experience and reduce user effort, we design an annotation grammar. Using NNI annotation, users can adapt their code to NNI just by adding some standalone annotating strings, which does not affect the execution of the original code. To improve user experience and reduce user effort, we design an annotation grammar. Using NNI annotation, users can adapt their code to NNI just by adding some standalone annotating strings, which does not affect the execution of the original code.
Below is an example: Below is an example:
...@@ -28,7 +28,7 @@ In NNI, there are mainly four types of annotation: ...@@ -28,7 +28,7 @@ In NNI, there are mainly four types of annotation:
**Arguments** **Arguments**
- **sampling_algo**: Sampling algorithm that specifies a search space. User should replace it with a built-in NNI sampling function whose name consists of an `nni.` identification and a search space type specified in [SearchSpaceSpec](https://nni.readthedocs.io/en/latest/SearchSpaceSpec.html) such as `choice` or `uniform`. - **sampling_algo**: Sampling algorithm that specifies a search space. User should replace it with a built-in NNI sampling function whose name consists of an `nni.` identification and a search space type specified in [SearchSpaceSpec](https://nni.readthedocs.io/en/latest/SearchSpaceSpec.html) such as `choice` or `uniform`.
- **name**: The name of the variable that the selected value will be assigned to. Note that this argument should be the same as the left value of the following assignment statement. - **name**: The name of the variable that the selected value will be assigned to. Note that this argument should be the same as the left value of the following assignment statement.
There are 10 types to express your search space as follows: There are 10 types to express your search space as follows:
......
...@@ -33,7 +33,7 @@ __all__ = ['generate_search_space', 'expand_annotations'] ...@@ -33,7 +33,7 @@ __all__ = ['generate_search_space', 'expand_annotations']
slash = '/' slash = '/'
if sys.platform == "win32": if sys.platform == "win32":
slash = '\\' slash = '\\'
def generate_search_space(code_dir): def generate_search_space(code_dir):
"""Generate search space from Python source code. """Generate search space from Python source code.
...@@ -41,7 +41,7 @@ def generate_search_space(code_dir): ...@@ -41,7 +41,7 @@ def generate_search_space(code_dir):
code_dir: directory path of source files (str) code_dir: directory path of source files (str)
""" """
search_space = {} search_space = {}
if code_dir.endswith(slash): if code_dir.endswith(slash):
code_dir = code_dir[:-1] code_dir = code_dir[:-1]
...@@ -84,7 +84,7 @@ def expand_annotations(src_dir, dst_dir, exp_id='', trial_id=''): ...@@ -84,7 +84,7 @@ def expand_annotations(src_dir, dst_dir, exp_id='', trial_id=''):
""" """
if src_dir[-1] == slash: if src_dir[-1] == slash:
src_dir = src_dir[:-1] src_dir = src_dir[:-1]
if dst_dir[-1] == slash: if dst_dir[-1] == slash:
dst_dir = dst_dir[:-1] dst_dir = dst_dir[:-1]
......
...@@ -230,7 +230,7 @@ def test_variable_equal(node1, node2): ...@@ -230,7 +230,7 @@ def test_variable_equal(node1, node2):
if len(node1) != len(node2): if len(node1) != len(node2):
return False return False
return all(test_variable_equal(n1, n2) for n1, n2 in zip(node1, node2)) return all(test_variable_equal(n1, n2) for n1, n2 in zip(node1, node2))
return node1 == node2 return node1 == node2
......
...@@ -161,7 +161,7 @@ def main(): ...@@ -161,7 +161,7 @@ def main():
def generate_default_params(): def generate_default_params():
params = {'data_dir': '/tmp/tensorflow/mnist/input_data', params = {'data_dir': '/tmp/tensorflow/mnist/input_data',
'dropout_rate': 0.5, 'channel_1_num': 32, 'channel_2_num': 64, 'dropout_rate': 0.5, 'channel_1_num': 32, 'channel_2_num': 64,
'conv_size': 5, 'pool_size': 2, 'hidden_size': 1024, 'batch_size': 'conv_size': 5, 'pool_size': 2, 'hidden_size': 1024, 'batch_size':
50, 'batch_num': 200, 'learning_rate': 0.0001} 50, 'batch_num': 200, 'learning_rate': 0.0001}
return params return params
......
...@@ -44,7 +44,7 @@ class MnistNetwork(object): ...@@ -44,7 +44,7 @@ class MnistNetwork(object):
self.x = tf.placeholder(tf.float32, [None, self.x_dim], name = 'input_x') self.x = tf.placeholder(tf.float32, [None, self.x_dim], name = 'input_x')
self.y = tf.placeholder(tf.float32, [None, self.y_dim], name = 'input_y') self.y = tf.placeholder(tf.float32, [None, self.y_dim], name = 'input_y')
self.keep_prob = tf.placeholder(tf.float32, name = 'keep_prob') self.keep_prob = tf.placeholder(tf.float32, name = 'keep_prob')
# Reshape to use within a convolutional neural net. # Reshape to use within a convolutional neural net.
# Last dimension is for "features" - there is only one here, since images are # Last dimension is for "features" - there is only one here, since images are
# grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc. # grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
...@@ -55,8 +55,8 @@ class MnistNetwork(object): ...@@ -55,8 +55,8 @@ class MnistNetwork(object):
#print('input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim)) #print('input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim))
logger.debug('input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim)) logger.debug('input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim))
raise raise
x_image = tf.reshape(self.x, [-1, input_dim, input_dim, 1]) x_image = tf.reshape(self.x, [-1, input_dim, input_dim, 1])
# First convolutional layer - maps one grayscale image to 32 feature maps. # First convolutional layer - maps one grayscale image to 32 feature maps.
with tf.name_scope('conv1'): with tf.name_scope('conv1'):
W_conv1 = weight_variable([self.conv_size, self.conv_size, 1, self.channel_1_num]) W_conv1 = weight_variable([self.conv_size, self.conv_size, 1, self.channel_1_num])
...@@ -68,38 +68,38 @@ class MnistNetwork(object): ...@@ -68,38 +68,38 @@ class MnistNetwork(object):
with tf.name_scope('pool1'): with tf.name_scope('pool1'):
"""@nni.function_choice(max_pool(h_conv1, self.pool_size),avg_pool(h_conv1, self.pool_size),name=max_pool)""" """@nni.function_choice(max_pool(h_conv1, self.pool_size),avg_pool(h_conv1, self.pool_size),name=max_pool)"""
h_pool1 = max_pool(h_conv1, self.pool_size) h_pool1 = max_pool(h_conv1, self.pool_size)
# Second convolutional layer -- maps 32 feature maps to 64. # Second convolutional layer -- maps 32 feature maps to 64.
with tf.name_scope('conv2'): with tf.name_scope('conv2'):
W_conv2 = weight_variable([self.conv_size, self.conv_size, self.channel_1_num, self.channel_2_num]) W_conv2 = weight_variable([self.conv_size, self.conv_size, self.channel_1_num, self.channel_2_num])
b_conv2 = bias_variable([self.channel_2_num]) b_conv2 = bias_variable([self.channel_2_num])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
# Second pooling layer. # Second pooling layer.
with tf.name_scope('pool2'): with tf.name_scope('pool2'):
#"""@nni.dynamic(input={cnn_block:1, concat:2},function_choice={"cnn_block":(x,nni.choice([3,4])),"cnn_block":(x),"concat":(x,y)},limit={"cnn_block.input":[concat,input],"concat.input":[this.depth-1,this.depth-3,this.depth-5],"graph.width":[1]})""" #"""@nni.dynamic(input={cnn_block:1, concat:2},function_choice={"cnn_block":(x,nni.choice([3,4])),"cnn_block":(x),"concat":(x,y)},limit={"cnn_block.input":[concat,input],"concat.input":[this.depth-1,this.depth-3,this.depth-5],"graph.width":[1]})"""
h_pool2 = max_pool(h_conv2, self.pool_size) h_pool2 = max_pool(h_conv2, self.pool_size)
# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
# is down to 7x7x64 feature maps -- maps this to 1024 features. # is down to 7x7x64 feature maps -- maps this to 1024 features.
last_dim = int(input_dim / (self.pool_size * self.pool_size)) last_dim = int(input_dim / (self.pool_size * self.pool_size))
with tf.name_scope('fc1'): with tf.name_scope('fc1'):
W_fc1 = weight_variable([last_dim * last_dim * self.channel_2_num, self.hidden_size]) W_fc1 = weight_variable([last_dim * last_dim * self.channel_2_num, self.hidden_size])
b_fc1 = bias_variable([self.hidden_size]) b_fc1 = bias_variable([self.hidden_size])
h_pool2_flat = tf.reshape(h_pool2, [-1, last_dim * last_dim * self.channel_2_num]) h_pool2_flat = tf.reshape(h_pool2, [-1, last_dim * last_dim * self.channel_2_num])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# Dropout - controls the complexity of the model, prevents co-adaptation of features. # Dropout - controls the complexity of the model, prevents co-adaptation of features.
with tf.name_scope('dropout'): with tf.name_scope('dropout'):
h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob) h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)
# Map the 1024 features to 10 classes, one for each digit # Map the 1024 features to 10 classes, one for each digit
with tf.name_scope('fc2'): with tf.name_scope('fc2'):
W_fc2 = weight_variable([self.hidden_size, self.y_dim]) W_fc2 = weight_variable([self.hidden_size, self.y_dim])
b_fc2 = bias_variable([self.y_dim]) b_fc2 = bias_variable([self.y_dim])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
with tf.name_scope('loss'): with tf.name_scope('loss'):
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = self.y, logits = y_conv)) cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = self.y, logits = y_conv))
with tf.name_scope('adam_optimizer'): with tf.name_scope('adam_optimizer'):
...@@ -121,7 +121,7 @@ def max_pool(x, pool_size): ...@@ -121,7 +121,7 @@ def max_pool(x, pool_size):
strides=[1, pool_size, pool_size, 1], padding='SAME') strides=[1, pool_size, pool_size, 1], padding='SAME')
def avg_pool(x,pool_size): def avg_pool(x,pool_size):
return tf.nn.avg_pool(x, ksize=[1, pool_size, pool_size, 1], return tf.nn.avg_pool(x, ksize=[1, pool_size, pool_size, 1],
strides=[1, pool_size, pool_size, 1], padding='SAME') strides=[1, pool_size, pool_size, 1], padding='SAME')
def weight_variable(shape): def weight_variable(shape):
"""weight_variable generates a weight variable of a given shape.""" """weight_variable generates a weight variable of a given shape."""
...@@ -163,12 +163,12 @@ def main(): ...@@ -163,12 +163,12 @@ def main():
'''@nni.variable(nni.choice(1,5),name=dropout_rate)''' '''@nni.variable(nni.choice(1,5),name=dropout_rate)'''
dropout_rate=0.5 dropout_rate=0.5
mnist_network.train_step.run(feed_dict={mnist_network.x: batch[0], mnist_network.y: batch[1], mnist_network.keep_prob: dropout_rate}) mnist_network.train_step.run(feed_dict={mnist_network.x: batch[0], mnist_network.y: batch[1], mnist_network.keep_prob: dropout_rate})
if i % 100 == 0: if i % 100 == 0:
#train_accuracy = mnist_network.accuracy.eval(feed_dict={ #train_accuracy = mnist_network.accuracy.eval(feed_dict={
# mnist_network.x: batch[0], mnist_network.y: batch[1], mnist_network.keep_prob: params['dropout_rate']}) # mnist_network.x: batch[0], mnist_network.y: batch[1], mnist_network.keep_prob: params['dropout_rate']})
#print('step %d, training accuracy %g' % (i, train_accuracy)) #print('step %d, training accuracy %g' % (i, train_accuracy))
test_acc = mnist_network.accuracy.eval(feed_dict={ test_acc = mnist_network.accuracy.eval(feed_dict={
mnist_network.x: mnist.test.images, mnist_network.y: mnist.test.labels, mnist_network.keep_prob: 1.0}) mnist_network.x: mnist.test.images, mnist_network.y: mnist.test.labels, mnist_network.keep_prob: 1.0})
'''@nni.report_intermediate_result(test_acc)''' '''@nni.report_intermediate_result(test_acc)'''
...@@ -196,7 +196,7 @@ if __name__ == '__main__': ...@@ -196,7 +196,7 @@ if __name__ == '__main__':
#FLAGS, unparsed = parse_command() #FLAGS, unparsed = parse_command()
#original_params = parse_init_json(FLAGS.init_file_path, {}) #original_params = parse_init_json(FLAGS.init_file_path, {})
#pipe_interface.set_params_to_env() #pipe_interface.set_params_to_env()
'''@nni.get_next_parameter()''' '''@nni.get_next_parameter()'''
try: try:
......
...@@ -56,6 +56,7 @@ common_schema = { ...@@ -56,6 +56,7 @@ common_schema = {
Optional('nniManagerIp'): setType('nniManagerIp', str), Optional('nniManagerIp'): setType('nniManagerIp', str),
Optional('logDir'): And(os.path.isdir, error=SCHEMA_PATH_ERROR % 'logDir'), Optional('logDir'): And(os.path.isdir, error=SCHEMA_PATH_ERROR % 'logDir'),
Optional('debug'): setType('debug', bool), Optional('debug'): setType('debug', bool),
Optional('versionCheck'): setType('versionCheck', bool),
Optional('logLevel'): setChoice('logLevel', 'trace', 'debug', 'info', 'warning', 'error', 'fatal'), Optional('logLevel'): setChoice('logLevel', 'trace', 'debug', 'info', 'warning', 'error', 'fatal'),
Optional('logCollection'): setChoice('logCollection', 'http', 'none'), Optional('logCollection'): setChoice('logCollection', 'http', 'none'),
'useAnnotation': setType('useAnnotation', bool), 'useAnnotation': setType('useAnnotation', bool),
...@@ -143,7 +144,7 @@ advisor_schema_dict = { ...@@ -143,7 +144,7 @@ advisor_schema_dict = {
'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'), 'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'),
Optional('min_budget'): setNumberRange('min_budget', int, 0, 9999), Optional('min_budget'): setNumberRange('min_budget', int, 0, 9999),
Optional('max_budget'): setNumberRange('max_budget', int, 0, 9999), Optional('max_budget'): setNumberRange('max_budget', int, 0, 9999),
Optional('eta'):setNumberRange('eta', int, 0, 9999), Optional('eta'):setNumberRange('eta', int, 0, 9999),
Optional('min_points_in_model'): setNumberRange('min_points_in_model', int, 0, 9999), Optional('min_points_in_model'): setNumberRange('min_points_in_model', int, 0, 9999),
Optional('top_n_percent'): setNumberRange('top_n_percent', int, 1, 99), Optional('top_n_percent'): setNumberRange('top_n_percent', int, 1, 99),
Optional('num_samples'): setNumberRange('num_samples', int, 1, 9999), Optional('num_samples'): setNumberRange('num_samples', int, 1, 9999),
...@@ -250,7 +251,7 @@ kubeflow_trial_schema = { ...@@ -250,7 +251,7 @@ kubeflow_trial_schema = {
'cpuNum': setNumberRange('cpuNum', int, 0, 99999), 'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
'memoryMB': setType('memoryMB', int), 'memoryMB': setType('memoryMB', int),
'image': setType('image', str) 'image': setType('image', str)
} }
} }
} }
......
...@@ -83,7 +83,7 @@ class Experiments: ...@@ -83,7 +83,7 @@ class Experiments:
self.experiments[id]['fileName'] = file_name self.experiments[id]['fileName'] = file_name
self.experiments[id]['platform'] = platform self.experiments[id]['platform'] = platform
self.write_file() self.write_file()
def update_experiment(self, id, key, value): def update_experiment(self, id, key, value):
'''Update experiment''' '''Update experiment'''
if id not in self.experiments: if id not in self.experiments:
...@@ -91,17 +91,17 @@ class Experiments: ...@@ -91,17 +91,17 @@ class Experiments:
self.experiments[id][key] = value self.experiments[id][key] = value
self.write_file() self.write_file()
return True return True
def remove_experiment(self, id): def remove_experiment(self, id):
'''remove an experiment by id''' '''remove an experiment by id'''
if id in self.experiments: if id in self.experiments:
self.experiments.pop(id) self.experiments.pop(id)
self.write_file() self.write_file()
def get_all_experiments(self): def get_all_experiments(self):
'''return all of experiments''' '''return all of experiments'''
return self.experiments return self.experiments
def write_file(self): def write_file(self):
'''save config to local file''' '''save config to local file'''
try: try:
......
...@@ -39,6 +39,7 @@ import site ...@@ -39,6 +39,7 @@ import site
import time import time
from pathlib import Path from pathlib import Path
from .command_utils import check_output_command, kill_command from .command_utils import check_output_command, kill_command
from .nnictl_utils import update_experiment
def get_log_path(config_file_name): def get_log_path(config_file_name):
'''generate stdout and stderr log path''' '''generate stdout and stderr log path'''
...@@ -102,7 +103,7 @@ def start_rest_server(port, platform, mode, config_file_name, experiment_id=None ...@@ -102,7 +103,7 @@ def start_rest_server(port, platform, mode, config_file_name, experiment_id=None
print_error('Port %s is used by another process, please reset the port!\n' \ print_error('Port %s is used by another process, please reset the port!\n' \
'You could use \'nnictl create --help\' to get help information' % port) 'You could use \'nnictl create --help\' to get help information' % port)
exit(1) exit(1)
if (platform != 'local') and detect_port(int(port) + 1): if (platform != 'local') and detect_port(int(port) + 1):
print_error('PAI mode need an additional adjacent port %d, and the port %d is used by another process!\n' \ print_error('PAI mode need an additional adjacent port %d, and the port %d is used by another process!\n' \
'You could set another port to start experiment!\n' \ 'You could set another port to start experiment!\n' \
...@@ -110,7 +111,7 @@ def start_rest_server(port, platform, mode, config_file_name, experiment_id=None ...@@ -110,7 +111,7 @@ def start_rest_server(port, platform, mode, config_file_name, experiment_id=None
exit(1) exit(1)
print_normal('Starting restful server...') print_normal('Starting restful server...')
entry_dir = get_nni_installation_path() entry_dir = get_nni_installation_path()
entry_file = os.path.join(entry_dir, 'main.js') entry_file = os.path.join(entry_dir, 'main.js')
...@@ -220,7 +221,7 @@ def setNNIManagerIp(experiment_config, port, config_file_name): ...@@ -220,7 +221,7 @@ def setNNIManagerIp(experiment_config, port, config_file_name):
return True, None return True, None
def set_pai_config(experiment_config, port, config_file_name): def set_pai_config(experiment_config, port, config_file_name):
'''set pai configuration''' '''set pai configuration'''
pai_config_data = dict() pai_config_data = dict()
pai_config_data['pai_config'] = experiment_config['paiConfig'] pai_config_data['pai_config'] = experiment_config['paiConfig']
response = rest_put(cluster_metadata_url(port), json.dumps(pai_config_data), REST_TIME_OUT) response = rest_put(cluster_metadata_url(port), json.dumps(pai_config_data), REST_TIME_OUT)
...@@ -239,7 +240,7 @@ def set_pai_config(experiment_config, port, config_file_name): ...@@ -239,7 +240,7 @@ def set_pai_config(experiment_config, port, config_file_name):
return set_trial_config(experiment_config, port, config_file_name), err_message return set_trial_config(experiment_config, port, config_file_name), err_message
def set_kubeflow_config(experiment_config, port, config_file_name): def set_kubeflow_config(experiment_config, port, config_file_name):
'''set kubeflow configuration''' '''set kubeflow configuration'''
kubeflow_config_data = dict() kubeflow_config_data = dict()
kubeflow_config_data['kubeflow_config'] = experiment_config['kubeflowConfig'] kubeflow_config_data['kubeflow_config'] = experiment_config['kubeflowConfig']
response = rest_put(cluster_metadata_url(port), json.dumps(kubeflow_config_data), REST_TIME_OUT) response = rest_put(cluster_metadata_url(port), json.dumps(kubeflow_config_data), REST_TIME_OUT)
...@@ -258,7 +259,7 @@ def set_kubeflow_config(experiment_config, port, config_file_name): ...@@ -258,7 +259,7 @@ def set_kubeflow_config(experiment_config, port, config_file_name):
return set_trial_config(experiment_config, port, config_file_name), err_message return set_trial_config(experiment_config, port, config_file_name), err_message
def set_frameworkcontroller_config(experiment_config, port, config_file_name): def set_frameworkcontroller_config(experiment_config, port, config_file_name):
'''set kubeflow configuration''' '''set kubeflow configuration'''
frameworkcontroller_config_data = dict() frameworkcontroller_config_data = dict()
frameworkcontroller_config_data['frameworkcontroller_config'] = experiment_config['frameworkcontrollerConfig'] frameworkcontroller_config_data['frameworkcontroller_config'] = experiment_config['frameworkcontrollerConfig']
response = rest_put(cluster_metadata_url(port), json.dumps(frameworkcontroller_config_data), REST_TIME_OUT) response = rest_put(cluster_metadata_url(port), json.dumps(frameworkcontroller_config_data), REST_TIME_OUT)
...@@ -302,6 +303,9 @@ def set_experiment(experiment_config, mode, port, config_file_name): ...@@ -302,6 +303,9 @@ def set_experiment(experiment_config, mode, port, config_file_name):
#debug mode should disable version check #debug mode should disable version check
if experiment_config.get('debug') is not None: if experiment_config.get('debug') is not None:
request_data['versionCheck'] = not experiment_config.get('debug') request_data['versionCheck'] = not experiment_config.get('debug')
#validate version check
if experiment_config.get('versionCheck') is not None:
request_data['versionCheck'] = experiment_config.get('versionCheck')
if experiment_config.get('logCollection'): if experiment_config.get('logCollection'):
request_data['logCollection'] = experiment_config.get('logCollection') request_data['logCollection'] = experiment_config.get('logCollection')
...@@ -318,7 +322,7 @@ def set_experiment(experiment_config, mode, port, config_file_name): ...@@ -318,7 +322,7 @@ def set_experiment(experiment_config, mode, port, config_file_name):
{'key': 'trial_config', 'value': experiment_config['trial']}) {'key': 'trial_config', 'value': experiment_config['trial']})
elif experiment_config['trainingServicePlatform'] == 'pai': elif experiment_config['trainingServicePlatform'] == 'pai':
request_data['clusterMetaData'].append( request_data['clusterMetaData'].append(
{'key': 'pai_config', 'value': experiment_config['paiConfig']}) {'key': 'pai_config', 'value': experiment_config['paiConfig']})
request_data['clusterMetaData'].append( request_data['clusterMetaData'].append(
{'key': 'trial_config', 'value': experiment_config['trial']}) {'key': 'trial_config', 'value': experiment_config['trial']})
elif experiment_config['trainingServicePlatform'] == 'kubeflow': elif experiment_config['trainingServicePlatform'] == 'kubeflow':
...@@ -346,13 +350,6 @@ def set_experiment(experiment_config, mode, port, config_file_name): ...@@ -346,13 +350,6 @@ def set_experiment(experiment_config, mode, port, config_file_name):
def launch_experiment(args, experiment_config, mode, config_file_name, experiment_id=None): def launch_experiment(args, experiment_config, mode, config_file_name, experiment_id=None):
'''follow steps to start rest server and start experiment''' '''follow steps to start rest server and start experiment'''
nni_config = Config(config_file_name) nni_config = Config(config_file_name)
# check execution policy in powershell
if sys.platform == 'win32':
execution_policy = check_output(['powershell.exe','Get-ExecutionPolicy']).decode('ascii').strip()
if execution_policy == 'Restricted':
print_error('PowerShell execution policy error, please run PowerShell as administrator with this command first:\r\n'\
+ '\'Set-ExecutionPolicy -ExecutionPolicy Unrestricted\'')
exit(1)
# check packages for tuner # check packages for tuner
package_name, module_name = None, None package_name, module_name = None, None
if experiment_config.get('tuner') and experiment_config['tuner'].get('builtinTunerName'): if experiment_config.get('tuner') and experiment_config['tuner'].get('builtinTunerName'):
...@@ -369,7 +366,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen ...@@ -369,7 +366,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
exit(1) exit(1)
log_dir = experiment_config['logDir'] if experiment_config.get('logDir') else None log_dir = experiment_config['logDir'] if experiment_config.get('logDir') else None
log_level = experiment_config['logLevel'] if experiment_config.get('logLevel') else None log_level = experiment_config['logLevel'] if experiment_config.get('logLevel') else None
if log_level not in ['trace', 'debug'] and args.debug: if log_level not in ['trace', 'debug'] and (args.debug or experiment_config.get('debug') is True):
log_level = 'debug' log_level = 'debug'
# start rest server # start rest server
rest_process, start_time = start_rest_server(args.port, experiment_config['trainingServicePlatform'], mode, config_file_name, experiment_id, log_dir, log_level) rest_process, start_time = start_rest_server(args.port, experiment_config['trainingServicePlatform'], mode, config_file_name, experiment_id, log_dir, log_level)
...@@ -430,7 +427,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen ...@@ -430,7 +427,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
except Exception: except Exception:
raise Exception(ERROR_INFO % 'Rest server stopped!') raise Exception(ERROR_INFO % 'Rest server stopped!')
exit(1) exit(1)
#set pai config #set pai config
if experiment_config['trainingServicePlatform'] == 'pai': if experiment_config['trainingServicePlatform'] == 'pai':
print_normal('Setting pai config...') print_normal('Setting pai config...')
...@@ -445,7 +442,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen ...@@ -445,7 +442,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
except Exception: except Exception:
raise Exception(ERROR_INFO % 'Restful server stopped!') raise Exception(ERROR_INFO % 'Restful server stopped!')
exit(1) exit(1)
#set kubeflow config #set kubeflow config
if experiment_config['trainingServicePlatform'] == 'kubeflow': if experiment_config['trainingServicePlatform'] == 'kubeflow':
print_normal('Setting kubeflow config...') print_normal('Setting kubeflow config...')
...@@ -461,7 +458,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen ...@@ -461,7 +458,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
raise Exception(ERROR_INFO % 'Restful server stopped!') raise Exception(ERROR_INFO % 'Restful server stopped!')
exit(1) exit(1)
#set kubeflow config #set frameworkcontroller config
if experiment_config['trainingServicePlatform'] == 'frameworkcontroller': if experiment_config['trainingServicePlatform'] == 'frameworkcontroller':
print_normal('Setting frameworkcontroller config...') print_normal('Setting frameworkcontroller config...')
config_result, err_msg = set_frameworkcontroller_config(experiment_config, args.port, config_file_name) config_result, err_msg = set_frameworkcontroller_config(experiment_config, args.port, config_file_name)
...@@ -499,7 +496,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen ...@@ -499,7 +496,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
else: else:
web_ui_url_list = get_local_urls(args.port) web_ui_url_list = get_local_urls(args.port)
nni_config.set_config('webuiUrl', web_ui_url_list) nni_config.set_config('webuiUrl', web_ui_url_list)
#save experiment information #save experiment information
nnictl_experiment_config = Experiments() nnictl_experiment_config = Experiments()
nnictl_experiment_config.add_experiment(experiment_id, args.port, start_time, config_file_name, experiment_config['trainingServicePlatform']) nnictl_experiment_config.add_experiment(experiment_id, args.port, start_time, config_file_name, experiment_config['trainingServicePlatform'])
...@@ -508,6 +505,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen ...@@ -508,6 +505,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
def resume_experiment(args): def resume_experiment(args):
'''resume an experiment''' '''resume an experiment'''
update_experiment()
experiment_config = Experiments() experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments() experiment_dict = experiment_config.get_all_experiments()
experiment_id = None experiment_id = None
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
import os import os
import json import json
from .config_schema import LOCAL_CONFIG_SCHEMA, REMOTE_CONFIG_SCHEMA, PAI_CONFIG_SCHEMA, KUBEFLOW_CONFIG_SCHEMA, FRAMEWORKCONTROLLER_CONFIG_SCHEMA, \ from .config_schema import LOCAL_CONFIG_SCHEMA, REMOTE_CONFIG_SCHEMA, PAI_CONFIG_SCHEMA, KUBEFLOW_CONFIG_SCHEMA, FRAMEWORKCONTROLLER_CONFIG_SCHEMA, \
tuner_schema_dict, advisor_schema_dict, assessor_schema_dict tuner_schema_dict, advisor_schema_dict, assessor_schema_dict
from schema import SchemaMissingKeyError, SchemaForbiddenKeyError, SchemaUnexpectedTypeError, SchemaWrongKeyError, SchemaError from schema import SchemaMissingKeyError, SchemaForbiddenKeyError, SchemaUnexpectedTypeError, SchemaWrongKeyError, SchemaError
from .common_utils import get_json_content, print_error, print_warning, print_normal from .common_utils import get_json_content, print_error, print_warning, print_normal
from schema import Schema, And, Use, Optional, Regex, Or from schema import Schema, And, Use, Optional, Regex, Or
...@@ -62,7 +62,7 @@ def parse_path(experiment_config, config_path): ...@@ -62,7 +62,7 @@ def parse_path(experiment_config, config_path):
expand_path(experiment_config['assessor'], 'codeDir') expand_path(experiment_config['assessor'], 'codeDir')
if experiment_config.get('advisor'): if experiment_config.get('advisor'):
expand_path(experiment_config['advisor'], 'codeDir') expand_path(experiment_config['advisor'], 'codeDir')
#if users use relative path, convert it to absolute path #if users use relative path, convert it to absolute path
root_path = os.path.dirname(config_path) root_path = os.path.dirname(config_path)
if experiment_config.get('searchSpacePath'): if experiment_config.get('searchSpacePath'):
...@@ -80,8 +80,8 @@ def parse_path(experiment_config, config_path): ...@@ -80,8 +80,8 @@ def parse_path(experiment_config, config_path):
parse_relative_path(root_path, experiment_config['machineList'][index], 'sshKeyPath') parse_relative_path(root_path, experiment_config['machineList'][index], 'sshKeyPath')
def validate_search_space_content(experiment_config): def validate_search_space_content(experiment_config):
'''Validate searchspace content, '''Validate searchspace content,
if the searchspace file is not json format or its values does not contain _type and _value which must be specified, if the searchspace file is not json format or its values does not contain _type and _value which must be specified,
it will not be a valid searchspace file''' it will not be a valid searchspace file'''
try: try:
search_space_content = json.load(open(experiment_config.get('searchSpacePath'), 'r')) search_space_content = json.load(open(experiment_config.get('searchSpacePath'), 'r'))
...@@ -110,7 +110,7 @@ def validate_kubeflow_operators(experiment_config): ...@@ -110,7 +110,7 @@ def validate_kubeflow_operators(experiment_config):
if experiment_config.get('trial').get('master') is None: if experiment_config.get('trial').get('master') is None:
print_error('kubeflow with pytorch-operator must set master') print_error('kubeflow with pytorch-operator must set master')
exit(1) exit(1)
if experiment_config.get('kubeflowConfig').get('storage') == 'nfs': if experiment_config.get('kubeflowConfig').get('storage') == 'nfs':
if experiment_config.get('kubeflowConfig').get('nfs') is None: if experiment_config.get('kubeflowConfig').get('nfs') is None:
print_error('please set nfs configuration!') print_error('please set nfs configuration!')
...@@ -170,7 +170,7 @@ def validate_common_content(experiment_config): ...@@ -170,7 +170,7 @@ def validate_common_content(experiment_config):
else: else:
print_error(error) print_error(error)
exit(1) exit(1)
#set default value #set default value
if experiment_config.get('maxExecDuration') is None: if experiment_config.get('maxExecDuration') is None:
experiment_config['maxExecDuration'] = '999d' experiment_config['maxExecDuration'] = '999d'
......
...@@ -176,7 +176,7 @@ def parse_args(): ...@@ -176,7 +176,7 @@ def parse_args():
parser_package_subparsers = parser_package.add_subparsers() parser_package_subparsers = parser_package.add_subparsers()
parser_package_install = parser_package_subparsers.add_parser('install', help='install packages') parser_package_install = parser_package_subparsers.add_parser('install', help='install packages')
parser_package_install.add_argument('--name', '-n', dest='name', help='package name to be installed') parser_package_install.add_argument('--name', '-n', dest='name', help='package name to be installed')
parser_package_install.set_defaults(func=package_install) parser_package_install.set_defaults(func=package_install)
parser_package_show = parser_package_subparsers.add_parser('show', help='show the information of packages') parser_package_show = parser_package_subparsers.add_parser('show', help='show the information of packages')
parser_package_show.set_defaults(func=package_show) parser_package_show.set_defaults(func=package_show)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment