Unverified Commit c037a7c1 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Merge pull request #213 from microsoft/master

merge master
parents 49972952 901012eb
...@@ -11,6 +11,9 @@ pids ...@@ -11,6 +11,9 @@ pids
*.seed *.seed
*.pid.lock *.pid.lock
# Build package
dist/
# Directory for instrumented libs generated by jscoverage/JSCover # Directory for instrumented libs generated by jscoverage/JSCover
lib-cov lib-cov
...@@ -54,9 +57,6 @@ typings/ ...@@ -54,9 +57,6 @@ typings/
# Yarn Integrity file # Yarn Integrity file
.yarn-integrity .yarn-integrity
# dotenv environment variables file
.env
# next.js build output # next.js build output
.next .next
...@@ -67,7 +67,18 @@ typings/ ...@@ -67,7 +67,18 @@ typings/
__pycache__ __pycache__
build build
*.egg-info *.egg-info
setup.pye
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# VSCode
.vscode .vscode
# In case you place source code in ~/nni/ # In case you place source code in ~/nni/
......
...@@ -52,6 +52,7 @@ $(shell mkdir -p $(NNI_DEPENDENCY_FOLDER)) ...@@ -52,6 +52,7 @@ $(shell mkdir -p $(NNI_DEPENDENCY_FOLDER))
NNI_NODE_TARBALL ?= $(NNI_DEPENDENCY_FOLDER)/nni-node-$(OS_SPEC)-x64.tar.xz NNI_NODE_TARBALL ?= $(NNI_DEPENDENCY_FOLDER)/nni-node-$(OS_SPEC)-x64.tar.xz
NNI_NODE_FOLDER = $(NNI_DEPENDENCY_FOLDER)/nni-node-$(OS_SPEC)-x64 NNI_NODE_FOLDER = $(NNI_DEPENDENCY_FOLDER)/nni-node-$(OS_SPEC)-x64
NNI_NODE ?= $(BIN_FOLDER)/node NNI_NODE ?= $(BIN_FOLDER)/node
NNI_NPM ?= $(BIN_FOLDER)/npm
NNI_YARN_TARBALL ?= $(NNI_DEPENDENCY_FOLDER)/nni-yarn.tar.gz NNI_YARN_TARBALL ?= $(NNI_DEPENDENCY_FOLDER)/nni-yarn.tar.gz
NNI_YARN_FOLDER ?= $(NNI_DEPENDENCY_FOLDER)/nni-yarn NNI_YARN_FOLDER ?= $(NNI_DEPENDENCY_FOLDER)/nni-yarn
NNI_YARN ?= PATH=$(BIN_FOLDER):$${PATH} $(NNI_YARN_FOLDER)/bin/yarn NNI_YARN ?= PATH=$(BIN_FOLDER):$${PATH} $(NNI_YARN_FOLDER)/bin/yarn
...@@ -149,8 +150,9 @@ install-dependencies: $(NNI_NODE_TARBALL) $(NNI_YARN_TARBALL) ...@@ -149,8 +150,9 @@ install-dependencies: $(NNI_NODE_TARBALL) $(NNI_YARN_TARBALL)
mkdir $(NNI_NODE_FOLDER) mkdir $(NNI_NODE_FOLDER)
tar -xf $(NNI_NODE_TARBALL) -C $(NNI_NODE_FOLDER) --strip-components 1 tar -xf $(NNI_NODE_TARBALL) -C $(NNI_NODE_FOLDER) --strip-components 1
mkdir -p $(BIN_FOLDER) mkdir -p $(BIN_FOLDER)
rm -f $(NNI_NODE) rm -f $(NNI_NODE) $(NNI_NPM)
cp $(NNI_NODE_FOLDER)/bin/node $(NNI_NODE) ln -s $(NNI_NODE_FOLDER)/bin/node $(NNI_NODE)
ln -s $(NNI_NODE_FOLDER)/bin/npm $(NNI_NPM)
#$(_INFO) Extracting Yarn $(_END) #$(_INFO) Extracting Yarn $(_END)
rm -rf $(NNI_YARN_FOLDER) rm -rf $(NNI_YARN_FOLDER)
......
...@@ -8,16 +8,34 @@ jobs: ...@@ -8,16 +8,34 @@ jobs:
PYTHON_VERSION: '3.6' PYTHON_VERSION: '3.6'
steps: steps:
- script: python3 -m pip install --upgrade pip setuptools --user - script: |
python3 -m pip install --upgrade pip setuptools --user
python3 -m pip install pylint==2.3.1 astroid==2.2.5 --user
python3 -m pip install coverage --user
echo "##vso[task.setvariable variable=PATH]${HOME}/.local/bin:${PATH}"
displayName: 'Install python tools' displayName: 'Install python tools'
- script: |
source install.sh
displayName: 'Install nni toolkit via source code'
- script: | - script: |
python3 -m pip install torch==0.4.1 --user python3 -m pip install torch==0.4.1 --user
python3 -m pip install torchvision==0.2.1 --user python3 -m pip install torchvision==0.2.1 --user
python3 -m pip install tensorflow==1.13.1 --user python3 -m pip install tensorflow==1.13.1 --user
python3 -m pip install keras==2.1.6 --user
python3 -m pip install gym onnx --user
sudo apt-get install swig -y
nnictl package install --name=SMAC
nnictl package install --name=BOHB
displayName: 'Install dependencies' displayName: 'Install dependencies'
- script: | - script: |
source install.sh set -e
displayName: 'Install nni toolkit via source code' python3 -m pylint --rcfile pylintrc nni_annotation
python3 -m pylint --rcfile pylintrc nni_cmd
python3 -m pylint --rcfile pylintrc nni_gpu_tool
python3 -m pylint --rcfile pylintrc nni_trial_tool
python3 -m pylint --rcfile pylintrc nni
python3 -m pylint --rcfile pylintrc nnicli
displayName: 'Run pylint'
- script: | - script: |
python3 -m pip install flake8 --user python3 -m pip install flake8 --user
IGNORE=./tools/nni_annotation/testcase/*:F821,./examples/trials/mnist-nas/*/mnist*.py:F821,./examples/trials/nas_cifar10/src/cifar10/general_child.py:F821 IGNORE=./tools/nni_annotation/testcase/*:F821,./examples/trials/mnist-nas/*/mnist*.py:F821,./examples/trials/nas_cifar10/src/cifar10/general_child.py:F821
...@@ -29,19 +47,19 @@ jobs: ...@@ -29,19 +47,19 @@ jobs:
displayName: 'Unit test' displayName: 'Unit test'
- script: | - script: |
cd test cd test
PATH=$HOME/.local/bin:$PATH python3 naive_test.py python3 naive_test.py
displayName: 'Naive test' displayName: 'Naive test'
- script: | - script: |
cd test cd test
PATH=$HOME/.local/bin:$PATH python3 tuner_test.py python3 tuner_test.py
displayName: 'Built-in tuners / assessors tests' displayName: 'Built-in tuners / assessors tests'
- script: | - script: |
cd test cd test
PATH=$HOME/.local/bin:$PATH python3 metrics_test.py python3 metrics_test.py
displayName: 'Trial job metrics test' displayName: 'Trial job metrics test'
- script: | - script: |
cd test cd test
PATH=$HOME/.local/bin:$PATH python3 cli_test.py python3 cli_test.py
displayName: 'nnicli test' displayName: 'nnicli test'
- job: 'basic_test_pr_macOS' - job: 'basic_test_pr_macOS'
...@@ -55,29 +73,34 @@ jobs: ...@@ -55,29 +73,34 @@ jobs:
steps: steps:
- script: python3 -m pip install --upgrade pip setuptools - script: python3 -m pip install --upgrade pip setuptools
displayName: 'Install python tools' displayName: 'Install python tools'
- script: |
source install.sh
echo "##vso[task.setvariable variable=PATH]${HOME}/Library/Python/3.7/bin:${PATH}"
displayName: 'Install nni toolkit via source code'
- script: | - script: |
python3 -m pip install torch==0.4.1 --user python3 -m pip install torch==0.4.1 --user
python3 -m pip install torchvision==0.2.1 --user python3 -m pip install torchvision==0.2.1 --user
python3 -m pip install tensorflow==1.13.1 --user python3 -m pip install tensorflow==1.13.1 --user
ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" < /dev/null 2> /dev/null
brew install swig@3
ln -s /usr/local/opt/swig\@3/bin/swig /usr/local/bin/swig
nnictl package install --name=SMAC
displayName: 'Install dependencies' displayName: 'Install dependencies'
- script: |
source install.sh
displayName: 'Install nni toolkit via source code'
- script: | - script: |
cd test cd test
PATH=$HOME/Library/Python/3.7/bin:$PATH && source unittest.sh source unittest.sh
displayName: 'Unit test' displayName: 'Unit test'
- script: | - script: |
cd test cd test
PATH=$HOME/Library/Python/3.7/bin:$PATH python3 naive_test.py python3 naive_test.py
displayName: 'Naive test' displayName: 'Naive test'
- script: | - script: |
cd test cd test
PATH=$HOME/Library/Python/3.7/bin:$PATH python3 tuner_test.py python3 tuner_test.py
displayName: 'Built-in tuners / assessors tests' displayName: 'Built-in tuners / assessors tests'
- script: | - script: |
cd test cd test
PATH=$HOME/Library/Python/3.7/bin:$PATH python3 cli_test.py python3 cli_test.py
displayName: 'nnicli test' displayName: 'nnicli test'
- job: 'basic_test_pr_Windows' - job: 'basic_test_pr_Windows'
......
nni/ nni/
nni-yarn/
dist/ dist/
build/ build/
*.egg-info/ *.egg-info/
nni-yarn.tar.gz
node-*.tar.xz node-*.tar.xz
node-*/ node-*/
\ No newline at end of file
...@@ -9,13 +9,13 @@ You can easily compress a model with NNI compression. Take pruning for example, ...@@ -9,13 +9,13 @@ You can easily compress a model with NNI compression. Take pruning for example,
```python ```python
from nni.compression.torch import LevelPruner from nni.compression.torch import LevelPruner
config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }] config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
pruner = LevelPruner(config_list) pruner = LevelPruner(model, config_list)
pruner(model) pruner.compress()
``` ```
The 'default' op_type stands for the module types defined in [default_layers.py](https://github.com/microsoft/nni/blob/master/src/sdk/pynni/nni/compression/torch/default_layers.py) for pytorch. The 'default' op_type stands for the module types defined in [default_layers.py](https://github.com/microsoft/nni/blob/master/src/sdk/pynni/nni/compression/torch/default_layers.py) for pytorch.
Therefore ```{ 'sparsity': 0.8, 'op_types': ['default'] }```means that **all layers with specified op_types will be compressed with the same 0.8 sparsity**. When ```pruner(model)``` called, the model is compressed with masks and after that you can normally fine tune this model and **pruned weights won't be updated** which have been masked. Therefore ```{ 'sparsity': 0.8, 'op_types': ['default'] }```means that **all layers with specified op_types will be compressed with the same 0.8 sparsity**. When ```pruner.compress()``` called, the model is compressed with masks and after that you can normally fine tune this model and **pruned weights won't be updated** which have been masked.
## Then, make this automatic ## Then, make this automatic
...@@ -84,9 +84,9 @@ config_list_agp = [{'initial_sparsity': 0, 'final_sparsity': conv0_sparsity, ...@@ -84,9 +84,9 @@ config_list_agp = [{'initial_sparsity': 0, 'final_sparsity': conv0_sparsity,
{'initial_sparsity': 0, 'final_sparsity': conv1_sparsity, {'initial_sparsity': 0, 'final_sparsity': conv1_sparsity,
'start_epoch': 0, 'end_epoch': 3, 'start_epoch': 0, 'end_epoch': 3,
'frequency': 1,'op_name': 'conv1' },] 'frequency': 1,'op_name': 'conv1' },]
PRUNERS = {'level':LevelPruner(config_list_level)'agp':AGP_Pruner(config_list_agp)} PRUNERS = {'level':LevelPruner(model, config_list_level)'agp':AGP_Pruner(model, config_list_agp)}
pruner = PRUNERS(params['prune_method']['_name']) pruner = PRUNERS(params['prune_method']['_name'])
pruner(model) pruner.compress()
... # fine tuning ... # fine tuning
acc = evaluate(model) # evaluation acc = evaluate(model) # evaluation
nni.report_final_results(acc) nni.report_final_results(acc)
......
...@@ -25,8 +25,8 @@ Tensorflow code ...@@ -25,8 +25,8 @@ Tensorflow code
```python ```python
from nni.compression.tensorflow import LevelPruner from nni.compression.tensorflow import LevelPruner
config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }] config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
pruner = LevelPruner(config_list) pruner = LevelPruner(tf.get_default_graph(), config_list)
pruner(tf.get_default_graph()) pruner.compress()
``` ```
PyTorch code PyTorch code
...@@ -34,13 +34,13 @@ PyTorch code ...@@ -34,13 +34,13 @@ PyTorch code
```python ```python
from nni.compression.torch import LevelPruner from nni.compression.torch import LevelPruner
config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }] config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
pruner = LevelPruner(config_list) pruner = LevelPruner(model, config_list)
pruner(model) pruner.compress()
``` ```
You can use other compression algorithms in the package of `nni.compression`. The algorithms are implemented in both PyTorch and Tensorflow, under `nni.compression.torch` and `nni.compression.tensorflow` respectively. You can refer to [Pruner](./Pruner.md) and [Quantizer](./Quantizer.md) for detail description of supported algorithms. You can use other compression algorithms in the package of `nni.compression`. The algorithms are implemented in both PyTorch and Tensorflow, under `nni.compression.torch` and `nni.compression.tensorflow` respectively. You can refer to [Pruner](./Pruner.md) and [Quantizer](./Quantizer.md) for detail description of supported algorithms.
The function call `pruner(model)` receives user defined model (in Tensorflow the model can be obtained with `tf.get_default_graph()`, while in PyTorch the model is the defined model class), and the model is modified with masks inserted. Then when you run the model, the masks take effect. The masks can be adjusted at runtime by the algorithms. The function call `pruner.compress()` modifies user defined model (in Tensorflow the model can be obtained with `tf.get_default_graph()`, while in PyTorch the model is the defined model class), and the model is modified with masks inserted. Then when you run the model, the masks take effect. The masks can be adjusted at runtime by the algorithms.
When instantiate a compression algorithm, there is `config_list` passed in. We describe how to write this config below. When instantiate a compression algorithm, there is `config_list` passed in. We describe how to write this config below.
...@@ -95,7 +95,17 @@ pruner.update_epoch(epoch) ...@@ -95,7 +95,17 @@ pruner.update_epoch(epoch)
The other is `step`, it can be called with `pruner.step()` after each minibatch. Note that not all algorithms need these two APIs, for those that do not need them, calling them is allowed but has no effect. The other is `step`, it can be called with `pruner.step()` after each minibatch. Note that not all algorithms need these two APIs, for those that do not need them, calling them is allowed but has no effect.
__[TODO]__ The last API is for users to export the compressed model. You will get a compressed model when you finish the training using this API. It also exports another file storing the values of masks. You can easily export the compressed model using the following API if you are pruning your model, ```state_dict``` of the sparse model weights will be stored in ```model.pth```, which can be loaded by ```torch.load('model.pth')```
```
pruner.export_model(model_path='model.pth')
```
```mask_dict ``` and pruned model in ```onnx``` format(```input_shape``` need to be specified) can also be exported like this:
```python
pruner.export_model(model_path='model.pth', mask_path='mask.pth', onnx_path='model.onnx', input_shape=[1, 1, 28, 28])
```
## Customize new compression algorithms ## Customize new compression algorithms
...@@ -111,20 +121,26 @@ If you want to write a new pruning algorithm, you can write a class that inherit ...@@ -111,20 +121,26 @@ If you want to write a new pruning algorithm, you can write a class that inherit
# nni.compression.tensorflow.Pruner with # nni.compression.tensorflow.Pruner with
# nni.compression.torch.Pruner # nni.compression.torch.Pruner
class YourPruner(nni.compression.tensorflow.Pruner): class YourPruner(nni.compression.tensorflow.Pruner):
def __init__(self, config_list): def __init__(self, model, config_list):
# suggest you to use the NNI defined spec for config """
super().__init__(config_list) Suggest you to use the NNI defined spec for config
"""
def bind_model(self, model): super().__init__(model, config_list)
# this func can be used to remember the model or its weights
# in member variables, for getting their values during training def calc_mask(self, layer, config):
pass """
Pruners should overload this method to provide mask for weight tensors.
def calc_mask(self, weight, config, **kwargs): The mask must have the same shape and type comparing to the weight.
# weight is the target weight tensor It will be applied with ``mul()`` operation on the weight.
# config is the selected dict object in config_list for this layer This method is effectively hooked to ``forward()`` method of the model.
# kwargs contains op, op_types, and op_name
# design your mask and return your mask Parameters
----------
layer: LayerInfo
calculate mask for ``layer``'s weight
config: dict
the configuration for generating the mask
"""
return your_mask return your_mask
# note for pytorch version, there is no sess in input arguments # note for pytorch version, there is no sess in input arguments
...@@ -133,16 +149,18 @@ class YourPruner(nni.compression.tensorflow.Pruner): ...@@ -133,16 +149,18 @@ class YourPruner(nni.compression.tensorflow.Pruner):
# note for pytorch version, there is no sess in input arguments # note for pytorch version, there is no sess in input arguments
def step(self, sess): def step(self, sess):
# can do some processing based on the model or weights binded """
# in the func bind_model Can do some processing based on the model or weights binded
in the func bind_model
"""
pass pass
``` ```
For the simplest algorithm, you only need to override `calc_mask`. It receives each layer's weight and selected configuration, as well as op information. You generate the mask for this weight in this function and return. Then NNI applies the mask for you. For the simplest algorithm, you only need to override ``calc_mask``. It receives the to-be-compressed layers one by one along with their compression configuration. You generate the mask for this weight in this function and return. Then NNI applies the mask for you.
Some algorithms generate mask based on training progress, i.e., epoch number. We provide `update_epoch` for the pruner to be aware of the training progress. Some algorithms generate mask based on training progress, i.e., epoch number. We provide `update_epoch` for the pruner to be aware of the training progress. It should be called at the beginning of each epoch.
Some algorithms may want global information for generating masks, for example, all weights of the model (for statistic information), model optimizer's information. NNI supports this requirement using `bind_model`. `bind_model` receives the complete model, thus, it could record any information (e.g., reference to weights) it cares about. Then `step` can process or update the information according to the algorithm. You can refer to [source code of built-in algorithms](https://github.com/microsoft/nni/tree/master/src/sdk/pynni/nni/compressors) for example implementations. Some algorithms may want global information for generating masks, for example, all weights of the model (for statistic information). Your can use `self.bound_model` in the Pruner class for accessing weights. If you also need optimizer's information (for example in Pytorch), you could override `__init__` to receive more arguments such as model's optimizer. Then `step` can process or update the information according to the algorithm. You can refer to [source code of built-in algorithms](https://github.com/microsoft/nni/tree/master/src/sdk/pynni/nni/compressors) for example implementations.
### Quantization algorithm ### Quantization algorithm
...@@ -154,20 +172,19 @@ The interface for customizing quantization algorithm is similar to that of pruni ...@@ -154,20 +172,19 @@ The interface for customizing quantization algorithm is similar to that of pruni
# nni.compression.tensorflow.Quantizer with # nni.compression.tensorflow.Quantizer with
# nni.compression.torch.Quantizer # nni.compression.torch.Quantizer
class YourQuantizer(nni.compression.tensorflow.Quantizer): class YourQuantizer(nni.compression.tensorflow.Quantizer):
def __init__(self, config_list): def __init__(self, model, config_list):
# suggest you to use the NNI defined spec for config """
super().__init__(config_list) Suggest you to use the NNI defined spec for config
"""
def bind_model(self, model): super().__init__(model, config_list)
# this func can be used to remember the model or its weights
# in member variables, for getting their values during training
pass
def quantize_weight(self, weight, config, **kwargs): def quantize_weight(self, weight, config, **kwargs):
# weight is the target weight tensor """
# config is the selected dict object in config_list for this layer weight is the target weight tensor
# kwargs contains op, op_types, and op_name config is the selected dict object in config_list for this layer
# design your quantizer and return new weight kwargs contains op, op_types, and op_name
design your quantizer and return new weight
"""
return new_weight return new_weight
# note for pytorch version, there is no sess in input arguments # note for pytorch version, there is no sess in input arguments
...@@ -176,8 +193,10 @@ class YourQuantizer(nni.compression.tensorflow.Quantizer): ...@@ -176,8 +193,10 @@ class YourQuantizer(nni.compression.tensorflow.Quantizer):
# note for pytorch version, there is no sess in input arguments # note for pytorch version, there is no sess in input arguments
def step(self, sess): def step(self, sess):
# can do some processing based on the model or weights binded """
# in the func bind_model Can do some processing based on the model or weights binded
in the func bind_model
"""
pass pass
``` ```
......
...@@ -13,16 +13,16 @@ Tensorflow code ...@@ -13,16 +13,16 @@ Tensorflow code
``` ```
from nni.compression.tensorflow import LevelPruner from nni.compression.tensorflow import LevelPruner
config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }] config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
pruner = LevelPruner(config_list) pruner = LevelPruner(model_graph, config_list)
pruner(model_graph) pruner.compress()
``` ```
PyTorch code PyTorch code
``` ```
from nni.compression.torch import LevelPruner from nni.compression.torch import LevelPruner
config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }] config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
pruner = LevelPruner(config_list) pruner = LevelPruner(model, config_list)
pruner(model) pruner.compress()
``` ```
#### User configuration for Level Pruner #### User configuration for Level Pruner
...@@ -53,8 +53,8 @@ config_list = [{ ...@@ -53,8 +53,8 @@ config_list = [{
'frequency': 1, 'frequency': 1,
'op_types': 'default' 'op_types': 'default'
}] }]
pruner = AGP_Pruner(config_list) pruner = AGP_Pruner(tf.get_default_graph(), config_list)
pruner(tf.get_default_graph()) pruner.compress()
``` ```
PyTorch code PyTorch code
```python ```python
...@@ -67,8 +67,8 @@ config_list = [{ ...@@ -67,8 +67,8 @@ config_list = [{
'frequency': 1, 'frequency': 1,
'op_types': 'default' 'op_types': 'default'
}] }]
pruner = AGP_Pruner(config_list) pruner = AGP_Pruner(model, config_list)
pruner(model) pruner.compress()
``` ```
Second, you should add code below to update epoch number when you finish one epoch in your training code. Second, you should add code below to update epoch number when you finish one epoch in your training code.
......
...@@ -8,11 +8,11 @@ We provide Naive Quantizer to quantizer weight to default 8 bits, you can use it ...@@ -8,11 +8,11 @@ We provide Naive Quantizer to quantizer weight to default 8 bits, you can use it
### Usage ### Usage
tensorflow tensorflow
```python ```python
nni.compressors.tensorflow.NaiveQuantizer()(model_graph) nni.compressors.tensorflow.NaiveQuantizer(model_graph).compress()
``` ```
pytorch pytorch
```python ```python
nni.compressors.torch.NaiveQuantizer()(model) nni.compressors.torch.NaiveQuantizer(model).compress()
``` ```
*** ***
...@@ -32,15 +32,15 @@ Tensorflow code ...@@ -32,15 +32,15 @@ Tensorflow code
```python ```python
from nni.compressors.tensorflow import QAT_Quantizer from nni.compressors.tensorflow import QAT_Quantizer
config_list = [{ 'q_bits': 8, 'op_types': ['default'] }] config_list = [{ 'q_bits': 8, 'op_types': ['default'] }]
quantizer = QAT_Quantizer(config_list) quantizer = QAT_Quantizer(tf.get_default_graph(), config_list)
quantizer(tf.get_default_graph()) quantizer.compress()
``` ```
PyTorch code PyTorch code
```python ```python
from nni.compressors.torch import QAT_Quantizer from nni.compressors.torch import QAT_Quantizer
config_list = [{ 'q_bits': 8, 'op_types': ['default'] }] config_list = [{ 'q_bits': 8, 'op_types': ['default'] }]
quantizer = QAT_Quantizer(config_list) quantizer = QAT_Quantizer(model, config_list)
quantizer(model) quantizer.compress()
``` ```
You can view example for more information You can view example for more information
...@@ -61,15 +61,15 @@ Tensorflow code ...@@ -61,15 +61,15 @@ Tensorflow code
```python ```python
from nni.compressors.tensorflow import DoReFaQuantizer from nni.compressors.tensorflow import DoReFaQuantizer
config_list = [{ 'q_bits': 8, 'op_types': 'default' }] config_list = [{ 'q_bits': 8, 'op_types': 'default' }]
quantizer = DoReFaQuantizer(config_list) quantizer = DoReFaQuantizer(tf.get_default_graph(), config_list)
quantizer(tf.get_default_graph()) quantizer.compress()
``` ```
PyTorch code PyTorch code
```python ```python
from nni.compressors.torch import DoReFaQuantizer from nni.compressors.torch import DoReFaQuantizer
config_list = [{ 'q_bits': 8, 'op_types': 'default' }] config_list = [{ 'q_bits': 8, 'op_types': 'default' }]
quantizer = DoReFaQuantizer(config_list) quantizer = DoReFaQuantizer(model, config_list)
quantizer(model) quantizer.compress()
``` ```
You can view example for more information You can view example for more information
......
...@@ -82,6 +82,22 @@ Compared with [LocalMode](LocalMode.md) and [RemoteMachineMode](RemoteMachineMod ...@@ -82,6 +82,22 @@ Compared with [LocalMode](LocalMode.md) and [RemoteMachineMode](RemoteMachineMod
portNumber: 1 portNumber: 1
``` ```
NNI support two kind of authorization method in PAI, including password and PAI token, [refer](https://github.com/microsoft/pai/blob/b6bd2ab1c8890f91b7ac5859743274d2aa923c22/docs/rest-server/API.md#2-authentication). The authorization is configured in `paiConfig` field.
For password authorization, the `paiConfig` schema is:
```
paiConfig:
userName: your_pai_nni_user
passWord: your_pai_password
host: 10.1.1.1
```
For pai token authorization, the `paiConfig` schema is:
```
paiConfig:
userName: your_pai_nni_user
token: your_pai_token
host: 10.1.1.1
```
Once complete to fill NNI experiment config file and save (for example, save as exp_pai.yml), then run the following command Once complete to fill NNI experiment config file and save (for example, save as exp_pai.yml), then run the following command
``` ```
nnictl create --config exp_pai.yml nnictl create --config exp_pai.yml
......
...@@ -122,7 +122,7 @@ Its requirement of computation resource is relatively high. Specifically, it req ...@@ -122,7 +122,7 @@ Its requirement of computation resource is relatively high. Specifically, it req
* **optimize_mode** (*maximize or minimize, optional, default = maximize*) - If 'maximize', the tuner will target to maximize metrics. If 'minimize', the tuner will target to minimize metrics. * **optimize_mode** (*maximize or minimize, optional, default = maximize*) - If 'maximize', the tuner will target to maximize metrics. If 'minimize', the tuner will target to minimize metrics.
* **population_size** (*int value (should > 0), optional, default = 20*) - the initial size of the population(trial num) in evolution tuner. Suggests `population_size` be much larger than `concurrency`, so users can get the most out of the algorithm (and at least `concurrency`, or the tuner will fail on their first generation of parameters). * **population_size** (*int value (should > 0), optional, default = 20*) - the initial size of the population (trial num) in evolution tuner. Suggests `population_size` be much larger than `concurrency`, so users can get the most out of the algorithm (and at least `concurrency`, or the tuner will fail on their first generation of parameters).
**Usage example** **Usage example**
...@@ -143,11 +143,11 @@ tuner: ...@@ -143,11 +143,11 @@ tuner:
> Built-in Tuner Name: **SMAC** > Built-in Tuner Name: **SMAC**
**Please note that SMAC doesn't support running on windows currently. The specific reason can be referred to this [GitHub issue](https://github.com/automl/SMAC3/issues/483).** **Please note that SMAC doesn't support running on Windows currently. The specific reason can be referred to this [GitHub issue](https://github.com/automl/SMAC3/issues/483).**
**Installation** **Installation**
SMAC need to be installed by following command before first use. SMAC need to be installed by following command before first use. As a reminder, `swig` is required for SMAC: for Ubuntu `swig` can be installed with `apt`.
```bash ```bash
nnictl package install --name=SMAC nnictl package install --name=SMAC
......
...@@ -21,6 +21,8 @@ To define a search space, users should define the name of variable, the type of ...@@ -21,6 +21,8 @@ To define a search space, users should define the name of variable, the type of
Take the first line as an example. `dropout_rate` is defined as a variable whose priori distribution is a uniform distribution of a range from `0.1` and `0.5`. Take the first line as an example. `dropout_rate` is defined as a variable whose priori distribution is a uniform distribution of a range from `0.1` and `0.5`.
Note that the ability of a search space is highly connected with your tuner. We listed the supported types for each builtin tuner below. For a customized tuner, you don't have to follow our convention and you will have the flexibility to define any type you want.
## Types ## Types
All types of sampling strategies and their parameter are listed here: All types of sampling strategies and their parameter are listed here:
...@@ -74,6 +76,8 @@ All types of sampling strategies and their parameter are listed here: ...@@ -74,6 +76,8 @@ All types of sampling strategies and their parameter are listed here:
* `{"_type": "mutable_layer", "_value": {mutable_layer_infomation}}` * `{"_type": "mutable_layer", "_value": {mutable_layer_infomation}}`
* Type for [Neural Architecture Search Space][1]. Value is also a dictionary, which contains key-value pairs representing respectively name and search space of each mutable_layer. * Type for [Neural Architecture Search Space][1]. Value is also a dictionary, which contains key-value pairs representing respectively name and search space of each mutable_layer.
* For now, users can only use this type of search space with annotation, which means that there is no need to define a json file for search space since it will be automatically generated according to the annotation in trial code. * For now, users can only use this type of search space with annotation, which means that there is no need to define a json file for search space since it will be automatically generated according to the annotation in trial code.
* The following HPO tuners can be adapted to tune this search space: TPE, Random, Anneal, Evolution, Grid Search,
Hyperband and BOHB.
* For detailed usage, please refer to [General NAS Interfaces][1]. * For detailed usage, please refer to [General NAS Interfaces][1].
## Search Space Types Supported by Each Tuner ## Search Space Types Supported by Each Tuner
...@@ -86,20 +90,20 @@ All types of sampling strategies and their parameter are listed here: ...@@ -86,20 +90,20 @@ All types of sampling strategies and their parameter are listed here:
| Evolution Tuner | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | | Evolution Tuner | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; |
| SMAC Tuner | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | | | | | | | SMAC Tuner | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | | | | | |
| Batch Tuner | &#10003; | | | | | | | | | | | Batch Tuner | &#10003; | | | | | | | | | |
| Grid Search Tuner | &#10003; | &#10003; | | &#10003; | | | | | | | | Grid Search Tuner | &#10003; | &#10003; | | &#10003; | | | | | | |
| Hyperband Advisor | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | | Hyperband Advisor | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; |
| Metis Tuner | &#10003; | &#10003; | &#10003; | &#10003; | | | | | | | | Metis Tuner | &#10003; | &#10003; | &#10003; | &#10003; | | | | | | |
| GP Tuner | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | | | | | | GP Tuner | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | &#10003; | | | | |
Known Limitations: Known Limitations:
* GP Tuner and Metis Tuner support only **numerical values** in search space(`choice` type values can be no-numeraical with other tuners, e.g. string values). Both GP Tuner and Metis Tuner use Gaussian Process Regressor(GPR). GPR make predictions based on a kernel function and the 'distance' between different points, it's hard to get the true distance between no-numerical values. * GP Tuner and Metis Tuner support only **numerical values** in search space (`choice` type values can be no-numeraical with other tuners, e.g. string values). Both GP Tuner and Metis Tuner use Gaussian Process Regressor(GPR). GPR make predictions based on a kernel function and the 'distance' between different points, it's hard to get the true distance between no-numerical values.
* Note that for nested search space: * Note that for nested search space:
* Only Random Search/TPE/Anneal/Evolution tuner supports nested search space * Only Random Search/TPE/Anneal/Evolution tuner supports nested search space
* We do not support nested search space "Hyper Parameter" in visualization now, the enhancement is being considered in #1110(https://github.com/microsoft/nni/issues/1110), any suggestions or discussions or contributions are warmly welcomed * We do not support nested search space "Hyper Parameter" in visualization now, the enhancement is being considered in [#1110](https://github.com/microsoft/nni/issues/1110), any suggestions or discussions or contributions are warmly welcomed
[1]: ../AdvancedFeature/GeneralNasInterfaces.md [1]: ../AdvancedFeature/GeneralNasInterfaces.md
...@@ -93,15 +93,13 @@ def main(): ...@@ -93,15 +93,13 @@ def main():
'frequency': 1, 'frequency': 1,
'op_types': ['default'] 'op_types': ['default']
}] }]
pruner = AGP_Pruner(configure_list) pruner = AGP_Pruner(tf.get_default_graph(), configure_list)
# if you want to load from yaml file # if you want to load from yaml file
# configure_file = nni.compressors.tf_compressor._nnimc_tf._tf_default_load_configure_file('configure_example.yaml','AGPruner') # configure_file = nni.compressors.tf_compressor._nnimc_tf._tf_default_load_configure_file('configure_example.yaml','AGPruner')
# configure_list = configure_file.get('config',[]) # configure_list = configure_file.get('config',[])
# pruner.load_configure(configure_list) # pruner.load_configure(configure_list)
# you can also handle it yourself and input an configure list in json # you can also handle it yourself and input an configure list in json
pruner(tf.get_default_graph()) pruner.compress()
# you can also use compress(model) or compress_default_graph() for tensorflow compressor
# pruner.compress(tf.get_default_graph())
with tf.Session() as sess: with tf.Session() as sess:
sess.run(tf.global_variables_initializer()) sess.run(tf.global_variables_initializer())
......
...@@ -83,8 +83,8 @@ def main(): ...@@ -83,8 +83,8 @@ def main():
DoReFaQuantizer(configure_list).compress(tf.get_default_graph()) DoReFaQuantizer(configure_list).compress(tf.get_default_graph())
''' '''
configure_list = [{'q_bits':8, 'op_types':['default']}] configure_list = [{'q_bits':8, 'op_types':['default']}]
quantizer = QAT_Quantizer(configure_list) quantizer = QAT_Quantizer(tf.get_default_graph(), configure_list)
quantizer(tf.get_default_graph()) quantizer.compress()
# you can also use compress(model) or compress_default_graph() # you can also use compress(model) or compress_default_graph()
# method like QATquantizer(q_bits = 8).compress_default_graph() # method like QATquantizer(q_bits = 8).compress_default_graph()
......
...@@ -66,6 +66,7 @@ def main(): ...@@ -66,6 +66,7 @@ def main():
batch_size=1000, shuffle=True) batch_size=1000, shuffle=True)
model = Mnist() model = Mnist()
model.to(device)
'''you can change this to LevelPruner to implement it '''you can change this to LevelPruner to implement it
pruner = LevelPruner(configure_list) pruner = LevelPruner(configure_list)
...@@ -79,8 +80,8 @@ def main(): ...@@ -79,8 +80,8 @@ def main():
'op_types': ['default'] 'op_types': ['default']
}] }]
pruner = AGP_Pruner(configure_list) pruner = AGP_Pruner(model, configure_list)
pruner(model) model = pruner.compress()
# you can also use compress(model) method # you can also use compress(model) method
# like that pruner.compress(model) # like that pruner.compress(model)
...@@ -90,6 +91,7 @@ def main(): ...@@ -90,6 +91,7 @@ def main():
print('# Epoch {} #'.format(epoch)) print('# Epoch {} #'.format(epoch))
train(model, device, train_loader, optimizer) train(model, device, train_loader, optimizer)
test(model, device, test_loader) test(model, device, test_loader)
pruner.export_model('model.pth', 'mask.pth', 'model.onnx', [1, 1, 28, 28])
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -69,8 +69,8 @@ def main(): ...@@ -69,8 +69,8 @@ def main():
DoReFaQuantizer(configure_list).compress(model) DoReFaQuantizer(configure_list).compress(model)
''' '''
configure_list = [{'q_bits':8, 'op_types':['default']}] configure_list = [{'q_bits':8, 'op_types':['default']}]
quantizer = QAT_Quantizer(configure_list) quantizer = QAT_Quantizer(model, configure_list)
quantizer(model) quantizer.compress()
# you can also use compress(model) method # you can also use compress(model) method
# like thaht quantizer.compress(model) # like thaht quantizer.compress(model)
......
...@@ -510,4 +510,4 @@ function unixPathJoin(...paths: any[]): string { ...@@ -510,4 +510,4 @@ function unixPathJoin(...paths: any[]): string {
export {countFilesRecursively, validateFileNameRecursively, getRemoteTmpDir, generateParamFileName, getMsgDispatcherCommand, getCheckpointDir, export {countFilesRecursively, validateFileNameRecursively, getRemoteTmpDir, generateParamFileName, getMsgDispatcherCommand, getCheckpointDir,
getLogDir, getExperimentRootDir, getJobCancelStatus, getDefaultDatabaseDir, getIPV4Address, unixPathJoin, getLogDir, getExperimentRootDir, getJobCancelStatus, getDefaultDatabaseDir, getIPV4Address, unixPathJoin,
mkDirP, delay, prepareUnitTest, parseArg, cleanupUnitTest, uniqueString, randomSelect, getLogLevel, getVersion, getCmdPy, getTunerProc, isAlive, killPid, getNewLine }; mkDirP, mkDirPSync, delay, prepareUnitTest, parseArg, cleanupUnitTest, uniqueString, randomSelect, getLogLevel, getVersion, getCmdPy, getTunerProc, isAlive, killPid, getNewLine };
...@@ -107,7 +107,8 @@ export namespace ValidationSchemas { ...@@ -107,7 +107,8 @@ export namespace ValidationSchemas {
}), }),
pai_config: joi.object({ pai_config: joi.object({
userName: joi.string().min(1).required(), userName: joi.string().min(1).required(),
passWord: joi.string().min(1).required(), passWord: joi.string().min(1),
token: joi.string().min(1),
host: joi.string().min(1).required() host: joi.string().min(1).required()
}), }),
kubeflow_config: joi.object({ kubeflow_config: joi.object({
......
...@@ -30,7 +30,7 @@ import { String } from 'typescript-string-operations'; ...@@ -30,7 +30,7 @@ import { String } from 'typescript-string-operations';
import * as component from '../../common/component'; import * as component from '../../common/component';
import { getBasePort, getExperimentId } from '../../common/experimentStartupInfo'; import { getBasePort, getExperimentId } from '../../common/experimentStartupInfo';
import { RestServer } from '../../common/restServer'; import { RestServer } from '../../common/restServer';
import { getLogDir } from '../../common/utils'; import { getExperimentRootDir, mkDirPSync } from '../../common/utils';
/** /**
* Cluster Job Training service Rest server, provides rest API to support Cluster job metrics update * Cluster Job Training service Rest server, provides rest API to support Cluster job metrics update
...@@ -146,7 +146,9 @@ export abstract class ClusterJobRestServer extends RestServer { ...@@ -146,7 +146,9 @@ export abstract class ClusterJobRestServer extends RestServer {
this.errorMessage = `Version check failed, didn't get version check response from trialKeeper,` this.errorMessage = `Version check failed, didn't get version check response from trialKeeper,`
+ ` please check your NNI version in NNIManager and TrialKeeper!`; + ` please check your NNI version in NNIManager and TrialKeeper!`;
} }
const trialLogPath: string = path.join(getLogDir(), `trial_${req.params.trialId}.log`); const trialLogDir: string = path.join(getExperimentRootDir(), 'trials', req.params.trialId);
mkDirPSync(trialLogDir);
const trialLogPath: string = path.join(trialLogDir, 'stdout_log_collection.log');
try { try {
let skipLogging: boolean = false; let skipLogging: boolean = false;
if (req.body.tag === 'trial' && req.body.msg !== undefined) { if (req.body.tag === 'trial' && req.body.msg !== undefined) {
......
...@@ -107,19 +107,22 @@ export class PAIJobConfig { ...@@ -107,19 +107,22 @@ export class PAIJobConfig {
*/ */
export class PAIClusterConfig { export class PAIClusterConfig {
public readonly userName: string; public readonly userName: string;
public readonly passWord: string; public readonly passWord?: string;
public readonly host: string; public readonly host: string;
public readonly token?: string;
/** /**
* Constructor * Constructor
* @param userName User name of PAI Cluster * @param userName User name of PAI Cluster
* @param passWord password of PAI Cluster * @param passWord password of PAI Cluster
* @param host Host IP of PAI Cluster * @param host Host IP of PAI Cluster
* @param token PAI token of PAI Cluster
*/ */
constructor(userName: string, passWord : string, host : string) { constructor(userName: string, host : string, passWord?: string, token?: string) {
this.userName = userName; this.userName = userName;
this.passWord = passWord; this.passWord = passWord;
this.host = host; this.host = host;
this.token = token;
} }
} }
......
...@@ -208,7 +208,7 @@ class PAITrainingService implements TrainingService { ...@@ -208,7 +208,7 @@ class PAITrainingService implements TrainingService {
const stopJobRequest: request.Options = { const stopJobRequest: request.Options = {
uri: `http://${this.paiClusterConfig.host}/rest-server/api/v1/user/${this.paiClusterConfig.userName}\ uri: `http://${this.paiClusterConfig.host}/rest-server/api/v1/user/${this.paiClusterConfig.userName}\
/jobs/${trialJobDetail.paiJobName}/executionType`, /jobs/${trialJobDetail.paiJobName}/executionType`,
method: 'PUT', method: 'PUT',
json: true, json: true,
body: {value: 'STOP'}, body: {value: 'STOP'},
...@@ -256,9 +256,15 @@ class PAITrainingService implements TrainingService { ...@@ -256,9 +256,15 @@ class PAITrainingService implements TrainingService {
path: '/webhdfs/api/v1', path: '/webhdfs/api/v1',
host: this.paiClusterConfig.host host: this.paiClusterConfig.host
}); });
if(this.paiClusterConfig.passWord) {
// Get PAI authentication token
await this.updatePaiToken();
} else if(this.paiClusterConfig.token) {
this.paiToken = this.paiClusterConfig.token;
} else {
deferred.reject(new Error('pai cluster config format error, please set password or token!'));
}
// Get PAI authentication token
await this.updatePaiToken();
deferred.resolve(); deferred.resolve();
break; break;
...@@ -483,8 +489,7 @@ class PAITrainingService implements TrainingService { ...@@ -483,8 +489,7 @@ class PAITrainingService implements TrainingService {
request(submitJobRequest, (error: Error, response: request.Response, body: any) => { request(submitJobRequest, (error: Error, response: request.Response, body: any) => {
if ((error !== undefined && error !== null) || response.statusCode >= 400) { if ((error !== undefined && error !== null) || response.statusCode >= 400) {
const errorMessage : string = (error !== undefined && error !== null) ? error.message : const errorMessage : string = (error !== undefined && error !== null) ? error.message :
`Submit trial ${trialJobId} failed, http code:${response.statusCode}, http body: ${response.body}`; `Submit trial ${trialJobId} failed, http code:${response.statusCode}, http body: ${response.body.message}`;
this.log.error(errorMessage);
trialJobDetail.status = 'FAILED'; trialJobDetail.status = 'FAILED';
deferred.resolve(true); deferred.resolve(true);
} else { } else {
...@@ -498,13 +503,15 @@ class PAITrainingService implements TrainingService { ...@@ -498,13 +503,15 @@ class PAITrainingService implements TrainingService {
private async statusCheckingLoop(): Promise<void> { private async statusCheckingLoop(): Promise<void> {
while (!this.stopping) { while (!this.stopping) {
try { if(this.paiClusterConfig && this.paiClusterConfig.passWord) {
await this.updatePaiToken(); try {
} catch (error) { await this.updatePaiToken();
this.log.error(`${error}`); } catch (error) {
//only throw error when initlize paiToken first time this.log.error(`${error}`);
if (this.paiToken === undefined) { //only throw error when initlize paiToken first time
throw new Error(error); if (this.paiToken === undefined) {
throw new Error(error);
}
} }
} }
await this.paiJobCollector.retrieveTrialStatus(this.paiToken, this.paiClusterConfig); await this.paiJobCollector.retrieveTrialStatus(this.paiToken, this.paiClusterConfig);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment