"...resnet50_tensorflow.git" did not exist on "ea88084b78e65cc7bd0bd9a1e8f316286342c756"
Unverified Commit c037a7c1 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Merge pull request #213 from microsoft/master

merge master
parents 49972952 901012eb
...@@ -28,6 +28,8 @@ import { ...@@ -28,6 +28,8 @@ import {
parseGpuIndices, RemoteMachineMeta, RemoteMachineScheduleResult, RemoteMachineTrialJobDetail, ScheduleResultType, SSHClientManager parseGpuIndices, RemoteMachineMeta, RemoteMachineScheduleResult, RemoteMachineTrialJobDetail, ScheduleResultType, SSHClientManager
} from './remoteMachineData'; } from './remoteMachineData';
type SCHEDULE_POLICY_NAME = 'random' | 'round-robin';
/** /**
* A simple GPU scheduler implementation * A simple GPU scheduler implementation
*/ */
...@@ -35,13 +37,18 @@ export class GPUScheduler { ...@@ -35,13 +37,18 @@ export class GPUScheduler {
private readonly machineSSHClientMap : Map<RemoteMachineMeta, SSHClientManager>; private readonly machineSSHClientMap : Map<RemoteMachineMeta, SSHClientManager>;
private readonly log: Logger = getLogger(); private readonly log: Logger = getLogger();
private readonly policyName: SCHEDULE_POLICY_NAME = 'round-robin';
private roundRobinIndex: number = 0;
private configuredRMs: RemoteMachineMeta[] = [];
/** /**
* Constructor * Constructor
* @param machineSSHClientMap map from remote machine to sshClient * @param machineSSHClientMap map from remote machine to sshClient
*/ */
constructor(machineSSHClientMap : Map<RemoteMachineMeta, SSHClientManager>) { constructor(machineSSHClientMap : Map<RemoteMachineMeta, SSHClientManager>) {
assert(machineSSHClientMap.size > 0);
this.machineSSHClientMap = machineSSHClientMap; this.machineSSHClientMap = machineSSHClientMap;
this.configuredRMs = Array.from(machineSSHClientMap.keys());
} }
/** /**
...@@ -189,7 +196,21 @@ export class GPUScheduler { ...@@ -189,7 +196,21 @@ export class GPUScheduler {
private selectMachine(rmMetas: RemoteMachineMeta[]): RemoteMachineMeta { private selectMachine(rmMetas: RemoteMachineMeta[]): RemoteMachineMeta {
assert(rmMetas !== undefined && rmMetas.length > 0); assert(rmMetas !== undefined && rmMetas.length > 0);
return randomSelect(rmMetas); if (this.policyName === 'random') {
return randomSelect(rmMetas);
} else if (this.policyName === 'round-robin') {
return this.roundRobinSelect(rmMetas);
} else {
throw new Error(`Unsupported schedule policy: ${this.policyName}`);
}
}
private roundRobinSelect(rmMetas: RemoteMachineMeta[]): RemoteMachineMeta {
while (!rmMetas.includes(this.configuredRMs[this.roundRobinIndex % this.configuredRMs.length])) {
this.roundRobinIndex++;
}
return this.configuredRMs[this.roundRobinIndex++ % this.configuredRMs.length];
} }
private selectGPUsForTrial(gpuInfos: GPUInfo[], requiredGPUNum: number): GPUInfo[] { private selectGPUsForTrial(gpuInfos: GPUInfo[], requiredGPUNum: number): GPUInfo[] {
......
import setuptools import setuptools
setuptools.setup( setuptools.setup(
name = 'nnicli', name='nnicli',
version = '999.0.0-developing', version='999.0.0-developing',
packages = setuptools.find_packages(), packages=setuptools.find_packages(),
python_requires = '>=3.5', python_requires='>=3.5',
install_requires = [ install_requires=[
'requests' 'requests'
], ],
author = 'Microsoft NNI Team', author='Microsoft NNI Team',
author_email = 'nni@microsoft.com', author_email='nni@microsoft.com',
description = 'nnicli for Neural Network Intelligence project', description='nnicli for Neural Network Intelligence project',
license = 'MIT', license='MIT',
url = 'https://github.com/Microsoft/nni', url='https://github.com/Microsoft/nni',
) )
...@@ -53,14 +53,14 @@ class Assessor(Recoverable): ...@@ -53,14 +53,14 @@ class Assessor(Recoverable):
to tell whether this trial can be early stopped or not. to tell whether this trial can be early stopped or not.
This is the abstract base class for all assessors. This is the abstract base class for all assessors.
Early stopping algorithms should derive this class and override :meth:`assess_trial` method, Early stopping algorithms should inherit this class and override :meth:`assess_trial` method,
which receives intermediate results from trials and give an assessing result. which receives intermediate results from trials and give an assessing result.
If :meth:`assess_trial` returns :obj:`AssessResult.Bad` for a trial, If :meth:`assess_trial` returns :obj:`AssessResult.Bad` for a trial,
it hints NNI framework that the trial is likely to result in a poor final accuracy, it hints NNI framework that the trial is likely to result in a poor final accuracy,
and therefore should be killed to save resource. and therefore should be killed to save resource.
If an accessor want's to get notified when a trial ends, it can also override :meth:`trial_end`. If an accessor want's to be notified when a trial ends, it can also override :meth:`trial_end`.
To write a new assessor, you can reference :class:`~nni.medianstop_assessor.MedianstopAssessor`'s code as an example. To write a new assessor, you can reference :class:`~nni.medianstop_assessor.MedianstopAssessor`'s code as an example.
...@@ -77,7 +77,7 @@ class Assessor(Recoverable): ...@@ -77,7 +77,7 @@ class Assessor(Recoverable):
The NNI framework has little guarantee on ``trial_history``. The NNI framework has little guarantee on ``trial_history``.
This method is not guaranteed to be invoked for each time ``trial_history`` get updated. This method is not guaranteed to be invoked for each time ``trial_history`` get updated.
It is also possible that a trial's history keeps updateing after receiving a bad result. It is also possible that a trial's history keeps updating after receiving a bad result.
And if the trial failed and retried, ``trial_history`` may be inconsistent with its previous value. And if the trial failed and retried, ``trial_history`` may be inconsistent with its previous value.
The only guarantee is that ``trial_history`` is always growing. The only guarantee is that ``trial_history`` is always growing.
...@@ -96,9 +96,9 @@ class Assessor(Recoverable): ...@@ -96,9 +96,9 @@ class Assessor(Recoverable):
Parameters Parameters
---------- ----------
trial_job_id: str trial_job_id : str
Unique identifier of the trial. Unique identifier of the trial.
trial_history: list trial_history : list
Intermediate results of this trial. The element type is decided by trial code. Intermediate results of this trial. The element type is decided by trial code.
Returns Returns
...@@ -114,9 +114,9 @@ class Assessor(Recoverable): ...@@ -114,9 +114,9 @@ class Assessor(Recoverable):
Parameters Parameters
---------- ----------
trial_job_id: str trial_job_id : str
Unique identifier of the trial. Unique identifier of the trial.
success: bool success : bool
True if the trial successfully completed; False if failed or terminated. True if the trial successfully completed; False if failed or terminated.
""" """
......
...@@ -8,16 +8,18 @@ _logger = logging.getLogger(__name__) ...@@ -8,16 +8,18 @@ _logger = logging.getLogger(__name__)
class LevelPruner(Pruner): class LevelPruner(Pruner):
def __init__(self, config_list): def __init__(self, model, config_list):
""" """
config_list: supported keys: config_list: supported keys:
- sparsity - sparsity
""" """
super().__init__(config_list) super().__init__(model, config_list)
self.mask_list = {} self.mask_list = {}
self.if_init_list = {} self.if_init_list = {}
def calc_mask(self, weight, config, op_name, **kwargs): def calc_mask(self, layer, config):
weight = layer.weight
op_name = layer.name
if self.if_init_list.get(op_name, True): if self.if_init_list.get(op_name, True):
threshold = tf.contrib.distributions.percentile(tf.abs(weight), config['sparsity'] * 100) threshold = tf.contrib.distributions.percentile(tf.abs(weight), config['sparsity'] * 100)
mask = tf.cast(tf.math.greater(tf.abs(weight), threshold), weight.dtype) mask = tf.cast(tf.math.greater(tf.abs(weight), threshold), weight.dtype)
...@@ -38,7 +40,7 @@ class AGP_Pruner(Pruner): ...@@ -38,7 +40,7 @@ class AGP_Pruner(Pruner):
https://arxiv.org/pdf/1710.01878.pdf https://arxiv.org/pdf/1710.01878.pdf
""" """
def __init__(self, config_list): def __init__(self, model, config_list):
""" """
config_list: supported keys: config_list: supported keys:
- initial_sparsity - initial_sparsity
...@@ -47,13 +49,15 @@ class AGP_Pruner(Pruner): ...@@ -47,13 +49,15 @@ class AGP_Pruner(Pruner):
- end_epoch: end epoch number stop update mask - end_epoch: end epoch number stop update mask
- frequency: if you want update every 2 epoch, you can set it 2 - frequency: if you want update every 2 epoch, you can set it 2
""" """
super().__init__(config_list) super().__init__(model, config_list)
self.mask_list = {} self.mask_list = {}
self.if_init_list = {} self.if_init_list = {}
self.now_epoch = tf.Variable(0) self.now_epoch = tf.Variable(0)
self.assign_handler = [] self.assign_handler = []
def calc_mask(self, weight, config, op_name, **kwargs): def calc_mask(self, layer, config):
weight = layer.weight
op_name = layer.name
start_epoch = config.get('start_epoch', 0) start_epoch = config.get('start_epoch', 0)
freq = config.get('frequency', 1) freq = config.get('frequency', 1)
if self.now_epoch >= start_epoch and self.if_init_list.get(op_name, True) and ( if self.now_epoch >= start_epoch and self.if_init_list.get(op_name, True) and (
......
...@@ -10,8 +10,8 @@ _logger = logging.getLogger(__name__) ...@@ -10,8 +10,8 @@ _logger = logging.getLogger(__name__)
class NaiveQuantizer(Quantizer): class NaiveQuantizer(Quantizer):
"""quantize weight to 8 bits """quantize weight to 8 bits
""" """
def __init__(self, config_list): def __init__(self, model, config_list):
super().__init__(config_list) super().__init__(model, config_list)
self.layer_scale = {} self.layer_scale = {}
def quantize_weight(self, weight, config, op_name, **kwargs): def quantize_weight(self, weight, config, op_name, **kwargs):
...@@ -27,12 +27,12 @@ class QAT_Quantizer(Quantizer): ...@@ -27,12 +27,12 @@ class QAT_Quantizer(Quantizer):
Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference
http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf
""" """
def __init__(self, config_list): def __init__(self, model, config_list):
""" """
config_list: supported keys: config_list: supported keys:
- q_bits - q_bits
""" """
super().__init__(config_list) super().__init__(model, config_list)
def quantize_weight(self, weight, config, **kwargs): def quantize_weight(self, weight, config, **kwargs):
a = tf.stop_gradient(tf.reduce_min(weight)) a = tf.stop_gradient(tf.reduce_min(weight))
...@@ -52,12 +52,12 @@ class DoReFaQuantizer(Quantizer): ...@@ -52,12 +52,12 @@ class DoReFaQuantizer(Quantizer):
Zhou et al., DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients Zhou et al., DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients
(https://arxiv.org/abs/1606.06160) (https://arxiv.org/abs/1606.06160)
""" """
def __init__(self, config_list): def __init__(self, model, config_list):
""" """
config_list: supported keys: config_list: supported keys:
- q_bits - q_bits
""" """
super().__init__(config_list) super().__init__(model, config_list)
def quantize_weight(self, weight, config, **kwargs): def quantize_weight(self, weight, config, **kwargs):
a = tf.math.tanh(weight) a = tf.math.tanh(weight)
......
...@@ -6,67 +6,85 @@ _logger = logging.getLogger(__name__) ...@@ -6,67 +6,85 @@ _logger = logging.getLogger(__name__)
class LayerInfo: class LayerInfo:
def __init__(self, op): def __init__(self, op, weight, weight_op):
self.op = op self.op = op
self.name = op.name self.name = op.name
self.type = op.type self.type = op.type
self.weight = weight
self.weight_op = weight_op
class Compressor: class Compressor:
"""Abstract base TensorFlow compressor""" """
Abstract base TensorFlow compressor
"""
def __init__(self, config_list): def __init__(self, model, config_list):
self._bound_model = None """
self._config_list = config_list Record necessary info in class members
Parameters
----------
model : pytorch model
the model user wants to compress
config_list : list
the configurations that users specify for compression
"""
self.bound_model = model
self.config_list = config_list
self.modules_to_compress = []
def __call__(self, model): def compress(self):
"""Compress given graph with algorithm implemented by subclass.
The graph will be editted and returned.
""" """
self.compress(model) Compress the model with algorithm implemented by subclass.
return model
def compress(self, model): The model will be instrumented and user should never edit it after calling this method.
"""Compress given graph with algorithm implemented by subclass. `self.modules_to_compress` records all the to-be-compressed layers
This will edit the graph.
""" """
assert self._bound_model is None, "Each NNI compressor instance can only compress one model" for op in self.bound_model.get_operations():
self._bound_model = model weight_index = _detect_weight_index(op)
self.bind_model(model) if weight_index is None:
for op in model.get_operations(): _logger.warning('Failed to detect weight for layer %s', op.name)
layer = LayerInfo(op) return
config = self._select_config(layer) weight_op = op.inputs[weight_index].op
weight = weight_op.inputs[0]
layer = LayerInfo(op, weight, weight_op)
config = self.select_config(layer)
if config is not None: if config is not None:
self._instrument_layer(layer, config) self._instrument_layer(layer, config)
self.modules_to_compress.append((layer, config))
return self.bound_model
def compress_default_graph(self): def get_modules_to_compress(self):
"""Compress the default graph with algorithm implemented by subclass.
This will edit the default graph.
""" """
self.compress(tf.get_default_graph()) To obtain all the to-be-compressed layers.
def bind_model(self, model): Returns
"""This method is called when a model is bound to the compressor. -------
Compressors can optionally overload this method to do model-specific initialization. self.modules_to_compress : list
It is guaranteed that only one model will be bound to each compressor instance. a list of the layers, each of which is a tuple (`layer`, `config`),
`layer` is `LayerInfo`, `config` is a `dict`
""" """
return self.modules_to_compress
def update_epoch(self, epoch, sess): def select_config(self, layer):
"""If user want to update mask every epoch, user can override this method
""" """
Find the configuration for `layer` by parsing `self.config_list`
def step(self, sess):
"""If user want to update mask every step, user can override this method Parameters
----------
layer : LayerInfo
one layer
Returns
-------
ret : config or None
the retrieved configuration for this layer, if None, this layer should
not be compressed
""" """
def _instrument_layer(self, layer, config):
raise NotImplementedError()
def _select_config(self, layer):
ret = None ret = None
for config in self._config_list: for config in self.config_list:
op_types = config.get('op_types') op_types = config.get('op_types')
if op_types == 'default': if op_types == 'default':
op_types = default_layers.op_weight_index.keys() op_types = default_layers.op_weight_index.keys()
...@@ -79,35 +97,72 @@ class Compressor: ...@@ -79,35 +97,72 @@ class Compressor:
return None return None
return ret return ret
def update_epoch(self, epoch, sess):
"""
If user want to update model every epoch, user can override this method.
This method should be called at the beginning of each epoch
Parameters
----------
epoch : num
the current epoch number
"""
def step(self, sess):
"""
If user want to update mask every step, user can override this method
"""
def _instrument_layer(self, layer, config):
"""
This method is implemented in the subclasses, i.e., `Pruner` and `Quantizer`
Parameters
----------
layer : LayerInfo
the layer to instrument the compression operation
config : dict
the configuration for compressing this layer
"""
raise NotImplementedError()
class Pruner(Compressor): class Pruner(Compressor):
""" """
Abstract base TensorFlow pruner Abstract base TensorFlow pruner
""" """
def calc_mask(self, weight, config, op, op_type, op_name): def calc_mask(self, layer, config):
"""Pruners should overload this method to provide mask for weight tensors. """
Pruners should overload this method to provide mask for weight tensors.
The mask must have the same shape and type comparing to the weight. The mask must have the same shape and type comparing to the weight.
It will be applied with `multiply()` operation. It will be applied with `mul()` operation on the weight.
This method works as a subgraph which will be inserted into the bound model. This method is effectively hooked to `forward()` method of the model.
Parameters
----------
layer : LayerInfo
calculate mask for `layer`'s weight
config : dict
the configuration for generating the mask
""" """
raise NotImplementedError("Pruners must overload calc_mask()") raise NotImplementedError("Pruners must overload calc_mask()")
def _instrument_layer(self, layer, config): def _instrument_layer(self, layer, config):
# it seems the graph editor can only swap edges of nodes or remove all edges from a node """
# it cannot remove one edge from a node, nor can it assign a new edge to a node Create a wrapper forward function to replace the original one.
# we assume there is a proxy operation between the weight and the Conv2D layer
# this is true as long as the weight is `tf.Value` Parameters
# not sure what will happen if the weight is calculated from other operations ----------
weight_index = _detect_weight_index(layer) layer : LayerInfo
if weight_index is None: the layer to instrument the mask
_logger.warning('Failed to detect weight for layer %s', layer.name) config : dict
return the configuration for generating the mask
weight_op = layer.op.inputs[weight_index].op """
weight = weight_op.inputs[0] mask = self.calc_mask(layer, config)
mask = self.calc_mask(weight, config, op=layer.op, op_type=layer.type, op_name=layer.name) new_weight = layer.weight * mask
new_weight = weight * mask tf.contrib.graph_editor.swap_outputs(layer.weight_op, new_weight.op)
tf.contrib.graph_editor.swap_outputs(weight_op, new_weight.op)
class Quantizer(Compressor): class Quantizer(Compressor):
...@@ -133,7 +188,7 @@ def _detect_weight_index(layer): ...@@ -133,7 +188,7 @@ def _detect_weight_index(layer):
index = default_layers.op_weight_index.get(layer.type) index = default_layers.op_weight_index.get(layer.type)
if index is not None: if index is not None:
return index return index
weight_indices = [i for i, op in enumerate(layer.op.inputs) if op.name.endswith('Variable/read')] weight_indices = [i for i, op in enumerate(layer.inputs) if op.name.endswith('Variable/read')]
if len(weight_indices) == 1: if len(weight_indices) == 1:
return weight_indices[0] return weight_indices[0]
return None return None
...@@ -11,16 +11,17 @@ class LevelPruner(Pruner): ...@@ -11,16 +11,17 @@ class LevelPruner(Pruner):
"""Prune to an exact pruning level specification """Prune to an exact pruning level specification
""" """
def __init__(self, config_list): def __init__(self, model, config_list):
""" """
config_list: supported keys: config_list: supported keys:
- sparsity - sparsity
""" """
super().__init__(config_list) super().__init__(model, config_list)
self.mask_list = {}
self.if_init_list = {} self.if_init_list = {}
def calc_mask(self, weight, config, op_name, **kwargs): def calc_mask(self, layer, config):
weight = layer.module.weight.data
op_name = layer.name
if self.if_init_list.get(op_name, True): if self.if_init_list.get(op_name, True):
w_abs = weight.abs() w_abs = weight.abs()
k = int(weight.numel() * config['sparsity']) k = int(weight.numel() * config['sparsity'])
...@@ -28,10 +29,10 @@ class LevelPruner(Pruner): ...@@ -28,10 +29,10 @@ class LevelPruner(Pruner):
return torch.ones(weight.shape).type_as(weight) return torch.ones(weight.shape).type_as(weight)
threshold = torch.topk(w_abs.view(-1), k, largest=False).values.max() threshold = torch.topk(w_abs.view(-1), k, largest=False).values.max()
mask = torch.gt(w_abs, threshold).type_as(weight) mask = torch.gt(w_abs, threshold).type_as(weight)
self.mask_list.update({op_name: mask}) self.mask_dict.update({op_name: mask})
self.if_init_list.update({op_name: False}) self.if_init_list.update({op_name: False})
else: else:
mask = self.mask_list[op_name] mask = self.mask_dict[op_name]
return mask return mask
...@@ -45,7 +46,7 @@ class AGP_Pruner(Pruner): ...@@ -45,7 +46,7 @@ class AGP_Pruner(Pruner):
https://arxiv.org/pdf/1710.01878.pdf https://arxiv.org/pdf/1710.01878.pdf
""" """
def __init__(self, config_list): def __init__(self, model, config_list):
""" """
config_list: supported keys: config_list: supported keys:
- initial_sparsity - initial_sparsity
...@@ -54,17 +55,18 @@ class AGP_Pruner(Pruner): ...@@ -54,17 +55,18 @@ class AGP_Pruner(Pruner):
- end_epoch: end epoch number stop update mask, you should make sure start_epoch <= end_epoch - end_epoch: end epoch number stop update mask, you should make sure start_epoch <= end_epoch
- frequency: if you want update every 2 epoch, you can set it 2 - frequency: if you want update every 2 epoch, you can set it 2
""" """
super().__init__(config_list) super().__init__(model, config_list)
self.mask_list = {}
self.now_epoch = 0 self.now_epoch = 0
self.if_init_list = {} self.if_init_list = {}
def calc_mask(self, weight, config, op_name, **kwargs): def calc_mask(self, layer, config):
weight = layer.module.weight.data
op_name = layer.name
start_epoch = config.get('start_epoch', 0) start_epoch = config.get('start_epoch', 0)
freq = config.get('frequency', 1) freq = config.get('frequency', 1)
if self.now_epoch >= start_epoch and self.if_init_list.get(op_name, True) and ( if self.now_epoch >= start_epoch and self.if_init_list.get(op_name, True) and (
self.now_epoch - start_epoch) % freq == 0: self.now_epoch - start_epoch) % freq == 0:
mask = self.mask_list.get(op_name, torch.ones(weight.shape).type_as(weight)) mask = self.mask_dict.get(op_name, torch.ones(weight.shape).type_as(weight))
target_sparsity = self.compute_target_sparsity(config) target_sparsity = self.compute_target_sparsity(config)
k = int(weight.numel() * target_sparsity) k = int(weight.numel() * target_sparsity)
if k == 0 or target_sparsity >= 1 or target_sparsity <= 0: if k == 0 or target_sparsity >= 1 or target_sparsity <= 0:
...@@ -73,10 +75,10 @@ class AGP_Pruner(Pruner): ...@@ -73,10 +75,10 @@ class AGP_Pruner(Pruner):
w_abs = weight.abs() * mask w_abs = weight.abs() * mask
threshold = torch.topk(w_abs.view(-1), k, largest=False).values.max() threshold = torch.topk(w_abs.view(-1), k, largest=False).values.max()
new_mask = torch.gt(w_abs, threshold).type_as(weight) new_mask = torch.gt(w_abs, threshold).type_as(weight)
self.mask_list.update({op_name: new_mask}) self.mask_dict.update({op_name: new_mask})
self.if_init_list.update({op_name: False}) self.if_init_list.update({op_name: False})
else: else:
new_mask = self.mask_list.get(op_name, torch.ones(weight.shape).type_as(weight)) new_mask = self.mask_dict.get(op_name, torch.ones(weight.shape).type_as(weight))
return new_mask return new_mask
def compute_target_sparsity(self, config): def compute_target_sparsity(self, config):
......
...@@ -10,8 +10,8 @@ logger = logging.getLogger(__name__) ...@@ -10,8 +10,8 @@ logger = logging.getLogger(__name__)
class NaiveQuantizer(Quantizer): class NaiveQuantizer(Quantizer):
"""quantize weight to 8 bits """quantize weight to 8 bits
""" """
def __init__(self, config_list): def __init__(self, model, config_list):
super().__init__(config_list) super().__init__(model, config_list)
self.layer_scale = {} self.layer_scale = {}
def quantize_weight(self, weight, config, op_name, **kwargs): def quantize_weight(self, weight, config, op_name, **kwargs):
...@@ -27,12 +27,12 @@ class QAT_Quantizer(Quantizer): ...@@ -27,12 +27,12 @@ class QAT_Quantizer(Quantizer):
Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference
http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf
""" """
def __init__(self, config_list): def __init__(self, model, config_list):
""" """
config_list: supported keys: config_list: supported keys:
- q_bits - q_bits
""" """
super().__init__(config_list) super().__init__(model, config_list)
def quantize_weight(self, weight, config, **kwargs): def quantize_weight(self, weight, config, **kwargs):
if config['q_bits'] <= 1: if config['q_bits'] <= 1:
...@@ -53,12 +53,12 @@ class DoReFaQuantizer(Quantizer): ...@@ -53,12 +53,12 @@ class DoReFaQuantizer(Quantizer):
Zhou et al., DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients Zhou et al., DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients
(https://arxiv.org/abs/1606.06160) (https://arxiv.org/abs/1606.06160)
""" """
def __init__(self, config_list): def __init__(self, model, config_list):
""" """
config_list: supported keys: config_list: supported keys:
- q_bits - q_bits
""" """
super().__init__(config_list) super().__init__(model, config_list)
def quantize_weight(self, weight, config, **kwargs): def quantize_weight(self, weight, config, **kwargs):
out = weight.tanh() out = weight.tanh()
......
...@@ -15,49 +15,69 @@ class LayerInfo: ...@@ -15,49 +15,69 @@ class LayerInfo:
class Compressor: class Compressor:
"""Abstract base PyTorch compressor""" """
Abstract base PyTorch compressor
"""
def __init__(self, config_list): def __init__(self, model, config_list):
self._bound_model = None """
self._config_list = config_list Record necessary info in class members
Parameters
----------
model : pytorch model
the model user wants to compress
config_list : list
the configurations that users specify for compression
"""
self.bound_model = model
self.config_list = config_list
self.modules_to_compress = []
def __call__(self, model): def compress(self):
self.compress(model) """
return model Compress the model with algorithm implemented by subclass.
def compress(self, model):
"""Compress the model with algorithm implemented by subclass.
The model will be instrumented and user should never edit it after calling this method. The model will be instrumented and user should never edit it after calling this method.
`self.modules_to_compress` records all the to-be-compressed layers
""" """
assert self._bound_model is None, "Each NNI compressor instance can only compress one model" for name, module in self.bound_model.named_modules():
self._bound_model = model
self.bind_model(model)
for name, module in model.named_modules():
layer = LayerInfo(name, module) layer = LayerInfo(name, module)
config = self._select_config(layer) config = self.select_config(layer)
if config is not None: if config is not None:
self._instrument_layer(layer, config) self._instrument_layer(layer, config)
self.modules_to_compress.append((layer, config))
return self.bound_model
def bind_model(self, model): def get_modules_to_compress(self):
"""This method is called when a model is bound to the compressor.
Users can optionally overload this method to do model-specific initialization.
It is guaranteed that only one model will be bound to each compressor instance.
""" """
To obtain all the to-be-compressed layers.
def update_epoch(self, epoch): Returns
"""if user want to update model every epoch, user can override this method -------
self.modules_to_compress : list
a list of the layers, each of which is a tuple (`layer`, `config`),
`layer` is `LayerInfo`, `config` is a `dict`
""" """
return self.modules_to_compress
def step(self): def select_config(self, layer):
"""if user want to update model every step, user can override this method
""" """
Find the configuration for `layer` by parsing `self.config_list`
def _instrument_layer(self, layer, config): Parameters
raise NotImplementedError() ----------
layer : LayerInfo
one layer
def _select_config(self, layer): Returns
-------
ret : config or None
the retrieved configuration for this layer, if None, this layer should
not be compressed
"""
ret = None ret = None
for config in self._config_list: for config in self.config_list:
config['op_types'] = self._expand_config_op_types(config) config['op_types'] = self._expand_config_op_types(config)
if layer.type not in config['op_types']: if layer.type not in config['op_types']:
continue continue
...@@ -68,6 +88,35 @@ class Compressor: ...@@ -68,6 +88,35 @@ class Compressor:
return None return None
return ret return ret
def update_epoch(self, epoch):
"""
If user want to update model every epoch, user can override this method.
This method should be called at the beginning of each epoch
Parameters
----------
epoch : num
the current epoch number
"""
def step(self):
"""
If user want to update model every step, user can override this method
"""
def _instrument_layer(self, layer, config):
"""
This method is implemented in the subclasses, i.e., `Pruner` and `Quantizer`
Parameters
----------
layer : LayerInfo
the layer to instrument the compression operation
config : dict
the configuration for compressing this layer
"""
raise NotImplementedError()
def _expand_config_op_types(self, config): def _expand_config_op_types(self, config):
if config is None: if config is None:
return [] return []
...@@ -79,22 +128,50 @@ class Compressor: ...@@ -79,22 +128,50 @@ class Compressor:
expanded_op_types.append(op_type) expanded_op_types.append(op_type)
return expanded_op_types return expanded_op_types
class Pruner(Compressor): class Pruner(Compressor):
""" """
Abstract base PyTorch pruner Prune to an exact pruning level specification
Attributes
----------
mask_dict : dict
Dictionary for saving masks, `key` should be layer name and
`value` should be a tensor which has the same shape with layer's weight
""" """
def calc_mask(self, weight, config, op, op_type, op_name): def __init__(self, model, config_list):
"""Pruners should overload this method to provide mask for weight tensors. super().__init__(model, config_list)
self.mask_dict = {}
def calc_mask(self, layer, config):
"""
Pruners should overload this method to provide mask for weight tensors.
The mask must have the same shape and type comparing to the weight. The mask must have the same shape and type comparing to the weight.
It will be applied with `mul()` operation. It will be applied with `mul()` operation on the weight.
This method is effectively hooked to `forward()` method of the model. This method is effectively hooked to `forward()` method of the model.
Parameters
----------
layer : LayerInfo
calculate mask for `layer`'s weight
config : dict
the configuration for generating the mask
""" """
raise NotImplementedError("Pruners must overload calc_mask()") raise NotImplementedError("Pruners must overload calc_mask()")
def _instrument_layer(self, layer, config): def _instrument_layer(self, layer, config):
# TODO: support multiple weight tensors """
# create a wrapper forward function to replace the original one Create a wrapper forward function to replace the original one.
Parameters
----------
layer : LayerInfo
the layer to instrument the mask
config : dict
the configuration for generating the mask
"""
assert layer._forward is None, 'Each model can only be compressed once' assert layer._forward is None, 'Each model can only be compressed once'
if not _check_weight(layer.module): if not _check_weight(layer.module):
_logger.warning('Module %s does not have parameter "weight"', layer.name) _logger.warning('Module %s does not have parameter "weight"', layer.name)
...@@ -104,16 +181,56 @@ class Pruner(Compressor): ...@@ -104,16 +181,56 @@ class Pruner(Compressor):
def new_forward(*inputs): def new_forward(*inputs):
# apply mask to weight # apply mask to weight
old_weight = layer.module.weight.data old_weight = layer.module.weight.data
mask = self.calc_mask(old_weight, config, op=layer.module, op_type=layer.type, op_name=layer.name) mask = self.calc_mask(layer, config)
layer.module.weight.data = old_weight.mul(mask) layer.module.weight.data = old_weight.mul(mask)
# calculate forward # calculate forward
ret = layer._forward(*inputs) ret = layer._forward(*inputs)
# recover original weight
layer.module.weight.data = old_weight
return ret return ret
layer.module.forward = new_forward layer.module.forward = new_forward
def export_model(self, model_path, mask_path=None, onnx_path=None, input_shape=None):
"""
Export pruned model weights, masks and onnx model(optional)
Parameters
----------
model_path : str
path to save pruned model state_dict
mask_path : str
(optional) path to save mask dict
onnx_path : str
(optional) path to save onnx model
input_shape : list or tuple
input shape to onnx model
"""
assert model_path is not None, 'model_path must be specified'
for name, m in self.bound_model.named_modules():
mask = self.mask_dict.get(name)
if mask is not None:
mask_sum = mask.sum().item()
mask_num = mask.numel()
_logger.info('Layer: %s Sparsity: %.2f', name, 1 - mask_sum / mask_num)
print('Layer: %s Sparsity: %.2f' % (name, 1 - mask_sum / mask_num))
m.weight.data = m.weight.data.mul(mask)
else:
_logger.info('Layer: %s NOT compressed', name)
print('Layer: %s NOT compressed' % name)
torch.save(self.bound_model.state_dict(), model_path)
_logger.info('Model state_dict saved to %s', model_path)
print('Model state_dict saved to %s' % model_path)
if mask_path is not None:
torch.save(self.mask_dict, mask_path)
_logger.info('Mask dict saved to %s', mask_path)
print('Mask dict saved to %s' % mask_path)
if onnx_path is not None:
assert input_shape is not None, 'input_shape must be specified to export onnx model'
# input info needed
input_data = torch.Tensor(*input_shape)
torch.onnx.export(self.bound_model, input_data, onnx_path)
_logger.info('Model in onnx with input shape %s saved to %s', input_data.shape, onnx_path)
print('Model in onnx with input shape %s saved to %s' % (input_data.shape, onnx_path))
class Quantizer(Compressor): class Quantizer(Compressor):
""" """
......
...@@ -158,11 +158,11 @@ class EvolutionTuner(Tuner): ...@@ -158,11 +158,11 @@ class EvolutionTuner(Tuner):
EvolutionTuner is tuner using navie evolution algorithm. EvolutionTuner is tuner using navie evolution algorithm.
""" """
def __init__(self, optimize_mode, population_size=32): def __init__(self, optimize_mode="maximize", population_size=32):
""" """
Parameters Parameters
---------- ----------
optimize_mode : str optimize_mode : str, default 'maximize'
population_size : int population_size : int
initial population size. The larger population size, initial population size. The larger population size,
the better evolution performance. the better evolution performance.
......
...@@ -265,6 +265,8 @@ def convert_nas_search_space(search_space): ...@@ -265,6 +265,8 @@ def convert_nas_search_space(search_space):
param search_space: raw search space param search_space: raw search space
return: the new search space, mutable_layers will be converted into choice return: the new search space, mutable_layers will be converted into choice
""" """
if not isinstance(search_space, dict):
return search_space
ret = dict() ret = dict()
for k, v in search_space.items(): for k, v in search_space.items():
if "_type" not in v: if "_type" not in v:
......
...@@ -48,7 +48,7 @@ def uniform(low, high, random_state): ...@@ -48,7 +48,7 @@ def uniform(low, high, random_state):
high: an float that represent an upper bound high: an float that represent an upper bound
random_state: an object of numpy.random.RandomState random_state: an object of numpy.random.RandomState
''' '''
assert high > low, 'Upper bound must be larger than lower bound' assert high >= low, 'Upper bound must be larger than lower bound'
return random_state.uniform(low, high) return random_state.uniform(low, high)
......
...@@ -143,14 +143,14 @@ class CategoricalPd(Pd): ...@@ -143,14 +143,14 @@ class CategoricalPd(Pd):
re_masked_res = tf.reshape(masked_res, [-1, self.size]) re_masked_res = tf.reshape(masked_res, [-1, self.size])
u = tf.random_uniform(tf.shape(re_masked_res), dtype=self.logits.dtype) u = tf.random_uniform(tf.shape(re_masked_res), dtype=self.logits.dtype)
return tf.argmax(re_masked_res - tf.log(-tf.log(u)), axis=-1) return tf.argmax(re_masked_res - tf.log(-1*tf.log(u)), axis=-1)
else: else:
u = tf.random_uniform(tf.shape(self.logits), dtype=self.logits.dtype) u = tf.random_uniform(tf.shape(self.logits), dtype=self.logits.dtype)
return tf.argmax(self.logits - tf.log(-tf.log(u)), axis=-1) return tf.argmax(self.logits - tf.log(-1*tf.log(u)), axis=-1)
@classmethod @classmethod
def fromflat(cls, flat): def fromflat(cls, flat):
return cls(flat) return cls(flat) # pylint: disable=no-value-for-parameter
class CategoricalPdType(PdType): class CategoricalPdType(PdType):
""" """
......
...@@ -107,7 +107,7 @@ class PolicyWithValue: ...@@ -107,7 +107,7 @@ class PolicyWithValue:
def sample(logits, mask_npinf): def sample(logits, mask_npinf):
new_logits = tf.math.add(logits, mask_npinf) new_logits = tf.math.add(logits, mask_npinf)
u = tf.random_uniform(tf.shape(new_logits), dtype=logits.dtype) u = tf.random_uniform(tf.shape(new_logits), dtype=logits.dtype)
return tf.argmax(new_logits - tf.log(-tf.log(u)), axis=-1) return tf.argmax(new_logits - tf.log(-1*tf.log(u)), axis=-1)
def neglogp(logits, x): def neglogp(logits, x):
# return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=x) # return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=x)
......
...@@ -22,11 +22,9 @@ ppo_tuner.py including: ...@@ -22,11 +22,9 @@ ppo_tuner.py including:
class PPOTuner class PPOTuner
""" """
import os
import copy import copy
import logging import logging
import numpy as np import numpy as np
import json_tricks
from gym import spaces from gym import spaces
import nni import nni
...@@ -236,7 +234,8 @@ class PPOModel: ...@@ -236,7 +234,8 @@ class PPOModel:
nextnonterminal = 1.0 - trials_info.dones[t+1] nextnonterminal = 1.0 - trials_info.dones[t+1]
nextvalues = trials_info.values[t+1] nextvalues = trials_info.values[t+1]
delta = mb_rewards[t] + self.model_config.gamma * nextvalues * nextnonterminal - trials_info.values[t] delta = mb_rewards[t] + self.model_config.gamma * nextvalues * nextnonterminal - trials_info.values[t]
mb_advs[t] = lastgaelam = delta + self.model_config.gamma * self.model_config.lam * nextnonterminal * lastgaelam lastgaelam = delta + self.model_config.gamma * self.model_config.lam * nextnonterminal * lastgaelam
mb_advs[t] = lastgaelam # pylint: disable=unsupported-assignment-operation
mb_returns = mb_advs + trials_info.values mb_returns = mb_advs + trials_info.values
trials_info.update_rewards(mb_rewards, mb_returns) trials_info.update_rewards(mb_rewards, mb_returns)
...@@ -536,8 +535,10 @@ class PPOTuner(Tuner): ...@@ -536,8 +535,10 @@ class PPOTuner(Tuner):
# generate new trials # generate new trials
self.trials_result = [None for _ in range(self.inf_batch_size)] self.trials_result = [None for _ in range(self.inf_batch_size)]
mb_obs, mb_actions, mb_values, mb_neglogpacs, mb_dones, last_values = self.model.inference(self.inf_batch_size) mb_obs, mb_actions, mb_values, mb_neglogpacs, mb_dones, last_values = self.model.inference(self.inf_batch_size)
self.trials_info = TrialsInfo(mb_obs, mb_actions, mb_values, mb_neglogpacs, self.trials_info = TrialsInfo(mb_obs, mb_actions,
mb_dones, last_values, self.inf_batch_size) mb_values, mb_neglogpacs,
mb_dones, last_values,
self.inf_batch_size)
# check credit and submit new trials # check credit and submit new trials
for _ in range(self.credit): for _ in range(self.credit):
trial_info_idx, actions = self.trials_info.get_next() trial_info_idx, actions = self.trials_info.get_next()
...@@ -581,8 +582,8 @@ class PPOTuner(Tuner): ...@@ -581,8 +582,8 @@ class PPOTuner(Tuner):
assert trial_info_idx is not None assert trial_info_idx is not None
# use mean of finished trials as the result of this failed trial # use mean of finished trials as the result of this failed trial
values = [val for val in self.trials_result if val is not None] values = [val for val in self.trials_result if val is not None]
logger.warning('zql values: {0}'.format(values)) logger.warning('zql values: %s', values)
self.trials_result[trial_info_idx] = (sum(values) / len(values)) if len(values) > 0 else 0 self.trials_result[trial_info_idx] = (sum(values) / len(values)) if values else 0
self.finished_trials += 1 self.finished_trials += 1
if self.finished_trials == self.inf_batch_size: if self.finished_trials == self.inf_batch_size:
self._next_round_inference() self._next_round_inference()
......
...@@ -56,7 +56,7 @@ def seq_to_batch(h, flat=False): ...@@ -56,7 +56,7 @@ def seq_to_batch(h, flat=False):
def lstm(xs, ms, s, scope, nh, init_scale=1.0): def lstm(xs, ms, s, scope, nh, init_scale=1.0):
"""lstm cell""" """lstm cell"""
nbatch, nin = [v.value for v in xs[0].get_shape()] _, nin = [v.value for v in xs[0].get_shape()] # the first is nbatch
with tf.variable_scope(scope): with tf.variable_scope(scope):
wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale)) wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale))
wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale)) wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale))
......
from .smac_tuner import SMACTuner from .smac_tuner import SMACTuner
\ No newline at end of file
...@@ -39,7 +39,6 @@ from nni.utils import OptimizeMode, extract_scalar_reward ...@@ -39,7 +39,6 @@ from nni.utils import OptimizeMode, extract_scalar_reward
from .convert_ss_to_scenario import generate_scenario from .convert_ss_to_scenario import generate_scenario
class SMACTuner(Tuner): class SMACTuner(Tuner):
""" """
Parameters Parameters
......
...@@ -42,7 +42,7 @@ class Tuner(Recoverable): ...@@ -42,7 +42,7 @@ class Tuner(Recoverable):
A new trial will run with this configuration. A new trial will run with this configuration.
This is the abstract base class for all tuners. This is the abstract base class for all tuners.
Tuning algorithms should derive this class and override :meth:`update_search_space`, :meth:`receive_trial_result`, Tuning algorithms should inherit this class and override :meth:`update_search_space`, :meth:`receive_trial_result`,
as well as :meth:`generate_parameters` or :meth:`generate_multiple_parameters`. as well as :meth:`generate_parameters` or :meth:`generate_multiple_parameters`.
After initializing, NNI will first call :meth:`update_search_space` to tell tuner the feasible region, After initializing, NNI will first call :meth:`update_search_space` to tell tuner the feasible region,
...@@ -96,9 +96,9 @@ class Tuner(Recoverable): ...@@ -96,9 +96,9 @@ class Tuner(Recoverable):
Parameters Parameters
---------- ----------
parameter_id: int parameter_id : int
Unique identifier for requested hyper-parameters. This will later be used in :meth:`receive_trial_result`. Unique identifier for requested hyper-parameters. This will later be used in :meth:`receive_trial_result`.
**kwargs: **kwargs
Unstable parameters which should be ignored by normal users. Unstable parameters which should be ignored by normal users.
Returns Returns
...@@ -129,10 +129,10 @@ class Tuner(Recoverable): ...@@ -129,10 +129,10 @@ class Tuner(Recoverable):
Parameters Parameters
---------- ----------
parameter_id_list: list of int parameter_id_list : list of int
Unique identifiers for each set of requested hyper-parameters. Unique identifiers for each set of requested hyper-parameters.
These will later be used in :meth:`receive_trial_result`. These will later be used in :meth:`receive_trial_result`.
**kwargs: **kwargs
Unstable parameters which should be ignored by normal users. Unstable parameters which should be ignored by normal users.
Returns Returns
...@@ -159,13 +159,13 @@ class Tuner(Recoverable): ...@@ -159,13 +159,13 @@ class Tuner(Recoverable):
Parameters Parameters
---------- ----------
parameter_id: int parameter_id : int
Unique identifier of used hyper-parameters, same with :meth:`generate_parameters`. Unique identifier of used hyper-parameters, same with :meth:`generate_parameters`.
parameters parameters
Hyper-parameters generated by :meth:`generate_parameters`. Hyper-parameters generated by :meth:`generate_parameters`.
value value
Result from trial (the return value of :func:`nni.report_final_result`). Result from trial (the return value of :func:`nni.report_final_result`).
**kwargs: **kwargs
Unstable parameters which should be ignored by normal users. Unstable parameters which should be ignored by normal users.
""" """
raise NotImplementedError('Tuner: receive_trial_result not implemented') raise NotImplementedError('Tuner: receive_trial_result not implemented')
...@@ -186,11 +186,11 @@ class Tuner(Recoverable): ...@@ -186,11 +186,11 @@ class Tuner(Recoverable):
Parameters Parameters
---------- ----------
parameter_id: int parameter_id : int
Unique identifier for hyper-parameters used by this trial. Unique identifier for hyper-parameters used by this trial.
success: bool success : bool
True if the trial successfully completed; False if failed or terminated. True if the trial successfully completed; False if failed or terminated.
**kwargs: **kwargs
Unstable parameters which should be ignored by normal users. Unstable parameters which should be ignored by normal users.
""" """
......
{
"choice_str": {
"_type": "choice",
"_value": ["cat", "dog", "elephant", "cow", "sheep", "panda"],
"fail": ["metis", "gp"]
},
"choice_int": {
"_type": "choice",
"_value": [42, 43, -1]
},
"choice_mixed": {
"_type": "choice",
"_value": [0.3, "cat", 1, null],
"fail": ["metis", "gp"]
},
"choice_float": {
"_type": "choice",
"_value": [0.3, 1, 2.0]
},
"choice_single": {
"_type": "choice",
"_value": [1]
},
"randint_ok": {
"_type": "randint",
"_value": [-2, 3]
},
"randint_single": {
"_type": "randint",
"_value": [10, 11]
},
"randint_fail_equal": {
"_type": "randint",
"_value": [0, 0]
},
"uniform_ok": {
"_type": "uniform",
"_value": [-1.0, 1.5]
},
"uniform_equal": {
"_type": "uniform",
"_value": [99.9, 99.9]
},
"quniform_ok": {
"_type": "quniform",
"_value": [0.0, 10.0, 2.5]
},
"quniform_clip": {
"_type": "quniform",
"_value": [2.0, 10.0, 5.0]
},
"quniform_clip_2": {
"_type": "quniform",
"_value": [-5.5, -0.5, 6]
},
"loguniform_ok": {
"_type": "loguniform",
"_value": [0.001, 100]
},
"loguniform_equal": {
"_type": "loguniform",
"_value": [1, 1]
},
"qloguniform_ok": {
"_type": "qloguniform",
"_value": [0.001, 100, 1]
},
"qloguniform_equal": {
"_type": "qloguniform",
"_value": [2, 2, 1]
},
"normal_ok": {
"_type": "normal",
"_value": [-1.0, 5.0]
},
"qnormal_ok": {
"_type": "qnormal",
"_value": [-1.5, 5.0, 0.1]
},
"lognormal_ok": {
"_type": "lognormal",
"_value": [-1.0, 5.0]
},
"qlognormal_ok": {
"_type": "qlognormal",
"_value": [-1.5, 5.0, 0.1]
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment