"...janus-pro-7b_pytorch.git" did not exist on "cda4a66e9f79387d199379c98de82af2ca13bbb0"
Unverified Commit c037a7c1 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Merge pull request #213 from microsoft/master

merge master
parents 49972952 901012eb
......@@ -28,6 +28,8 @@ import {
parseGpuIndices, RemoteMachineMeta, RemoteMachineScheduleResult, RemoteMachineTrialJobDetail, ScheduleResultType, SSHClientManager
} from './remoteMachineData';
type SCHEDULE_POLICY_NAME = 'random' | 'round-robin';
/**
* A simple GPU scheduler implementation
*/
......@@ -35,13 +37,18 @@ export class GPUScheduler {
private readonly machineSSHClientMap : Map<RemoteMachineMeta, SSHClientManager>;
private readonly log: Logger = getLogger();
private readonly policyName: SCHEDULE_POLICY_NAME = 'round-robin';
private roundRobinIndex: number = 0;
private configuredRMs: RemoteMachineMeta[] = [];
/**
* Constructor
* @param machineSSHClientMap map from remote machine to sshClient
*/
constructor(machineSSHClientMap : Map<RemoteMachineMeta, SSHClientManager>) {
assert(machineSSHClientMap.size > 0);
this.machineSSHClientMap = machineSSHClientMap;
this.configuredRMs = Array.from(machineSSHClientMap.keys());
}
/**
......@@ -189,7 +196,21 @@ export class GPUScheduler {
private selectMachine(rmMetas: RemoteMachineMeta[]): RemoteMachineMeta {
assert(rmMetas !== undefined && rmMetas.length > 0);
return randomSelect(rmMetas);
if (this.policyName === 'random') {
return randomSelect(rmMetas);
} else if (this.policyName === 'round-robin') {
return this.roundRobinSelect(rmMetas);
} else {
throw new Error(`Unsupported schedule policy: ${this.policyName}`);
}
}
private roundRobinSelect(rmMetas: RemoteMachineMeta[]): RemoteMachineMeta {
while (!rmMetas.includes(this.configuredRMs[this.roundRobinIndex % this.configuredRMs.length])) {
this.roundRobinIndex++;
}
return this.configuredRMs[this.roundRobinIndex++ % this.configuredRMs.length];
}
private selectGPUsForTrial(gpuInfos: GPUInfo[], requiredGPUNum: number): GPUInfo[] {
......
import setuptools
setuptools.setup(
name = 'nnicli',
version = '999.0.0-developing',
packages = setuptools.find_packages(),
name='nnicli',
version='999.0.0-developing',
packages=setuptools.find_packages(),
python_requires = '>=3.5',
install_requires = [
python_requires='>=3.5',
install_requires=[
'requests'
],
author = 'Microsoft NNI Team',
author_email = 'nni@microsoft.com',
description = 'nnicli for Neural Network Intelligence project',
license = 'MIT',
url = 'https://github.com/Microsoft/nni',
author='Microsoft NNI Team',
author_email='nni@microsoft.com',
description='nnicli for Neural Network Intelligence project',
license='MIT',
url='https://github.com/Microsoft/nni',
)
......@@ -53,14 +53,14 @@ class Assessor(Recoverable):
to tell whether this trial can be early stopped or not.
This is the abstract base class for all assessors.
Early stopping algorithms should derive this class and override :meth:`assess_trial` method,
Early stopping algorithms should inherit this class and override :meth:`assess_trial` method,
which receives intermediate results from trials and give an assessing result.
If :meth:`assess_trial` returns :obj:`AssessResult.Bad` for a trial,
it hints NNI framework that the trial is likely to result in a poor final accuracy,
and therefore should be killed to save resource.
If an accessor want's to get notified when a trial ends, it can also override :meth:`trial_end`.
If an accessor want's to be notified when a trial ends, it can also override :meth:`trial_end`.
To write a new assessor, you can reference :class:`~nni.medianstop_assessor.MedianstopAssessor`'s code as an example.
......@@ -77,7 +77,7 @@ class Assessor(Recoverable):
The NNI framework has little guarantee on ``trial_history``.
This method is not guaranteed to be invoked for each time ``trial_history`` get updated.
It is also possible that a trial's history keeps updateing after receiving a bad result.
It is also possible that a trial's history keeps updating after receiving a bad result.
And if the trial failed and retried, ``trial_history`` may be inconsistent with its previous value.
The only guarantee is that ``trial_history`` is always growing.
......@@ -96,9 +96,9 @@ class Assessor(Recoverable):
Parameters
----------
trial_job_id: str
trial_job_id : str
Unique identifier of the trial.
trial_history: list
trial_history : list
Intermediate results of this trial. The element type is decided by trial code.
Returns
......@@ -114,9 +114,9 @@ class Assessor(Recoverable):
Parameters
----------
trial_job_id: str
trial_job_id : str
Unique identifier of the trial.
success: bool
success : bool
True if the trial successfully completed; False if failed or terminated.
"""
......
......@@ -8,16 +8,18 @@ _logger = logging.getLogger(__name__)
class LevelPruner(Pruner):
def __init__(self, config_list):
def __init__(self, model, config_list):
"""
config_list: supported keys:
- sparsity
"""
super().__init__(config_list)
super().__init__(model, config_list)
self.mask_list = {}
self.if_init_list = {}
def calc_mask(self, weight, config, op_name, **kwargs):
def calc_mask(self, layer, config):
weight = layer.weight
op_name = layer.name
if self.if_init_list.get(op_name, True):
threshold = tf.contrib.distributions.percentile(tf.abs(weight), config['sparsity'] * 100)
mask = tf.cast(tf.math.greater(tf.abs(weight), threshold), weight.dtype)
......@@ -38,7 +40,7 @@ class AGP_Pruner(Pruner):
https://arxiv.org/pdf/1710.01878.pdf
"""
def __init__(self, config_list):
def __init__(self, model, config_list):
"""
config_list: supported keys:
- initial_sparsity
......@@ -47,13 +49,15 @@ class AGP_Pruner(Pruner):
- end_epoch: end epoch number stop update mask
- frequency: if you want update every 2 epoch, you can set it 2
"""
super().__init__(config_list)
super().__init__(model, config_list)
self.mask_list = {}
self.if_init_list = {}
self.now_epoch = tf.Variable(0)
self.assign_handler = []
def calc_mask(self, weight, config, op_name, **kwargs):
def calc_mask(self, layer, config):
weight = layer.weight
op_name = layer.name
start_epoch = config.get('start_epoch', 0)
freq = config.get('frequency', 1)
if self.now_epoch >= start_epoch and self.if_init_list.get(op_name, True) and (
......
......@@ -10,8 +10,8 @@ _logger = logging.getLogger(__name__)
class NaiveQuantizer(Quantizer):
"""quantize weight to 8 bits
"""
def __init__(self, config_list):
super().__init__(config_list)
def __init__(self, model, config_list):
super().__init__(model, config_list)
self.layer_scale = {}
def quantize_weight(self, weight, config, op_name, **kwargs):
......@@ -27,12 +27,12 @@ class QAT_Quantizer(Quantizer):
Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference
http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf
"""
def __init__(self, config_list):
def __init__(self, model, config_list):
"""
config_list: supported keys:
- q_bits
"""
super().__init__(config_list)
super().__init__(model, config_list)
def quantize_weight(self, weight, config, **kwargs):
a = tf.stop_gradient(tf.reduce_min(weight))
......@@ -52,12 +52,12 @@ class DoReFaQuantizer(Quantizer):
Zhou et al., DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients
(https://arxiv.org/abs/1606.06160)
"""
def __init__(self, config_list):
def __init__(self, model, config_list):
"""
config_list: supported keys:
- q_bits
"""
super().__init__(config_list)
super().__init__(model, config_list)
def quantize_weight(self, weight, config, **kwargs):
a = tf.math.tanh(weight)
......
......@@ -6,67 +6,85 @@ _logger = logging.getLogger(__name__)
class LayerInfo:
def __init__(self, op):
def __init__(self, op, weight, weight_op):
self.op = op
self.name = op.name
self.type = op.type
self.weight = weight
self.weight_op = weight_op
class Compressor:
"""Abstract base TensorFlow compressor"""
"""
Abstract base TensorFlow compressor
"""
def __init__(self, config_list):
self._bound_model = None
self._config_list = config_list
def __init__(self, model, config_list):
"""
Record necessary info in class members
Parameters
----------
model : pytorch model
the model user wants to compress
config_list : list
the configurations that users specify for compression
"""
self.bound_model = model
self.config_list = config_list
self.modules_to_compress = []
def __call__(self, model):
"""Compress given graph with algorithm implemented by subclass.
The graph will be editted and returned.
def compress(self):
"""
self.compress(model)
return model
Compress the model with algorithm implemented by subclass.
def compress(self, model):
"""Compress given graph with algorithm implemented by subclass.
This will edit the graph.
The model will be instrumented and user should never edit it after calling this method.
`self.modules_to_compress` records all the to-be-compressed layers
"""
assert self._bound_model is None, "Each NNI compressor instance can only compress one model"
self._bound_model = model
self.bind_model(model)
for op in model.get_operations():
layer = LayerInfo(op)
config = self._select_config(layer)
for op in self.bound_model.get_operations():
weight_index = _detect_weight_index(op)
if weight_index is None:
_logger.warning('Failed to detect weight for layer %s', op.name)
return
weight_op = op.inputs[weight_index].op
weight = weight_op.inputs[0]
layer = LayerInfo(op, weight, weight_op)
config = self.select_config(layer)
if config is not None:
self._instrument_layer(layer, config)
self.modules_to_compress.append((layer, config))
return self.bound_model
def compress_default_graph(self):
"""Compress the default graph with algorithm implemented by subclass.
This will edit the default graph.
def get_modules_to_compress(self):
"""
self.compress(tf.get_default_graph())
To obtain all the to-be-compressed layers.
def bind_model(self, model):
"""This method is called when a model is bound to the compressor.
Compressors can optionally overload this method to do model-specific initialization.
It is guaranteed that only one model will be bound to each compressor instance.
Returns
-------
self.modules_to_compress : list
a list of the layers, each of which is a tuple (`layer`, `config`),
`layer` is `LayerInfo`, `config` is a `dict`
"""
return self.modules_to_compress
def update_epoch(self, epoch, sess):
"""If user want to update mask every epoch, user can override this method
def select_config(self, layer):
"""
def step(self, sess):
"""If user want to update mask every step, user can override this method
Find the configuration for `layer` by parsing `self.config_list`
Parameters
----------
layer : LayerInfo
one layer
Returns
-------
ret : config or None
the retrieved configuration for this layer, if None, this layer should
not be compressed
"""
def _instrument_layer(self, layer, config):
raise NotImplementedError()
def _select_config(self, layer):
ret = None
for config in self._config_list:
for config in self.config_list:
op_types = config.get('op_types')
if op_types == 'default':
op_types = default_layers.op_weight_index.keys()
......@@ -79,35 +97,72 @@ class Compressor:
return None
return ret
def update_epoch(self, epoch, sess):
"""
If user want to update model every epoch, user can override this method.
This method should be called at the beginning of each epoch
Parameters
----------
epoch : num
the current epoch number
"""
def step(self, sess):
"""
If user want to update mask every step, user can override this method
"""
def _instrument_layer(self, layer, config):
"""
This method is implemented in the subclasses, i.e., `Pruner` and `Quantizer`
Parameters
----------
layer : LayerInfo
the layer to instrument the compression operation
config : dict
the configuration for compressing this layer
"""
raise NotImplementedError()
class Pruner(Compressor):
"""
Abstract base TensorFlow pruner
"""
def calc_mask(self, weight, config, op, op_type, op_name):
"""Pruners should overload this method to provide mask for weight tensors.
def calc_mask(self, layer, config):
"""
Pruners should overload this method to provide mask for weight tensors.
The mask must have the same shape and type comparing to the weight.
It will be applied with `multiply()` operation.
This method works as a subgraph which will be inserted into the bound model.
It will be applied with `mul()` operation on the weight.
This method is effectively hooked to `forward()` method of the model.
Parameters
----------
layer : LayerInfo
calculate mask for `layer`'s weight
config : dict
the configuration for generating the mask
"""
raise NotImplementedError("Pruners must overload calc_mask()")
def _instrument_layer(self, layer, config):
# it seems the graph editor can only swap edges of nodes or remove all edges from a node
# it cannot remove one edge from a node, nor can it assign a new edge to a node
# we assume there is a proxy operation between the weight and the Conv2D layer
# this is true as long as the weight is `tf.Value`
# not sure what will happen if the weight is calculated from other operations
weight_index = _detect_weight_index(layer)
if weight_index is None:
_logger.warning('Failed to detect weight for layer %s', layer.name)
return
weight_op = layer.op.inputs[weight_index].op
weight = weight_op.inputs[0]
mask = self.calc_mask(weight, config, op=layer.op, op_type=layer.type, op_name=layer.name)
new_weight = weight * mask
tf.contrib.graph_editor.swap_outputs(weight_op, new_weight.op)
"""
Create a wrapper forward function to replace the original one.
Parameters
----------
layer : LayerInfo
the layer to instrument the mask
config : dict
the configuration for generating the mask
"""
mask = self.calc_mask(layer, config)
new_weight = layer.weight * mask
tf.contrib.graph_editor.swap_outputs(layer.weight_op, new_weight.op)
class Quantizer(Compressor):
......@@ -133,7 +188,7 @@ def _detect_weight_index(layer):
index = default_layers.op_weight_index.get(layer.type)
if index is not None:
return index
weight_indices = [i for i, op in enumerate(layer.op.inputs) if op.name.endswith('Variable/read')]
weight_indices = [i for i, op in enumerate(layer.inputs) if op.name.endswith('Variable/read')]
if len(weight_indices) == 1:
return weight_indices[0]
return None
......@@ -11,16 +11,17 @@ class LevelPruner(Pruner):
"""Prune to an exact pruning level specification
"""
def __init__(self, config_list):
def __init__(self, model, config_list):
"""
config_list: supported keys:
- sparsity
"""
super().__init__(config_list)
self.mask_list = {}
super().__init__(model, config_list)
self.if_init_list = {}
def calc_mask(self, weight, config, op_name, **kwargs):
def calc_mask(self, layer, config):
weight = layer.module.weight.data
op_name = layer.name
if self.if_init_list.get(op_name, True):
w_abs = weight.abs()
k = int(weight.numel() * config['sparsity'])
......@@ -28,10 +29,10 @@ class LevelPruner(Pruner):
return torch.ones(weight.shape).type_as(weight)
threshold = torch.topk(w_abs.view(-1), k, largest=False).values.max()
mask = torch.gt(w_abs, threshold).type_as(weight)
self.mask_list.update({op_name: mask})
self.mask_dict.update({op_name: mask})
self.if_init_list.update({op_name: False})
else:
mask = self.mask_list[op_name]
mask = self.mask_dict[op_name]
return mask
......@@ -45,7 +46,7 @@ class AGP_Pruner(Pruner):
https://arxiv.org/pdf/1710.01878.pdf
"""
def __init__(self, config_list):
def __init__(self, model, config_list):
"""
config_list: supported keys:
- initial_sparsity
......@@ -54,17 +55,18 @@ class AGP_Pruner(Pruner):
- end_epoch: end epoch number stop update mask, you should make sure start_epoch <= end_epoch
- frequency: if you want update every 2 epoch, you can set it 2
"""
super().__init__(config_list)
self.mask_list = {}
super().__init__(model, config_list)
self.now_epoch = 0
self.if_init_list = {}
def calc_mask(self, weight, config, op_name, **kwargs):
def calc_mask(self, layer, config):
weight = layer.module.weight.data
op_name = layer.name
start_epoch = config.get('start_epoch', 0)
freq = config.get('frequency', 1)
if self.now_epoch >= start_epoch and self.if_init_list.get(op_name, True) and (
self.now_epoch - start_epoch) % freq == 0:
mask = self.mask_list.get(op_name, torch.ones(weight.shape).type_as(weight))
mask = self.mask_dict.get(op_name, torch.ones(weight.shape).type_as(weight))
target_sparsity = self.compute_target_sparsity(config)
k = int(weight.numel() * target_sparsity)
if k == 0 or target_sparsity >= 1 or target_sparsity <= 0:
......@@ -73,10 +75,10 @@ class AGP_Pruner(Pruner):
w_abs = weight.abs() * mask
threshold = torch.topk(w_abs.view(-1), k, largest=False).values.max()
new_mask = torch.gt(w_abs, threshold).type_as(weight)
self.mask_list.update({op_name: new_mask})
self.mask_dict.update({op_name: new_mask})
self.if_init_list.update({op_name: False})
else:
new_mask = self.mask_list.get(op_name, torch.ones(weight.shape).type_as(weight))
new_mask = self.mask_dict.get(op_name, torch.ones(weight.shape).type_as(weight))
return new_mask
def compute_target_sparsity(self, config):
......
......@@ -10,8 +10,8 @@ logger = logging.getLogger(__name__)
class NaiveQuantizer(Quantizer):
"""quantize weight to 8 bits
"""
def __init__(self, config_list):
super().__init__(config_list)
def __init__(self, model, config_list):
super().__init__(model, config_list)
self.layer_scale = {}
def quantize_weight(self, weight, config, op_name, **kwargs):
......@@ -27,12 +27,12 @@ class QAT_Quantizer(Quantizer):
Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference
http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf
"""
def __init__(self, config_list):
def __init__(self, model, config_list):
"""
config_list: supported keys:
- q_bits
"""
super().__init__(config_list)
super().__init__(model, config_list)
def quantize_weight(self, weight, config, **kwargs):
if config['q_bits'] <= 1:
......@@ -53,12 +53,12 @@ class DoReFaQuantizer(Quantizer):
Zhou et al., DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients
(https://arxiv.org/abs/1606.06160)
"""
def __init__(self, config_list):
def __init__(self, model, config_list):
"""
config_list: supported keys:
- q_bits
"""
super().__init__(config_list)
super().__init__(model, config_list)
def quantize_weight(self, weight, config, **kwargs):
out = weight.tanh()
......
......@@ -15,49 +15,69 @@ class LayerInfo:
class Compressor:
"""Abstract base PyTorch compressor"""
"""
Abstract base PyTorch compressor
"""
def __init__(self, config_list):
self._bound_model = None
self._config_list = config_list
def __init__(self, model, config_list):
"""
Record necessary info in class members
Parameters
----------
model : pytorch model
the model user wants to compress
config_list : list
the configurations that users specify for compression
"""
self.bound_model = model
self.config_list = config_list
self.modules_to_compress = []
def __call__(self, model):
self.compress(model)
return model
def compress(self):
"""
Compress the model with algorithm implemented by subclass.
def compress(self, model):
"""Compress the model with algorithm implemented by subclass.
The model will be instrumented and user should never edit it after calling this method.
`self.modules_to_compress` records all the to-be-compressed layers
"""
assert self._bound_model is None, "Each NNI compressor instance can only compress one model"
self._bound_model = model
self.bind_model(model)
for name, module in model.named_modules():
for name, module in self.bound_model.named_modules():
layer = LayerInfo(name, module)
config = self._select_config(layer)
config = self.select_config(layer)
if config is not None:
self._instrument_layer(layer, config)
self.modules_to_compress.append((layer, config))
return self.bound_model
def bind_model(self, model):
"""This method is called when a model is bound to the compressor.
Users can optionally overload this method to do model-specific initialization.
It is guaranteed that only one model will be bound to each compressor instance.
def get_modules_to_compress(self):
"""
To obtain all the to-be-compressed layers.
def update_epoch(self, epoch):
"""if user want to update model every epoch, user can override this method
Returns
-------
self.modules_to_compress : list
a list of the layers, each of which is a tuple (`layer`, `config`),
`layer` is `LayerInfo`, `config` is a `dict`
"""
return self.modules_to_compress
def step(self):
"""if user want to update model every step, user can override this method
def select_config(self, layer):
"""
Find the configuration for `layer` by parsing `self.config_list`
def _instrument_layer(self, layer, config):
raise NotImplementedError()
Parameters
----------
layer : LayerInfo
one layer
def _select_config(self, layer):
Returns
-------
ret : config or None
the retrieved configuration for this layer, if None, this layer should
not be compressed
"""
ret = None
for config in self._config_list:
for config in self.config_list:
config['op_types'] = self._expand_config_op_types(config)
if layer.type not in config['op_types']:
continue
......@@ -68,6 +88,35 @@ class Compressor:
return None
return ret
def update_epoch(self, epoch):
"""
If user want to update model every epoch, user can override this method.
This method should be called at the beginning of each epoch
Parameters
----------
epoch : num
the current epoch number
"""
def step(self):
"""
If user want to update model every step, user can override this method
"""
def _instrument_layer(self, layer, config):
"""
This method is implemented in the subclasses, i.e., `Pruner` and `Quantizer`
Parameters
----------
layer : LayerInfo
the layer to instrument the compression operation
config : dict
the configuration for compressing this layer
"""
raise NotImplementedError()
def _expand_config_op_types(self, config):
if config is None:
return []
......@@ -79,22 +128,50 @@ class Compressor:
expanded_op_types.append(op_type)
return expanded_op_types
class Pruner(Compressor):
"""
Abstract base PyTorch pruner
Prune to an exact pruning level specification
Attributes
----------
mask_dict : dict
Dictionary for saving masks, `key` should be layer name and
`value` should be a tensor which has the same shape with layer's weight
"""
def calc_mask(self, weight, config, op, op_type, op_name):
"""Pruners should overload this method to provide mask for weight tensors.
def __init__(self, model, config_list):
super().__init__(model, config_list)
self.mask_dict = {}
def calc_mask(self, layer, config):
"""
Pruners should overload this method to provide mask for weight tensors.
The mask must have the same shape and type comparing to the weight.
It will be applied with `mul()` operation.
It will be applied with `mul()` operation on the weight.
This method is effectively hooked to `forward()` method of the model.
Parameters
----------
layer : LayerInfo
calculate mask for `layer`'s weight
config : dict
the configuration for generating the mask
"""
raise NotImplementedError("Pruners must overload calc_mask()")
def _instrument_layer(self, layer, config):
# TODO: support multiple weight tensors
# create a wrapper forward function to replace the original one
"""
Create a wrapper forward function to replace the original one.
Parameters
----------
layer : LayerInfo
the layer to instrument the mask
config : dict
the configuration for generating the mask
"""
assert layer._forward is None, 'Each model can only be compressed once'
if not _check_weight(layer.module):
_logger.warning('Module %s does not have parameter "weight"', layer.name)
......@@ -104,16 +181,56 @@ class Pruner(Compressor):
def new_forward(*inputs):
# apply mask to weight
old_weight = layer.module.weight.data
mask = self.calc_mask(old_weight, config, op=layer.module, op_type=layer.type, op_name=layer.name)
mask = self.calc_mask(layer, config)
layer.module.weight.data = old_weight.mul(mask)
# calculate forward
ret = layer._forward(*inputs)
# recover original weight
layer.module.weight.data = old_weight
return ret
layer.module.forward = new_forward
def export_model(self, model_path, mask_path=None, onnx_path=None, input_shape=None):
"""
Export pruned model weights, masks and onnx model(optional)
Parameters
----------
model_path : str
path to save pruned model state_dict
mask_path : str
(optional) path to save mask dict
onnx_path : str
(optional) path to save onnx model
input_shape : list or tuple
input shape to onnx model
"""
assert model_path is not None, 'model_path must be specified'
for name, m in self.bound_model.named_modules():
mask = self.mask_dict.get(name)
if mask is not None:
mask_sum = mask.sum().item()
mask_num = mask.numel()
_logger.info('Layer: %s Sparsity: %.2f', name, 1 - mask_sum / mask_num)
print('Layer: %s Sparsity: %.2f' % (name, 1 - mask_sum / mask_num))
m.weight.data = m.weight.data.mul(mask)
else:
_logger.info('Layer: %s NOT compressed', name)
print('Layer: %s NOT compressed' % name)
torch.save(self.bound_model.state_dict(), model_path)
_logger.info('Model state_dict saved to %s', model_path)
print('Model state_dict saved to %s' % model_path)
if mask_path is not None:
torch.save(self.mask_dict, mask_path)
_logger.info('Mask dict saved to %s', mask_path)
print('Mask dict saved to %s' % mask_path)
if onnx_path is not None:
assert input_shape is not None, 'input_shape must be specified to export onnx model'
# input info needed
input_data = torch.Tensor(*input_shape)
torch.onnx.export(self.bound_model, input_data, onnx_path)
_logger.info('Model in onnx with input shape %s saved to %s', input_data.shape, onnx_path)
print('Model in onnx with input shape %s saved to %s' % (input_data.shape, onnx_path))
class Quantizer(Compressor):
"""
......
......@@ -158,11 +158,11 @@ class EvolutionTuner(Tuner):
EvolutionTuner is tuner using navie evolution algorithm.
"""
def __init__(self, optimize_mode, population_size=32):
def __init__(self, optimize_mode="maximize", population_size=32):
"""
Parameters
----------
optimize_mode : str
optimize_mode : str, default 'maximize'
population_size : int
initial population size. The larger population size,
the better evolution performance.
......
......@@ -265,6 +265,8 @@ def convert_nas_search_space(search_space):
param search_space: raw search space
return: the new search space, mutable_layers will be converted into choice
"""
if not isinstance(search_space, dict):
return search_space
ret = dict()
for k, v in search_space.items():
if "_type" not in v:
......
......@@ -48,7 +48,7 @@ def uniform(low, high, random_state):
high: an float that represent an upper bound
random_state: an object of numpy.random.RandomState
'''
assert high > low, 'Upper bound must be larger than lower bound'
assert high >= low, 'Upper bound must be larger than lower bound'
return random_state.uniform(low, high)
......
......@@ -143,14 +143,14 @@ class CategoricalPd(Pd):
re_masked_res = tf.reshape(masked_res, [-1, self.size])
u = tf.random_uniform(tf.shape(re_masked_res), dtype=self.logits.dtype)
return tf.argmax(re_masked_res - tf.log(-tf.log(u)), axis=-1)
return tf.argmax(re_masked_res - tf.log(-1*tf.log(u)), axis=-1)
else:
u = tf.random_uniform(tf.shape(self.logits), dtype=self.logits.dtype)
return tf.argmax(self.logits - tf.log(-tf.log(u)), axis=-1)
return tf.argmax(self.logits - tf.log(-1*tf.log(u)), axis=-1)
@classmethod
def fromflat(cls, flat):
return cls(flat)
return cls(flat) # pylint: disable=no-value-for-parameter
class CategoricalPdType(PdType):
"""
......
......@@ -107,7 +107,7 @@ class PolicyWithValue:
def sample(logits, mask_npinf):
new_logits = tf.math.add(logits, mask_npinf)
u = tf.random_uniform(tf.shape(new_logits), dtype=logits.dtype)
return tf.argmax(new_logits - tf.log(-tf.log(u)), axis=-1)
return tf.argmax(new_logits - tf.log(-1*tf.log(u)), axis=-1)
def neglogp(logits, x):
# return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=x)
......
......@@ -22,11 +22,9 @@ ppo_tuner.py including:
class PPOTuner
"""
import os
import copy
import logging
import numpy as np
import json_tricks
from gym import spaces
import nni
......@@ -236,7 +234,8 @@ class PPOModel:
nextnonterminal = 1.0 - trials_info.dones[t+1]
nextvalues = trials_info.values[t+1]
delta = mb_rewards[t] + self.model_config.gamma * nextvalues * nextnonterminal - trials_info.values[t]
mb_advs[t] = lastgaelam = delta + self.model_config.gamma * self.model_config.lam * nextnonterminal * lastgaelam
lastgaelam = delta + self.model_config.gamma * self.model_config.lam * nextnonterminal * lastgaelam
mb_advs[t] = lastgaelam # pylint: disable=unsupported-assignment-operation
mb_returns = mb_advs + trials_info.values
trials_info.update_rewards(mb_rewards, mb_returns)
......@@ -536,8 +535,10 @@ class PPOTuner(Tuner):
# generate new trials
self.trials_result = [None for _ in range(self.inf_batch_size)]
mb_obs, mb_actions, mb_values, mb_neglogpacs, mb_dones, last_values = self.model.inference(self.inf_batch_size)
self.trials_info = TrialsInfo(mb_obs, mb_actions, mb_values, mb_neglogpacs,
mb_dones, last_values, self.inf_batch_size)
self.trials_info = TrialsInfo(mb_obs, mb_actions,
mb_values, mb_neglogpacs,
mb_dones, last_values,
self.inf_batch_size)
# check credit and submit new trials
for _ in range(self.credit):
trial_info_idx, actions = self.trials_info.get_next()
......@@ -581,8 +582,8 @@ class PPOTuner(Tuner):
assert trial_info_idx is not None
# use mean of finished trials as the result of this failed trial
values = [val for val in self.trials_result if val is not None]
logger.warning('zql values: {0}'.format(values))
self.trials_result[trial_info_idx] = (sum(values) / len(values)) if len(values) > 0 else 0
logger.warning('zql values: %s', values)
self.trials_result[trial_info_idx] = (sum(values) / len(values)) if values else 0
self.finished_trials += 1
if self.finished_trials == self.inf_batch_size:
self._next_round_inference()
......
......@@ -56,7 +56,7 @@ def seq_to_batch(h, flat=False):
def lstm(xs, ms, s, scope, nh, init_scale=1.0):
"""lstm cell"""
nbatch, nin = [v.value for v in xs[0].get_shape()]
_, nin = [v.value for v in xs[0].get_shape()] # the first is nbatch
with tf.variable_scope(scope):
wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale))
wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale))
......
from .smac_tuner import SMACTuner
\ No newline at end of file
from .smac_tuner import SMACTuner
......@@ -39,7 +39,6 @@ from nni.utils import OptimizeMode, extract_scalar_reward
from .convert_ss_to_scenario import generate_scenario
class SMACTuner(Tuner):
"""
Parameters
......
......@@ -42,7 +42,7 @@ class Tuner(Recoverable):
A new trial will run with this configuration.
This is the abstract base class for all tuners.
Tuning algorithms should derive this class and override :meth:`update_search_space`, :meth:`receive_trial_result`,
Tuning algorithms should inherit this class and override :meth:`update_search_space`, :meth:`receive_trial_result`,
as well as :meth:`generate_parameters` or :meth:`generate_multiple_parameters`.
After initializing, NNI will first call :meth:`update_search_space` to tell tuner the feasible region,
......@@ -96,9 +96,9 @@ class Tuner(Recoverable):
Parameters
----------
parameter_id: int
parameter_id : int
Unique identifier for requested hyper-parameters. This will later be used in :meth:`receive_trial_result`.
**kwargs:
**kwargs
Unstable parameters which should be ignored by normal users.
Returns
......@@ -129,10 +129,10 @@ class Tuner(Recoverable):
Parameters
----------
parameter_id_list: list of int
parameter_id_list : list of int
Unique identifiers for each set of requested hyper-parameters.
These will later be used in :meth:`receive_trial_result`.
**kwargs:
**kwargs
Unstable parameters which should be ignored by normal users.
Returns
......@@ -159,13 +159,13 @@ class Tuner(Recoverable):
Parameters
----------
parameter_id: int
parameter_id : int
Unique identifier of used hyper-parameters, same with :meth:`generate_parameters`.
parameters
Hyper-parameters generated by :meth:`generate_parameters`.
value
Result from trial (the return value of :func:`nni.report_final_result`).
**kwargs:
**kwargs
Unstable parameters which should be ignored by normal users.
"""
raise NotImplementedError('Tuner: receive_trial_result not implemented')
......@@ -186,11 +186,11 @@ class Tuner(Recoverable):
Parameters
----------
parameter_id: int
parameter_id : int
Unique identifier for hyper-parameters used by this trial.
success: bool
success : bool
True if the trial successfully completed; False if failed or terminated.
**kwargs:
**kwargs
Unstable parameters which should be ignored by normal users.
"""
......
{
"choice_str": {
"_type": "choice",
"_value": ["cat", "dog", "elephant", "cow", "sheep", "panda"],
"fail": ["metis", "gp"]
},
"choice_int": {
"_type": "choice",
"_value": [42, 43, -1]
},
"choice_mixed": {
"_type": "choice",
"_value": [0.3, "cat", 1, null],
"fail": ["metis", "gp"]
},
"choice_float": {
"_type": "choice",
"_value": [0.3, 1, 2.0]
},
"choice_single": {
"_type": "choice",
"_value": [1]
},
"randint_ok": {
"_type": "randint",
"_value": [-2, 3]
},
"randint_single": {
"_type": "randint",
"_value": [10, 11]
},
"randint_fail_equal": {
"_type": "randint",
"_value": [0, 0]
},
"uniform_ok": {
"_type": "uniform",
"_value": [-1.0, 1.5]
},
"uniform_equal": {
"_type": "uniform",
"_value": [99.9, 99.9]
},
"quniform_ok": {
"_type": "quniform",
"_value": [0.0, 10.0, 2.5]
},
"quniform_clip": {
"_type": "quniform",
"_value": [2.0, 10.0, 5.0]
},
"quniform_clip_2": {
"_type": "quniform",
"_value": [-5.5, -0.5, 6]
},
"loguniform_ok": {
"_type": "loguniform",
"_value": [0.001, 100]
},
"loguniform_equal": {
"_type": "loguniform",
"_value": [1, 1]
},
"qloguniform_ok": {
"_type": "qloguniform",
"_value": [0.001, 100, 1]
},
"qloguniform_equal": {
"_type": "qloguniform",
"_value": [2, 2, 1]
},
"normal_ok": {
"_type": "normal",
"_value": [-1.0, 5.0]
},
"qnormal_ok": {
"_type": "qnormal",
"_value": [-1.5, 5.0, 0.1]
},
"lognormal_ok": {
"_type": "lognormal",
"_value": [-1.0, 5.0]
},
"qlognormal_ok": {
"_type": "qlognormal",
"_value": [-1.5, 5.0, 0.1]
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment