Unverified Commit 070df4a0 authored by liuzhe-lz's avatar liuzhe-lz Committed by GitHub
Browse files

Merge pull request #4291 from microsoft/v2.5

merge v2.5 back to master
parents 821706b8 6a082fe9
...@@ -83,16 +83,16 @@ def quantization_aware_training_example(train_loader, test_loader, device): ...@@ -83,16 +83,16 @@ def quantization_aware_training_example(train_loader, test_loader, device):
model = NaiveModel() model = NaiveModel()
configure_list = [{ configure_list = [{
'quant_types': ['weight', 'output'], 'quant_types': ['input', 'weight'],
'quant_bits': {'weight':8, 'output':8}, 'quant_bits': {'input':8, 'weight':8},
'op_names': ['conv1'] 'op_names': ['conv1']
}, { }, {
'quant_types': ['output'], 'quant_types': ['output'],
'quant_bits': {'output':8}, 'quant_bits': {'output':8},
'op_names': ['relu1'] 'op_names': ['relu1']
}, { }, {
'quant_types': ['weight', 'output'], 'quant_types': ['input', 'weight'],
'quant_bits': {'weight':8, 'output':8}, 'quant_bits': {'input':8, 'weight':8},
'op_names': ['conv2'] 'op_names': ['conv2']
}, { }, {
'quant_types': ['output'], 'quant_types': ['output'],
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# download automlbenchmark repository # download automlbenchmark repository
if [ ! -d './automlbenchmark' ] ; then if [ ! -d './automlbenchmark' ] ; then
git clone https://github.com/openml/automlbenchmark.git --branch stable --depth 1 git clone https://github.com/openml/automlbenchmark.git --branch v1.6 --depth 1
fi fi
# install dependencies # install dependencies
......
...@@ -384,6 +384,7 @@ class ADMMPruner(IterativePruner): ...@@ -384,6 +384,7 @@ class ADMMPruner(IterativePruner):
for i, wrapper in enumerate(self.get_modules_wrapper()): for i, wrapper in enumerate(self.get_modules_wrapper()):
z = wrapper.module.weight.data + self.U[i] z = wrapper.module.weight.data + self.U[i]
self.Z[i] = self._projection(z, wrapper.config['sparsity'], wrapper) self.Z[i] = self._projection(z, wrapper.config['sparsity'], wrapper)
torch.cuda.empty_cache()
self.U[i] = self.U[i] + wrapper.module.weight.data - self.Z[i] self.U[i] = self.U[i] + wrapper.module.weight.data - self.Z[i]
# apply prune # apply prune
......
...@@ -26,7 +26,6 @@ __all__ = ['NaiveQuantizer', 'QAT_Quantizer', 'DoReFaQuantizer', 'BNNQuantizer', ...@@ -26,7 +26,6 @@ __all__ = ['NaiveQuantizer', 'QAT_Quantizer', 'DoReFaQuantizer', 'BNNQuantizer',
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class NaiveQuantizer(Quantizer): class NaiveQuantizer(Quantizer):
"""quantize weight to 8 bits """quantize weight to 8 bits
""" """
...@@ -676,17 +675,20 @@ class QAT_Quantizer(Quantizer): ...@@ -676,17 +675,20 @@ class QAT_Quantizer(Quantizer):
for layer, _ in modules_to_compress: for layer, _ in modules_to_compress:
name, module = layer.name, layer.module name, module = layer.name, layer.module
if name not in calibration_config: if name not in calibration_config:
if hasattr(module, 'weight_bits') or hasattr(module, 'output_bits') or hasattr(module, 'input_bits'): if module.layer_quant_setting.weight or module.layer_quant_setting.input or module.layer_quant_setting.output:
logger.warning(f"Can not find module {name}'s parameter in input config.") logger.warning(f"Can not find module {name}'s parameter in input config.")
continue continue
if hasattr(module, 'weight_bits'): if module.layer_quant_setting.weight:
assert calibration_config[name]['weight_bits'] == module.weight_bits, f"weight bits of module {name} fail to match" assert calibration_config[name]['weight_bits'] == module.layer_quant_setting.weight.bits, \
if hasattr(module, 'input_bits'): f"weight bits of module {name} fail to match"
assert calibration_config[name]['input_bits'] == module.input_bits, f"input bits of module {name} fail to match" if module.layer_quant_setting.input:
assert calibration_config[name]['input_bits'] == module.layer_quant_setting.input.bits, \
f"input bits of module {name} fail to match"
module.tracked_min_input.data = torch.tensor([calibration_config[name]['tracked_min_input']]) module.tracked_min_input.data = torch.tensor([calibration_config[name]['tracked_min_input']])
module.tracked_max_input.data = torch.tensor([calibration_config[name]['tracked_max_input']]) module.tracked_max_input.data = torch.tensor([calibration_config[name]['tracked_max_input']])
if hasattr(module, 'output_bits'): if module.layer_quant_setting.output:
assert calibration_config[name]['output_bits'] == module.output_bits, f"output bits of module {name} fail to match" assert calibration_config[name]['output_bits'] == module.layer_quant_setting.output.bits, \
f"output bits of module {name} fail to match"
module.tracked_min_output.data = torch.tensor([calibration_config[name]['tracked_min_output']]) module.tracked_min_output.data = torch.tensor([calibration_config[name]['tracked_min_output']])
module.tracked_max_output.data = torch.tensor([calibration_config[name]['tracked_max_output']]) module.tracked_max_output.data = torch.tensor([calibration_config[name]['tracked_max_output']])
...@@ -716,11 +718,13 @@ class QAT_Quantizer(Quantizer): ...@@ -716,11 +718,13 @@ class QAT_Quantizer(Quantizer):
self._unwrap_model() self._unwrap_model()
calibration_config = {} calibration_config = {}
for name, module in self.bound_model.named_modules(): modules_to_compress = self.get_modules_to_compress()
if hasattr(module, 'weight_bits') or hasattr(module, 'output_bits'): for layer, _ in modules_to_compress:
name, module = layer.name, layer.module
if hasattr(module.layer_quant_setting, 'weight') or hasattr(module.layer_quant_setting, 'output'):
calibration_config[name] = {} calibration_config[name] = {}
if hasattr(module, 'weight_bits'): if module.layer_quant_setting.weight:
calibration_config[name]['weight_bits'] = int(module.weight_bits) calibration_config[name]['weight_bits'] = int(module.layer_quant_setting.weight.bits)
calibration_config[name]['weight_scale'] = module.weight_scale calibration_config[name]['weight_scale'] = module.weight_scale
calibration_config[name]['weight_zero_point'] = module.weight_zero_point calibration_config[name]['weight_zero_point'] = module.weight_zero_point
...@@ -738,13 +742,14 @@ class QAT_Quantizer(Quantizer): ...@@ -738,13 +742,14 @@ class QAT_Quantizer(Quantizer):
module.register_parameter('bias', actual_bias) module.register_parameter('bias', actual_bias)
else: else:
setattr(module, 'bias', None) setattr(module, 'bias', None)
if hasattr(module, 'input_bits'):
calibration_config[name]['input_bits'] = int(module.input_bits) if module.layer_quant_setting.input:
calibration_config[name]['input_bits'] = int(module.layer_quant_setting.input.bits)
calibration_config[name]['tracked_min_input'] = float(module.tracked_min_input) calibration_config[name]['tracked_min_input'] = float(module.tracked_min_input)
calibration_config[name]['tracked_max_input'] = float(module.tracked_max_input) calibration_config[name]['tracked_max_input'] = float(module.tracked_max_input)
if hasattr(module, 'output_bits'): if module.layer_quant_setting.output:
calibration_config[name]['output_bits'] = int(module.output_bits) calibration_config[name]['output_bits'] = int(module.layer_quant_setting.output.bits)
calibration_config[name]['tracked_min_output'] = float(module.tracked_min_output) calibration_config[name]['tracked_min_output'] = float(module.tracked_min_output)
calibration_config[name]['tracked_max_output'] = float(module.tracked_max_output) calibration_config[name]['tracked_max_output'] = float(module.tracked_max_output)
self._del_simulated_attr(module) self._del_simulated_attr(module)
...@@ -1157,7 +1162,7 @@ class LsqQuantizer(Quantizer): ...@@ -1157,7 +1162,7 @@ class LsqQuantizer(Quantizer):
calibration_config = {} calibration_config = {}
for name, module in self.bound_model.named_modules(): for name, module in self.bound_model.named_modules():
if hasattr(module, 'input_bits') or hasattr(module, 'output_bits'): if hasattr(module, 'input_bits') or hasattr(module, 'weight_bits') or hasattr(module, 'output_bits'):
calibration_config[name] = {} calibration_config[name] = {}
if hasattr(module, 'weight_bits'): if hasattr(module, 'weight_bits'):
calibration_config[name]['weight_bits'] = int(module.weight_bits) calibration_config[name]['weight_bits'] = int(module.weight_bits)
...@@ -1177,6 +1182,11 @@ class LsqQuantizer(Quantizer): ...@@ -1177,6 +1182,11 @@ class LsqQuantizer(Quantizer):
module.register_parameter('bias', actual_bias) module.register_parameter('bias', actual_bias)
else: else:
setattr(module, 'bias', None) setattr(module, 'bias', None)
if hasattr(module, 'input_bits'):
calibration_config[name]['input_bits'] = int(module.input_bits)
abs_max_input = float(module.input_scale * module.input_qmax)
calibration_config[name]['tracked_min_input'] = -abs_max_input
calibration_config[name]['tracked_max_input'] = abs_max_input
if hasattr(module, 'output_bits'): if hasattr(module, 'output_bits'):
calibration_config[name]['output_bits'] = int(module.output_bits) calibration_config[name]['output_bits'] = int(module.output_bits)
abs_max_output = float(module.output_scale * module.output_qmax) abs_max_output = float(module.output_scale * module.output_qmax)
......
...@@ -123,21 +123,21 @@ class BasicPruner(Pruner): ...@@ -123,21 +123,21 @@ class BasicPruner(Pruner):
class LevelPruner(BasicPruner): class LevelPruner(BasicPruner):
"""
Parameters
----------
model : torch.nn.Module
Model to be pruned
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Operation types to prune.
- op_names : Operation names to prune.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
"""
def __init__(self, model: Module, config_list: List[Dict]): def __init__(self, model: Module, config_list: List[Dict]):
"""
Parameters
----------
model
Model to be pruned
config_list
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Operation types to prune.
- op_names : Operation names to prune.
- op_partial_names: An auxiliary field collecting matched op_names in model, then this will convert to op_names.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
"""
super().__init__(model, config_list) super().__init__(model, config_list)
def _validate_config_before_canonical(self, model: Module, config_list: List[Dict]): def _validate_config_before_canonical(self, model: Module, config_list: List[Dict]):
...@@ -157,36 +157,36 @@ class LevelPruner(BasicPruner): ...@@ -157,36 +157,36 @@ class LevelPruner(BasicPruner):
class NormPruner(BasicPruner): class NormPruner(BasicPruner):
"""
Parameters
----------
model : torch.nn.Module
Model to be pruned
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in NormPruner.
- op_names : Operation names to prune.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
p : int
The order of norm.
mode : str
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the norm of weights and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
def __init__(self, model: Module, config_list: List[Dict], p: int, def __init__(self, model: Module, config_list: List[Dict], p: int,
mode: str = 'normal', dummy_input: Optional[Tensor] = None): mode: str = 'normal', dummy_input: Optional[Tensor] = None):
"""
Parameters
----------
model
Model to be pruned
config_list
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in NormPruner.
- op_names : Operation names to prune.
- op_partial_names: An auxiliary field collecting matched op_names in model, then this will convert to op_names.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
p
The order of norm.
mode
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the norm of weights and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
self.p = p self.p = p
self.mode = mode self.mode = mode
self.dummy_input = dummy_input self.dummy_input = dummy_input
...@@ -217,98 +217,98 @@ class NormPruner(BasicPruner): ...@@ -217,98 +217,98 @@ class NormPruner(BasicPruner):
class L1NormPruner(NormPruner): class L1NormPruner(NormPruner):
"""
Parameters
----------
model : torch.nn.Module
Model to be pruned
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in L1NormPruner.
- op_names : Operation names to prune.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
mode : str
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the l1-norm of weights and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
def __init__(self, model: Module, config_list: List[Dict], def __init__(self, model: Module, config_list: List[Dict],
mode: str = 'normal', dummy_input: Optional[Tensor] = None): mode: str = 'normal', dummy_input: Optional[Tensor] = None):
"""
Parameters
----------
model
Model to be pruned
config_list
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in L1NormPruner.
- op_names : Operation names to prune.
- op_partial_names: An auxiliary field collecting matched op_names in model, then this will convert to op_names.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
mode
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the l1-norm of weights and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
super().__init__(model, config_list, 1, mode, dummy_input) super().__init__(model, config_list, 1, mode, dummy_input)
class L2NormPruner(NormPruner): class L2NormPruner(NormPruner):
"""
Parameters
----------
model : torch.nn.Module
Model to be pruned
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in L1NormPruner.
- op_names : Operation names to prune.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
mode : str
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the l2-norm of weights and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
def __init__(self, model: Module, config_list: List[Dict], def __init__(self, model: Module, config_list: List[Dict],
mode: str = 'normal', dummy_input: Optional[Tensor] = None): mode: str = 'normal', dummy_input: Optional[Tensor] = None):
"""
Parameters
----------
model
Model to be pruned
config_list
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in L2NormPruner.
- op_names : Operation names to prune.
- op_partial_names: An auxiliary field collecting matched op_names in model, then this will convert to op_names.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
mode
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the l2-norm of weights and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
super().__init__(model, config_list, 2, mode, dummy_input) super().__init__(model, config_list, 2, mode, dummy_input)
class FPGMPruner(BasicPruner): class FPGMPruner(BasicPruner):
"""
Parameters
----------
model : torch.nn.Module
Model to be pruned
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in FPGMPruner.
- op_names : Operation names to prune.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
mode : str
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the FPGM of weights and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
def __init__(self, model: Module, config_list: List[Dict], def __init__(self, model: Module, config_list: List[Dict],
mode: str = 'normal', dummy_input: Optional[Tensor] = None): mode: str = 'normal', dummy_input: Optional[Tensor] = None):
"""
Parameters
----------
model
Model to be pruned
config_list
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in FPGMPruner.
- op_names : Operation names to prune.
- op_partial_names: An auxiliary field collecting matched op_names in model, then this will convert to op_names.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
mode
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the FPGM of weights and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
self.mode = mode self.mode = mode
self.dummy_input = dummy_input self.dummy_input = dummy_input
super().__init__(model, config_list) super().__init__(model, config_list)
...@@ -338,57 +338,57 @@ class FPGMPruner(BasicPruner): ...@@ -338,57 +338,57 @@ class FPGMPruner(BasicPruner):
class SlimPruner(BasicPruner): class SlimPruner(BasicPruner):
"""
Parameters
----------
model : torch.nn.Module
Model to be pruned
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- total_sparsity : This is to specify the total sparsity for all layers in this config,
each layer may have different sparsity.
- max_sparsity_per_layer : Always used with total_sparsity. Limit the max sparsity of each layer.
- op_types : Only BatchNorm2d is supported in SlimPruner.
- op_names : Operation names to prune.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer : Callable[[Module, Optimizer, Callable], None]
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
optimizer : torch.optim.Optimizer
The optimizer instance used in trainer. Note that this optimizer might be patched during collect data,
so do not use this optimizer in other places.
criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_epochs : int
The epoch number for training model to sparsify the BN weight.
mode : str
'normal' or 'global'.
If prune the model in a global way, all layer weights with same config will be considered uniformly.
That means a single layer may not reach or exceed the sparsity setting in config,
but the total pruned weights meet the sparsity setting.
"""
def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None], def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None],
optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor], optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor],
training_epochs: int, scale: float = 0.0001, mode='global'): training_epochs: int, scale: float = 0.0001, mode='global'):
"""
Parameters
----------
model
Model to be pruned
config_list
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- total_sparsity : This is to specify the total sparsity for all layers in this config,
each layer may have different sparsity.
- max_sparsity_per_layer : Always used with total_sparsity. Limit the max sparsity of each layer.
- op_types : Only BatchNorm2d is supported in SlimPruner.
- op_names : Operation names to prune.
- op_partial_names: An auxiliary field collecting matched op_names in model, then this will convert to op_names.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
optimizer
The optimizer instance used in trainer. Note that this optimizer might be patched during collect data,
so do not use this optimizer in other places.
criterion
The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_epochs
The epoch number for training model to sparsify the BN weight.
mode
'normal' or 'global'.
If prune the model in a global way, all layer weights with same config will be considered uniformly.
That means a single layer may not reach or exceed the sparsity setting in config,
but the total pruned weights meet the sparsity setting.
"""
self.mode = mode self.mode = mode
self.trainer = trainer self.trainer = trainer
self.optimizer = optimizer self.optimizer = optimizer
...@@ -435,61 +435,61 @@ class SlimPruner(BasicPruner): ...@@ -435,61 +435,61 @@ class SlimPruner(BasicPruner):
class ActivationPruner(BasicPruner): class ActivationPruner(BasicPruner):
"""
Parameters
----------
model : torch.nn.Module
Model to be pruned
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in ActivationPruner.
- op_names : Operation names to prune.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer : Callable[[Module, Optimizer, Callable], None]
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
optimizer : torch.optim.Optimizer
The optimizer instance used in trainer. Note that this optimizer might be patched during collect data,
so do not use this optimizer in other places.
criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_batches
The batch number used to collect activations.
mode : str
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the activation-based metrics and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None], def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None],
optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor], training_batches: int, activation: str = 'relu', optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor], training_batches: int, activation: str = 'relu',
mode: str = 'normal', dummy_input: Optional[Tensor] = None): mode: str = 'normal', dummy_input: Optional[Tensor] = None):
"""
Parameters
----------
model
Model to be pruned
config_list
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in ActivationPruner.
- op_names : Operation names to prune.
- op_partial_names: An auxiliary field collecting matched op_names in model, then this will convert to op_names.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
optimizer
The optimizer instance used in trainer. Note that this optimizer might be patched during collect data,
so do not use this optimizer in other places.
criterion
The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_batches
The batch number used to collect activations.
mode
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the activation-based metrics and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
self.mode = mode self.mode = mode
self.dummy_input = dummy_input self.dummy_input = dummy_input
self.trainer = trainer self.trainer = trainer
...@@ -553,69 +553,69 @@ class ActivationMeanRankPruner(ActivationPruner): ...@@ -553,69 +553,69 @@ class ActivationMeanRankPruner(ActivationPruner):
class TaylorFOWeightPruner(BasicPruner): class TaylorFOWeightPruner(BasicPruner):
"""
Parameters
----------
model : torch.nn.Module
Model to be pruned
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- total_sparsity : This is to specify the total sparsity for all layers in this config,
each layer may have different sparsity.
- max_sparsity_per_layer : Always used with total_sparsity. Limit the max sparsity of each layer.
- op_types : Conv2d and Linear are supported in TaylorFOWeightPruner.
- op_names : Operation names to prune.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer : Callable[[Module, Optimizer, Callable]
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
optimizer : torch.optim.Optimizer
The optimizer instance used in trainer. Note that this optimizer might be patched during collect data,
so do not use this optimizer in other places.
criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_batches : int
The batch number used to collect activations.
mode : str
'normal', 'dependency_aware' or 'global'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the taylorFO and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
If prune the model in a global way, all layer weights with same config will be considered uniformly.
That means a single layer may not reach or exceed the sparsity setting in config,
but the total pruned weights meet the sparsity setting.
dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None], def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None],
optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor], training_batches: int, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor], training_batches: int,
mode: str = 'normal', dummy_input: Optional[Tensor] = None): mode: str = 'normal', dummy_input: Optional[Tensor] = None):
"""
Parameters
----------
model
Model to be pruned
config_list
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- total_sparsity : This is to specify the total sparsity for all layers in this config,
each layer may have different sparsity.
- max_sparsity_per_layer : Always used with total_sparsity. Limit the max sparsity of each layer.
- op_types : Conv2d and Linear are supported in TaylorFOWeightPruner.
- op_names : Operation names to prune.
- op_partial_names: An auxiliary field collecting matched op_names in model, then this will convert to op_names.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
optimizer
The optimizer instance used in trainer. Note that this optimizer might be patched during collect data,
so do not use this optimizer in other places.
criterion
The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_batches
The batch number used to collect activations.
mode
'normal', 'dependency_aware' or 'global'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the taylorFO and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
If prune the model in a global way, all layer weights with same config will be considered uniformly.
That means a single layer may not reach or exceed the sparsity setting in config,
but the total pruned weights meet the sparsity setting.
dummy_input
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
self.mode = mode self.mode = mode
self.dummy_input = dummy_input self.dummy_input = dummy_input
self.trainer = trainer self.trainer = trainer
...@@ -674,53 +674,51 @@ class ADMMPruner(BasicPruner): ...@@ -674,53 +674,51 @@ class ADMMPruner(BasicPruner):
Only in the final iteration, the mask will be generated and apply to model wrapper. Only in the final iteration, the mask will be generated and apply to model wrapper.
The original paper refer to: https://arxiv.org/abs/1804.03294. The original paper refer to: https://arxiv.org/abs/1804.03294.
Parameters
----------
model : torch.nn.Module
Model to be pruned.
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- rho : Penalty parameters in ADMM algorithm.
- op_types : Operation types to prune.
- op_names : Operation names to prune.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer : Callable[[Module, Optimizer, Callable]
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
optimizer : torch.optim.Optimizer
The optimizer instance used in trainer. Note that this optimizer might be patched during collect data,
so do not use this optimizer in other places.
criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss.
iterations : int
The total iteration number in admm pruning algorithm.
training_epochs : int
The epoch number for training model in each iteration.
""" """
def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None], def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None],
optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor], iterations: int, training_epochs: int): optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor], iterations: int, training_epochs: int):
"""
Parameters
----------
model
Model to be pruned.
config_list
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- rho : Penalty parameters in ADMM algorithm. Default: 1e-4.
- op_types : Operation types to prune.
- op_names : Operation names to prune.
- op_partial_names: An auxiliary field collecting matched op_names in model, then this will convert to op_names.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
optimizer
The optimizer instance used in trainer. Note that this optimizer might be patched during collect data,
so do not use this optimizer in other places.
criterion
The criterion function used in trainer. Take model output and target value as input, and return the loss.
iterations
The total iteration number in admm pruning algorithm.
training_epochs
The epoch number for training model in each iteration.
"""
self.trainer = trainer self.trainer = trainer
self.optimizer = optimizer self.optimizer = optimizer
self.criterion = criterion self.criterion = criterion
......
...@@ -14,29 +14,29 @@ from .tools import TaskGenerator ...@@ -14,29 +14,29 @@ from .tools import TaskGenerator
class PruningScheduler(BasePruningScheduler): class PruningScheduler(BasePruningScheduler):
"""
Parameters
----------
pruner
The pruner used in pruner scheduler.
The scheduler will use `Pruner.reset(model, config_list)` to reset it in each iteration.
task_generator
Used to generate task for each iteration.
finetuner
The finetuner handled all finetune logic, use a pytorch module as input.
speed_up
If set True, speed up the model in each iteration.
dummy_input
If `speed_up` is True, `dummy_input` is required for trace the model in speed up.
evaluator
Evaluate the pruned model and give a score.
If evaluator is None, the best result refers to the latest result.
reset_weight
If set True, the model weight will reset to the origin model weight at the end of each iteration step.
"""
def __init__(self, pruner: Pruner, task_generator: TaskGenerator, finetuner: Callable[[Module], None] = None, def __init__(self, pruner: Pruner, task_generator: TaskGenerator, finetuner: Callable[[Module], None] = None,
speed_up: bool = False, dummy_input: Tensor = None, evaluator: Optional[Callable[[Module], float]] = None, speed_up: bool = False, dummy_input: Tensor = None, evaluator: Optional[Callable[[Module], float]] = None,
reset_weight: bool = False): reset_weight: bool = False):
"""
Parameters
----------
pruner
The pruner used in pruner scheduler.
The scheduler will use `Pruner.reset(model, config_list)` to reset it in each iteration.
task_generator
Used to generate task for each iteration.
finetuner
The finetuner handled all finetune logic, use a pytorch module as input.
speed_up
If set True, speed up the model in each iteration.
dummy_input
If `speed_up` is True, `dummy_input` is required for trace the model in speed up.
evaluator
Evaluate the pruned model and give a score.
If evaluator is None, the best result refers to the latest result.
reset_weight
If set True, the model weight will reset to the origin model weight at the end of each iteration step.
"""
self.pruner = pruner self.pruner = pruner
self.task_generator = task_generator self.task_generator = task_generator
self.finetuner = finetuner self.finetuner = finetuner
......
...@@ -80,7 +80,7 @@ class GlobalSparsityAllocator(SparsityAllocator): ...@@ -80,7 +80,7 @@ class GlobalSparsityAllocator(SparsityAllocator):
stay_metric = torch.topk(metric.view(-1), stay_metric_num, largest=False)[0] stay_metric = torch.topk(metric.view(-1), stay_metric_num, largest=False)[0]
sub_thresholds[name] = stay_metric.max() sub_thresholds[name] = stay_metric.max()
if expend_times > 1: if expend_times > 1:
stay_metric = stay_metric.expand(stay_metric_num, int(layer_weight_num / metric.numel())).view(-1) stay_metric = stay_metric.expand(int(layer_weight_num / metric.numel()), stay_metric_num).contiguous().view(-1)
metric_list.append(stay_metric) metric_list.append(stay_metric)
total_prune_num = int(total_sparsity * total_weight_num) total_prune_num = int(total_sparsity * total_weight_num)
......
...@@ -79,5 +79,5 @@ def get_quant_shape(shape, quant_type, quant_scheme): ...@@ -79,5 +79,5 @@ def get_quant_shape(shape, quant_type, quant_scheme):
if is_per_channel(quant_scheme): if is_per_channel(quant_scheme):
quant_shape = [1 if idx != default_idx else s for idx, s in enumerate(shape)] quant_shape = [1 if idx != default_idx else s for idx, s in enumerate(shape)]
else: else:
quant_shape = [] quant_shape = [1]
return quant_shape return quant_shape
...@@ -110,6 +110,8 @@ def replace_prelu(prelu, masks): ...@@ -110,6 +110,8 @@ def replace_prelu(prelu, masks):
in_mask = in_masks[0] in_mask = in_masks[0]
weight_mask = weight_mask['weight'] weight_mask = weight_mask['weight']
if weight_mask.size(0) == 1:
return prelu
pruned_in, remained_in = convert_to_coarse_mask(in_mask, 1) pruned_in, remained_in = convert_to_coarse_mask(in_mask, 1)
pruned_out, remained_out = convert_to_coarse_mask(output_mask, 1) pruned_out, remained_out = convert_to_coarse_mask(output_mask, 1)
n_remained_in = weight_mask.size(0) - pruned_in.size(0) n_remained_in = weight_mask.size(0) - pruned_in.size(0)
...@@ -221,8 +223,9 @@ def replace_batchnorm1d(norm, masks): ...@@ -221,8 +223,9 @@ def replace_batchnorm1d(norm, masks):
affine=norm.affine, affine=norm.affine,
track_running_stats=norm.track_running_stats) track_running_stats=norm.track_running_stats)
# assign weights # assign weights
new_norm.weight.data = torch.index_select(norm.weight.data, 0, remained_in) if norm.affine:
new_norm.bias.data = torch.index_select(norm.bias.data, 0, remained_in) new_norm.weight.data = torch.index_select(norm.weight.data, 0, remained_in)
new_norm.bias.data = torch.index_select(norm.bias.data, 0, remained_in)
new_norm.running_mean.data = torch.index_select( new_norm.running_mean.data = torch.index_select(
norm.running_mean.data, 0, remained_in) norm.running_mean.data, 0, remained_in)
...@@ -264,8 +267,9 @@ def replace_batchnorm2d(norm, masks): ...@@ -264,8 +267,9 @@ def replace_batchnorm2d(norm, masks):
affine=norm.affine, affine=norm.affine,
track_running_stats=norm.track_running_stats) track_running_stats=norm.track_running_stats)
# assign weights # assign weights
new_norm.weight.data = torch.index_select(norm.weight.data, 0, remained_in) if norm.affine:
new_norm.bias.data = torch.index_select(norm.bias.data, 0, remained_in) new_norm.weight.data = torch.index_select(norm.weight.data, 0, remained_in)
new_norm.bias.data = torch.index_select(norm.bias.data, 0, remained_in)
new_norm.running_mean.data = torch.index_select( new_norm.running_mean.data = torch.index_select(
norm.running_mean.data, 0, remained_in) norm.running_mean.data, 0, remained_in)
......
...@@ -23,28 +23,27 @@ _logger.setLevel(logging.INFO) ...@@ -23,28 +23,27 @@ _logger.setLevel(logging.INFO)
class ModelSpeedup: class ModelSpeedup:
""" """
This class is to speedup the model with provided weight mask. This class is to speedup the model with provided weight mask.
Parameters
----------
model : pytorch model
The model user wants to speed up
dummy_input : pytorch tensor, tuple of tensor, list of tensor
Note: The first dimension of the dummy_input should be the batchsize.
The dummy input for ```jit.trace```, users should put it on the right
device.
masks_file : str/dict
The path of user provided mask file, or the mask object
map_location : str
the device on which masks are placed, same to map_location in ```torch.load```
batch_dim : int
the index of batch dimension in the dummy_input
confidence: the confidence coefficient of the sparsity inference. This value is
actually used as the batchsize of the dummy_input.
""" """
def __init__(self, model, dummy_input, masks_file, map_location=None, def __init__(self, model, dummy_input, masks_file, map_location=None,
batch_dim=0, confidence=8): batch_dim=0, confidence=8):
"""
Parameters
----------
model : pytorch model
The model user wants to speed up
dummy_input : pytorch tensor, tuple of tensor, list of tensor
Note: The first dimension of the dummy_input should be the batchsize.
The dummy input for ```jit.trace```, users should put it on the right
device.
masks_file : str/dict
The path of user provided mask file, or the mask object
map_location : str
the device on which masks are placed, same to map_location in ```torch.load```
batch_dim : int
the index of batch dimension in the dummy_input
confidence: the confidence coefficient of the sparsity inference. This value is
actually used as the batchsize of the dummy_input.
"""
assert confidence > 1 assert confidence > 1
# The auto inference will change the values of the parameters in the model # The auto inference will change the values of the parameters in the model
# so we need make a copy before the mask inference # so we need make a copy before the mask inference
......
import os import os
ENV_NASBENCHMARK_DIR = 'NASBENCHMARK_DIR'
ENV_NNI_HOME = 'NNI_HOME' ENV_NNI_HOME = 'NNI_HOME'
ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
DEFAULT_CACHE_DIR = '~/.cache' DEFAULT_CACHE_DIR = '~/.cache'
...@@ -10,7 +11,7 @@ def _get_nasbenchmark_dir(): ...@@ -10,7 +11,7 @@ def _get_nasbenchmark_dir():
nni_home = os.path.expanduser( nni_home = os.path.expanduser(
os.getenv(ENV_NNI_HOME, os.getenv(ENV_NNI_HOME,
os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'nni'))) os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'nni')))
return os.path.join(nni_home, 'nasbenchmark') return os.getenv(ENV_NASBENCHMARK_DIR, os.path.join(nni_home, 'nasbenchmark'))
DATABASE_DIR = _get_nasbenchmark_dir() DATABASE_DIR = _get_nasbenchmark_dir()
......
import argparse
if __name__ == '__main__':
parser = argparse.ArgumentParser('NAS benchmark downloader')
parser.add_argument('benchmark_name', choices=['nasbench101', 'nasbench201', 'nds'])
args = parser.parse_args()
from .utils import download_benchmark
download_benchmark(args.benchmark_name)
...@@ -381,17 +381,8 @@ class GraphConverter: ...@@ -381,17 +381,8 @@ class GraphConverter:
# step #1: generate graph ir for this method # step #1: generate graph ir for this method
method_ir_graph = Graph(model=ir_model, graph_id=-100, name='temp_graph', _internal=True) method_ir_graph = Graph(model=ir_model, graph_id=-100, name='temp_graph', _internal=True)
method_node_index = self.handle_graph_nodes(script_module, script_method.graph, module, self.handle_graph_nodes(script_module, script_method.graph, module,
module_name, ir_model, method_ir_graph, shared_module_index) module_name, ir_model, method_ir_graph, shared_module_index)
for _output in script_method.graph.outputs():
method_ir_graph._add_output(_convert_name(_output.debugName()))
predecessor_node_outputs = [o for o in _output.node().outputs()]
if len(predecessor_node_outputs) == 1:
src_node_idx = None
else:
src_node_idx = predecessor_node_outputs.index(_output)
method_ir_graph.add_edge(head=(method_node_index[_output.node()], src_node_idx),
tail=(method_ir_graph.output_node, None))
self.refine_graph(method_ir_graph) self.refine_graph(method_ir_graph)
# step #2: merge this graph to its module graph # step #2: merge this graph to its module graph
...@@ -491,18 +482,24 @@ class GraphConverter: ...@@ -491,18 +482,24 @@ class GraphConverter:
for node in sm_graph.nodes(): for node in sm_graph.nodes():
handle_single_node(node) handle_single_node(node)
if node_index == {}: if node_index != {}:
# here is an example that the ir_graph is empty for _output in sm_graph.outputs():
ir_graph._add_output(_convert_name(_output.debugName()))
predecessor_node_outputs = [o for o in _output.node().outputs()]
if len(predecessor_node_outputs) == 1:
src_node_idx = None
else:
src_node_idx = predecessor_node_outputs.index(_output)
ir_graph.add_edge(head=(node_index[_output.node()], src_node_idx),
tail=(ir_graph.output_node, None))
else:
# here is an example that the ir_graph and node_index is empty
# graph(%self : __torch__.torchmodels.googlenet.GoogLeNet, # graph(%self : __torch__.torchmodels.googlenet.GoogLeNet,
# %x.1 : Tensor): return (%x.1) # %x.1 : Tensor): return (%x.1)
# add a noop_identity node to handle this situation # add an edge from head to tail to handle this situation
self.global_seq += 1 ir_graph.add_edge(head=(ir_graph.input_node, 0), tail=(ir_graph.output_node, None))
ni_node = ir_graph.add_node(build_full_name(module_name, 'noop_identity', self.global_seq), 'noop_identity')
ir_graph.add_edge(head=(ir_graph.input_node, 0), tail=(ni_node, None))
ir_graph.add_edge(head=(ni_node, None), tail=(ir_graph.output_node, None))
for _output in sm_graph.outputs():
node_index[_output.node()] = ni_node
return node_index
def merge_aten_slices(self, ir_graph): def merge_aten_slices(self, ir_graph):
""" """
...@@ -625,20 +622,8 @@ class GraphConverter: ...@@ -625,20 +622,8 @@ class GraphConverter:
ir_graph = Graph(model=ir_model, graph_id=self.global_graph_id, name=module_name, _internal=True) ir_graph = Graph(model=ir_model, graph_id=self.global_graph_id, name=module_name, _internal=True)
# handle graph nodes # handle graph nodes
node_index = self.handle_graph_nodes(script_module, sm_graph, module, self.handle_graph_nodes(script_module, sm_graph, module,
module_name, ir_model, ir_graph) module_name, ir_model, ir_graph)
# handle graph outputs
for _output in sm_graph.outputs():
ir_graph._add_output(_convert_name(_output.debugName()))
predecessor_node_outputs = [o for o in _output.node().outputs()]
if len(predecessor_node_outputs) == 1:
src_node_idx = None
else:
src_node_idx = predecessor_node_outputs.index(_output)
ir_graph.add_edge(head=(node_index[_output.node()], src_node_idx),
tail=(ir_graph.output_node, None))
self.refine_graph(ir_graph) self.refine_graph(ir_graph)
ir_graph._register() ir_graph._register()
...@@ -690,7 +675,7 @@ class GraphConverterWithShape(GraphConverter): ...@@ -690,7 +675,7 @@ class GraphConverterWithShape(GraphConverter):
Known issues Known issues
------------ ------------
1. `InputChoice` and `ValueChoice` not supported yet. 1. `InputChoice` and `ValueChoice` not supported yet.
2. Currently random inputs are feeded while tracing layerchoice. 2. Currently random inputs are fed while tracing layerchoice.
If forward path of candidates depends on input data, then wrong path will be traced. If forward path of candidates depends on input data, then wrong path will be traced.
This will result in incomplete shape info. This will result in incomplete shape info.
""" """
......
from typing import Any, Union, Optional, List # Copyright (c) Microsoft Corporation.
import torch # Licensed under the MIT license.
from typing import Any, List, Optional, Union
import torch
from pytorch_lightning.accelerators.accelerator import Accelerator from pytorch_lightning.accelerators.accelerator import Accelerator
from pytorch_lightning.plugins.environments import ClusterEnvironment
from pytorch_lightning.plugins.training_type.training_type_plugin import TrainingTypePlugin from pytorch_lightning.plugins.training_type.training_type_plugin import TrainingTypePlugin
from pytorch_lightning.trainer.connectors.accelerator_connector import AcceleratorConnector
from pytorch_lightning.trainer import Trainer from pytorch_lightning.trainer import Trainer
from pytorch_lightning.trainer.connectors.accelerator_connector import AcceleratorConnector
from pytorch_lightning.plugins import Plugin
from pytorch_lightning.plugins.environments import ClusterEnvironment
from ....serializer import serialize_cls from ....serializer import serialize_cls
...@@ -69,9 +70,8 @@ class BypassPlugin(TrainingTypePlugin): ...@@ -69,9 +70,8 @@ class BypassPlugin(TrainingTypePlugin):
# bypass device placement from pytorch lightning # bypass device placement from pytorch lightning
pass pass
def setup(self, model: torch.nn.Module) -> torch.nn.Module: def setup(self) -> None:
self.model_to_device() pass
return self.model
@property @property
def is_global_zero(self) -> bool: def is_global_zero(self) -> bool:
...@@ -100,8 +100,9 @@ def get_accelerator_connector( ...@@ -100,8 +100,9 @@ def get_accelerator_connector(
deterministic: bool = False, deterministic: bool = False,
precision: int = 32, precision: int = 32,
amp_backend: str = 'native', amp_backend: str = 'native',
amp_level: str = 'O2', amp_level: Optional[str] = None,
plugins: Optional[Union[List[Union[Plugin, ClusterEnvironment, str]], Plugin, ClusterEnvironment, str]] = None, plugins: Optional[Union[List[Union[TrainingTypePlugin, ClusterEnvironment, str]],
TrainingTypePlugin, ClusterEnvironment, str]] = None,
**other_trainier_kwargs) -> AcceleratorConnector: **other_trainier_kwargs) -> AcceleratorConnector:
gpu_ids = Trainer()._parse_devices(gpus, auto_select_gpus, tpu_cores) gpu_ids = Trainer()._parse_devices(gpus, auto_select_gpus, tpu_cores)
return AcceleratorConnector( return AcceleratorConnector(
......
...@@ -7,7 +7,7 @@ from typing import Dict, List, Optional, Union ...@@ -7,7 +7,7 @@ from typing import Dict, List, Optional, Union
import torch.nn as nn import torch.nn as nn
import torch.optim as optim import torch.optim as optim
import pytorch_lightning as pl import torchmetrics
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
import nni import nni
...@@ -19,7 +19,7 @@ from ....serializer import serialize_cls ...@@ -19,7 +19,7 @@ from ....serializer import serialize_cls
@serialize_cls @serialize_cls
class _MultiModelSupervisedLearningModule(LightningModule): class _MultiModelSupervisedLearningModule(LightningModule):
def __init__(self, criterion: nn.Module, metrics: Dict[str, pl.metrics.Metric], def __init__(self, criterion: nn.Module, metrics: Dict[str, torchmetrics.Metric],
n_models: int = 0, n_models: int = 0,
learning_rate: float = 0.001, learning_rate: float = 0.001,
weight_decay: float = 0., weight_decay: float = 0.,
...@@ -119,7 +119,7 @@ class MultiModelSupervisedLearningModule(_MultiModelSupervisedLearningModule): ...@@ -119,7 +119,7 @@ class MultiModelSupervisedLearningModule(_MultiModelSupervisedLearningModule):
Class for optimizer (not an instance). default: ``Adam`` Class for optimizer (not an instance). default: ``Adam``
""" """
def __init__(self, criterion: nn.Module, metrics: Dict[str, pl.metrics.Metric], def __init__(self, criterion: nn.Module, metrics: Dict[str, torchmetrics.Metric],
learning_rate: float = 0.001, learning_rate: float = 0.001,
weight_decay: float = 0., weight_decay: float = 0.,
optimizer: optim.Optimizer = optim.Adam): optimizer: optim.Optimizer = optim.Adam):
...@@ -180,7 +180,7 @@ class _RegressionModule(MultiModelSupervisedLearningModule): ...@@ -180,7 +180,7 @@ class _RegressionModule(MultiModelSupervisedLearningModule):
learning_rate: float = 0.001, learning_rate: float = 0.001,
weight_decay: float = 0., weight_decay: float = 0.,
optimizer: optim.Optimizer = optim.Adam): optimizer: optim.Optimizer = optim.Adam):
super().__init__(criterion, {'mse': pl.metrics.MeanSquaredError}, super().__init__(criterion, {'mse': torchmetrics.MeanSquaredError},
learning_rate=learning_rate, weight_decay=weight_decay, optimizer=optimizer) learning_rate=learning_rate, weight_decay=weight_decay, optimizer=optimizer)
......
...@@ -9,6 +9,7 @@ from typing import Dict, NoReturn, Union, Optional, List, Type ...@@ -9,6 +9,7 @@ from typing import Dict, NoReturn, Union, Optional, List, Type
import pytorch_lightning as pl import pytorch_lightning as pl
import torch.nn as nn import torch.nn as nn
import torch.optim as optim import torch.optim as optim
import torchmetrics
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
import nni import nni
...@@ -140,7 +141,7 @@ def _check_dataloader(dataloader): ...@@ -140,7 +141,7 @@ def _check_dataloader(dataloader):
### The following are some commonly used Lightning modules ### ### The following are some commonly used Lightning modules ###
class _SupervisedLearningModule(LightningModule): class _SupervisedLearningModule(LightningModule):
def __init__(self, criterion: nn.Module, metrics: Dict[str, pl.metrics.Metric], def __init__(self, criterion: nn.Module, metrics: Dict[str, torchmetrics.Metric],
learning_rate: float = 0.001, learning_rate: float = 0.001,
weight_decay: float = 0., weight_decay: float = 0.,
optimizer: optim.Optimizer = optim.Adam, optimizer: optim.Optimizer = optim.Adam,
...@@ -213,7 +214,7 @@ class _SupervisedLearningModule(LightningModule): ...@@ -213,7 +214,7 @@ class _SupervisedLearningModule(LightningModule):
return {name: self.trainer.callback_metrics['val_' + name].item() for name in self.metrics} return {name: self.trainer.callback_metrics['val_' + name].item() for name in self.metrics}
class _AccuracyWithLogits(pl.metrics.Accuracy): class _AccuracyWithLogits(torchmetrics.Accuracy):
def update(self, pred, target): def update(self, pred, target):
return super().update(nn.functional.softmax(pred), target) return super().update(nn.functional.softmax(pred), target)
...@@ -278,7 +279,7 @@ class _RegressionModule(_SupervisedLearningModule): ...@@ -278,7 +279,7 @@ class _RegressionModule(_SupervisedLearningModule):
weight_decay: float = 0., weight_decay: float = 0.,
optimizer: optim.Optimizer = optim.Adam, optimizer: optim.Optimizer = optim.Adam,
export_onnx: bool = True): export_onnx: bool = True):
super().__init__(criterion, {'mse': pl.metrics.MeanSquaredError}, super().__init__(criterion, {'mse': torchmetrics.MeanSquaredError},
learning_rate=learning_rate, weight_decay=weight_decay, optimizer=optimizer, learning_rate=learning_rate, weight_decay=weight_decay, optimizer=optimizer,
export_onnx=export_onnx) export_onnx=export_onnx)
......
...@@ -219,7 +219,8 @@ class RetiariiExperiment(Experiment): ...@@ -219,7 +219,8 @@ class RetiariiExperiment(Experiment):
elif self.config.execution_engine == 'cgo': elif self.config.execution_engine == 'cgo':
from ..execution.cgo_engine import CGOExecutionEngine from ..execution.cgo_engine import CGOExecutionEngine
# assert self.config.trial_gpu_number==1, "trial_gpu_number must be 1 to use CGOExecutionEngine" assert self.config.training_service.platform == 'remote', \
"CGO execution engine currently only supports remote training service"
assert self.config.batch_waiting_time is not None assert self.config.batch_waiting_time is not None
devices = self._construct_devices() devices = self._construct_devices()
engine = CGOExecutionEngine(devices, engine = CGOExecutionEngine(devices,
...@@ -273,11 +274,10 @@ class RetiariiExperiment(Experiment): ...@@ -273,11 +274,10 @@ class RetiariiExperiment(Experiment):
devices = [] devices = []
if hasattr(self.config.training_service, 'machine_list'): if hasattr(self.config.training_service, 'machine_list'):
for machine in self.config.training_service.machine_list: for machine in self.config.training_service.machine_list:
assert machine.gpu_indices is not None, \
'gpu_indices must be set in RemoteMachineConfig for CGO execution engine'
for gpu_idx in machine.gpu_indices: for gpu_idx in machine.gpu_indices:
devices.append(GPUDevice(machine.host, gpu_idx)) devices.append(GPUDevice(machine.host, gpu_idx))
else:
for gpu_idx in self.config.training_service.gpu_indices:
devices.append(GPUDevice('local', gpu_idx))
return devices return devices
def _create_dispatcher(self): def _create_dispatcher(self):
......
...@@ -254,6 +254,13 @@ class AtenFloordiv(PyTorchOperation): ...@@ -254,6 +254,13 @@ class AtenFloordiv(PyTorchOperation):
return f'{output} = {inputs[0]} // {inputs[1]}' return f'{output} = {inputs[0]} // {inputs[1]}'
class AtenMul(PyTorchOperation):
_ori_type_name = ['aten::mul']
def to_forward_code(self, field: str, output: str, inputs: List[str], inputs_value: List[Any] = None) -> str:
return f'{output} = {inputs[0]} * {inputs[1]}'
class AtenLen(PyTorchOperation): class AtenLen(PyTorchOperation):
_ori_type_name = ['aten::len'] _ori_type_name = ['aten::len']
...@@ -491,7 +498,8 @@ class AtenAvgpool2d(PyTorchOperation): ...@@ -491,7 +498,8 @@ class AtenAvgpool2d(PyTorchOperation):
class ToDevice(PyTorchOperation): class ToDevice(PyTorchOperation):
_artificial_op_name = "ToDevice" _artificial_op_name = "ToDevice"
def __init__(self, type_name: str, parameters: Dict[str, Any], _internal: bool = False): def __init__(self, type_name: str, parameters: Dict[str, Any], _internal: bool = False,
attributes: Dict[str, Any] = None):
self.type = "ToDevice" self.type = "ToDevice"
self.device = parameters['device'] self.device = parameters['device']
self.overridden_device_repr = None self.overridden_device_repr = None
......
...@@ -57,6 +57,8 @@ def parse_path(experiment_config, config_path): ...@@ -57,6 +57,8 @@ def parse_path(experiment_config, config_path):
expand_path(experiment_config['assessor'], 'codeDir') expand_path(experiment_config['assessor'], 'codeDir')
if experiment_config.get('advisor'): if experiment_config.get('advisor'):
expand_path(experiment_config['advisor'], 'codeDir') expand_path(experiment_config['advisor'], 'codeDir')
if experiment_config['advisor'].get('classArgs') and experiment_config['advisor']['classArgs'].get('config_space'):
expand_path(experiment_config['advisor']['classArgs'], 'config_space')
if experiment_config.get('machineList'): if experiment_config.get('machineList'):
for index in range(len(experiment_config['machineList'])): for index in range(len(experiment_config['machineList'])):
expand_path(experiment_config['machineList'][index], 'sshKeyPath') expand_path(experiment_config['machineList'][index], 'sshKeyPath')
...@@ -95,8 +97,8 @@ def parse_path(experiment_config, config_path): ...@@ -95,8 +97,8 @@ def parse_path(experiment_config, config_path):
if experiment_config.get('advisor'): if experiment_config.get('advisor'):
parse_relative_path(root_path, experiment_config['advisor'], 'codeDir') parse_relative_path(root_path, experiment_config['advisor'], 'codeDir')
# for BOHB when delivering a ConfigSpace file directly # for BOHB when delivering a ConfigSpace file directly
if experiment_config.get('advisor').get('classArgs') and experiment_config.get('advisor').get('classArgs').get('config_space'): if experiment_config['advisor'].get('classArgs') and experiment_config['advisor']['classArgs'].get('config_space'):
parse_relative_path(root_path, experiment_config.get('advisor').get('classArgs'), 'config_space') parse_relative_path(root_path, experiment_config['advisor']['classArgs'], 'config_space')
if experiment_config.get('machineList'): if experiment_config.get('machineList'):
for index in range(len(experiment_config['machineList'])): for index in range(len(experiment_config['machineList'])):
......
...@@ -97,10 +97,10 @@ class QuantizationSpeedupTestCase(TestCase): ...@@ -97,10 +97,10 @@ class QuantizationSpeedupTestCase(TestCase):
model = BackboneModel() model = BackboneModel()
configure_list = { configure_list = {
'conv1':{'weight_bit':8, 'activation_bit':8}, 'conv1':{'weight_bits':8, 'output_bits':8},
'conv2':{'weight_bit':32, 'activation_bit':32}, 'conv2':{'weight_bits':32, 'output_bits':32},
'fc1':{'weight_bit':16, 'activation_bit':16}, 'fc1':{'weight_bits':16, 'output_bits':16},
'fc2':{'weight_bit':8, 'activation_bit':8} 'fc2':{'weight_bits':8, 'output_bits':8}
} }
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5) optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
...@@ -126,16 +126,16 @@ class QuantizationSpeedupTestCase(TestCase): ...@@ -126,16 +126,16 @@ class QuantizationSpeedupTestCase(TestCase):
model = BackboneModel() model = BackboneModel()
configure_list = [{ configure_list = [{
'quant_types': ['weight', 'output'], 'quant_types': ['input', 'weight'],
'quant_bits': {'weight':8, 'output':8}, 'quant_bits': {'input':8, 'weight':8},
'op_names': ['conv1'] 'op_names': ['conv1']
}, { }, {
'quant_types': ['output'], 'quant_types': ['output'],
'quant_bits': {'output':8}, 'quant_bits': {'output':8},
'op_names': ['relu1'] 'op_names': ['relu1']
}, { }, {
'quant_types': ['weight', 'output'], 'quant_types': ['input', 'weight'],
'quant_bits': {'weight':8, 'output':8}, 'quant_bits': {'input':8, 'weight':8},
'op_names': ['conv2'] 'op_names': ['conv2']
}, { }, {
'quant_types': ['output'], 'quant_types': ['output'],
...@@ -145,8 +145,9 @@ class QuantizationSpeedupTestCase(TestCase): ...@@ -145,8 +145,9 @@ class QuantizationSpeedupTestCase(TestCase):
] ]
# finetune the model by using QAT # finetune the model by using QAT
dummy_input = torch.randn(1, 1, 28, 28)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5) optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
quantizer = QAT_Quantizer(model, configure_list, optimizer) quantizer = QAT_Quantizer(model, configure_list, optimizer, dummy_input)
quantizer.compress() quantizer.compress()
model.to(self.device) model.to(self.device)
...@@ -178,13 +179,13 @@ class QuantizationSpeedupTestCase(TestCase): ...@@ -178,13 +179,13 @@ class QuantizationSpeedupTestCase(TestCase):
model = vgg16() model = vgg16()
configure_list = { configure_list = {
'features.0':{'weight_bit':8, 'activation_bit':8}, 'features.0':{'weight_bits':8, 'output_bits':8},
'features.1':{'weight_bit':32, 'activation_bit':32}, 'features.1':{'weight_bits':32, 'output_bits':32},
'features.2':{'weight_bit':16, 'activation_bit':16}, 'features.2':{'weight_bits':16, 'output_bits':16},
'features.4':{'weight_bit':8, 'activation_bit':8}, 'features.4':{'weight_bits':8, 'output_bits':8},
'features.7':{'weight_bit':8, 'activation_bit':8}, 'features.7':{'weight_bits':8, 'output_bits':8},
'features.8':{'weight_bit':8, 'activation_bit':8}, 'features.8':{'weight_bits':8, 'output_bits':8},
'features.11':{'weight_bit':8, 'activation_bit':8} 'features.11':{'weight_bits':8, 'output_bits':8}
} }
model.to(self.device) model.to(self.device)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment