"device_operation/include/device_reduce_common.hpp" did not exist on "12dfba3d03f402c051e2129fa21f33264f4d26e5"
Unverified Commit e8b88a79 authored by J-shang's avatar J-shang Committed by GitHub
Browse files

unify name speed up and speedup to speedup (#4689)

parent c5066cda
...@@ -217,9 +217,9 @@ def parse_args(): ...@@ -217,9 +217,9 @@ def parse_args():
parser.add_argument('--agp_n_epochs_per_iter', type=int, default=1, parser.add_argument('--agp_n_epochs_per_iter', type=int, default=1,
help='number of epochs per iteration for agp') help='number of epochs per iteration for agp')
# speed-up # speedup
parser.add_argument('--speed_up', action='store_true', default=False, parser.add_argument('--speedup', action='store_true', default=False,
help='Whether to speed-up the pruned model') help='Whether to speedup the pruned model')
# finetuning parameters # finetuning parameters
parser.add_argument('--n_workers', type=int, default=16, parser.add_argument('--n_workers', type=int, default=16,
...@@ -336,7 +336,7 @@ def run_pruning(args): ...@@ -336,7 +336,7 @@ def run_pruning(args):
# model speedup # model speedup
pruner._unwrap_model() pruner._unwrap_model()
if args.speed_up: if args.speedup:
dummy_input = torch.rand(1,3,224,224).to(device) dummy_input = torch.rand(1,3,224,224).to(device)
ms = ModelSpeedup(model, dummy_input, args.experiment_dir + './mask_temp.pth') ms = ModelSpeedup(model, dummy_input, args.experiment_dir + './mask_temp.pth')
ms.speedup_model() ms.speedup_model()
......
...@@ -29,7 +29,7 @@ if [[ ${TASK_LIST[*]} =~ (^|[[:space:]])$TASK_NAME($|[[:space:]]) ]]; then ...@@ -29,7 +29,7 @@ if [[ ${TASK_LIST[*]} =~ (^|[[:space:]])$TASK_NAME($|[[:space:]]) ]]; then
--ranking_criterion $RANKING_CRITERION \ --ranking_criterion $RANKING_CRITERION \
--num_iterations $NUM_ITERATIONS \ --num_iterations $NUM_ITERATIONS \
--epochs_per_iteration $EPOCHS_PER_ITERATION \ --epochs_per_iteration $EPOCHS_PER_ITERATION \
--speed_up \ --speedup \
--model_name $PRETRAINED_MODEL \ --model_name $PRETRAINED_MODEL \
--task_name $TASK_NAME \ --task_name $TASK_NAME \
--max_length $MAX_LENGTH \ --max_length $MAX_LENGTH \
......
...@@ -53,8 +53,8 @@ def parse_args(): ...@@ -53,8 +53,8 @@ def parse_args():
parser.add_argument("--epochs_per_iteration", type=int, default=1, parser.add_argument("--epochs_per_iteration", type=int, default=1,
help="Epochs to finetune before the next pruning iteration " help="Epochs to finetune before the next pruning iteration "
"(only effective if num_iterations > 1).") "(only effective if num_iterations > 1).")
parser.add_argument("--speed_up", action="store_true", default=False, parser.add_argument("--speedup", action="store_true", default=False,
help="Whether to speed-up the pruned model") help="Whether to speedup the pruned model")
# parameters for model training; no need to change them for running examples # parameters for model training; no need to change them for running examples
parser.add_argument("--max_length", type=int, default=128, parser.add_argument("--max_length", type=int, default=128,
...@@ -338,8 +338,8 @@ def main(): ...@@ -338,8 +338,8 @@ def main():
# Currently, speeding up Transformers through NNI ModelSpeedup is not supported because of shape inference issues. # Currently, speeding up Transformers through NNI ModelSpeedup is not supported because of shape inference issues.
# However, if you are using the transformers library, you can use the following workaround: # However, if you are using the transformers library, you can use the following workaround:
# The following code gets the head pruning decisions from the pruner and calls the _prune_heads() function # The following code gets the head pruning decisions from the pruner and calls the _prune_heads() function
# implemented in models from the transformers library to speed up the model. # implemented in models from the transformers library to speedup the model.
if args.speed_up: if args.speedup:
speedup_rules = {} speedup_rules = {}
for group_idx, group in enumerate(pruner.attention_name_groups): for group_idx, group in enumerate(pruner.attention_name_groups):
# get the layer index # get the layer index
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
''' '''
NNI example for supported ActivationAPoZRank and ActivationMeanRank pruning algorithms. NNI example for supported ActivationAPoZRank and ActivationMeanRank pruning algorithms.
In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning. In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required. Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
''' '''
import argparse import argparse
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
''' '''
NNI example for supported ADMM pruning algorithms. NNI example for supported ADMM pruning algorithms.
In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning. In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required. Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
''' '''
import argparse import argparse
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
''' '''
NNI example for supported fpgm pruning algorithms. NNI example for supported fpgm pruning algorithms.
In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning. In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required. Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
''' '''
import argparse import argparse
......
...@@ -94,8 +94,8 @@ if __name__ == '__main__': ...@@ -94,8 +94,8 @@ if __name__ == '__main__':
choices=['level', 'l1', 'l2', 'fpgm', 'slim', 'apoz', choices=['level', 'l1', 'l2', 'fpgm', 'slim', 'apoz',
'mean_activation', 'taylorfo', 'admm'], 'mean_activation', 'taylorfo', 'admm'],
help='algorithm to evaluate weights to prune') help='algorithm to evaluate weights to prune')
parser.add_argument('--speed-up', type=bool, default=False, parser.add_argument('--speedup', type=bool, default=False,
help='Whether to speed-up the pruned model') help='Whether to speedup the pruned model')
parser.add_argument('--reset-weight', type=bool, default=True, parser.add_argument('--reset-weight', type=bool, default=True,
help='Whether to reset weight during each iteration') help='Whether to reset weight during each iteration')
...@@ -120,8 +120,8 @@ if __name__ == '__main__': ...@@ -120,8 +120,8 @@ if __name__ == '__main__':
'evaluator': None, 'evaluator': None,
'finetuner': finetuner} 'finetuner': finetuner}
if args.speed_up: if args.speedup:
kw_args['speed_up'] = args.speed_up kw_args['speedup'] = args.speedup
kw_args['dummy_input'] = torch.rand(10, 3, 32, 32).to(device) kw_args['dummy_input'] = torch.rand(10, 3, 32, 32).to(device)
if args.pruner == 'linear': if args.pruner == 'linear':
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
''' '''
NNI example for supported level pruning algorithm. NNI example for supported level pruning algorithm.
In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning. In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required. Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
''' '''
import argparse import argparse
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
''' '''
NNI example for supported l1norm and l2norm pruning algorithms. NNI example for supported l1norm and l2norm pruning algorithms.
In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning. In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required. Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
''' '''
import argparse import argparse
......
...@@ -92,8 +92,8 @@ if __name__ == '__main__': ...@@ -92,8 +92,8 @@ if __name__ == '__main__':
# if you just want to keep the final result as the best result, you can pass evaluator as None. # if you just want to keep the final result as the best result, you can pass evaluator as None.
# or the result with the highest score (given by evaluator) will be the best result. # or the result with the highest score (given by evaluator) will be the best result.
# scheduler = PruningScheduler(pruner, task_generator, finetuner=finetuner, speed_up=True, dummy_input=dummy_input, evaluator=evaluator) # scheduler = PruningScheduler(pruner, task_generator, finetuner=finetuner, speedup=True, dummy_input=dummy_input, evaluator=evaluator)
scheduler = PruningScheduler(pruner, task_generator, finetuner=finetuner, speed_up=True, dummy_input=dummy_input, evaluator=None, reset_weight=False) scheduler = PruningScheduler(pruner, task_generator, finetuner=finetuner, speedup=True, dummy_input=dummy_input, evaluator=None, reset_weight=False)
scheduler.compress() scheduler.compress()
......
...@@ -77,12 +77,12 @@ if __name__ == '__main__': ...@@ -77,12 +77,12 @@ if __name__ == '__main__':
pruner._unwrap_model() pruner._unwrap_model()
ModelSpeedup(model, dummy_input=torch.rand(10, 3, 32, 32).to(device), masks_file=masks).speedup_model() ModelSpeedup(model, dummy_input=torch.rand(10, 3, 32, 32).to(device), masks_file=masks).speedup_model()
print('\nThe accuracy after speed up:') print('\nThe accuracy after speedup:')
evaluator(model) evaluator(model)
# Need a new optimizer due to the modules in model will be replaced during speedup. # Need a new optimizer due to the modules in model will be replaced during speedup.
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4) optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
print('\nFinetune the model after speed up:') print('\nFinetune the model after speedup:')
for i in range(5): for i in range(5):
trainer(model, optimizer, criterion, i) trainer(model, optimizer, criterion, i)
evaluator(model) evaluator(model)
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
''' '''
NNI example for supported slim pruning algorithms. NNI example for supported slim pruning algorithms.
In this example, we show the end-to-end pruning process: pre-training -> pruning -> speedup -> fine-tuning. In this example, we show the end-to-end pruning process: pre-training -> pruning -> speedup -> fine-tuning.
Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required. Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
''' '''
import argparse import argparse
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
''' '''
NNI example for supported TaylorFOWeight pruning algorithms. NNI example for supported TaylorFOWeight pruning algorithms.
In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning. In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required. Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
''' '''
import argparse import argparse
......
...@@ -82,27 +82,27 @@ for name, mask in masks.items(): ...@@ -82,27 +82,27 @@ for name, mask in masks.items():
print(name, ' sparsity : ', '{:.2}'.format(mask['weight'].sum() / mask['weight'].numel())) print(name, ' sparsity : ', '{:.2}'.format(mask['weight'].sum() / mask['weight'].numel()))
# %% # %%
# Speed up the original model with masks, note that `ModelSpeedup` requires an unwrapped model. # Speedup the original model with masks, note that `ModelSpeedup` requires an unwrapped model.
# The model becomes smaller after speed-up, # The model becomes smaller after speedup,
# and reaches a higher sparsity ratio because `ModelSpeedup` will propagate the masks across layers. # and reaches a higher sparsity ratio because `ModelSpeedup` will propagate the masks across layers.
# need to unwrap the model, if the model is wrapped before speed up # need to unwrap the model, if the model is wrapped before speedup
pruner._unwrap_model() pruner._unwrap_model()
# speed up the model # speedup the model
from nni.compression.pytorch.speedup import ModelSpeedup from nni.compression.pytorch.speedup import ModelSpeedup
ModelSpeedup(model, torch.rand(3, 1, 28, 28).to(device), masks).speedup_model() ModelSpeedup(model, torch.rand(3, 1, 28, 28).to(device), masks).speedup_model()
# %% # %%
# the model will become real smaller after speed up # the model will become real smaller after speedup
print(model) print(model)
# %% # %%
# Fine-tuning Compacted Model # Fine-tuning Compacted Model
# --------------------------- # ---------------------------
# Note that if the model has been sped up, you need to re-initialize a new optimizer for fine-tuning. # Note that if the model has been sped up, you need to re-initialize a new optimizer for fine-tuning.
# Because speed up will replace the masked big layers with dense small ones. # Because speedup will replace the masked big layers with dense small ones.
optimizer = SGD(model.parameters(), 1e-2) optimizer = SGD(model.parameters(), 1e-2)
for epoch in range(3): for epoch in range(3):
......
""" """
Speed Up Model with Mask Speedup Model with Mask
======================== ========================
Introduction Introduction
...@@ -12,16 +12,16 @@ to convert a model to a smaller one based on user provided masks (the masks come ...@@ -12,16 +12,16 @@ to convert a model to a smaller one based on user provided masks (the masks come
pruning algorithms). pruning algorithms).
There are two types of pruning. One is fine-grained pruning, it does not change the shape of weights, There are two types of pruning. One is fine-grained pruning, it does not change the shape of weights,
and input/output tensors. Sparse kernel is required to speed up a fine-grained pruned layer. and input/output tensors. Sparse kernel is required to speedup a fine-grained pruned layer.
The other is coarse-grained pruning (e.g., channels), shape of weights and input/output tensors usually change due to such pruning. The other is coarse-grained pruning (e.g., channels), shape of weights and input/output tensors usually change due to such pruning.
To speed up this kind of pruning, there is no need to use sparse kernel, just replace the pruned layer with smaller one. To speedup this kind of pruning, there is no need to use sparse kernel, just replace the pruned layer with smaller one.
Since the support of sparse kernels in community is limited, Since the support of sparse kernels in community is limited,
we only support the speedup of coarse-grained pruning and leave the support of fine-grained pruning in future. we only support the speedup of coarse-grained pruning and leave the support of fine-grained pruning in future.
Design and Implementation Design and Implementation
------------------------- -------------------------
To speed up a model, the pruned layers should be replaced, either replaced with smaller layer for coarse-grained mask, To speedup a model, the pruned layers should be replaced, either replaced with smaller layer for coarse-grained mask,
or replaced with sparse kernel for fine-grained mask. Coarse-grained mask usually changes the shape of weights or input/output tensors, or replaced with sparse kernel for fine-grained mask. Coarse-grained mask usually changes the shape of weights or input/output tensors,
thus, we should do shape inference to check are there other unpruned layers should be replaced as well due to shape change. thus, we should do shape inference to check are there other unpruned layers should be replaced as well due to shape change.
Therefore, in our design, there are two main steps: first, do shape inference to find out all the modules that should be replaced; Therefore, in our design, there are two main steps: first, do shape inference to find out all the modules that should be replaced;
...@@ -64,13 +64,13 @@ model(torch.rand(128, 1, 28, 28).to(device)) ...@@ -64,13 +64,13 @@ model(torch.rand(128, 1, 28, 28).to(device))
print('Original Model - Elapsed Time : ', time.time() - start) print('Original Model - Elapsed Time : ', time.time() - start)
# %% # %%
# Speed up the model and show the model structure after speed up. # Speedup the model and show the model structure after speedup.
from nni.compression.pytorch import ModelSpeedup from nni.compression.pytorch import ModelSpeedup
ModelSpeedup(model, torch.rand(10, 1, 28, 28).to(device), masks).speedup_model() ModelSpeedup(model, torch.rand(10, 1, 28, 28).to(device), masks).speedup_model()
print(model) print(model)
# %% # %%
# Roughly test the model after speed-up inference speed. # Roughly test the model after speedup inference speed.
start = time.time() start = time.time()
model(torch.rand(128, 1, 28, 28).to(device)) model(torch.rand(128, 1, 28, 28).to(device))
print('Speedup Model - Elapsed Time : ', time.time() - start) print('Speedup Model - Elapsed Time : ', time.time() - start)
...@@ -87,7 +87,7 @@ print('Speedup Model - Elapsed Time : ', time.time() - start) ...@@ -87,7 +87,7 @@ print('Speedup Model - Elapsed Time : ', time.time() - start)
# For PyTorch we can only replace modules, if functions in ``forward`` should be replaced, # For PyTorch we can only replace modules, if functions in ``forward`` should be replaced,
# our current implementation does not work. One workaround is make the function a PyTorch module. # our current implementation does not work. One workaround is make the function a PyTorch module.
# #
# If you want to speed up your own model which cannot supported by the current implementation, # If you want to speedup your own model which cannot supported by the current implementation,
# you need implement the replace function for module replacement, welcome to contribute. # you need implement the replace function for module replacement, welcome to contribute.
# #
# Speedup Results of Examples # Speedup Results of Examples
......
""" """
Speed Up Model with Calibration Config SpeedUp Model with Calibration Config
====================================== ======================================
...@@ -8,10 +8,10 @@ Introduction ...@@ -8,10 +8,10 @@ Introduction
Deep learning network has been computational intensive and memory intensive Deep learning network has been computational intensive and memory intensive
which increases the difficulty of deploying deep neural network model. Quantization is a which increases the difficulty of deploying deep neural network model. Quantization is a
fundamental technology which is widely used to reduce memory footprint and speed up inference fundamental technology which is widely used to reduce memory footprint and speedup inference
process. Many frameworks begin to support quantization, but few of them support mixed precision process. Many frameworks begin to support quantization, but few of them support mixed precision
quantization and get real speedup. Frameworks like `HAQ: Hardware-Aware Automated Quantization with Mixed Precision <https://arxiv.org/pdf/1811.08886.pdf>`__\, only support simulated mixed precision quantization which will quantization and get real speedup. Frameworks like `HAQ: Hardware-Aware Automated Quantization with Mixed Precision <https://arxiv.org/pdf/1811.08886.pdf>`__\, only support simulated mixed precision quantization which will
not speed up the inference process. To get real speedup of mixed precision quantization and not speedup the inference process. To get real speedup of mixed precision quantization and
help people get the real feedback from hardware, we design a general framework with simple interface to allow NNI quantization algorithms to connect different help people get the real feedback from hardware, we design a general framework with simple interface to allow NNI quantization algorithms to connect different
DL model optimization backends (e.g., TensorRT, NNFusion), which gives users an end-to-end experience that after quantizing their model DL model optimization backends (e.g., TensorRT, NNFusion), which gives users an end-to-end experience that after quantizing their model
with quantization algorithms, the quantized model can be directly speeded up with the connected optimization backend. NNI connects with quantization algorithms, the quantized model can be directly speeded up with the connected optimization backend. NNI connects
...@@ -108,7 +108,7 @@ calibration_config = quantizer.export_model(model_path, calibration_path) ...@@ -108,7 +108,7 @@ calibration_config = quantizer.export_model(model_path, calibration_path)
print("calibration_config: ", calibration_config) print("calibration_config: ", calibration_config)
# %% # %%
# build tensorRT engine to make a real speed up # build tensorRT engine to make a real speedup
# from nni.compression.pytorch.quantization_speedup import ModelSpeedupTensorRT # from nni.compression.pytorch.quantization_speedup import ModelSpeedupTensorRT
# input_shape = (32, 1, 28, 28) # input_shape = (32, 1, 28, 28)
......
...@@ -201,7 +201,7 @@ class AutoCompressPruner(Pruner): ...@@ -201,7 +201,7 @@ class AutoCompressPruner(Pruner):
ADMMpruner.export_model(os.path.join(self._experiment_data_dir, 'model_admm_masked.pth'), os.path.join( ADMMpruner.export_model(os.path.join(self._experiment_data_dir, 'model_admm_masked.pth'), os.path.join(
self._experiment_data_dir, 'mask.pth')) self._experiment_data_dir, 'mask.pth'))
# use speed up to prune the model before next iteration, # use speedup to prune the model before next iteration,
# because SimulatedAnnealingPruner & ADMMPruner don't take masked models # because SimulatedAnnealingPruner & ADMMPruner don't take masked models
self._model_to_prune.load_state_dict(torch.load(os.path.join( self._model_to_prune.load_state_dict(torch.load(os.path.join(
self._experiment_data_dir, 'model_admm_masked.pth'))) self._experiment_data_dir, 'model_admm_masked.pth')))
......
...@@ -20,7 +20,7 @@ class Task: ...@@ -20,7 +20,7 @@ class Task:
_reference_counter = {} _reference_counter = {}
def __init__(self, task_id: int, model_path: str, masks_path: str, config_list_path: str, def __init__(self, task_id: int, model_path: str, masks_path: str, config_list_path: str,
speed_up: Optional[bool] = True, finetune: Optional[bool] = True, evaluate: Optional[bool] = True): speedup: Optional[bool] = True, finetune: Optional[bool] = True, evaluate: Optional[bool] = True):
""" """
Parameters Parameters
---------- ----------
...@@ -32,8 +32,8 @@ class Task: ...@@ -32,8 +32,8 @@ class Task:
The path of the masks that applied on the model before pruning. The path of the masks that applied on the model before pruning.
config_list_path config_list_path
The path of the config list that used in this task. The path of the config list that used in this task.
speed_up speedup
Control if this task needs speed up, True means use scheduler default value, False means no speed up. Control if this task needs speedup, True means use scheduler default value, False means no speedup.
finetune finetune
Control if this task needs finetune, True means use scheduler default value, False means no finetune. Control if this task needs finetune, True means use scheduler default value, False means no finetune.
evaluate evaluate
...@@ -44,7 +44,7 @@ class Task: ...@@ -44,7 +44,7 @@ class Task:
self.masks_path = masks_path self.masks_path = masks_path
self.config_list_path = config_list_path self.config_list_path = config_list_path
self.speed_up = speed_up self.speedup = speedup
self.finetune = finetune self.finetune = finetune
self.evaluate = evaluate self.evaluate = evaluate
...@@ -65,7 +65,7 @@ class Task: ...@@ -65,7 +65,7 @@ class Task:
'model_path': str(self.model_path), 'model_path': str(self.model_path),
'masks_path': str(self.masks_path), 'masks_path': str(self.masks_path),
'config_list_path': str(self.config_list_path), 'config_list_path': str(self.config_list_path),
'speed_up': self.speed_up, 'speedup': self.speedup,
'finetune': self.finetune, 'finetune': self.finetune,
'evaluate': self.evaluate, 'evaluate': self.evaluate,
'status': self.status, 'status': self.status,
......
...@@ -185,7 +185,7 @@ class AMCPruner(IterativePruner): ...@@ -185,7 +185,7 @@ class AMCPruner(IterativePruner):
- op_partial_names: Operation partial names to be pruned, will be autocompleted by NNI. - op_partial_names: Operation partial names to be pruned, will be autocompleted by NNI.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning. - exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
dummy_input : torch.Tensor dummy_input : torch.Tensor
`dummy_input` is required for speed-up and tracing the model in RL environment. `dummy_input` is required for speedup and tracing the model in RL environment.
evaluator : Callable[[Module], float] evaluator : Callable[[Module], float]
Evaluate the pruned model and give a score. Evaluate the pruned model and give a score.
pruning_algorithm : str pruning_algorithm : str
...@@ -249,5 +249,5 @@ class AMCPruner(IterativePruner): ...@@ -249,5 +249,5 @@ class AMCPruner(IterativePruner):
ddpg_params=ddpg_params, ddpg_params=ddpg_params,
target=target) target=target)
pruner = PRUNER_DICT[pruning_algorithm](None, None, **pruning_params) pruner = PRUNER_DICT[pruning_algorithm](None, None, **pruning_params)
super().__init__(pruner, task_generator, finetuner=finetuner, speed_up=True, dummy_input=dummy_input, super().__init__(pruner, task_generator, finetuner=finetuner, speedup=True, dummy_input=dummy_input,
evaluator=evaluator, reset_weight=False) evaluator=evaluator, reset_weight=False)
...@@ -112,10 +112,10 @@ class AutoCompressPruner(IterativePruner): ...@@ -112,10 +112,10 @@ class AutoCompressPruner(IterativePruner):
finetuner : Optional[Callable[[Module], None]] finetuner : Optional[Callable[[Module], None]]
The finetuner handles all finetune logic, takes a pytorch module as input. The finetuner handles all finetune logic, takes a pytorch module as input.
It will be called at the end of each iteration, usually for neutralizing the accuracy loss brought by the pruning in this iteration. It will be called at the end of each iteration, usually for neutralizing the accuracy loss brought by the pruning in this iteration.
speed_up : bool speedup : bool
If set True, speed up the model at the end of each iteration to make the pruned model compact. If set True, speedup the model at the end of each iteration to make the pruned model compact.
dummy_input : Optional[torch.Tensor] dummy_input : Optional[torch.Tensor]
If `speed_up` is True, `dummy_input` is required for tracing the model in speed up. If `speedup` is True, `dummy_input` is required for tracing the model in speedup.
Examples Examples
-------- --------
...@@ -148,7 +148,7 @@ class AutoCompressPruner(IterativePruner): ...@@ -148,7 +148,7 @@ class AutoCompressPruner(IterativePruner):
def __init__(self, model: Module, config_list: List[Dict], total_iteration: int, admm_params: Dict, def __init__(self, model: Module, config_list: List[Dict], total_iteration: int, admm_params: Dict,
sa_params: Dict, log_dir: str = '.', keep_intermediate_result: bool = False, sa_params: Dict, log_dir: str = '.', keep_intermediate_result: bool = False,
finetuner: Optional[Callable[[Module], None]] = None, speed_up: bool = False, finetuner: Optional[Callable[[Module], None]] = None, speedup: bool = False,
dummy_input: Optional[Tensor] = None, evaluator: Callable[[Module], float] = None): dummy_input: Optional[Tensor] = None, evaluator: Callable[[Module], float] = None):
task_generator = AutoCompressTaskGenerator(total_iteration=total_iteration, task_generator = AutoCompressTaskGenerator(total_iteration=total_iteration,
origin_model=model, origin_model=model,
...@@ -159,5 +159,5 @@ class AutoCompressPruner(IterativePruner): ...@@ -159,5 +159,5 @@ class AutoCompressPruner(IterativePruner):
if 'traced_optimizer' in admm_params: if 'traced_optimizer' in admm_params:
admm_params['traced_optimizer'] = OptimizerConstructHelper.from_trace(model, admm_params['traced_optimizer']) admm_params['traced_optimizer'] = OptimizerConstructHelper.from_trace(model, admm_params['traced_optimizer'])
pruner = ADMMPruner(None, None, **admm_params) pruner = ADMMPruner(None, None, **admm_params)
super().__init__(pruner, task_generator, finetuner=finetuner, speed_up=speed_up, dummy_input=dummy_input, super().__init__(pruner, task_generator, finetuner=finetuner, speedup=speedup, dummy_input=dummy_input,
evaluator=evaluator, reset_weight=False) evaluator=evaluator, reset_weight=False)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment