unify name speed up and speedup to speedup (#4689)

e8b88a79 · J-shang · GitHub · c5066cda · e8b88a79 · e8b88a79
Unverified Commit e8b88a79 authored Mar 28, 2022 by J-shang Committed by GitHub Mar 28, 2022
20 changed files
--- a/examples/model_compress/pruning/mobilenetv2_end2end/pruning_experiments.py
+++ b/examples/model_compress/pruning/mobilenetv2_end2end/pruning_experiments.py
@@ -217,9 +217,9 @@ def parse_args():
    parser.add_argument('--agp_n_epochs_per_iter', type=int, default=1,
                        help='number of epochs per iteration for agp')
-    # speed-up
+    # speedup
-    parser.add_argument('--speed_up', action='store_true', default=False,
+    parser.add_argument('--speedup', action='store_true', default=False,
-                        help='Whether to speed-up the pruned model')
+                        help='Whether to speedup the pruned model')
    # finetuning parameters
    parser.add_argument('--n_workers', type=int, default=16,
@@ -336,7 +336,7 @@ def run_pruning(args):
    # model speedup
    pruner._unwrap_model()
-    if args.speed_up:
+    if args.speedup:
        dummy_input = torch.rand(1,3,224,224).to(device)
        ms = ModelSpeedup(model, dummy_input, args.experiment_dir + './mask_temp.pth')
        ms.speedup_model()

--- a/examples/model_compress/pruning/transformers/run.sh
+++ b/examples/model_compress/pruning/transformers/run.sh
@@ -29,7 +29,7 @@ if [[ ${TASK_LIST[*]} =~ (^|[[:space:]])$TASK_NAME($|[[:space:]]) ]]; then
 	   --ranking_criterion $RANKING_CRITERION \
 	   --num_iterations $NUM_ITERATIONS \
 	   --epochs_per_iteration $EPOCHS_PER_ITERATION \
-	   --speed_up \
+	   --speedup \
 	   --model_name $PRETRAINED_MODEL \
 	   --task_name $TASK_NAME \
 	   --max_length $MAX_LENGTH \

--- a/examples/model_compress/pruning/transformers/transformer_pruning.py
+++ b/examples/model_compress/pruning/transformers/transformer_pruning.py
@@ -53,8 +53,8 @@ def parse_args():
    parser.add_argument("--epochs_per_iteration", type=int, default=1,
                        help="Epochs to finetune before the next pruning iteration "
                             "(only effective if num_iterations > 1).")
-    parser.add_argument("--speed_up", action="store_true", default=False,
+    parser.add_argument("--speedup", action="store_true", default=False,
-                        help="Whether to speed-up the pruned model")
+                        help="Whether to speedup the pruned model")
    # parameters for model training; no need to change them for running examples
    parser.add_argument("--max_length", type=int, default=128,
@@ -338,8 +338,8 @@ def main():
    # Currently, speeding up Transformers through NNI ModelSpeedup is not supported because of shape inference issues.
    # However, if you are using the transformers library, you can use the following workaround:
    # The following code gets the head pruning decisions from the pruner and calls the _prune_heads() function
-    # implemented in models from the transformers library to speed up the model.
+    # implemented in models from the transformers library to speedup the model.
-    if args.speed_up:
+    if args.speedup:
        speedup_rules = {}
        for group_idx, group in enumerate(pruner.attention_name_groups):
            # get the layer index

--- a/examples/model_compress/pruning/v2/activation_pruning_torch.py
+++ b/examples/model_compress/pruning/v2/activation_pruning_torch.py
@@ -4,7 +4,7 @@
 '''
 NNI example for supported ActivationAPoZRank and ActivationMeanRank pruning algorithms.
 In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
-Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required.
+Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
 '''
 import argparse

--- a/examples/model_compress/pruning/v2/admm_pruning_torch.py
+++ b/examples/model_compress/pruning/v2/admm_pruning_torch.py
@@ -4,7 +4,7 @@
 '''
 NNI example for supported ADMM pruning algorithms.
 In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
-Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required.
+Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
 '''
 import argparse

--- a/examples/model_compress/pruning/v2/fpgm_pruning_torch.py
+++ b/examples/model_compress/pruning/v2/fpgm_pruning_torch.py
@@ -4,7 +4,7 @@
 '''
 NNI example for supported fpgm pruning algorithms.
 In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
-Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required.
+Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
 '''
 import argparse

--- a/examples/model_compress/pruning/v2/iterative_pruning_torch.py
+++ b/examples/model_compress/pruning/v2/iterative_pruning_torch.py
@@ -94,8 +94,8 @@ if __name__ == '__main__':
                        choices=['level', 'l1', 'l2', 'fpgm', 'slim', 'apoz',
                                 'mean_activation', 'taylorfo', 'admm'],
                        help='algorithm to evaluate weights to prune')
-    parser.add_argument('--speed-up', type=bool, default=False,
+    parser.add_argument('--speedup', type=bool, default=False,
-                        help='Whether to speed-up the pruned model')
+                        help='Whether to speedup the pruned model')
    parser.add_argument('--reset-weight', type=bool, default=True,
                        help='Whether to reset weight during each iteration')
@@ -120,8 +120,8 @@ if __name__ == '__main__':
               'evaluator': None,
               'finetuner': finetuner}
-    if args.speed_up:
+    if args.speedup:
-        kw_args['speed_up'] = args.speed_up
+        kw_args['speedup'] = args.speedup
        kw_args['dummy_input'] = torch.rand(10, 3, 32, 32).to(device)
    if args.pruner == 'linear':

--- a/examples/model_compress/pruning/v2/level_pruning_torch.py
+++ b/examples/model_compress/pruning/v2/level_pruning_torch.py
@@ -4,7 +4,7 @@
 '''
 NNI example for supported level pruning algorithm.
 In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
-Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required.
+Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
 '''
 import argparse

--- a/examples/model_compress/pruning/v2/norm_pruning_torch.py
+++ b/examples/model_compress/pruning/v2/norm_pruning_torch.py
@@ -4,7 +4,7 @@
 '''
 NNI example for supported l1norm and l2norm pruning algorithms.
 In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
-Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required.
+Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
 '''
 import argparse

--- a/examples/model_compress/pruning/v2/scheduler_torch.py
+++ b/examples/model_compress/pruning/v2/scheduler_torch.py
@@ -92,8 +92,8 @@ if __name__ == '__main__':
    # if you just want to keep the final result as the best result, you can pass evaluator as None.
    # or the result with the highest score (given by evaluator) will be the best result.
-    # scheduler = PruningScheduler(pruner, task_generator, finetuner=finetuner, speed_up=True, dummy_input=dummy_input, evaluator=evaluator)
+    # scheduler = PruningScheduler(pruner, task_generator, finetuner=finetuner, speedup=True, dummy_input=dummy_input, evaluator=evaluator)
-    scheduler = PruningScheduler(pruner, task_generator, finetuner=finetuner, speed_up=True, dummy_input=dummy_input, evaluator=None, reset_weight=False)
+    scheduler = PruningScheduler(pruner, task_generator, finetuner=finetuner, speedup=True, dummy_input=dummy_input, evaluator=None, reset_weight=False)
    scheduler.compress()

--- a/examples/model_compress/pruning/v2/simple_pruning_torch.py
+++ b/examples/model_compress/pruning/v2/simple_pruning_torch.py
@@ -77,12 +77,12 @@ if __name__ == '__main__':
    pruner._unwrap_model()
    ModelSpeedup(model, dummy_input=torch.rand(10, 3, 32, 32).to(device), masks_file=masks).speedup_model()
-    print('\nThe accuracy after speed up:')
+    print('\nThe accuracy after speedup:')
    evaluator(model)
    # Need a new optimizer due to the modules in model will be replaced during speedup.
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
-    print('\nFinetune the model after speed up:')
+    print('\nFinetune the model after speedup:')
    for i in range(5):
        trainer(model, optimizer, criterion, i)
        evaluator(model)
--- a/examples/model_compress/pruning/v2/slim_pruning_torch.py
+++ b/examples/model_compress/pruning/v2/slim_pruning_torch.py
@@ -4,7 +4,7 @@
 '''
 NNI example for supported slim pruning algorithms.
 In this example, we show the end-to-end pruning process: pre-training -> pruning -> speedup -> fine-tuning.
-Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required.
+Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
 '''
 import argparse

--- a/examples/model_compress/pruning/v2/taylorfo_pruning_torch.py
+++ b/examples/model_compress/pruning/v2/taylorfo_pruning_torch.py
@@ -4,7 +4,7 @@
 '''
 NNI example for supported TaylorFOWeight pruning algorithms.
 In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
-Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required.
+Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
 '''
 import argparse

--- a/examples/tutorials/pruning_quick_start_mnist.py
+++ b/examples/tutorials/pruning_quick_start_mnist.py
@@ -82,27 +82,27 @@ for name, mask in masks.items():
    print(name, ' sparsity : ', '{:.2}'.format(mask['weight'].sum() / mask['weight'].numel()))
 # %%
-# Speed up the original model with masks, note that `ModelSpeedup` requires an unwrapped model.
+# Speedup the original model with masks, note that `ModelSpeedup` requires an unwrapped model.
-# The model becomes smaller after speed-up,
+# The model becomes smaller after speedup,
 # and reaches a higher sparsity ratio because `ModelSpeedup` will propagate the masks across layers.
-# need to unwrap the model, if the model is wrapped before speed up
+# need to unwrap the model, if the model is wrapped before speedup
 pruner._unwrap_model()
-# speed up the model
+# speedup the model
 from nni.compression.pytorch.speedup import ModelSpeedup
 ModelSpeedup(model, torch.rand(3, 1, 28, 28).to(device), masks).speedup_model()
 # %%
-# the model will become real smaller after speed up
+# the model will become real smaller after speedup
 print(model)
 # %%
 # Fine-tuning Compacted Model
 # ---------------------------
 # Note that if the model has been sped up, you need to re-initialize a new optimizer for fine-tuning.
-# Because speed up will replace the masked big layers with dense small ones.
+# Because speedup will replace the masked big layers with dense small ones.
 optimizer = SGD(model.parameters(), 1e-2)
 for epoch in range(3):

--- a/docs/source/tutorials/pruning_speed_up.py
+++ b/docs/source/tutorials/pruning_speed_up.py
 """
-Speed Up Model with Mask
+Speedup Model with Mask
 ========================
 Introduction
@@ -12,16 +12,16 @@ to convert a model to a smaller one based on user provided masks (the masks come
 pruning algorithms).
 There are two types of pruning. One is fine-grained pruning, it does not change the shape of weights,
-and input/output tensors. Sparse kernel is required to speed up a fine-grained pruned layer.
+and input/output tensors. Sparse kernel is required to speedup a fine-grained pruned layer.
 The other is coarse-grained pruning (e.g., channels), shape of weights and input/output tensors usually change due to such pruning.
-To speed up this kind of pruning, there is no need to use sparse kernel, just replace the pruned layer with smaller one.
+To speedup this kind of pruning, there is no need to use sparse kernel, just replace the pruned layer with smaller one.
 Since the support of sparse kernels in community is limited,
 we only support the speedup of coarse-grained pruning and leave the support of fine-grained pruning in future.
 Design and Implementation
 -------------------------
-To speed up a model, the pruned layers should be replaced, either replaced with smaller layer for coarse-grained mask,
+To speedup a model, the pruned layers should be replaced, either replaced with smaller layer for coarse-grained mask,
 or replaced with sparse kernel for fine-grained mask. Coarse-grained mask usually changes the shape of weights or input/output tensors,
 thus, we should do shape inference to check are there other unpruned layers should be replaced as well due to shape change.
 Therefore, in our design, there are two main steps: first, do shape inference to find out all the modules that should be replaced;
@@ -64,13 +64,13 @@ model(torch.rand(128, 1, 28, 28).to(device))
 print('Original Model - Elapsed Time : ', time.time() - start)
 # %%
-# Speed up the model and show the model structure after speed up.
+# Speedup the model and show the model structure after speedup.
 from nni.compression.pytorch import ModelSpeedup
 ModelSpeedup(model, torch.rand(10, 1, 28, 28).to(device), masks).speedup_model()
 print(model)
 # %%
-# Roughly test the model after speed-up inference speed.
+# Roughly test the model after speedup inference speed.
 start = time.time()
 model(torch.rand(128, 1, 28, 28).to(device))
 print('Speedup Model - Elapsed Time : ', time.time() - start)
@@ -87,7 +87,7 @@ print('Speedup Model - Elapsed Time : ', time.time() - start)
 # For PyTorch we can only replace modules, if functions in ``forward`` should be replaced,
 # our current implementation does not work. One workaround is make the function a PyTorch module.
 #
-# If you want to speed up your own model which cannot supported by the current implementation,
+# If you want to speedup your own model which cannot supported by the current implementation,
 # you need implement the replace function for module replacement, welcome to contribute.
 #
 # Speedup Results of Examples

--- a/examples/tutorials/quantization_speed_up.py
+++ b/examples/tutorials/quantization_speed_up.py
 """
-Speed Up Model with Calibration Config
+SpeedUp Model with Calibration Config
 ======================================
@@ -8,10 +8,10 @@ Introduction
 Deep learning network has been computational intensive and memory intensive 
 which increases the difficulty of deploying deep neural network model. Quantization is a 
-fundamental technology which is widely used to reduce memory footprint and speed up inference 
+fundamental technology which is widely used to reduce memory footprint and speedup inference 
 process. Many frameworks begin to support quantization, but few of them support mixed precision 
 quantization and get real speedup. Frameworks like `HAQ: Hardware-Aware Automated Quantization with Mixed Precision <https://arxiv.org/pdf/1811.08886.pdf>`__\, only support simulated mixed precision quantization which will 
-not speed up the inference process. To get real speedup of mixed precision quantization and 
+not speedup the inference process. To get real speedup of mixed precision quantization and 
 help people get the real feedback from hardware, we design a general framework with simple interface to allow NNI quantization algorithms to connect different 
 DL model optimization backends (e.g., TensorRT, NNFusion), which gives users an end-to-end experience that after quantizing their model 
 with quantization algorithms, the quantized model can be directly speeded up with the connected optimization backend. NNI connects 
@@ -108,7 +108,7 @@ calibration_config = quantizer.export_model(model_path, calibration_path)
 print("calibration_config: ", calibration_config)
 # %%
-# build tensorRT engine to make a real speed up
+# build tensorRT engine to make a real speedup
 # from nni.compression.pytorch.quantization_speedup import ModelSpeedupTensorRT
 # input_shape = (32, 1, 28, 28)

--- a/nni/algorithms/compression/pytorch/pruning/auto_compress_pruner.py
+++ b/nni/algorithms/compression/pytorch/pruning/auto_compress_pruner.py
@@ -201,7 +201,7 @@ class AutoCompressPruner(Pruner):
            ADMMpruner.export_model(os.path.join(self._experiment_data_dir, 'model_admm_masked.pth'), os.path.join(
                self._experiment_data_dir, 'mask.pth'))
-            # use speed up to prune the model before next iteration,
+            # use speedup to prune the model before next iteration,
            # because SimulatedAnnealingPruner & ADMMPruner don't take masked models
            self._model_to_prune.load_state_dict(torch.load(os.path.join(
                self._experiment_data_dir, 'model_admm_masked.pth')))

--- a/nni/algorithms/compression/v2/pytorch/base/scheduler.py
+++ b/nni/algorithms/compression/v2/pytorch/base/scheduler.py
@@ -20,7 +20,7 @@ class Task:
    _reference_counter = {}
    def __init__(self, task_id: int, model_path: str, masks_path: str, config_list_path: str,
-                 speed_up: Optional[bool] = True, finetune: Optional[bool] = True, evaluate: Optional[bool] = True):
+                 speedup: Optional[bool] = True, finetune: Optional[bool] = True, evaluate: Optional[bool] = True):
        """
        Parameters
        ----------
@@ -32,8 +32,8 @@ class Task:
            The path of the masks that applied on the model before pruning.
        config_list_path
            The path of the config list that used in this task.
-        speed_up
+        speedup
-            Control if this task needs speed up, True means use scheduler default value, False means no speed up.
+            Control if this task needs speedup, True means use scheduler default value, False means no speedup.
        finetune
            Control if this task needs finetune, True means use scheduler default value, False means no finetune.
        evaluate
@@ -44,7 +44,7 @@ class Task:
        self.masks_path = masks_path
        self.config_list_path = config_list_path
-        self.speed_up = speed_up
+        self.speedup = speedup
        self.finetune = finetune
        self.evaluate = evaluate
@@ -65,7 +65,7 @@ class Task:
            'model_path': str(self.model_path),
            'masks_path': str(self.masks_path),
            'config_list_path': str(self.config_list_path),
-            'speed_up': self.speed_up,
+            'speedup': self.speedup,
            'finetune': self.finetune,
            'evaluate': self.evaluate,
            'status': self.status,

--- a/nni/algorithms/compression/v2/pytorch/pruning/amc_pruner.py
+++ b/nni/algorithms/compression/v2/pytorch/pruning/amc_pruner.py
@@ -185,7 +185,7 @@ class AMCPruner(IterativePruner):
            - op_partial_names: Operation partial names to be pruned, will be autocompleted by NNI.
            - exclude  : Set True then the layers setting by op_types and op_names will be excluded from pruning.
    dummy_input : torch.Tensor
-        `dummy_input` is required for speed-up and tracing the model in RL environment.
+        `dummy_input` is required for speedup and tracing the model in RL environment.
    evaluator : Callable[[Module], float]
        Evaluate the pruned model and give a score.
    pruning_algorithm : str
@@ -249,5 +249,5 @@ class AMCPruner(IterativePruner):
                                          ddpg_params=ddpg_params,
                                          target=target)
        pruner = PRUNER_DICT[pruning_algorithm](None, None, **pruning_params)
-        super().__init__(pruner, task_generator, finetuner=finetuner, speed_up=True, dummy_input=dummy_input,
+        super().__init__(pruner, task_generator, finetuner=finetuner, speedup=True, dummy_input=dummy_input,
                         evaluator=evaluator, reset_weight=False)
--- a/nni/algorithms/compression/v2/pytorch/pruning/auto_compress_pruner.py
+++ b/nni/algorithms/compression/v2/pytorch/pruning/auto_compress_pruner.py
@@ -112,10 +112,10 @@ class AutoCompressPruner(IterativePruner):
    finetuner : Optional[Callable[[Module], None]]
        The finetuner handles all finetune logic, takes a pytorch module as input.
        It will be called at the end of each iteration, usually for neutralizing the accuracy loss brought by the pruning in this iteration.
-    speed_up : bool
+    speedup : bool
-        If set True, speed up the model at the end of each iteration to make the pruned model compact.
+        If set True, speedup the model at the end of each iteration to make the pruned model compact.
    dummy_input : Optional[torch.Tensor]
-        If `speed_up` is True, `dummy_input` is required for tracing the model in speed up.
+        If `speedup` is True, `dummy_input` is required for tracing the model in speedup.
    Examples
    --------
@@ -148,7 +148,7 @@ class AutoCompressPruner(IterativePruner):
    def __init__(self, model: Module, config_list: List[Dict], total_iteration: int, admm_params: Dict,
                 sa_params: Dict, log_dir: str = '.', keep_intermediate_result: bool = False,
-                 finetuner: Optional[Callable[[Module], None]] = None, speed_up: bool = False,
+                 finetuner: Optional[Callable[[Module], None]] = None, speedup: bool = False,
                 dummy_input: Optional[Tensor] = None, evaluator: Callable[[Module], float] = None):
        task_generator = AutoCompressTaskGenerator(total_iteration=total_iteration,
                                                   origin_model=model,
@@ -159,5 +159,5 @@ class AutoCompressPruner(IterativePruner):
        if 'traced_optimizer' in admm_params:
            admm_params['traced_optimizer'] = OptimizerConstructHelper.from_trace(model, admm_params['traced_optimizer'])
        pruner = ADMMPruner(None, None, **admm_params)
-        super().__init__(pruner, task_generator, finetuner=finetuner, speed_up=speed_up, dummy_input=dummy_input,
+        super().__init__(pruner, task_generator, finetuner=finetuner, speedup=speedup, dummy_input=dummy_input,
                         evaluator=evaluator, reset_weight=False)