Resolve conflicts for #4760 (#4762)

a911b856 · Yuge Zhang · GitHub · 14d2966b · a911b856 · a911b856
Unverified Commit a911b856 authored Apr 21, 2022 by Yuge Zhang Committed by GitHub Apr 21, 2022
20 changed files
--- a/examples/model_compress/pruning/mobilenetv2_end2end/utils.py
+++ b/examples/model_compress/pruning/mobilenetv2_end2end/utils.py
@@ -7,10 +7,10 @@ import torch
 from torch.utils.data import Dataset, DataLoader
 import torchvision.transforms as transforms
 import numpy as np
-from nni.compression.pytorch.utils.counter import count_flops_params
+from nni.compression.pytorch.utils import count_flops_params

 from pathlib import Path
-sys.path.append(str(Path(__file__).absolute().parents[2] / 'models'))
+sys.path.append(str(Path(__file__).absolute().parents[3] / 'models'))
 from mobilenet import MobileNet
 from mobilenet_v2 import MobileNetV2


--- a/examples/model_compress/pruning/naive_prune_tf.py
+++ b/examples/model_compress/pruning/naive_prune_tf.py
--- a/examples/model_compress/pruning/naive_prune_torch.py
+++ b/examples/model_compress/pruning/naive_prune_torch.py
--- a/examples/model_compress/pruning/speedup/model_speedup.py
+++ b/examples/model_compress/pruning/speedup/model_speedup.py
@@ -5,7 +5,7 @@ import time
 import torch

 from pathlib import Path
-sys.path.append(str(Path(__file__).absolute().parents[2] / 'models'))
+sys.path.append(str(Path(__file__).absolute().parents[3] / 'models'))
 from cifar10.vgg import VGG
 from mnist.lenet import LeNet


--- a/examples/model_compress/pruning/speedup/speedup_mobilnetv2.py
+++ b/examples/model_compress/pruning/speedup/speedup_mobilnetv2.py
--- a/examples/model_compress/pruning/speedup/speedup_nanodet.py
+++ b/examples/model_compress/pruning/speedup/speedup_nanodet.py
--- a/examples/model_compress/pruning/speedup/speedup_yolov3.py
+++ b/examples/model_compress/pruning/speedup/speedup_yolov3.py
--- a/examples/model_compress/pruning/transformers/run.sh
+++ b/examples/model_compress/pruning/transformers/run.sh
@@ -29,7 +29,7 @@ if [[ ${TASK_LIST[*]} =~ (^|[[:space:]])$TASK_NAME($|[[:space:]]) ]]; then
 	   --ranking_criterion $RANKING_CRITERION \
 	   --num_iterations $NUM_ITERATIONS \
 	   --epochs_per_iteration $EPOCHS_PER_ITERATION \
-	   --speed_up \
+	   --speedup \
 	   --model_name $PRETRAINED_MODEL \
 	   --task_name $TASK_NAME \
 	   --max_length $MAX_LENGTH \

--- a/examples/model_compress/pruning/transformers/transformer_pruning.py
+++ b/examples/model_compress/pruning/transformers/transformer_pruning.py
@@ -9,7 +9,7 @@ import torch
 from torch.utils.data.dataloader import DataLoader
 from tqdm.auto import tqdm

-from nni.compression.pytorch.utils.counter import count_flops_params
+from nni.compression.pytorch.utils import count_flops_params
 from nni.algorithms.compression.pytorch.pruning import TransformerHeadPruner

 import datasets
@@ -53,8 +53,8 @@ def parse_args():
    parser.add_argument("--epochs_per_iteration", type=int, default=1,
                        help="Epochs to finetune before the next pruning iteration "
                             "(only effective if num_iterations > 1).")
-    parser.add_argument("--speed_up", action="store_true", default=False,
-                        help="Whether to speed-up the pruned model")
+    parser.add_argument("--speedup", action="store_true", default=False,
+                        help="Whether to speedup the pruned model")

    # parameters for model training; no need to change them for running examples
    parser.add_argument("--max_length", type=int, default=128,
@@ -338,8 +338,8 @@ def main():
    # Currently, speeding up Transformers through NNI ModelSpeedup is not supported because of shape inference issues.
    # However, if you are using the transformers library, you can use the following workaround:
    # The following code gets the head pruning decisions from the pruner and calls the _prune_heads() function
-    # implemented in models from the transformers library to speed up the model.
-    if args.speed_up:
+    # implemented in models from the transformers library to speedup the model.
+    if args.speedup:
        speedup_rules = {}
        for group_idx, group in enumerate(pruner.attention_name_groups):
            # get the layer index

--- a/examples/model_compress/pruning/v2/level_pruning_torch.py
+++ b/examples/model_compress/pruning/v2/level_pruning_torch.py
@@ -4,7 +4,7 @@
 '''
 NNI example for supported level pruning algorithm.
 In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
-Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required.
+Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.

 '''
 import argparse
@@ -14,11 +14,11 @@ import torch
 from torchvision import datasets, transforms
 from torch.optim.lr_scheduler import MultiStepLR

-from nni.compression.pytorch.utils.counter import count_flops_params
-from nni.algorithms.compression.v2.pytorch.pruning.basic_pruner import LevelPruner
+from nni.compression.pytorch.utils import count_flops_params
+from nni.compression.pytorch.pruning import LevelPruner

 from pathlib import Path
-sys.path.append(str(Path(__file__).absolute().parents[2] / 'models'))
+sys.path.append(str(Path(__file__).absolute().parents[1] / 'models'))
 from cifar10.vgg import VGG

 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

--- a/examples/model_compress/pruning/v2/movement_pruning_glue.py
+++ b/examples/model_compress/pruning/v2/movement_pruning_glue.py
 import functools
+import time
 from tqdm import tqdm

 import torch
@@ -14,7 +15,7 @@ from transformers import (
 )

 import nni
-from nni.algorithms.compression.v2.pytorch.pruning import MovementPruner
+from nni.compression.pytorch.pruning import MovementPruner


 task_to_keys = {
@@ -31,7 +32,7 @@ task_to_keys = {

 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

-gradient_accumulation_steps = 16
+gradient_accumulation_steps = 8

 # a fake criterion because huggingface output already has loss
 def criterion(input, target):
@@ -40,7 +41,7 @@ def criterion(input, target):
 def trainer(model, optimizer, criterion, train_dataloader):
    model.train()
    counter = 0
-    for batch in tqdm(train_dataloader):
+    for batch in (train_dataloader):
        counter += 1
        batch.to(device)
        optimizer.zero_grad()
@@ -51,12 +52,14 @@ def trainer(model, optimizer, criterion, train_dataloader):
        loss.backward()
        if counter % gradient_accumulation_steps == 0 or counter == len(train_dataloader):
            optimizer.step()
-        if counter % 16000 == 0:
+        if counter % 800 == 0:
+            print('[{}]: {}'.format(time.asctime(time.localtime(time.time())), counter))
+        if counter % 8000 == 0:
            print('Step {}: {}'.format(counter // gradient_accumulation_steps, evaluator(model, metric, is_regression, validate_dataloader)))

 def evaluator(model, metric, is_regression, eval_dataloader):
    model.eval()
-    for batch in tqdm(eval_dataloader):
+    for batch in (eval_dataloader):
        batch.to(device)
        outputs = model(**batch)
        predictions = outputs.logits.argmax(dim=-1) if not is_regression else outputs.logits.squeeze()
@@ -70,8 +73,8 @@ if __name__ == '__main__':
    task_name = 'mnli'
    is_regression = False
    num_labels = 1 if is_regression else (3 if task_name == 'mnli' else 2)
-    train_batch_size = 8
-    eval_batch_size = 8
+    train_batch_size = 4
+    eval_batch_size = 4

    set_seed(1024)

@@ -113,7 +116,7 @@ if __name__ == '__main__':
    # make sure you have used nni.trace to wrap the optimizer class before initialize
    traced_optimizer = nni.trace(Adam)(model.parameters(), lr=2e-5)
    pruner = MovementPruner(model, config_list, p_trainer, traced_optimizer, criterion, training_epochs=10,
-                            warm_up_step=3000, cool_down_beginning_step=27000)
+                            warm_up_step=12272, cool_down_beginning_step=110448)

    _, masks = pruner.compress()
    pruner.show_pruned_weights()

--- a/examples/model_compress/pruning/v2/norm_pruning_torch.py
+++ b/examples/model_compress/pruning/v2/norm_pruning_torch.py
@@ -4,7 +4,7 @@
 '''
 NNI example for supported l1norm and l2norm pruning algorithms.
 In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
-Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required.
+Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.

 '''
 import argparse
@@ -15,11 +15,11 @@ from torchvision import datasets, transforms
 from torch.optim.lr_scheduler import MultiStepLR

 from nni.compression.pytorch import ModelSpeedup
-from nni.compression.pytorch.utils.counter import count_flops_params
-from nni.algorithms.compression.v2.pytorch.pruning.basic_pruner import L1NormPruner, L2NormPruner
+from nni.compression.pytorch.utils import count_flops_params
+from nni.compression.pytorch.pruning import L1NormPruner, L2NormPruner

 from pathlib import Path
-sys.path.append(str(Path(__file__).absolute().parents[2] / 'models'))
+sys.path.append(str(Path(__file__).absolute().parents[1] / 'models'))
 from cifar10.vgg import VGG

 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

--- a/examples/model_compress/pruning/v2/scheduler_torch.py
+++ b/examples/model_compress/pruning/v2/scheduler_torch.py
@@ -4,12 +4,12 @@ from tqdm import tqdm
 import torch
 from torchvision import datasets, transforms

-from nni.algorithms.compression.v2.pytorch.pruning import L1NormPruner
+from nni.compression.pytorch.pruning import L1NormPruner
 from nni.algorithms.compression.v2.pytorch.pruning.tools import AGPTaskGenerator
 from nni.algorithms.compression.v2.pytorch.pruning.basic_scheduler import PruningScheduler

 from pathlib import Path
-sys.path.append(str(Path(__file__).absolute().parents[2] / 'models'))
+sys.path.append(str(Path(__file__).absolute().parents[1] / 'models'))
 from cifar10.vgg import VGG


@@ -92,8 +92,8 @@ if __name__ == '__main__':
    # if you just want to keep the final result as the best result, you can pass evaluator as None.
    # or the result with the highest score (given by evaluator) will be the best result.

-    # scheduler = PruningScheduler(pruner, task_generator, finetuner=finetuner, speed_up=True, dummy_input=dummy_input, evaluator=evaluator)
-    scheduler = PruningScheduler(pruner, task_generator, finetuner=finetuner, speed_up=True, dummy_input=dummy_input, evaluator=None, reset_weight=False)
+    # scheduler = PruningScheduler(pruner, task_generator, finetuner=finetuner, speedup=True, dummy_input=dummy_input, evaluator=evaluator)
+    scheduler = PruningScheduler(pruner, task_generator, finetuner=finetuner, speedup=True, dummy_input=dummy_input, evaluator=None, reset_weight=False)

    scheduler.compress()


--- a/examples/model_compress/pruning/v2/simple_pruning_torch.py
+++ b/examples/model_compress/pruning/v2/simple_pruning_torch.py
@@ -4,11 +4,11 @@ from tqdm import tqdm
 import torch
 from torchvision import datasets, transforms

-from nni.algorithms.compression.v2.pytorch.pruning import L1NormPruner
+from nni.compression.pytorch.pruning import L1NormPruner
 from nni.compression.pytorch.speedup import ModelSpeedup

 from pathlib import Path
-sys.path.append(str(Path(__file__).absolute().parents[2] / 'models'))
+sys.path.append(str(Path(__file__).absolute().parents[1] / 'models'))
 from cifar10.vgg import VGG


@@ -77,12 +77,12 @@ if __name__ == '__main__':
    pruner._unwrap_model()
    ModelSpeedup(model, dummy_input=torch.rand(10, 3, 32, 32).to(device), masks_file=masks).speedup_model()

-    print('\nThe accuracy after speed up:')
+    print('\nThe accuracy after speedup:')
    evaluator(model)

    # Need a new optimizer due to the modules in model will be replaced during speedup.
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
-    print('\nFinetune the model after speed up:')
+    print('\nFinetune the model after speedup:')
    for i in range(5):
        trainer(model, optimizer, criterion, i)
        evaluator(model)
--- a/examples/model_compress/pruning/v2/simulated_anealing_pruning_torch.py
+++ b/examples/model_compress/pruning/v2/simulated_anealing_pruning_torch.py
@@ -13,10 +13,10 @@ from tqdm import tqdm
 import torch
 from torchvision import datasets, transforms

-from nni.algorithms.compression.v2.pytorch.pruning import SimulatedAnnealingPruner
+from nni.compression.pytorch.pruning import SimulatedAnnealingPruner

 from pathlib import Path
-sys.path.append(str(Path(__file__).absolute().parents[2] / 'models'))
+sys.path.append(str(Path(__file__).absolute().parents[1] / 'models'))
 from cifar10.vgg import VGG



--- a/examples/model_compress/pruning/v2/slim_pruning_torch.py
+++ b/examples/model_compress/pruning/v2/slim_pruning_torch.py
@@ -4,7 +4,7 @@
 '''
 NNI example for supported slim pruning algorithms.
 In this example, we show the end-to-end pruning process: pre-training -> pruning -> speedup -> fine-tuning.
-Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required.
+Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.

 '''
 import argparse
@@ -16,11 +16,11 @@ from torch.optim.lr_scheduler import MultiStepLR

 import nni
 from nni.compression.pytorch import ModelSpeedup
-from nni.compression.pytorch.utils.counter import count_flops_params
-from nni.algorithms.compression.v2.pytorch.pruning.basic_pruner import SlimPruner
+from nni.compression.pytorch.utils import count_flops_params
+from nni.compression.pytorch.pruning import SlimPruner

 from pathlib import Path
-sys.path.append(str(Path(__file__).absolute().parents[2] / 'models'))
+sys.path.append(str(Path(__file__).absolute().parents[1] / 'models'))
 from cifar10.vgg import VGG

 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

--- a/examples/model_compress/pruning/v2/taylorfo_pruning_torch.py
+++ b/examples/model_compress/pruning/v2/taylorfo_pruning_torch.py
@@ -4,7 +4,7 @@
 '''
 NNI example for supported TaylorFOWeight pruning algorithms.
 In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
-Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required.
+Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.

 '''
 import argparse
@@ -16,11 +16,11 @@ from torch.optim.lr_scheduler import MultiStepLR

 import nni
 from nni.compression.pytorch import ModelSpeedup
-from nni.compression.pytorch.utils.counter import count_flops_params
-from nni.algorithms.compression.v2.pytorch.pruning.basic_pruner import TaylorFOWeightPruner
+from nni.compression.pytorch.utils import count_flops_params
+from nni.compression.pytorch.pruning import TaylorFOWeightPruner

 from pathlib import Path
-sys.path.append(str(Path(__file__).absolute().parents[2] / 'models'))
+sys.path.append(str(Path(__file__).absolute().parents[1] / 'models'))
 from cifar10.vgg import VGG

 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

--- a/examples/nas/.gitignore
+++ b/examples/nas/.gitignore
@@ -8,3 +8,4 @@ _generated_model_*.py
 _generated_model
 generated
 lightning_logs
+model.onnx
--- a/examples/nas/multi-trial/mnasnet/base_mnasnet.py
+++ b/examples/nas/multi-trial/mnasnet/base_mnasnet.py
@@ -4,8 +4,8 @@ import warnings

 import torch
 import torch.nn as torch_nn
-from torchvision.models.utils import load_state_dict_from_url
 import torch.nn.functional as F
+from nni.retiarii import model_wrapper

 import sys
 from pathlib import Path
@@ -111,7 +111,7 @@ def _get_depths(depths, alpha):
    rather than down. """
    return [_round_to_multiple_of(depth * alpha, 8) for depth in depths]

-
+@model_wrapper
 class MNASNet(nn.Module):
    """ MNASNet, as described in https://arxiv.org/pdf/1807.11626.pdf. This
    implements the B1 variant of the model.
@@ -180,7 +180,7 @@ class MNASNet(nn.Module):
            nn.ReLU(inplace=True),
        ]
        self.layers = nn.Sequential(*layers)
-        self.classifier = nn.Sequential(nn.Dropout(p=dropout, inplace=True),
+        self.classifier = nn.Sequential(nn.Dropout(p=dropout),
                                        nn.Linear(1280, num_classes))
        self._initialize_weights()
        #self.for_test = 10

--- a/examples/nas/multi-trial/mnist/search.py
+++ b/examples/nas/multi-trial/mnist/search.py
@@ -107,6 +107,7 @@ def evaluate_model(model_cls):

    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

+    model.to(device)
    for epoch in range(3):
        # train the model for one epoch
        train_epoch(model, device, train_loader, optimizer, epoch)
@@ -138,7 +139,7 @@ if __name__ == '__main__':
    # exp_config.execution_engine = 'base'
    # export_formatter = 'code'

-    exp.run(exp_config, 8081 + random.randint(0, 100))
+    exp.run(exp_config, 8080)
    print('Final model:')
    for model_code in exp.export_top_models(formatter=export_formatter):
        print(model_code)