Merge pull request #3302 from microsoft/v2.0-merge

Merge branch v2.0 into master (no squash)

Merge pull request #3302 from microsoft/v2.0-merge
Merge branch v2.0 into master (no squash)
4784cc6c · liuzhe-lz · GitHub · 25db55ca · 349ead41 · 4784cc6c
Unverified Commit 4784cc6c authored Jan 14, 2021 by liuzhe-lz Committed by GitHub Jan 14, 2021
20 changed files
--- a/examples/model_compress/README.md
+++ b/examples/model_compress/README.md
@@ -3,7 +3,7 @@
 You can run these examples easily like this, take torch pruning for example
 ```bash
-python main_torch_pruner.py
+python model_prune_torch.py
 ```
 This example uses AGP Pruner. Initiating a pruner needs a user provided configuration which can be provided in two ways:
@@ -14,7 +14,7 @@ This example uses AGP Pruner. Initiating a pruner needs a user provided configur
 In our example, we simply config model compression in our codes like this
 ```python
-configure_list = [{
+config_list = [{
    'initial_sparsity': 0,
    'final_sparsity': 0.8,
    'start_epoch': 0,
@@ -22,7 +22,7 @@ configure_list = [{
    'frequency': 1,
    'op_types': ['default']
 }]
-pruner = AGPPruner(configure_list)
+pruner = AGPPruner(config_list)
 ```
 When ```pruner(model)``` is called, your model is injected with masks as embedded operations. For example, a layer takes a weight as input, we will insert an operation between the weight and the layer, this operation takes the weight as input and outputs a new weight applied by the mask. Thus, the masks are applied at any time the computation goes through the operations. You can fine-tune your model **without** any modifications.

--- a/examples/model_compress/README_zh_CN.md
+++ b/examples/model_compress/README_zh_CN.md
@@ -14,7 +14,7 @@ python main_torch_pruner.py
 此例在代码中配置了模型压缩：
 ```python
-configure_list = [{
+config_list = [{
    'initial_sparsity': 0,
    'final_sparsity': 0.8,
    'start_epoch': 0,
@@ -22,7 +22,7 @@ configure_list = [{
    'frequency': 1,
    'op_types': ['default']
 }]
-pruner = AGPPruner(configure_list)
+pruner = AGPPruner(config_list)
 ```
 当调用 `pruner(model)` 时，模型会被嵌入掩码操作。 例如，某层以权重作为输入，可在权重和层操作之间插入一个操作，此操作以权重为输入，并将其应用掩码后输出。 因此，计算过程中，只要通过此操作，就会应用掩码。 还可以**不做任何改动**，来对模型进行微调。

--- a/examples/model_compress/amc/README.md
+++ b/examples/model_compress/amc/README.md
--- a/examples/model_compress/amc/amc_search.py
+++ b/examples/model_compress/amc/amc_search.py
--- a/examples/model_compress/amc/amc_train.py
+++ b/examples/model_compress/amc/amc_train.py
--- a/examples/model_compress/amc/data.py
+++ b/examples/model_compress/amc/data.py
--- a/examples/model_compress/amc/utils.py
+++ b/examples/model_compress/amc/utils.py
--- a/examples/model_compress/auto_pruners_torch.py
+++ b/examples/model_compress/auto_pruners_torch.py
@@ -186,7 +186,7 @@ def get_trained_model_optimizer(args, device, train_loader, val_loader, criterio
        if args.save_model:
            torch.save(state_dict, os.path.join(args.experiment_data_dir, 'model_trained.pth'))
-            print('Model trained saved to %s', args.experiment_data_dir)
+            print('Model trained saved to %s' % args.experiment_data_dir)
    return model, optimizer
@@ -229,7 +229,7 @@ def main(args):
    # used to save the performance of the original & pruned & finetuned models
    result = {'flops': {}, 'params': {}, 'performance':{}}
-    flops, params = count_flops_params(model, get_input_size(args.dataset))
+    flops, params, _ = count_flops_params(model, get_input_size(args.dataset))
    result['flops']['original'] = flops
    result['params']['original'] = params
@@ -238,7 +238,7 @@ def main(args):
    result['performance']['original'] = evaluation_result
    # module types to prune, only "Conv2d" supported for channel pruning
-    if args.base_algo in ['l1', 'l2']:
+    if args.base_algo in ['l1', 'l2', 'fpgm']:
        op_types = ['Conv2d']
    elif args.base_algo == 'level':
        op_types = ['default']
@@ -261,7 +261,7 @@ def main(args):
    elif args.pruner == 'ADMMPruner':
        # users are free to change the config here
        if args.model == 'LeNet':
-            if args.base_algo in ['l1', 'l2']:
+            if args.base_algo in ['l1', 'l2', 'fpgm']:
                config_list = [{
                    'sparsity': 0.8,
                    'op_types': ['Conv2d'],
@@ -312,7 +312,7 @@ def main(args):
    if args.save_model:
        pruner.export_model(
            os.path.join(args.experiment_data_dir, 'model_masked.pth'), os.path.join(args.experiment_data_dir, 'mask.pth'))
-        print('Masked model saved to %s', args.experiment_data_dir)
+        print('Masked model saved to %s' % args.experiment_data_dir)
    # model speed up
    if args.speed_up:
@@ -336,8 +336,8 @@ def main(args):
            result['performance']['speedup'] = evaluation_result
            torch.save(model.state_dict(), os.path.join(args.experiment_data_dir, 'model_speed_up.pth'))
-            print('Speed up model saved to %s', args.experiment_data_dir)
+            print('Speed up model saved to %s' % args.experiment_data_dir)
-        flops, params = count_flops_params(model, get_input_size(args.dataset))
+        flops, params, _ = count_flops_params(model, get_input_size(args.dataset))
        result['flops']['speedup'] = flops
        result['params']['speedup'] = params
@@ -367,7 +367,7 @@ def main(args):
                torch.save(model.state_dict(), os.path.join(args.experiment_data_dir, 'model_fine_tuned.pth'))
    print('Evaluation result (fine tuned): %s' % best_acc)
-    print('Fined tuned model saved to %s', args.experiment_data_dir)
+    print('Fined tuned model saved to %s' % args.experiment_data_dir)
    result['performance']['finetuned'] = best_acc
    with open(os.path.join(args.experiment_data_dir, 'result.json'), 'w+') as f:
@@ -414,7 +414,7 @@ if __name__ == '__main__':
    parser.add_argument('--pruner', type=str, default='SimulatedAnnealingPruner',
                        help='pruner to use')
    parser.add_argument('--base-algo', type=str, default='l1',
-                        help='base pruning algorithm. level, l1 or l2')
+                        help='base pruning algorithm. level, l1, l2, or fpgm')
    parser.add_argument('--sparsity', type=float, default=0.1,
                        help='target overall target sparsity')
    # param for SimulatedAnnealingPruner

--- a/examples/model_compress/comparison_of_pruners/analyze.py
+++ b/examples/model_compress/comparison_of_pruners/analyze.py
--- a/examples/model_compress/comparison_of_pruners/cifar10/comparison_result_resnet18.json
+++ b/examples/model_compress/comparison_of_pruners/cifar10/comparison_result_resnet18.json
--- a/examples/model_compress/comparison_of_pruners/cifar10/comparison_result_resnet50.json
+++ b/examples/model_compress/comparison_of_pruners/cifar10/comparison_result_resnet50.json
--- a/examples/model_compress/comparison_of_pruners/cifar10/comparison_result_vgg16.json
+++ b/examples/model_compress/comparison_of_pruners/cifar10/comparison_result_vgg16.json
--- a/examples/model_compress/comparison_of_pruners/img/performance_comparison_resnet18.png
+++ b/examples/model_compress/comparison_of_pruners/img/performance_comparison_resnet18.png
--- a/examples/model_compress/comparison_of_pruners/img/performance_comparison_resnet50.png
+++ b/examples/model_compress/comparison_of_pruners/img/performance_comparison_resnet50.png
--- a/examples/model_compress/comparison_of_pruners/img/performance_comparison_vgg16.png
+++ b/examples/model_compress/comparison_of_pruners/img/performance_comparison_vgg16.png
--- a/examples/model_compress/configure_example.yaml
+++ b/examples/model_compress/configure_example.yaml
--- a/examples/model_compress/model_prune_torch.py
+++ b/examples/model_compress/model_prune_torch.py
@@ -212,7 +212,7 @@ def main(args):
    train_loader, test_loader = get_data_loaders(dataset_name, args.batch_size)
    dummy_input, _ = next(iter(train_loader))
    dummy_input = dummy_input.to(device)
-    model = create_model(model_name).cuda()
+    model = create_model(model_name).to(device)
    if args.resume_from is not None and os.path.exists(args.resume_from):
        print('loading checkpoint {} ...'.format(args.resume_from))
        model.load_state_dict(torch.load(args.resume_from))

--- a/examples/model_compress/model_speedup.py
+++ b/examples/model_compress/model_speedup.py
@@ -16,25 +16,21 @@ compare_results = True
 config = {
    'apoz': {
        'model_name': 'vgg16',
-        'device': 'cuda',
        'input_shape': [64, 3, 32, 32],
        'masks_file': './checkpoints/mask_vgg16_cifar10_apoz.pth'
    },
    'l1filter': {
        'model_name': 'vgg16',
-        'device': 'cuda',
        'input_shape': [64, 3, 32, 32],
        'masks_file': './checkpoints/mask_vgg16_cifar10_l1filter.pth'
    },
    'fpgm': {
        'model_name': 'naive',
-        'device': 'cpu',
        'input_shape': [64, 1, 28, 28],
        'masks_file': './checkpoints/mask_naive_mnist_fpgm.pth'
    },
    'slim': {
        'model_name': 'vgg19',
-        'device': 'cuda',
        'input_shape': [64, 3, 32, 32],
        'masks_file': './checkpoints/mask_vgg19_cifar10_slim.pth' #'mask_vgg19_cifar10.pth'
    }
@@ -42,7 +38,10 @@ config = {
 def model_inference(config):
    masks_file = config['masks_file']
-    device = torch.device(config['device'])
+    device = torch.device(
+        'cuda') if torch.cuda.is_available() else torch.device('cpu')
+    # device = torch.device(config['device'])
    if config['model_name'] == 'vgg16':
        model = VGG(depth=16)
    elif config['model_name'] == 'vgg19':
@@ -57,14 +56,13 @@ def model_inference(config):
    use_mask_out = use_speedup_out = None
    # must run use_mask before use_speedup because use_speedup modify the model
    if use_mask:
-        apply_compression_results(model, masks_file, 'cpu' if config['device'] == 'cpu' else None)
+        apply_compression_results(model, masks_file, device)
        start = time.time()
        for _ in range(32):
            use_mask_out = model(dummy_input)
        print('elapsed time when use mask: ', time.time() - start)
    if use_speedup:
-        m_speedup = ModelSpeedup(model, dummy_input, masks_file,
+        m_speedup = ModelSpeedup(model, dummy_input, masks_file, device)
-                                 'cpu' if config['device'] == 'cpu' else None)
        m_speedup.speedup_model()
        start = time.time()
        for _ in range(32):

--- a/examples/model_compress/models/cifar10/resnet.py
+++ b/examples/model_compress/models/cifar10/resnet.py
--- a/examples/model_compress/models/cifar10/vgg.py
+++ b/examples/model_compress/models/cifar10/vgg.py