Commit e773dfcc authored by qianyj's avatar qianyj
Browse files

create branch for v2.9

parents
import sys
from tqdm import tqdm
import torch
from torchvision import datasets, transforms
import nni
from nni.compression.pytorch.pruning import AutoCompressPruner
from pathlib import Path
sys.path.append(str(Path(__file__).absolute().parents[1] / 'models'))
from cifar10.vgg import VGG
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
normalize = transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('./data', train=True, transform=transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(32, 4),
transforms.ToTensor(),
normalize,
]), download=True),
batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('./data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
normalize,
])),
batch_size=128, shuffle=False)
criterion = torch.nn.CrossEntropyLoss()
epoch = 0
def trainer(model, optimizer, criterion):
global epoch
model.train()
for data, target in tqdm(iterable=train_loader, desc='Total Epoch {}'.format(epoch)):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
epoch = epoch + 1
def finetuner(model):
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()
trainer(model, optimizer, criterion)
def evaluator(model):
model.eval()
correct = 0
with torch.no_grad():
for data, target in tqdm(iterable=test_loader, desc='Test'):
data, target = data.to(device), target.to(device)
output = model(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
acc = 100 * correct / len(test_loader.dataset)
print('Accuracy: {}%\n'.format(acc))
return acc
if __name__ == '__main__':
model = VGG().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()
# pre-train the model
for _ in range(10):
trainer(model, optimizer, criterion)
config_list = [{'op_types': ['Conv2d'], 'total_sparsity': 0.8}]
dummy_input = torch.rand(10, 3, 32, 32).to(device)
# make sure you have used nni.trace to wrap the optimizer class before initialize
traced_optimizer = nni.trace(torch.optim.SGD)(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
admm_params = {
'trainer': trainer,
'traced_optimizer': traced_optimizer,
'criterion': criterion,
'iterations': 10,
'training_epochs': 1
}
sa_params = {
'evaluator': evaluator
}
pruner = AutoCompressPruner(model, config_list, 10, admm_params, sa_params, keep_intermediate_result=True, finetuner=finetuner)
pruner.compress()
_, model, masks, _, _ = pruner.get_best_result()
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI example for supported fpgm pruning algorithms.
In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
'''
import argparse
import sys
import torch
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import MultiStepLR
from nni.compression.pytorch import ModelSpeedup
from nni.compression.pytorch.utils import count_flops_params
from nni.compression.pytorch.pruning import FPGMPruner
from pathlib import Path
sys.path.append(str(Path(__file__).absolute().parents[1] / 'models'))
from cifar10.vgg import VGG
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
normalize = transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
g_epoch = 0
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('./data', train=True, transform=transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(32, 4),
transforms.ToTensor(),
normalize,
]), download=True),
batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('./data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
normalize,
])),
batch_size=128, shuffle=False)
def trainer(model, optimizer, criterion):
global g_epoch
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
if batch_idx and batch_idx % 100 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
g_epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
g_epoch += 1
def evaluator(model):
model.eval()
correct = 0.0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
acc = 100 * correct / len(test_loader.dataset)
print('Accuracy: {}%\n'.format(acc))
return acc
def optimizer_scheduler_generator(model, _lr=0.1, _momentum=0.9, _weight_decay=5e-4, total_epoch=160):
optimizer = torch.optim.SGD(model.parameters(), lr=_lr, momentum=_momentum, weight_decay=_weight_decay)
scheduler = MultiStepLR(optimizer, milestones=[int(total_epoch * 0.5), int(total_epoch * 0.75)], gamma=0.1)
return optimizer, scheduler
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='PyTorch Example for model comporession')
parser.add_argument('--pretrain-epochs', type=int, default=20,
help='number of epochs to pretrain the model')
parser.add_argument('--fine-tune-epochs', type=int, default=20,
help='number of epochs to fine tune the model')
args = parser.parse_args()
print('\n' + '=' * 50 + ' START TO TRAIN THE MODEL ' + '=' * 50)
model = VGG().to(device)
optimizer, scheduler = optimizer_scheduler_generator(model, total_epoch=args.pretrain_epochs)
criterion = torch.nn.CrossEntropyLoss()
pre_best_acc = 0.0
best_state_dict = None
for i in range(args.pretrain_epochs):
trainer(model, optimizer, criterion)
scheduler.step()
acc = evaluator(model)
if acc > pre_best_acc:
pre_best_acc = acc
best_state_dict = model.state_dict()
print("Best accuracy: {}".format(pre_best_acc))
model.load_state_dict(best_state_dict)
pre_flops, pre_params, _ = count_flops_params(model, torch.randn([128, 3, 32, 32]).to(device))
g_epoch = 0
# Start to prune and speedup
print('\n' + '=' * 50 + ' START TO PRUNE THE BEST ACCURACY PRETRAINED MODEL ' + '=' * 50)
config_list = [{
'sparsity': 0.5,
'op_types': ['Conv2d']
}]
pruner = FPGMPruner(model, config_list)
_, masks = pruner.compress()
pruner.show_pruned_weights()
pruner._unwrap_model()
ModelSpeedup(model, dummy_input=torch.rand([10, 3, 32, 32]).to(device), masks_file=masks).speedup_model()
print('\n' + '=' * 50 + ' EVALUATE THE MODEL AFTER SPEEDUP ' + '=' * 50)
evaluator(model)
# Optimizer used in the pruner might be patched, so recommend to new an optimizer for fine-tuning stage.
print('\n' + '=' * 50 + ' START TO FINE TUNE THE MODEL ' + '=' * 50)
optimizer, scheduler = optimizer_scheduler_generator(model, _lr=0.01, total_epoch=args.fine_tune_epochs)
best_acc = 0.0
for i in range(args.fine_tune_epochs):
trainer(model, optimizer, criterion)
scheduler.step()
best_acc = max(evaluator(model), best_acc)
flops, params, results = count_flops_params(model, torch.randn([128, 3, 32, 32]).to(device))
print(f'Pretrained model FLOPs {pre_flops/1e6:.2f} M, #Params: {pre_params/1e6:.2f}M, Accuracy: {pre_best_acc: .2f}%')
print(f'Finetuned model FLOPs {flops/1e6:.2f} M, #Params: {params/1e6:.2f}M, Accuracy: {best_acc: .2f}%')
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI example for supported iterative pruning algorithms.
In this example, we show the end-to-end iterative pruning process: pre-training -> pruning -> fine-tuning.
'''
import sys
import argparse
from tqdm import tqdm
import torch
from torchvision import datasets, transforms
from nni.compression.pytorch.pruning import (
LinearPruner,
AGPPruner,
LotteryTicketPruner
)
from pathlib import Path
sys.path.append(str(Path(__file__).absolute().parents[1] / 'models'))
from cifar10.vgg import VGG
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
normalize = transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('./data', train=True, transform=transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(32, 4),
transforms.ToTensor(),
normalize,
]), download=True),
batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('./data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
normalize,
])),
batch_size=128, shuffle=False)
criterion = torch.nn.CrossEntropyLoss()
def trainer(model, optimizer, criterion, epoch):
model.train()
for data, target in tqdm(iterable=train_loader, desc='Epoch {}'.format(epoch)):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
def finetuner(model):
model.train()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()
for data, target in tqdm(iterable=train_loader, desc='Epoch PFs'):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
def evaluator(model):
model.eval()
correct = 0
with torch.no_grad():
for data, target in tqdm(iterable=test_loader, desc='Test'):
data, target = data.to(device), target.to(device)
output = model(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
acc = 100 * correct / len(test_loader.dataset)
print('Accuracy: {}%\n'.format(acc))
return acc
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='PyTorch Iterative Example for model comporession')
parser.add_argument('--pruner', type=str, default='linear',
choices=['linear', 'agp', 'lottery'],
help='pruner to use')
parser.add_argument('--pretrain-epochs', type=int, default=10,
help='number of epochs to pretrain the model')
parser.add_argument('--total-iteration', type=int, default=10,
help='number of iteration to iteratively prune the model')
parser.add_argument('--pruning-algo', type=str, default='l1',
choices=['level', 'l1', 'l2', 'fpgm', 'slim', 'apoz',
'mean_activation', 'taylorfo', 'admm'],
help='algorithm to evaluate weights to prune')
parser.add_argument('--speedup', type=bool, default=False,
help='Whether to speedup the pruned model')
parser.add_argument('--reset-weight', type=bool, default=True,
help='Whether to reset weight during each iteration')
args = parser.parse_args()
model = VGG().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()
# pre-train the model
for i in range(args.pretrain_epochs):
trainer(model, optimizer, criterion, i)
evaluator(model)
config_list = [{'op_types': ['Conv2d'], 'sparsity': 0.8}]
dummy_input = torch.rand(10, 3, 32, 32).to(device)
# if you just want to keep the final result as the best result, you can pass evaluator as None.
# or the result with the highest score (given by evaluator) will be the best result.
kw_args = {'pruning_algorithm': args.pruning_algo,
'total_iteration': args.total_iteration,
'evaluator': None,
'finetuner': finetuner}
if args.speedup:
kw_args['speedup'] = args.speedup
kw_args['dummy_input'] = torch.rand(10, 3, 32, 32).to(device)
if args.pruner == 'linear':
iterative_pruner = LinearPruner
elif args.pruner == 'agp':
iterative_pruner = AGPPruner
elif args.pruner == 'lottery':
kw_args['reset_weight'] = args.reset_weight
iterative_pruner = LotteryTicketPruner
pruner = iterative_pruner(model, config_list, **kw_args)
pruner.compress()
_, model, masks, _, _ = pruner.get_best_result()
evaluator(model)
cifar-10-python.tar.gz
cifar-10-batches-py/
\ No newline at end of file
# AMCPruner Example
This example shows us how to use AMCPruner example.
## Step 1: train a model for pruning
Run following command to train a mobilenetv2 model:
```bash
python3 amc_train.py --model_type mobilenetv2 --n_epoch 50
```
Once finished, saved checkpoint file can be found at:
```
logs/mobilenetv2_cifar10_train-run1/ckpt.best.pth
```
## Pruning with AMCPruner
Run following command to prune the trained model:
```bash
python3 amc_search.py --model_type mobilenetv2 --ckpt logs/mobilenetv2_cifar10_train-run1/ckpt.best.pth
```
Once finished, pruned model and mask can be found at:
```
logs/mobilenetv2_cifar10_r0.5_search-run2
```
## Finetune pruned model
Run `amc_train.py` again with `--ckpt` and `--mask` to speedup and finetune the pruned model:
```bash
python3 amc_train.py --model_type mobilenetv2 --ckpt logs/mobilenetv2_cifar10_r0.5_search-run2/best_model.pth --mask logs/mobilenetv2_cifar10_r0.5_search-run2/best_mask.pth --n_epoch 100
```
# AMCPruner 示例
此示例将说明如何使用 AMCPruner。
## 步骤一:训练模型
运行以下命令来训练 mobilenetv2 模型:
```bash
python3 amc_train.py --model_type mobilenetv2 --n_epoch 50
```
训练完成之后,检查点文件被保存在这里:
```
logs/mobilenetv2_cifar10_train-run1/ckpt.best.pth
```
## 使用 AMCPruner 剪枝
运行以下命令对模型进行剪枝:
```bash
python3 amc_search.py --model_type mobilenetv2 --ckpt logs/mobilenetv2_cifar10_train-run1/ckpt.best.pth
```
完成之后,剪枝后的模型和掩码文件被保存在:
```
logs/mobilenetv2_cifar10_r0.5_search-run2
```
## 微调剪枝后的模型
加上 `--ckpt``--mask` 参数,再次运行 `amc_train.py` 命令去加速和微调剪枝后的模型。
```bash
python3 amc_train.py --model_type mobilenetv2 --ckpt logs/mobilenetv2_cifar10_r0.5_search-run2/best_model.pth --mask logs/mobilenetv2_cifar10_r0.5_search-run2/best_mask.pth --n_epoch 100
```
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import sys
import argparse
import time
import torch
import torch.nn as nn
from torchvision.models import resnet
from nni.algorithms.compression.pytorch.pruning import AMCPruner
from data import get_split_dataset
from utils import AverageMeter, accuracy
sys.path.append('../../models')
def parse_args():
parser = argparse.ArgumentParser(description='AMC search script')
parser.add_argument('--model_type', default='mobilenet', type=str, choices=['mobilenet', 'mobilenetv2', 'resnet18', 'resnet34', 'resnet50'],
help='model to prune')
parser.add_argument('--dataset', default='cifar10', type=str, choices=['cifar10', 'imagenet'], help='dataset to use (cifar/imagenet)')
parser.add_argument('--batch_size', default=50, type=int, help='number of data batch size')
parser.add_argument('--data_root', default='./data', type=str, help='dataset path')
parser.add_argument('--flops_ratio', default=0.5, type=float, help='target flops ratio to preserve of the model')
parser.add_argument('--lbound', default=0.2, type=float, help='minimum sparsity')
parser.add_argument('--rbound', default=1., type=float, help='maximum sparsity')
parser.add_argument('--ckpt_path', default=None, type=str, help='manual path of checkpoint')
parser.add_argument('--train_episode', default=800, type=int, help='number of training episode')
parser.add_argument('--n_gpu', default=1, type=int, help='number of gpu to use')
parser.add_argument('--n_worker', default=16, type=int, help='number of data loader worker')
parser.add_argument('--suffix', default=None, type=str, help='suffix of auto-generated log directory')
return parser.parse_args()
def get_model_and_checkpoint(model, dataset, checkpoint_path, n_gpu=1):
if dataset == 'imagenet':
n_class = 1000
elif dataset == 'cifar10':
n_class = 10
else:
raise ValueError('unsupported dataset')
if model == 'mobilenet':
from mobilenet import MobileNet
net = MobileNet(n_class=n_class)
elif model == 'mobilenetv2':
from mobilenet_v2 import MobileNetV2
net = MobileNetV2(n_class=n_class)
elif model.startswith('resnet'):
net = resnet.__dict__[model](pretrained=True)
in_features = net.fc.in_features
net.fc = nn.Linear(in_features, n_class)
else:
raise NotImplementedError
if checkpoint_path:
print('loading {}...'.format(checkpoint_path))
sd = torch.load(checkpoint_path, map_location=torch.device('cpu'))
if 'state_dict' in sd: # a checkpoint but not a state_dict
sd = sd['state_dict']
sd = {k.replace('module.', ''): v for k, v in sd.items()}
net.load_state_dict(sd)
if torch.cuda.is_available() and n_gpu > 0:
net = net.cuda()
if n_gpu > 1:
net = torch.nn.DataParallel(net, range(n_gpu))
return net
def init_data(args):
# split the train set into train + val
# for CIFAR, split 5k for val
# for ImageNet, split 3k for val
val_size = 5000 if 'cifar' in args.dataset else 3000
train_loader, val_loader, _ = get_split_dataset(
args.dataset, args.batch_size,
args.n_worker, val_size,
data_root=args.data_root,
shuffle=False
) # same sampling
return train_loader, val_loader
def validate(val_loader, model, verbose=False):
batch_time = AverageMeter()
losses = AverageMeter()
top1 = AverageMeter()
top5 = AverageMeter()
criterion = nn.CrossEntropyLoss().cuda()
# switch to evaluate mode
model.eval()
end = time.time()
t1 = time.time()
with torch.no_grad():
for i, (input, target) in enumerate(val_loader):
target = target.to(device)
input_var = torch.autograd.Variable(input).to(device)
target_var = torch.autograd.Variable(target).to(device)
# compute output
output = model(input_var)
loss = criterion(output, target_var)
# measure accuracy and record loss
prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
losses.update(loss.item(), input.size(0))
top1.update(prec1.item(), input.size(0))
top5.update(prec5.item(), input.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
t2 = time.time()
if verbose:
print('* Test loss: %.3f top1: %.3f top5: %.3f time: %.3f' %
(losses.avg, top1.avg, top5.avg, t2 - t1))
return top5.avg
if __name__ == "__main__":
args = parse_args()
device = torch.device('cuda') if torch.cuda.is_available() and args.n_gpu > 0 else torch.device('cpu')
model = get_model_and_checkpoint(args.model_type, args.dataset, checkpoint_path=args.ckpt_path, n_gpu=args.n_gpu)
_, val_loader = init_data(args)
config_list = [{
'op_types': ['Conv2d', 'Linear']
}]
pruner = AMCPruner(
model, config_list, validate, val_loader, model_type=args.model_type, dataset=args.dataset,
train_episode=args.train_episode, flops_ratio=args.flops_ratio, lbound=args.lbound,
rbound=args.rbound, suffix=args.suffix)
pruner.compress()
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import sys
import os
import time
import argparse
import shutil
import math
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from tensorboardX import SummaryWriter
from torchvision.models import resnet
from nni.algorithms.compression.pytorch.pruning.amc.lib.net_measure import measure_model
from nni.algorithms.compression.pytorch.pruning.amc.lib.utils import get_output_folder
from nni.compression.pytorch import ModelSpeedup
from data import get_dataset
from utils import AverageMeter, accuracy, progress_bar
from pathlib import Path
sys.path.append(str(Path(__file__).absolute().parents[3] / 'models'))
from mobilenet import MobileNet
from mobilenet_v2 import MobileNetV2
def parse_args():
parser = argparse.ArgumentParser(description='AMC train / fine-tune script')
parser.add_argument('--model_type', default='mobilenet', type=str,
choices=['mobilenet', 'mobilenetv2', 'resnet18', 'resnet34', 'resnet50'],
help='name of the model to train')
parser.add_argument('--dataset', default='cifar10', type=str, help='name of the dataset to train')
parser.add_argument('--lr', default=0.05, type=float, help='learning rate')
parser.add_argument('--n_gpu', default=4, type=int, help='number of GPUs to use')
parser.add_argument('--batch_size', default=256, type=int, help='batch size')
parser.add_argument('--n_worker', default=32, type=int, help='number of data loader worker')
parser.add_argument('--lr_type', default='cos', type=str, help='lr scheduler (exp/cos/step3/fixed)')
parser.add_argument('--n_epoch', default=150, type=int, help='number of epochs to train')
parser.add_argument('--wd', default=4e-5, type=float, help='weight decay')
parser.add_argument('--seed', default=None, type=int, help='random seed to set')
parser.add_argument('--data_root', default='./data', type=str, help='dataset path')
# resume
parser.add_argument('--ckpt_path', default=None, type=str, help='checkpoint path to fine tune')
parser.add_argument('--mask_path', default=None, type=str, help='mask path for speedup')
# run eval
parser.add_argument('--eval', action='store_true', help='Simply run eval')
parser.add_argument('--calc_flops', action='store_true', help='Calculate flops')
return parser.parse_args()
def get_model(args):
print('=> Building model..')
if args.dataset == 'imagenet':
n_class = 1000
elif args.dataset == 'cifar10':
n_class = 10
else:
raise NotImplementedError
if args.model_type == 'mobilenet':
net = MobileNet(n_class=n_class)
elif args.model_type == 'mobilenetv2':
net = MobileNetV2(n_class=n_class)
elif args.model_type.startswith('resnet'):
net = resnet.__dict__[args.model_type](pretrained=True)
in_features = net.fc.in_features
net.fc = nn.Linear(in_features, n_class)
else:
raise NotImplementedError
if args.ckpt_path is not None:
# the checkpoint can be state_dict exported by amc_search.py or saved by amc_train.py
print('=> Loading checkpoint {} ..'.format(args.ckpt_path))
net.load_state_dict(torch.load(args.ckpt_path, torch.device('cpu')))
if args.mask_path is not None:
SZ = 224 if args.dataset == 'imagenet' else 32
data = torch.randn(2, 3, SZ, SZ)
ms = ModelSpeedup(net, data, args.mask_path, torch.device('cpu'))
ms.speedup_model()
net.to(args.device)
if torch.cuda.is_available() and args.n_gpu > 1:
net = torch.nn.DataParallel(net, list(range(args.n_gpu)))
return net
def train(epoch, train_loader, device):
print('\nEpoch: %d' % epoch)
net.train()
batch_time = AverageMeter()
losses = AverageMeter()
top1 = AverageMeter()
top5 = AverageMeter()
end = time.time()
for batch_idx, (inputs, targets) in enumerate(train_loader):
inputs, targets = inputs.to(device), targets.to(device)
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
# measure accuracy and record loss
prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
losses.update(loss.item(), inputs.size(0))
top1.update(prec1.item(), inputs.size(0))
top5.update(prec5.item(), inputs.size(0))
# timing
batch_time.update(time.time() - end)
end = time.time()
progress_bar(batch_idx, len(train_loader), 'Loss: {:.3f} | Acc1: {:.3f}% | Acc5: {:.3f}%'
.format(losses.avg, top1.avg, top5.avg))
writer.add_scalar('loss/train', losses.avg, epoch)
writer.add_scalar('acc/train_top1', top1.avg, epoch)
writer.add_scalar('acc/train_top5', top5.avg, epoch)
def test(epoch, test_loader, device, save=True):
global best_acc
net.eval()
batch_time = AverageMeter()
losses = AverageMeter()
top1 = AverageMeter()
top5 = AverageMeter()
end = time.time()
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(test_loader):
inputs, targets = inputs.to(device), targets.to(device)
outputs = net(inputs)
loss = criterion(outputs, targets)
# measure accuracy and record loss
prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
losses.update(loss.item(), inputs.size(0))
top1.update(prec1.item(), inputs.size(0))
top5.update(prec5.item(), inputs.size(0))
# timing
batch_time.update(time.time() - end)
end = time.time()
progress_bar(batch_idx, len(test_loader), 'Loss: {:.3f} | Acc1: {:.3f}% | Acc5: {:.3f}%'
.format(losses.avg, top1.avg, top5.avg))
if save:
writer.add_scalar('loss/test', losses.avg, epoch)
writer.add_scalar('acc/test_top1', top1.avg, epoch)
writer.add_scalar('acc/test_top5', top5.avg, epoch)
is_best = False
if top1.avg > best_acc:
best_acc = top1.avg
is_best = True
print('Current best acc: {}'.format(best_acc))
save_checkpoint({
'epoch': epoch,
'model': args.model_type,
'dataset': args.dataset,
'state_dict': net.module.state_dict() if isinstance(net, nn.DataParallel) else net.state_dict(),
'acc': top1.avg,
'optimizer': optimizer.state_dict(),
}, is_best, checkpoint_dir=log_dir)
def adjust_learning_rate(optimizer, epoch):
if args.lr_type == 'cos': # cos without warm-up
lr = 0.5 * args.lr * (1 + math.cos(math.pi * epoch / args.n_epoch))
elif args.lr_type == 'exp':
step = 1
decay = 0.96
lr = args.lr * (decay ** (epoch // step))
elif args.lr_type == 'fixed':
lr = args.lr
else:
raise NotImplementedError
print('=> lr: {}'.format(lr))
for param_group in optimizer.param_groups:
param_group['lr'] = lr
return lr
def save_checkpoint(state, is_best, checkpoint_dir='.'):
filename = os.path.join(checkpoint_dir, 'ckpt.pth')
print('=> Saving checkpoint to {}'.format(filename))
torch.save(state, filename)
if is_best:
shutil.copyfile(filename, filename.replace('.pth', '.best.pth'))
if __name__ == '__main__':
args = parse_args()
if torch.cuda.is_available():
torch.backends.cudnn.benchmark = True
args.device = torch.device('cuda') if torch.cuda.is_available() and args.n_gpu > 0 else torch.device('cpu')
best_acc = 0 # best test accuracy
start_epoch = 0 # start from epoch 0 or last checkpoint epoch
if args.seed is not None:
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)
print('=> Preparing data..')
train_loader, val_loader, n_class = get_dataset(args.dataset, args.batch_size, args.n_worker,
data_root=args.data_root)
net = get_model(args) # for measure
if args.calc_flops:
IMAGE_SIZE = 224 if args.dataset == 'imagenet' else 32
n_flops, n_params = measure_model(net, IMAGE_SIZE, IMAGE_SIZE, args.device)
print('=> Model Parameter: {:.3f} M, FLOPs: {:.3f}M'.format(n_params / 1e6, n_flops / 1e6))
exit(0)
criterion = nn.CrossEntropyLoss()
print('Using SGD...')
print('weight decay = {}'.format(args.wd))
optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.wd)
if args.eval: # just run eval
print('=> Start evaluation...')
test(0, val_loader, args.device, save=False)
else: # train
print('=> Start training...')
print('Training {} on {}...'.format(args.model_type, args.dataset))
train_type = 'train' if args.ckpt_path is None else 'finetune'
log_dir = get_output_folder('./logs', '{}_{}_{}'.format(args.model_type, args.dataset, train_type))
print('=> Saving logs to {}'.format(log_dir))
# tf writer
writer = SummaryWriter(logdir=log_dir)
for epoch in range(start_epoch, start_epoch + args.n_epoch):
lr = adjust_learning_rate(optimizer, epoch)
train(epoch, train_loader, args.device)
test(epoch, val_loader, args.device)
writer.close()
print('=> Best top-1 acc: {}%'.format(best_acc))
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import torch
import torch.nn.parallel
import torch.optim
import torch.utils.data
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np
import os
def get_dataset(dset_name, batch_size, n_worker, data_root='../../data'):
cifar_tran_train = [
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
]
cifar_tran_test = [
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
]
print('=> Preparing data..')
if dset_name == 'cifar10':
transform_train = transforms.Compose(cifar_tran_train)
transform_test = transforms.Compose(cifar_tran_test)
trainset = torchvision.datasets.CIFAR10(root=data_root, train=True, download=True, transform=transform_train)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True,
num_workers=n_worker, pin_memory=True, sampler=None)
testset = torchvision.datasets.CIFAR10(root=data_root, train=False, download=True, transform=transform_test)
val_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False,
num_workers=n_worker, pin_memory=True)
n_class = 10
elif dset_name == 'imagenet':
# get dir
traindir = os.path.join(data_root, 'train')
valdir = os.path.join(data_root, 'val')
# preprocessing
input_size = 224
imagenet_tran_train = [
transforms.RandomResizedCrop(input_size, scale=(0.2, 1.0)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
imagenet_tran_test = [
transforms.Resize(int(input_size / 0.875)),
transforms.CenterCrop(input_size),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
train_loader = torch.utils.data.DataLoader(
datasets.ImageFolder(traindir, transforms.Compose(imagenet_tran_train)),
batch_size=batch_size, shuffle=True,
num_workers=n_worker, pin_memory=True, sampler=None)
val_loader = torch.utils.data.DataLoader(
datasets.ImageFolder(valdir, transforms.Compose(imagenet_tran_test)),
batch_size=batch_size, shuffle=False,
num_workers=n_worker, pin_memory=True)
n_class = 1000
else:
raise NotImplementedError
return train_loader, val_loader, n_class
def get_split_dataset(dset_name, batch_size, n_worker, val_size, data_root='../data', shuffle=True):
'''
split the train set into train / val for rl search
'''
if shuffle:
index_sampler = SubsetRandomSampler
else: # every time we use the same order for the split subset
class SubsetSequentialSampler(SubsetRandomSampler):
def __iter__(self):
return (self.indices[i] for i in torch.arange(len(self.indices)).int())
index_sampler = SubsetSequentialSampler
print('=> Preparing data: {}...'.format(dset_name))
if dset_name == 'cifar10':
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
trainset = torchvision.datasets.CIFAR100(root=data_root, train=True, download=True, transform=transform_train)
valset = torchvision.datasets.CIFAR10(root=data_root, train=True, download=True, transform=transform_test)
n_train = len(trainset)
indices = list(range(n_train))
# now shuffle the indices
#np.random.shuffle(indices)
assert val_size < n_train
train_idx, val_idx = indices[val_size:], indices[:val_size]
train_sampler = index_sampler(train_idx)
val_sampler = index_sampler(val_idx)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=False, sampler=train_sampler,
num_workers=n_worker, pin_memory=True)
val_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False, sampler=val_sampler,
num_workers=n_worker, pin_memory=True)
n_class = 10
elif dset_name == 'imagenet':
train_dir = os.path.join(data_root, 'train')
val_dir = os.path.join(data_root, 'val')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
input_size = 224
train_transform = transforms.Compose([
transforms.RandomResizedCrop(input_size),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize,
])
test_transform = transforms.Compose([
transforms.Resize(int(input_size/0.875)),
transforms.CenterCrop(input_size),
transforms.ToTensor(),
normalize,
])
trainset = datasets.ImageFolder(train_dir, train_transform)
valset = datasets.ImageFolder(train_dir, test_transform)
n_train = len(trainset)
indices = list(range(n_train))
np.random.shuffle(indices)
assert val_size < n_train
train_idx, val_idx = indices[val_size:], indices[:val_size]
train_sampler = index_sampler(train_idx)
val_sampler = index_sampler(val_idx)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, sampler=train_sampler,
num_workers=n_worker, pin_memory=True)
val_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size, sampler=val_sampler,
num_workers=n_worker, pin_memory=True)
n_class = 1000
else:
raise NotImplementedError
return train_loader, val_loader, n_class
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import sys
import os
import time
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
if self.count > 0:
self.avg = self.sum / self.count
def accumulate(self, val, n=1):
self.sum += val
self.count += n
if self.count > 0:
self.avg = self.sum / self.count
def accuracy(output, target, topk=(1, 5)):
"""Computes the precision@k for the specified values of k"""
batch_size = target.size(0)
num = output.size(1)
target_topk = []
appendices = []
for k in topk:
if k <= num:
target_topk.append(k)
else:
appendices.append([0.0])
topk = target_topk
maxk = max(topk)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:k].contiguous().view(-1).float().sum(0)
res.append(correct_k.mul_(100.0 / batch_size))
return res + appendices
# Custom progress bar
_, term_width = os.popen('stty size', 'r').read().split()
term_width = int(term_width)
TOTAL_BAR_LENGTH = 40.
last_time = time.time()
begin_time = last_time
def progress_bar(current, total, msg=None):
def format_time(seconds):
days = int(seconds / 3600 / 24)
seconds = seconds - days * 3600 * 24
hours = int(seconds / 3600)
seconds = seconds - hours * 3600
minutes = int(seconds / 60)
seconds = seconds - minutes * 60
secondsf = int(seconds)
seconds = seconds - secondsf
millis = int(seconds * 1000)
f = ''
i = 1
if days > 0:
f += str(days) + 'D'
i += 1
if hours > 0 and i <= 2:
f += str(hours) + 'h'
i += 1
if minutes > 0 and i <= 2:
f += str(minutes) + 'm'
i += 1
if secondsf > 0 and i <= 2:
f += str(secondsf) + 's'
i += 1
if millis > 0 and i <= 2:
f += str(millis) + 'ms'
i += 1
if f == '':
f = '0ms'
return f
global last_time, begin_time
if current == 0:
begin_time = time.time() # Reset for new bar.
cur_len = int(TOTAL_BAR_LENGTH*current/total)
rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
sys.stdout.write(' [')
for i in range(cur_len):
sys.stdout.write('=')
sys.stdout.write('>')
for i in range(rest_len):
sys.stdout.write('.')
sys.stdout.write(']')
cur_time = time.time()
step_time = cur_time - last_time
last_time = cur_time
tot_time = cur_time - begin_time
L = []
L.append(' Step: %s' % format_time(step_time))
L.append(' | Tot: %s' % format_time(tot_time))
if msg:
L.append(' | ' + msg)
msg = ''.join(L)
sys.stdout.write(msg)
for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
sys.stdout.write(' ')
# Go back to the center of the bar.
for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
sys.stdout.write('\b')
sys.stdout.write(' %d/%d ' % (current+1, total))
if current < total-1:
sys.stdout.write('\r')
else:
sys.stdout.write('\n')
sys.stdout.flush()
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
Example for supported automatic pruning algorithms.
In this example, we present the usage of automatic pruners (NetAdapt, AutoCompressPruner). L1, L2, FPGM pruners are also executed for comparison purpose.
'''
import argparse
import os
import sys
import json
import torch
from torch.optim.lr_scheduler import StepLR, MultiStepLR
from torchvision import datasets, transforms
from nni.algorithms.compression.pytorch.pruning import L1FilterPruner, L2FilterPruner, FPGMPruner
from nni.algorithms.compression.pytorch.pruning import SimulatedAnnealingPruner, ADMMPruner, NetAdaptPruner, AutoCompressPruner
from nni.compression.pytorch import ModelSpeedup
from nni.compression.pytorch.utils import count_flops_params
from pathlib import Path
sys.path.append(str(Path(__file__).absolute().parents[2] / 'models'))
from mnist.lenet import LeNet
from cifar10.vgg import VGG
from cifar10.resnet import ResNet18, ResNet50
def get_data(dataset, data_dir, batch_size, test_batch_size):
'''
get data
'''
kwargs = {'num_workers': 1, 'pin_memory': True} if torch.cuda.is_available() else {
}
if dataset == 'mnist':
train_loader = torch.utils.data.DataLoader(
datasets.MNIST(data_dir, train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True, **kwargs)
val_loader = torch.utils.data.DataLoader(
datasets.MNIST(data_dir, train=False,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=test_batch_size, shuffle=True, **kwargs)
criterion = torch.nn.NLLLoss()
elif dataset == 'cifar10':
normalize = transforms.Normalize(
(0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10(data_dir, train=True, transform=transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(32, 4),
transforms.ToTensor(),
normalize,
]), download=True),
batch_size=batch_size, shuffle=True, **kwargs)
val_loader = torch.utils.data.DataLoader(
datasets.CIFAR10(data_dir, train=False, transform=transforms.Compose([
transforms.ToTensor(),
normalize,
])),
batch_size=batch_size, shuffle=False, **kwargs)
criterion = torch.nn.CrossEntropyLoss()
return train_loader, val_loader, criterion
def train(args, model, device, train_loader, criterion, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test(model, device, criterion, val_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in val_loader:
data, target = data.to(device), target.to(device)
output = model(data)
# sum up batch loss
test_loss += criterion(output, target).item()
# get the index of the max log-probability
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(val_loader.dataset)
accuracy = correct / len(val_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
test_loss, correct, len(val_loader.dataset), 100. * accuracy))
return accuracy
def get_trained_model_optimizer(args, device, train_loader, val_loader, criterion):
if args.model == 'LeNet':
model = LeNet().to(device)
if args.load_pretrained_model:
model.load_state_dict(torch.load(args.pretrained_model_dir))
optimizer = torch.optim.Adadelta(model.parameters(), lr=1e-4)
else:
optimizer = torch.optim.Adadelta(model.parameters(), lr=1)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
elif args.model == 'vgg16':
model = VGG(depth=16).to(device)
if args.load_pretrained_model:
model.load_state_dict(torch.load(args.pretrained_model_dir))
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4)
else:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.pretrain_epochs*0.5), int(args.pretrain_epochs*0.75)], gamma=0.1)
elif args.model == 'resnet18':
model = ResNet18().to(device)
if args.load_pretrained_model:
model.load_state_dict(torch.load(args.pretrained_model_dir))
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4)
else:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.pretrain_epochs*0.5), int(args.pretrain_epochs*0.75)], gamma=0.1)
elif args.model == 'resnet50':
model = ResNet50().to(device)
if args.load_pretrained_model:
model.load_state_dict(torch.load(args.pretrained_model_dir))
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4)
else:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.pretrain_epochs*0.5), int(args.pretrain_epochs*0.75)], gamma=0.1)
else:
raise ValueError("model not recognized")
if not args.load_pretrained_model:
best_acc = 0
best_epoch = 0
for epoch in range(args.pretrain_epochs):
train(args, model, device, train_loader, criterion, optimizer, epoch)
scheduler.step()
acc = test(model, device, criterion, val_loader)
if acc > best_acc:
best_acc = acc
best_epoch = epoch
state_dict = model.state_dict()
model.load_state_dict(state_dict)
print('Best acc:', best_acc)
print('Best epoch:', best_epoch)
if args.save_model:
torch.save(state_dict, os.path.join(args.experiment_data_dir, 'model_trained.pth'))
print('Model trained saved to %s' % args.experiment_data_dir)
return model, optimizer
def get_dummy_input(args, device):
if args.dataset == 'mnist':
dummy_input = torch.randn([args.test_batch_size, 1, 28, 28]).to(device)
elif args.dataset in ['cifar10', 'imagenet']:
dummy_input = torch.randn([args.test_batch_size, 3, 32, 32]).to(device)
return dummy_input
def get_input_size(dataset):
if dataset == 'mnist':
input_size = (1, 1, 28, 28)
elif dataset == 'cifar10':
input_size = (1, 3, 32, 32)
elif dataset == 'imagenet':
input_size = (1, 3, 256, 256)
return input_size
def main(args):
# prepare dataset
torch.manual_seed(0)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_loader, val_loader, criterion = get_data(args.dataset, args.data_dir, args.batch_size, args.test_batch_size)
model, optimizer = get_trained_model_optimizer(args, device, train_loader, val_loader, criterion)
def short_term_fine_tuner(model, epochs=1):
for epoch in range(epochs):
train(args, model, device, train_loader, criterion, optimizer, epoch)
def trainer(model, optimizer, criterion, epoch):
return train(args, model, device, train_loader, criterion, optimizer, epoch=epoch)
def evaluator(model):
return test(model, device, criterion, val_loader)
# used to save the performance of the original & pruned & finetuned models
result = {'flops': {}, 'params': {}, 'performance':{}}
flops, params, _ = count_flops_params(model, get_input_size(args.dataset))
result['flops']['original'] = flops
result['params']['original'] = params
evaluation_result = evaluator(model)
print('Evaluation result (original model): %s' % evaluation_result)
result['performance']['original'] = evaluation_result
# module types to prune, only "Conv2d" supported for channel pruning
if args.base_algo in ['l1', 'l2', 'fpgm']:
op_types = ['Conv2d']
elif args.base_algo == 'level':
op_types = ['default']
config_list = [{
'sparsity': args.sparsity,
'op_types': op_types
}]
dummy_input = get_dummy_input(args, device)
if args.pruner == 'L1FilterPruner':
pruner = L1FilterPruner(model, config_list)
elif args.pruner == 'L2FilterPruner':
pruner = L2FilterPruner(model, config_list)
elif args.pruner == 'FPGMPruner':
pruner = FPGMPruner(model, config_list)
elif args.pruner == 'NetAdaptPruner':
pruner = NetAdaptPruner(model, config_list, short_term_fine_tuner=short_term_fine_tuner, evaluator=evaluator,
base_algo=args.base_algo, experiment_data_dir=args.experiment_data_dir)
elif args.pruner == 'ADMMPruner':
# users are free to change the config here
if args.model == 'LeNet':
if args.base_algo in ['l1', 'l2', 'fpgm']:
config_list = [{
'sparsity': 0.8,
'op_types': ['Conv2d'],
'op_names': ['conv1']
}, {
'sparsity': 0.92,
'op_types': ['Conv2d'],
'op_names': ['conv2']
}]
elif args.base_algo == 'level':
config_list = [{
'sparsity': 0.8,
'op_names': ['conv1']
}, {
'sparsity': 0.92,
'op_names': ['conv2']
}, {
'sparsity': 0.991,
'op_names': ['fc1']
}, {
'sparsity': 0.93,
'op_names': ['fc2']
}]
else:
raise ValueError('Example only implemented for LeNet.')
pruner = ADMMPruner(model, config_list, trainer=trainer, num_iterations=2, epochs_per_iteration=2)
elif args.pruner == 'SimulatedAnnealingPruner':
pruner = SimulatedAnnealingPruner(
model, config_list, evaluator=evaluator, base_algo=args.base_algo,
cool_down_rate=args.cool_down_rate, experiment_data_dir=args.experiment_data_dir)
elif args.pruner == 'AutoCompressPruner':
pruner = AutoCompressPruner(
model, config_list, trainer=trainer, evaluator=evaluator, dummy_input=dummy_input,
num_iterations=3, optimize_mode='maximize', base_algo=args.base_algo,
cool_down_rate=args.cool_down_rate, admm_num_iterations=30, admm_epochs_per_iteration=5,
experiment_data_dir=args.experiment_data_dir)
else:
raise ValueError(
"Pruner not supported.")
# Pruner.compress() returns the masked model
# but for AutoCompressPruner, Pruner.compress() returns directly the pruned model
model = pruner.compress()
evaluation_result = evaluator(model)
print('Evaluation result (masked model): %s' % evaluation_result)
result['performance']['pruned'] = evaluation_result
if args.save_model:
pruner.export_model(
os.path.join(args.experiment_data_dir, 'model_masked.pth'), os.path.join(args.experiment_data_dir, 'mask.pth'))
print('Masked model saved to %s' % args.experiment_data_dir)
# model speedup
if args.speedup:
if args.pruner != 'AutoCompressPruner':
if args.model == 'LeNet':
model = LeNet().to(device)
elif args.model == 'vgg16':
model = VGG(depth=16).to(device)
elif args.model == 'resnet18':
model = ResNet18().to(device)
elif args.model == 'resnet50':
model = ResNet50().to(device)
model.load_state_dict(torch.load(os.path.join(args.experiment_data_dir, 'model_masked.pth')))
masks_file = os.path.join(args.experiment_data_dir, 'mask.pth')
m_speedup = ModelSpeedup(model, dummy_input, masks_file, device)
m_speedup.speedup_model()
evaluation_result = evaluator(model)
print('Evaluation result (speedup model): %s' % evaluation_result)
result['performance']['speedup'] = evaluation_result
torch.save(model.state_dict(), os.path.join(args.experiment_data_dir, 'model_speedup.pth'))
print('Speedup model saved to %s' % args.experiment_data_dir)
flops, params, _ = count_flops_params(model, get_input_size(args.dataset))
result['flops']['speedup'] = flops
result['params']['speedup'] = params
if args.fine_tune:
if args.dataset == 'mnist':
optimizer = torch.optim.Adadelta(model.parameters(), lr=1)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
elif args.dataset == 'cifar10' and args.model == 'vgg16':
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.fine_tune_epochs*0.5), int(args.fine_tune_epochs*0.75)], gamma=0.1)
elif args.dataset == 'cifar10' and args.model == 'resnet18':
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.fine_tune_epochs*0.5), int(args.fine_tune_epochs*0.75)], gamma=0.1)
elif args.dataset == 'cifar10' and args.model == 'resnet50':
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.fine_tune_epochs*0.5), int(args.fine_tune_epochs*0.75)], gamma=0.1)
best_acc = 0
for epoch in range(args.fine_tune_epochs):
train(args, model, device, train_loader, criterion, optimizer, epoch)
scheduler.step()
acc = evaluator(model)
if acc > best_acc:
best_acc = acc
torch.save(model.state_dict(), os.path.join(args.experiment_data_dir, 'model_fine_tuned.pth'))
print('Evaluation result (fine tuned): %s' % best_acc)
print('Fined tuned model saved to %s' % args.experiment_data_dir)
result['performance']['finetuned'] = best_acc
with open(os.path.join(args.experiment_data_dir, 'result.json'), 'w+') as f:
json.dump(result, f)
if __name__ == '__main__':
def str2bool(s):
if isinstance(s, bool):
return s
if s.lower() in ('yes', 'true', 't', 'y', '1'):
return True
if s.lower() in ('no', 'false', 'f', 'n', '0'):
return False
raise argparse.ArgumentTypeError('Boolean value expected.')
parser = argparse.ArgumentParser(description='PyTorch Example for SimulatedAnnealingPruner')
# dataset and model
parser.add_argument('--dataset', type=str, default='cifar10',
help='dataset to use, mnist, cifar10 or imagenet')
parser.add_argument('--data-dir', type=str, default='./data/',
help='dataset directory')
parser.add_argument('--model', type=str, default='vgg16',
help='model to use, LeNet, vgg16, resnet18 or resnet50')
parser.add_argument('--load-pretrained-model', type=str2bool, default=False,
help='whether to load pretrained model')
parser.add_argument('--pretrained-model-dir', type=str, default='./',
help='path to pretrained model')
parser.add_argument('--pretrain-epochs', type=int, default=100,
help='number of epochs to pretrain the model')
parser.add_argument('--batch-size', type=int, default=64,
help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=64,
help='input batch size for testing (default: 64)')
parser.add_argument('--fine-tune', type=str2bool, default=True,
help='whether to fine-tune the pruned model')
parser.add_argument('--fine-tune-epochs', type=int, default=5,
help='epochs to fine tune')
parser.add_argument('--experiment-data-dir', type=str, default='./experiment_data',
help='For saving experiment data')
# pruner
parser.add_argument('--pruner', type=str, default='SimulatedAnnealingPruner',
help='pruner to use')
parser.add_argument('--base-algo', type=str, default='l1',
help='base pruning algorithm. level, l1, l2, or fpgm')
parser.add_argument('--sparsity', type=float, default=0.1,
help='target overall target sparsity')
# param for SimulatedAnnealingPruner
parser.add_argument('--cool-down-rate', type=float, default=0.9,
help='cool down rate')
# param for NetAdaptPruner
parser.add_argument('--sparsity-per-iteration', type=float, default=0.05,
help='sparsity_per_iteration of NetAdaptPruner')
# speedup
parser.add_argument('--speedup', type=str2bool, default=False,
help='Whether to speedup the pruned model')
# others
parser.add_argument('--log-interval', type=int, default=200,
help='how many batches to wait before logging training status')
parser.add_argument('--save-model', type=str2bool, default=True,
help='For Saving the current Model')
args = parser.parse_args()
if not os.path.exists(args.experiment_data_dir):
os.makedirs(args.experiment_data_dir)
main(args)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI example for supported basic pruning algorithms.
In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
You can also try auto_pruners_torch.py to see the usage of some automatic pruning algorithms.
'''
import logging
import argparse
import os
import sys
import torch
from torch.optim.lr_scheduler import StepLR, MultiStepLR
from torchvision import datasets, transforms
from pathlib import Path
sys.path.append(str(Path(__file__).absolute().parents[2] / 'models'))
from mnist.lenet import LeNet
from cifar10.vgg import VGG
from cifar10.resnet import ResNet18
from nni.compression.pytorch.utils import count_flops_params
import nni
from nni.compression.pytorch import ModelSpeedup
from nni.algorithms.compression.pytorch.pruning import (
LevelPruner,
SlimPruner,
FPGMPruner,
TaylorFOWeightFilterPruner,
L1FilterPruner,
L2FilterPruner,
AGPPruner,
ActivationMeanRankFilterPruner,
ActivationAPoZRankFilterPruner
)
_logger = logging.getLogger('mnist_example')
_logger.setLevel(logging.INFO)
str2pruner = {
'level': LevelPruner,
'l1filter': L1FilterPruner,
'l2filter': L2FilterPruner,
'slim': SlimPruner,
'agp': AGPPruner,
'fpgm': FPGMPruner,
'mean_activation': ActivationMeanRankFilterPruner,
'apoz': ActivationAPoZRankFilterPruner,
'taylorfo': TaylorFOWeightFilterPruner
}
def get_dummy_input(args, device):
if args.dataset == 'mnist':
dummy_input = torch.randn([args.test_batch_size, 1, 28, 28]).to(device)
elif args.dataset in ['cifar10', 'imagenet']:
dummy_input = torch.randn([args.test_batch_size, 3, 32, 32]).to(device)
return dummy_input
def get_data(dataset, data_dir, batch_size, test_batch_size):
kwargs = {'num_workers': 1, 'pin_memory': True} if torch.cuda.is_available() else {
}
if dataset == 'mnist':
train_loader = torch.utils.data.DataLoader(
datasets.MNIST(data_dir, train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST(data_dir, train=False,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=test_batch_size, shuffle=True, **kwargs)
criterion = torch.nn.NLLLoss()
elif dataset == 'cifar10':
normalize = transforms.Normalize(
(0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10(data_dir, train=True, transform=transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(32, 4),
transforms.ToTensor(),
normalize,
]), download=True),
batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10(data_dir, train=False, transform=transforms.Compose([
transforms.ToTensor(),
normalize,
])),
batch_size=batch_size, shuffle=False, **kwargs)
criterion = torch.nn.CrossEntropyLoss()
return train_loader, test_loader, criterion
def get_model_optimizer_scheduler(args, device, train_loader, test_loader, criterion):
if args.model == 'lenet':
model = LeNet().to(device)
if args.pretrained_model_dir is None:
optimizer = torch.optim.Adadelta(model.parameters(), lr=1)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
elif args.model == 'vgg16':
model = VGG(depth=16).to(device)
if args.pretrained_model_dir is None:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.pretrain_epochs * 0.5), int(args.pretrain_epochs * 0.75)], gamma=0.1)
elif args.model == 'vgg19':
model = VGG(depth=19).to(device)
if args.pretrained_model_dir is None:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.pretrain_epochs * 0.5), int(args.pretrain_epochs * 0.75)], gamma=0.1)
elif args.model == 'resnet18':
model = ResNet18().to(device)
if args.pretrained_model_dir is None:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.pretrain_epochs * 0.5), int(args.pretrain_epochs * 0.75)], gamma=0.1)
else:
raise ValueError("model not recognized")
if args.pretrained_model_dir is None:
print('start pre-training...')
best_acc = 0
for epoch in range(args.pretrain_epochs):
train(args, model, device, train_loader, criterion, optimizer, epoch)
scheduler.step()
acc = test(args, model, device, criterion, test_loader)
if acc > best_acc:
best_acc = acc
state_dict = model.state_dict()
model.load_state_dict(state_dict)
acc = best_acc
torch.save(state_dict, os.path.join(args.experiment_data_dir, f'pretrain_{args.dataset}_{args.model}.pth'))
print('Model trained saved to %s' % args.experiment_data_dir)
else:
model.load_state_dict(torch.load(args.pretrained_model_dir))
best_acc = test(args, model, device, criterion, test_loader)
# setup new opotimizer for pruning
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(optimizer, milestones=[int(args.pretrain_epochs * 0.5), int(args.pretrain_epochs * 0.75)], gamma=0.1)
print('Pretrained model acc:', best_acc)
return model, optimizer, scheduler
def train(args, model, device, train_loader, criterion, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
if args.dry_run:
break
def test(args, model, device, criterion, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += criterion(output, target).item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
acc = 100 * correct / len(test_loader.dataset)
print('Test Loss: {} Accuracy: {}%\n'.format(
test_loss, acc))
return acc
def main(args):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.makedirs(args.experiment_data_dir, exist_ok=True)
# prepare model and data
train_loader, test_loader, criterion = get_data(args.dataset, args.data_dir, args.batch_size, args.test_batch_size)
model, optimizer, _ = get_model_optimizer_scheduler(args, device, train_loader, test_loader, criterion)
dummy_input = get_dummy_input(args, device)
flops, params, _ = count_flops_params(model, dummy_input)
print(f"FLOPs: {flops}, params: {params}")
print(f'start {args.pruner} pruning...')
def trainer(model, optimizer, criterion, epoch):
return train(args, model, device, train_loader, criterion, optimizer, epoch=epoch)
pruner_cls = str2pruner[args.pruner]
kw_args = {}
config_list = [{
'sparsity': args.sparsity,
'op_types': ['Conv2d']
}]
if args.pruner == 'level':
config_list = [{
'sparsity': args.sparsity,
'op_types': ['default']
}]
else:
if args.global_sort:
print('Enable the global_sort mode')
# only taylor pruner supports global sort mode currently
kw_args['global_sort'] = True
if args.dependency_aware:
dummy_input = get_dummy_input(args, device)
print('Enable the dependency_aware mode')
# note that, not all pruners support the dependency_aware mode
kw_args['dependency_aware'] = True
kw_args['dummy_input'] = dummy_input
if args.pruner not in ('l1filter', 'l2filter', 'fpgm'):
# set only work for training aware pruners
kw_args['trainer'] = trainer
kw_args['optimizer'] = optimizer
kw_args['criterion'] = criterion
if args.pruner in ('mean_activation', 'apoz', 'taylorfo'):
kw_args['sparsifying_training_batches'] = 1
if args.pruner == 'slim':
kw_args['sparsifying_training_epochs'] = 1
if args.pruner == 'agp':
kw_args['pruning_algorithm'] = 'l1'
kw_args['num_iterations'] = 2
kw_args['epochs_per_iteration'] = 1
# Reproduced result in paper 'PRUNING FILTERS FOR EFFICIENT CONVNETS',
# Conv_1, Conv_8, Conv_9, Conv_10, Conv_11, Conv_12 are pruned with 50% sparsity, as 'VGG-16-pruned-A'
# If you want to skip some layer, you can use 'exclude' like follow.
if args.pruner == 'slim':
config_list = [{
'sparsity': args.sparsity,
'op_types': ['BatchNorm2d'],
}]
elif args.model == 'resnet18':
config_list = [{
'sparsity': args.sparsity,
'op_types': ['Conv2d']
}, {
'exclude': True,
'op_names': ['layer1.0.conv1', 'layer1.0.conv2']
}]
else:
config_list = [{
'sparsity': args.sparsity,
'op_types': ['Conv2d'],
'op_names': ['feature.0', 'feature.24', 'feature.27', 'feature.30', 'feature.34', 'feature.37']
}]
pruner = pruner_cls(model, config_list, **kw_args)
# Pruner.compress() returns the masked model
model = pruner.compress()
pruner.get_pruned_weights()
# export the pruned model masks for model speedup
model_path = os.path.join(args.experiment_data_dir, 'pruned_{}_{}_{}.pth'.format(
args.model, args.dataset, args.pruner))
mask_path = os.path.join(args.experiment_data_dir, 'mask_{}_{}_{}.pth'.format(
args.model, args.dataset, args.pruner))
pruner.export_model(model_path=model_path, mask_path=mask_path)
if args.test_only:
test(args, model, device, criterion, test_loader)
if args.speedup:
# Unwrap all modules to normal state
pruner._unwrap_model()
m_speedup = ModelSpeedup(model, dummy_input, mask_path, device)
m_speedup.speedup_model()
print('start finetuning...')
# Optimizer used in the pruner might be patched, so recommend to new an optimizer for fine-tuning stage.
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(optimizer, milestones=[int(args.pretrain_epochs * 0.5), int(args.pretrain_epochs * 0.75)], gamma=0.1)
best_top1 = 0
save_path = os.path.join(args.experiment_data_dir, f'finetuned.pth')
for epoch in range(args.fine_tune_epochs):
print('# Epoch {} #'.format(epoch))
train(args, model, device, train_loader, criterion, optimizer, epoch)
scheduler.step()
top1 = test(args, model, device, criterion, test_loader)
if top1 > best_top1:
best_top1 = top1
torch.save(model.state_dict(), save_path)
flops, params, results = count_flops_params(model, dummy_input)
print(f'Finetuned model FLOPs {flops/1e6:.2f} M, #Params: {params/1e6:.2f}M, Accuracy: {best_top1: .2f}')
if args.nni:
nni.report_final_result(best_top1)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='PyTorch Example for model comporession')
# dataset and model
parser.add_argument('--dataset', type=str, default='cifar10',
help='dataset to use, mnist, cifar10 or imagenet')
parser.add_argument('--data-dir', type=str, default='./data/',
help='dataset directory')
parser.add_argument('--model', type=str, default='vgg16',
choices=['lenet', 'vgg16', 'vgg19', 'resnet18'],
help='model to use')
parser.add_argument('--pretrained-model-dir', type=str, default=None,
help='path to pretrained model')
parser.add_argument('--pretrain-epochs', type=int, default=160,
help='number of epochs to pretrain the model')
parser.add_argument('--batch-size', type=int, default=128,
help='input batch size for training')
parser.add_argument('--test-batch-size', type=int, default=200,
help='input batch size for testing')
parser.add_argument('--experiment-data-dir', type=str, default='./experiment_data',
help='For saving output checkpoints')
parser.add_argument('--log-interval', type=int, default=100, metavar='N',
help='how many batches to wait before logging training status')
parser.add_argument('--dry-run', action='store_true', default=False,
help='quickly check a single pass')
parser.add_argument('--multi-gpu', action='store_true', default=False,
help='run on mulitple gpus')
parser.add_argument('--test-only', action='store_true', default=False,
help='run test only')
# pruner
parser.add_argument('--sparsity', type=float, default=0.5,
help='target overall target sparsity')
parser.add_argument('--dependency-aware', action='store_true', default=False,
help='toggle dependency-aware mode')
parser.add_argument('--global-sort', action='store_true', default=False,
help='toggle global sort mode')
parser.add_argument('--pruner', type=str, default='l1filter',
choices=['level', 'l1filter', 'l2filter', 'slim', 'agp',
'fpgm', 'mean_activation', 'apoz', 'taylorfo'],
help='pruner to use')
# speedup
parser.add_argument('--speedup', action='store_true', default=False,
help='Whether to speedup the pruned model')
# fine-tuning
parser.add_argument('--fine-tune-epochs', type=int, default=160,
help='epochs to fine tune')
parser.add_argument('--nni', action='store_true', default=False,
help="whether to tune the pruners using NNi tuners")
args = parser.parse_args()
if args.nni:
params = nni.get_next_parameter()
print(params)
args.sparsity = params['sparsity']
args.pruner = params['pruner']
args.model = params['model']
main(args)
\ No newline at end of file
import argparse
import json
import matplotlib.pyplot as plt
def plot_performance_comparison(args):
# reference data, performance of the original model and the performance declared in the AutoCompress Paper
references = {
'original':{
'cifar10':{
'vgg16':{
'performance': 0.9298,
'params':14987722.0,
'flops':314018314.0
},
'resnet18':{
'performance': 0.9433,
'params':11173962.0,
'flops':556651530.0
},
'resnet50':{
'performance': 0.9488,
'params':23520842.0,
'flops':1304694794.0
}
}
},
'AutoCompressPruner':{
'cifar10':{
'vgg16':{
'performance': 0.9321,
'params':52.2, # times
'flops':8.8
},
'resnet18':{
'performance': 0.9381,
'params':54.2, # times
'flops':12.2
}
}
}
}
markers = ['v', '^', '<', '1', '2', '3', '4', '8', '*', '+', 'o']
with open('cifar10/comparison_result_{}.json'.format(args.model), 'r') as jsonfile:
result = json.load(jsonfile)
pruners = result.keys()
performances = {}
flops = {}
params = {}
sparsities = {}
for pruner in pruners:
performances[pruner] = [val['performance'] for val in result[pruner]]
flops[pruner] = [val['flops'] for val in result[pruner]]
params[pruner] = [val['params'] for val in result[pruner]]
sparsities[pruner] = [val['sparsity'] for val in result[pruner]]
fig, axs = plt.subplots(2, 1, figsize=(8, 10))
fig.suptitle('Channel Pruning Comparison on {}/CIFAR10'.format(args.model))
fig.subplots_adjust(hspace=0.5)
for idx, pruner in enumerate(pruners):
axs[0].scatter(params[pruner], performances[pruner], marker=markers[idx], label=pruner)
axs[1].scatter(flops[pruner], performances[pruner], marker=markers[idx], label=pruner)
# references
params_original = references['original']['cifar10'][args.model]['params']
performance_original = references['original']['cifar10'][args.model]['performance']
axs[0].plot(params_original, performance_original, 'rx', label='original model')
if args.model in ['vgg16', 'resnet18']:
axs[0].plot(params_original/references['AutoCompressPruner']['cifar10'][args.model]['params'],
references['AutoCompressPruner']['cifar10'][args.model]['performance'],
'bx', label='AutoCompress Paper')
axs[0].set_title("Performance v.s. Number of Parameters")
axs[0].set_xlabel("Number of Parameters")
axs[0].set_ylabel('Accuracy')
axs[0].legend()
# references
flops_original = references['original']['cifar10'][args.model]['flops']
performance_original = references['original']['cifar10'][args.model]['performance']
axs[1].plot(flops_original, performance_original, 'rx', label='original model')
if args.model in ['vgg16', 'resnet18']:
axs[1].plot(flops_original/references['AutoCompressPruner']['cifar10'][args.model]['flops'],
references['AutoCompressPruner']['cifar10'][args.model]['performance'],
'bx', label='AutoCompress Paper')
axs[1].set_title("Performance v.s. FLOPs")
axs[1].set_xlabel("FLOPs")
axs[1].set_ylabel('Accuracy')
axs[1].legend()
plt.savefig('img/performance_comparison_{}.png'.format(args.model))
plt.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--model', type=str, default='vgg16',
help='vgg16, resnet18 or resnet50')
args = parser.parse_args()
plot_performance_comparison(args)
{
"L1FilterPruner": [
{
"sparsity": 0.1,
"params": 9642085.0,
"flops": 496882684.0,
"performance": 0.9436
},
{
"sparsity": 0.2,
"params": 8149126.0,
"flops": 436381222.0,
"performance": 0.9472
},
{
"sparsity": 0.3,
"params": 6705269.0,
"flops": 371666312.0,
"performance": 0.9391
},
{
"sparsity": 0.4,
"params": 5335138.0,
"flops": 307050934.0,
"performance": 0.9433
},
{
"sparsity": 0.5,
"params": 3998122.0,
"flops": 237900244.0,
"performance": 0.9379
},
{
"sparsity": 0.6,
"params": 2767325.0,
"flops": 175308326.0,
"performance": 0.9326
},
{
"sparsity": 0.7,
"params": 1617817.0,
"flops": 108532198.0,
"performance": 0.928
},
{
"sparsity": 0.8,
"params": 801338.0,
"flops": 53808728.0,
"performance": 0.9145
},
{
"sparsity": 0.9,
"params": 229372.0,
"flops": 15304972.0,
"performance": 0.8858
},
{
"sparsity": 0.95,
"params": 61337.0,
"flops": 4305146.0,
"performance": 0.8441
},
{
"sparsity": 0.975,
"params": 17763.0,
"flops": 1561644.0,
"performance": 0.7294
}
],
"L2FilterPruner": [
{
"sparsity": 0.1,
"params": 9680242.0,
"flops": 497492746.0,
"performance": 0.9423
},
{
"sparsity": 0.2,
"params": 8137784.0,
"flops": 436199900.0,
"performance": 0.9471
},
{
"sparsity": 0.3,
"params": 6702679.0,
"flops": 369733768.0,
"performance": 0.9415
},
{
"sparsity": 0.4,
"params": 5330426.0,
"flops": 305512736.0,
"performance": 0.9411
},
{
"sparsity": 0.5,
"params": 3961076.0,
"flops": 236467814.0,
"performance": 0.9349
},
{
"sparsity": 0.6,
"params": 2776512.0,
"flops": 175872204.0,
"performance": 0.9393
},
{
"sparsity": 0.7,
"params": 1622571.0,
"flops": 107994906.0,
"performance": 0.9295
},
{
"sparsity": 0.8,
"params": 797075.0,
"flops": 53534414.0,
"performance": 0.9187
},
{
"sparsity": 0.9,
"params": 232153.0,
"flops": 15385078.0,
"performance": 0.8838
},
{
"sparsity": 0.95,
"params": 58180.0,
"flops": 4510072.0,
"performance": 0.8396
},
{
"sparsity": 0.975,
"params": 16836.0,
"flops": 1429752.0,
"performance": 0.7482
}
],
"FPGMPruner": [
{
"sparsity": 0.1,
"params": 9705680.0,
"flops": 497899454.0,
"performance": 0.9443
},
{
"sparsity": 0.2,
"params": 8160468.0,
"flops": 436562544.0,
"performance": 0.946
},
{
"sparsity": 0.3,
"params": 6710052.0,
"flops": 367960482.0,
"performance": 0.9452
},
{
"sparsity": 0.4,
"params": 5334205.0,
"flops": 306166432.0,
"performance": 0.9412
},
{
"sparsity": 0.5,
"params": 4007259.0,
"flops": 237702210.0,
"performance": 0.9385
},
{
"sparsity": 0.6,
"params": 2782236.0,
"flops": 175813620.0,
"performance": 0.9304
},
{
"sparsity": 0.7,
"params": 1634603.0,
"flops": 108904676.0,
"performance": 0.9249
},
{
"sparsity": 0.8,
"params": 799610.0,
"flops": 53645918.0,
"performance": 0.9203
},
{
"sparsity": 0.9,
"params": 233644.0,
"flops": 15408784.0,
"performance": 0.8856
},
{
"sparsity": 0.95,
"params": 56518.0,
"flops": 4266910.0,
"performance": 0.83
},
{
"sparsity": 0.975,
"params": 17610.0,
"flops": 1441836.0,
"performance": 0.7356
}
],
"NetAdaptPruner": [
{
"sparsity": 0.1,
"params": 11173962.0,
"flops": 556651530.0,
"performance": 0.9474
},
{
"sparsity": 0.2,
"params": 10454958.0,
"flops": 545147466.0,
"performance": 0.9482
},
{
"sparsity": 0.3,
"params": 9299986.0,
"flops": 526681564.0,
"performance": 0.9469
},
{
"sparsity": 0.4,
"params": 8137618.0,
"flops": 508087276.0,
"performance": 0.9451
},
{
"sparsity": 0.5,
"params": 6267654.0,
"flops": 478185102.0,
"performance": 0.947
},
{
"sparsity": 0.6,
"params": 5277444.0,
"flops": 462341742.0,
"performance": 0.9469
},
{
"sparsity": 0.7,
"params": 4854190.0,
"flops": 455580628.0,
"performance": 0.9466
},
{
"sparsity": 0.8,
"params": 3531098.0,
"flops": 434411156.0,
"performance": 0.9472
}
],
"SimulatedAnnealingPruner": [
{
"sparsity": 0.1,
"params": 10307424.0,
"flops": 537697098.0,
"performance": 0.942
},
{
"sparsity": 0.2,
"params": 9264598.0,
"flops": 513101368.0,
"performance": 0.9456
},
{
"sparsity": 0.3,
"params": 7999316.0,
"flops": 489260738.0,
"performance": 0.946
},
{
"sparsity": 0.4,
"params": 6996176.0,
"flops": 450768626.0,
"performance": 0.9413
},
{
"sparsity": 0.5,
"params": 5412616.0,
"flops": 408698434.0,
"performance": 0.9477
},
{
"sparsity": 0.6,
"params": 5106924.0,
"flops": 391735326.0,
"performance": 0.9483
},
{
"sparsity": 0.7,
"params": 3032105.0,
"flops": 269777978.0,
"performance": 0.9414
},
{
"sparsity": 0.8,
"params": 2423230.0,
"flops": 294783862.0,
"performance": 0.9384
},
{
"sparsity": 0.9,
"params": 1151046.0,
"flops": 209639226.0,
"performance": 0.939
},
{
"sparsity": 0.95,
"params": 394406.0,
"flops": 108776618.0,
"performance": 0.923
},
{
"sparsity": 0.975,
"params": 250649.0,
"flops": 84645050.0,
"performance": 0.917
}
],
"AutoCompressPruner": [
{
"sparsity": 0.1,
"params": 10238286.0,
"flops": 536590794.0,
"performance": 0.9406
},
{
"sparsity": 0.2,
"params": 9272049.0,
"flops": 512333916.0,
"performance": 0.9392
},
{
"sparsity": 0.3,
"params": 8099915.0,
"flops": 485418056.0,
"performance": 0.9398
},
{
"sparsity": 0.4,
"params": 6864547.0,
"flops": 449359492.0,
"performance": 0.9406
},
{
"sparsity": 0.5,
"params": 6106994.0,
"flops": 430766432.0,
"performance": 0.9397
},
{
"sparsity": 0.6,
"params": 5338096.0,
"flops": 415085278.0,
"performance": 0.9384
},
{
"sparsity": 0.7,
"params": 3701330.0,
"flops": 351057878.0,
"performance": 0.938
},
{
"sparsity": 0.8,
"params": 2229760.0,
"flops": 269058346.0,
"performance": 0.9388
},
{
"sparsity": 0.9,
"params": 1108564.0,
"flops": 189355930.0,
"performance": 0.9348
},
{
"sparsity": 0.95,
"params": 616893.0,
"flops": 159314256.0,
"performance": 0.93
},
{
"sparsity": 0.975,
"params": 297368.0,
"flops": 113398292.0,
"performance": 0.9072
}
]
}
\ No newline at end of file
{
"L1FilterPruner": [
{
"sparsity": 0.1,
"params": 20378141.0,
"flops": 1134740738.0,
"performance": 0.9456
},
{
"sparsity": 0.2,
"params": 17286560.0,
"flops": 966734852.0,
"performance": 0.9433
},
{
"sparsity": 0.3,
"params": 14403947.0,
"flops": 807114812.0,
"performance": 0.9396
},
{
"sparsity": 0.4,
"params": 11558288.0,
"flops": 656314106.0,
"performance": 0.9402
},
{
"sparsity": 0.5,
"params": 8826728.0,
"flops": 507965924.0,
"performance": 0.9394
},
{
"sparsity": 0.6,
"params": 6319902.0,
"flops": 374211960.0,
"performance": 0.9372
},
{
"sparsity": 0.7,
"params": 4063713.0,
"flops": 246788556.0,
"performance": 0.9304
},
{
"sparsity": 0.8,
"params": 2120717.0,
"flops": 133614422.0,
"performance": 0.9269
},
{
"sparsity": 0.9,
"params": 652524.0,
"flops": 41973714.0,
"performance": 0.9081
},
{
"sparsity": 0.95,
"params": 195468.0,
"flops": 13732020.0,
"performance": 0.8723
},
{
"sparsity": 0.975,
"params": 58054.0,
"flops": 4268104.0,
"performance": 0.7941
}
],
"L2FilterPruner": [
{
"sparsity": 0.1,
"params": 20378141.0,
"flops": 1134740738.0,
"performance": 0.9442
},
{
"sparsity": 0.2,
"params": 17275244.0,
"flops": 966400928.0,
"performance": 0.9463
},
{
"sparsity": 0.3,
"params": 14415409.0,
"flops": 807710914.0,
"performance": 0.9367
},
{
"sparsity": 0.4,
"params": 11564310.0,
"flops": 656653008.0,
"performance": 0.9391
},
{
"sparsity": 0.5,
"params": 8843266.0,
"flops": 508086256.0,
"performance": 0.9381
},
{
"sparsity": 0.6,
"params": 6316815.0,
"flops": 373882614.0,
"performance": 0.9368
},
{
"sparsity": 0.7,
"params": 4054272.0,
"flops": 246477678.0,
"performance": 0.935
},
{
"sparsity": 0.8,
"params": 2129321.0,
"flops": 134527520.0,
"performance": 0.9275
},
{
"sparsity": 0.9,
"params": 667500.0,
"flops": 42927060.0,
"performance": 0.9129
},
{
"sparsity": 0.95,
"params": 192464.0,
"flops": 13669430.0,
"performance": 0.8757
},
{
"sparsity": 0.975,
"params": 58250.0,
"flops": 4365620.0,
"performance": 0.7978
}
],
"FPGMPruner": [
{
"sparsity": 0.1,
"params": 20401570.0,
"flops": 1135114552.0,
"performance": 0.9438
},
{
"sparsity": 0.2,
"params": 17321414.0,
"flops": 967137398.0,
"performance": 0.9427
},
{
"sparsity": 0.3,
"params": 14418221.0,
"flops": 807755756.0,
"performance": 0.9422
},
{
"sparsity": 0.4,
"params": 11565000.0,
"flops": 655412124.0,
"performance": 0.9403
},
{
"sparsity": 0.5,
"params": 8829840.0,
"flops": 506715294.0,
"performance": 0.9355
},
{
"sparsity": 0.6,
"params": 6308085.0,
"flops": 374231682.0,
"performance": 0.9359
},
{
"sparsity": 0.7,
"params": 4054237.0,
"flops": 246511714.0,
"performance": 0.9285
},
{
"sparsity": 0.8,
"params": 2134187.0,
"flops": 134456366.0,
"performance": 0.9275
},
{
"sparsity": 0.9,
"params": 665931.0,
"flops": 42859752.0,
"performance": 0.9083
},
{
"sparsity": 0.95,
"params": 191590.0,
"flops": 13641052.0,
"performance": 0.8762
},
{
"sparsity": 0.975,
"params": 57767.0,
"flops": 4350074.0,
"performance": 0.789
}
],
"NetAdaptPruner": [
{
"sparsity": 0.1,
"params": 22348970.0,
"flops": 1275701258.0,
"performance": 0.9404
},
{
"sparsity": 0.2,
"params": 21177162.0,
"flops": 1256952330.0,
"performance": 0.9445
},
{
"sparsity": 0.3,
"params": 18407434.0,
"flops": 1212636682.0,
"performance": 0.9433
},
{
"sparsity": 0.4,
"params": 16061284.0,
"flops": 1175098282.0,
"performance": 0.9401
}
],
"SimulatedAnnealingPruner": [
{
"sparsity": 0.1,
"params": 20551755.0,
"flops": 1230145122.0,
"performance": 0.9438
},
{
"sparsity": 0.2,
"params": 17766048.0,
"flops": 1159924128.0,
"performance": 0.9432
},
{
"sparsity": 0.3,
"params": 15105146.0,
"flops": 1094478662.0,
"performance": 0.943
},
{
"sparsity": 0.4,
"params": 12378092.0,
"flops": 1008801158.0,
"performance": 0.9398
},
{
"sparsity": 0.5,
"params": 9890487.0,
"flops": 911941770.0,
"performance": 0.9426
},
{
"sparsity": 0.6,
"params": 7638262.0,
"flops": 831218770.0,
"performance": 0.9412
},
{
"sparsity": 0.7,
"params": 5469936.0,
"flops": 691881792.0,
"performance": 0.9405
},
{
"sparsity": 0.8,
"params": 3668951.0,
"flops": 580850666.0,
"performance": 0.941
},
{
"sparsity": 0.9,
"params": 1765284.0,
"flops": 389162310.0,
"performance": 0.9294
}
],
"AutoCompressPruner": [
{
"sparsity": 0.1,
"params": 20660299.0,
"flops": 1228508590.0,
"performance": 0.9337
},
{
"sparsity": 0.2,
"params": 17940465.0,
"flops": 1152868146.0,
"performance": 0.9326
},
{
"sparsity": 0.3,
"params": 15335831.0,
"flops": 1084996094.0,
"performance": 0.9348
},
{
"sparsity": 0.4,
"params": 12821408.0,
"flops": 991305524.0,
"performance": 0.936
},
{
"sparsity": 0.5,
"params": 10695425.0,
"flops": 919638860.0,
"performance": 0.9349
},
{
"sparsity": 0.6,
"params": 8536821.0,
"flops": 802011678.0,
"performance": 0.9339
},
{
"sparsity": 0.7,
"params": 7276898.0,
"flops": 744248114.0,
"performance": 0.9337
},
{
"sparsity": 0.8,
"params": 5557721.0,
"flops": 643881710.0,
"performance": 0.9323
},
{
"sparsity": 0.9,
"params": 3925140.0,
"flops": 512545272.0,
"performance": 0.9304
},
{
"sparsity": 0.95,
"params": 2867004.0,
"flops": 365184762.0,
"performance": 0.9263
},
{
"sparsity": 0.975,
"params": 1773257.0,
"flops": 229320266.0,
"performance": 0.9175
}
]
}
\ No newline at end of file
{
"L1FilterPruner": [
{
"sparsity": 0.1,
"params": 12187336.0,
"flops": 256252606.0,
"performance": 0.9344
},
{
"sparsity": 0.2,
"params": 9660216.0,
"flops": 203049930.0,
"performance": 0.9371
},
{
"sparsity": 0.3,
"params": 7435417.0,
"flops": 155477470.0,
"performance": 0.9341
},
{
"sparsity": 0.4,
"params": 5493954.0,
"flops": 114721578.0,
"performance": 0.9317
},
{
"sparsity": 0.5,
"params": 3820010.0,
"flops": 79155722.0,
"performance": 0.9309
},
{
"sparsity": 0.6,
"params": 2478632.0,
"flops": 51618494.0,
"performance": 0.9229
},
{
"sparsity": 0.7,
"params": 1420600.0,
"flops": 29455306.0,
"performance": 0.9031
},
{
"sparsity": 0.8,
"params": 658553.0,
"flops": 13290974.0,
"performance": 0.8756
},
{
"sparsity": 0.9,
"params": 186178.0,
"flops": 3574570.0,
"performance": 0.8145
},
{
"sparsity": 0.95,
"params": 58680.0,
"flops": 1050570.0,
"performance": 0.6983
},
{
"sparsity": 0.975,
"params": 23408.0,
"flops": 329918.0,
"performance": 0.5573
}
],
"L2FilterPruner": [
{
"sparsity": 0.1,
"params": 12187336.0,
"flops": 256252606.0,
"performance": 0.9357
},
{
"sparsity": 0.2,
"params": 9660216.0,
"flops": 203049930.0,
"performance": 0.9355
},
{
"sparsity": 0.3,
"params": 7435417.0,
"flops": 155477470.0,
"performance": 0.9337
},
{
"sparsity": 0.4,
"params": 5493954.0,
"flops": 114721578.0,
"performance": 0.9308
},
{
"sparsity": 0.5,
"params": 3820010.0,
"flops": 79155722.0,
"performance": 0.9285
},
{
"sparsity": 0.6,
"params": 2478632.0,
"flops": 51618494.0,
"performance": 0.9208
},
{
"sparsity": 0.7,
"params": 1420600.0,
"flops": 29455306.0,
"performance": 0.909
},
{
"sparsity": 0.8,
"params": 658553.0,
"flops": 13290974.0,
"performance": 0.8698
},
{
"sparsity": 0.9,
"params": 186178.0,
"flops": 3574570.0,
"performance": 0.8203
},
{
"sparsity": 0.95,
"params": 58680.0,
"flops": 1050570.0,
"performance": 0.7063
},
{
"sparsity": 0.975,
"params": 23408.0,
"flops": 329918.0,
"performance": 0.5455
}
],
"FPGMPruner": [
{
"sparsity": 0.1,
"params": 12187336.0,
"flops": 256252606.0,
"performance": 0.937
},
{
"sparsity": 0.2,
"params": 9660216.0,
"flops": 203049930.0,
"performance": 0.936
},
{
"sparsity": 0.3,
"params": 7435417.0,
"flops": 155477470.0,
"performance": 0.9359
},
{
"sparsity": 0.4,
"params": 5493954.0,
"flops": 114721578.0,
"performance": 0.9302
},
{
"sparsity": 0.5,
"params": 3820010.0,
"flops": 79155722.0,
"performance": 0.9233
},
{
"sparsity": 0.6,
"params": 2478632.0,
"flops": 51618494.0,
"performance": 0.922
},
{
"sparsity": 0.7,
"params": 1420600.0,
"flops": 29455306.0,
"performance": 0.9022
},
{
"sparsity": 0.8,
"params": 658553.0,
"flops": 13290974.0,
"performance": 0.8794
},
{
"sparsity": 0.9,
"params": 186178.0,
"flops": 3574570.0,
"performance": 0.8276
},
{
"sparsity": 0.95,
"params": 58680.0,
"flops": 1050570.0,
"performance": 0.6967
},
{
"sparsity": 0.975,
"params": 23408.0,
"flops": 329918.0,
"performance": 0.3683
}
],
"NetAdaptPruner": [
{
"sparsity": 0.1,
"params": 13492098.0,
"flops": 308484330.0,
"performance": 0.9376
},
{
"sparsity": 0.2,
"params": 11998408.0,
"flops": 297641410.0,
"performance": 0.9374
},
{
"sparsity": 0.3,
"params": 10504344.0,
"flops": 281928834.0,
"performance": 0.9369
},
{
"sparsity": 0.4,
"params": 8263221.0,
"flops": 272964342.0,
"performance": 0.9382
},
{
"sparsity": 0.5,
"params": 6769885.0,
"flops": 249070966.0,
"performance": 0.9388
},
{
"sparsity": 0.6,
"params": 6022137.0,
"flops": 237106998.0,
"performance": 0.9383
},
{
"sparsity": 0.7,
"params": 4526754.0,
"flops": 222152490.0,
"performance": 0.936
},
{
"sparsity": 0.8,
"params": 3032759.0,
"flops": 162401210.0,
"performance": 0.9362
}
],
"SimulatedAnnealingPruner": [
{
"sparsity": 0.1,
"params": 12691704.0,
"flops": 301467870.0,
"performance": 0.9366
},
{
"sparsity": 0.2,
"params": 10318461.0,
"flops": 275724450.0,
"performance": 0.9362
},
{
"sparsity": 0.3,
"params": 8217127.0,
"flops": 246321046.0,
"performance": 0.9371
},
{
"sparsity": 0.4,
"params": 6458368.0,
"flops": 232948294.0,
"performance": 0.9378
},
{
"sparsity": 0.5,
"params": 4973079.0,
"flops": 217675254.0,
"performance": 0.9362
},
{
"sparsity": 0.6,
"params": 3131526.0,
"flops": 151576878.0,
"performance": 0.9347
},
{
"sparsity": 0.7,
"params": 1891036.0,
"flops": 76575574.0,
"performance": 0.9289
},
{
"sparsity": 0.8,
"params": 1170751.0,
"flops": 107532322.0,
"performance": 0.9325
},
{
"sparsity": 0.9,
"params": 365978.0,
"flops": 46241354.0,
"performance": 0.9167
},
{
"sparsity": 0.95,
"params": 167089.0,
"flops": 38589922.0,
"performance": 0.7746
},
{
"sparsity": 0.975,
"params": 96779.0,
"flops": 26838230.0,
"performance": 0.1
}
],
"AutoCompressPruner": [
{
"sparsity": 0.1,
"params": 12460277.0,
"flops": 290311730.0,
"performance": 0.9352
},
{
"sparsity": 0.2,
"params": 10138147.0,
"flops": 269180938.0,
"performance": 0.9324
},
{
"sparsity": 0.3,
"params": 8033350.0,
"flops": 241789714.0,
"performance": 0.9357
},
{
"sparsity": 0.4,
"params": 6105156.0,
"flops": 213573294.0,
"performance": 0.9367
},
{
"sparsity": 0.5,
"params": 4372604.0,
"flops": 185826362.0,
"performance": 0.9387
},
{
"sparsity": 0.6,
"params": 3029629.0,
"flops": 166285498.0,
"performance": 0.9334
},
{
"sparsity": 0.7,
"params": 1897060.0,
"flops": 134897806.0,
"performance": 0.9359
},
{
"sparsity": 0.8,
"params": 1145509.0,
"flops": 111766450.0,
"performance": 0.9334
},
{
"sparsity": 0.9,
"params": 362546.0,
"flops": 50777246.0,
"performance": 0.9261
},
{
"sparsity": 0.95,
"params": 149735.0,
"flops": 39201770.0,
"performance": 0.8924
},
{
"sparsity": 0.975,
"params": 45378.0,
"flops": 13213974.0,
"performance": 0.8193
}
]
}
\ No newline at end of file
searchSpace:
sparsity:
_type: choice
_value: [0.25, 0.5, 0.75]
pruner:
_type: choice
_value: ['slim', 'l2filter', 'fpgm', 'apoz']
model:
_type: choice
_value: ['vgg16', 'vgg19']
trainingService:
platform: local
trialCodeDirectory: .
trialCommand: python3 basic_pruners_torch.py --nni
trialConcurrency: 1
trialGpuNumber: 0
tuner:
name: GridSearch
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment