Commit 1011377c authored by qianyj's avatar qianyj
Browse files

the source code of NNI for DCU

parent abc22158
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""
NNI example for combined pruning and quantization to compress a model.
In this example, we show the compression process to first prune a model, then quantize the pruned model.
"""
import argparse
import os
import time
import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from torchvision import datasets, transforms
from nni.compression.pytorch.utils.counter import count_flops_params
from nni.compression.pytorch import ModelSpeedup
from nni.algorithms.compression.pytorch.pruning import L1FilterPruner
from nni.algorithms.compression.pytorch.quantization import QAT_Quantizer
from models.mnist.naive import NaiveModel
from nni.compression.pytorch.quantization_speedup import ModelSpeedupTensorRT
def get_model_time_cost(model, dummy_input):
model.eval()
n_times = 100
time_list = []
for _ in range(n_times):
torch.cuda.synchronize()
tic = time.time()
_ = model(dummy_input)
torch.cuda.synchronize()
time_list.append(time.time()-tic)
time_list = time_list[10:]
return sum(time_list) / len(time_list)
def train(args, model, device, train_loader, criterion, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
if args.dry_run:
break
def test(args, model, device, criterion, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += criterion(output, target).item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
acc = 100 * correct / len(test_loader.dataset)
print('Test Loss: {:.6f} Accuracy: {}%\n'.format(
test_loss, acc))
return acc
def test_trt(engine, test_loader):
test_loss = 0
correct = 0
time_elasped = 0
for data, target in test_loader:
output, time = engine.inference(data)
test_loss += F.nll_loss(output, target, reduction='sum').item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
time_elasped += time
test_loss /= len(test_loader.dataset)
print('Loss: {} Accuracy: {}%'.format(
test_loss, 100 * correct / len(test_loader.dataset)))
print("Inference elapsed_time (whole dataset): {}s".format(time_elasped))
def main(args):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.makedirs(args.experiment_data_dir, exist_ok=True)
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('data', train=True, download=True, transform=transform),
batch_size=64,)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('data', train=False, transform=transform),
batch_size=1000)
# Step1. Model Pretraining
model = NaiveModel().to(device)
criterion = torch.nn.NLLLoss()
optimizer = optim.Adadelta(model.parameters(), lr=args.pretrain_lr)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
flops, params, _ = count_flops_params(model, (1, 1, 28, 28), verbose=False)
if args.pretrained_model_dir is None:
args.pretrained_model_dir = os.path.join(args.experiment_data_dir, f'pretrained.pth')
best_acc = 0
for epoch in range(args.pretrain_epochs):
train(args, model, device, train_loader, criterion, optimizer, epoch)
scheduler.step()
acc = test(args, model, device, criterion, test_loader)
if acc > best_acc:
best_acc = acc
state_dict = model.state_dict()
model.load_state_dict(state_dict)
torch.save(state_dict, args.pretrained_model_dir)
print(f'Model saved to {args.pretrained_model_dir}')
else:
state_dict = torch.load(args.pretrained_model_dir)
model.load_state_dict(state_dict)
best_acc = test(args, model, device, criterion, test_loader)
dummy_input = torch.randn([1000, 1, 28, 28]).to(device)
time_cost = get_model_time_cost(model, dummy_input)
# 125.49 M, 0.85M, 93.29, 1.1012
print(f'Pretrained model FLOPs {flops/1e6:.2f} M, #Params: {params/1e6:.2f}M, Accuracy: {best_acc: .2f}, Time Cost: {time_cost}')
# Step2. Model Pruning
config_list = [{
'sparsity': args.sparsity,
'op_types': ['Conv2d']
}]
kw_args = {}
if args.dependency_aware:
dummy_input = torch.randn([1000, 1, 28, 28]).to(device)
print('Enable the dependency_aware mode')
# note that, not all pruners support the dependency_aware mode
kw_args['dependency_aware'] = True
kw_args['dummy_input'] = dummy_input
pruner = L1FilterPruner(model, config_list, **kw_args)
model = pruner.compress()
pruner.get_pruned_weights()
mask_path = os.path.join(args.experiment_data_dir, 'mask.pth')
model_path = os.path.join(args.experiment_data_dir, 'pruned.pth')
pruner.export_model(model_path=model_path, mask_path=mask_path)
pruner._unwrap_model() # unwrap all modules to normal state
# Step3. Model Speedup
m_speedup = ModelSpeedup(model, dummy_input, mask_path, device)
m_speedup.speedup_model()
print('model after speedup', model)
flops, params, _ = count_flops_params(model, dummy_input, verbose=False)
acc = test(args, model, device, criterion, test_loader)
time_cost = get_model_time_cost(model, dummy_input)
print(f'Pruned model FLOPs {flops/1e6:.2f} M, #Params: {params/1e6:.2f}M, Accuracy: {acc: .2f}, Time Cost: {time_cost}')
# Step4. Model Finetuning
optimizer = optim.Adadelta(model.parameters(), lr=args.pretrain_lr)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
best_acc = 0
for epoch in range(args.finetune_epochs):
train(args, model, device, train_loader, criterion, optimizer, epoch)
scheduler.step()
acc = test(args, model, device, criterion, test_loader)
if acc > best_acc:
best_acc = acc
state_dict = model.state_dict()
model.load_state_dict(state_dict)
save_path = os.path.join(args.experiment_data_dir, f'finetuned.pth')
torch.save(state_dict, save_path)
flops, params, _ = count_flops_params(model, dummy_input, verbose=True)
time_cost = get_model_time_cost(model, dummy_input)
# FLOPs 28.48 M, #Params: 0.18M, Accuracy: 89.03, Time Cost: 1.03
print(f'Finetuned model FLOPs {flops/1e6:.2f} M, #Params: {params/1e6:.2f}M, Accuracy: {best_acc: .2f}, Time Cost: {time_cost}')
print(f'Model saved to {save_path}')
# Step5. Model Quantization via QAT
config_list = [{
'quant_types': ['weight', 'output'],
'quant_bits': {'weight': 8, 'output': 8},
'op_names': ['conv1']
}, {
'quant_types': ['output'],
'quant_bits': {'output':8},
'op_names': ['relu1']
}, {
'quant_types': ['weight', 'output'],
'quant_bits': {'weight': 8, 'output': 8},
'op_names': ['conv2']
}, {
'quant_types': ['output'],
'quant_bits': {'output': 8},
'op_names': ['relu2']
}]
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
quantizer = QAT_Quantizer(model, config_list, optimizer)
quantizer.compress()
# Step6. Quantization Aware Training
best_acc = 0
for epoch in range(1):
train(args, model, device, train_loader, criterion, optimizer, epoch)
scheduler.step()
acc = test(args, model, device, criterion, test_loader)
if acc > best_acc:
best_acc = acc
state_dict = model.state_dict()
calibration_path = os.path.join(args.experiment_data_dir, 'calibration.pth')
calibration_config = quantizer.export_model(model_path, calibration_path)
print("calibration_config: ", calibration_config)
# Step7. Model Speedup
batch_size = 32
input_shape = (batch_size, 1, 28, 28)
engine = ModelSpeedupTensorRT(model, input_shape, config=calibration_config, batchsize=32)
engine.compress()
test_trt(engine, test_loader)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='PyTorch Example for model comporession')
# dataset and model
# parser.add_argument('--dataset', type=str, default='mnist',
# help='dataset to use, mnist, cifar10 or imagenet')
# parser.add_argument('--data-dir', type=str, default='./data/',
# help='dataset directory')
parser.add_argument('--pretrained-model-dir', type=str, default=None,
help='path to pretrained model')
parser.add_argument('--pretrain-epochs', type=int, default=10,
help='number of epochs to pretrain the model')
parser.add_argument('--pretrain-lr', type=float, default=1.0,
help='learning rate to pretrain the model')
parser.add_argument('--experiment-data-dir', type=str, default='./experiment_data',
help='For saving output checkpoints')
parser.add_argument('--log-interval', type=int, default=100, metavar='N',
help='how many batches to wait before logging training status')
parser.add_argument('--dry-run', action='store_true', default=False,
help='quickly check a single pass')
# parser.add_argument('--multi-gpu', action='store_true', default=False,
# help='run on mulitple gpus')
# parser.add_argument('--test-only', action='store_true', default=False,
# help='run test only')
# pruner
# parser.add_argument('--pruner', type=str, default='l1filter',
# choices=['level', 'l1filter', 'l2filter', 'slim', 'agp',
# 'fpgm', 'mean_activation', 'apoz', 'admm'],
# help='pruner to use')
parser.add_argument('--sparsity', type=float, default=0.5,
help='target overall target sparsity')
parser.add_argument('--dependency-aware', action='store_true', default=False,
help='toggle dependency aware mode')
# finetuning
parser.add_argument('--finetune-epochs', type=int, default=5,
help='epochs to fine tune')
# parser.add_argument('--kd', action='store_true', default=False,
# help='quickly check a single pass')
# parser.add_argument('--kd_T', type=float, default=4,
# help='temperature for KD distillation')
# parser.add_argument('--finetune-lr', type=float, default=0.5,
# help='learning rate to finetune the model')
# speedup
# parser.add_argument('--speed-up', action='store_true', default=False,
# help='whether to speed-up the pruned model')
# parser.add_argument('--nni', action='store_true', default=False,
# help="whether to tune the pruners using NNi tuners")
args = parser.parse_args()
main(args)
import torch
import torch.nn as nn
import torch.nn.functional as F
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(
in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, in_planes, planes, stride=1):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, self.expansion *
planes, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(self.expansion*planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(ResNet, self).__init__()
self.in_planes = 64
# this layer is different from torchvision.resnet18() since this model adopted for Cifar10
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear = nn.Linear(512*block.expansion, num_classes)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def ResNet18():
return ResNet(BasicBlock, [2, 2, 2, 2])
def ResNet34():
return ResNet(BasicBlock, [3, 4, 6, 3])
def ResNet50():
return ResNet(Bottleneck, [3, 4, 6, 3])
def ResNet101():
return ResNet(Bottleneck, [3, 4, 23, 3])
def ResNet152():
return ResNet(Bottleneck, [3, 8, 36, 3])
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
defaultcfg = {
11: [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512],
13: [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512],
16: [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512],
19: [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512],
}
class VGG(nn.Module):
def __init__(self, depth=16):
super(VGG, self).__init__()
cfg = defaultcfg[depth]
self.cfg = cfg
self.feature = self.make_layers(cfg, True)
num_classes = 10
self.classifier = nn.Sequential(
nn.Linear(cfg[-1], 512),
nn.BatchNorm1d(512),
nn.ReLU(inplace=True),
nn.Linear(512, num_classes)
)
self._initialize_weights()
def make_layers(self, cfg, batch_norm=False):
layers = []
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1, bias=False)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
return nn.Sequential(*layers)
def forward(self, x):
x = self.feature(x)
x = nn.AvgPool2d(2)(x)
x = x.view(x.size(0), -1)
y = self.classifier(x)
return y
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(0.5)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
import torch
import torch.nn as nn
import torch.nn.functional as F
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.dropout1 = nn.Dropout2d(0.25)
self.dropout2 = nn.Dropout2d(0.5)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1)
return output
import torch
import torch.nn as nn
import torch.nn.functional as F
from functools import reduce
class NaiveModel(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv1 = torch.nn.Conv2d(1, 20, 5, 1)
self.conv2 = torch.nn.Conv2d(20, 50, 5, 1)
self.fc1 = torch.nn.Linear(4 * 4 * 50, 500)
self.fc2 = torch.nn.Linear(500, 10)
self.relu1 = torch.nn.ReLU6()
self.relu2 = torch.nn.ReLU6()
self.relu3 = torch.nn.ReLU6()
self.max_pool1 = torch.nn.MaxPool2d(2, 2)
self.max_pool2 = torch.nn.MaxPool2d(2, 2)
def forward(self, x):
x = self.relu1(self.conv1(x))
x = self.max_pool1(x)
x = self.relu2(self.conv2(x))
x = self.max_pool2(x)
x = x.view(-1, x.size()[1:].numel())
x = self.relu3(self.fc1(x))
x = self.fc2(x)
return F.log_softmax(x, dim=1)
\ No newline at end of file
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import torch.nn as nn
import math
def conv_bn(inp, oup, stride):
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
nn.BatchNorm2d(oup),
nn.ReLU(inplace=True)
)
def conv_dw(inp, oup, stride):
return nn.Sequential(
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
nn.BatchNorm2d(inp),
nn.ReLU(inplace=True),
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
nn.ReLU(inplace=True),
)
class MobileNet(nn.Module):
def __init__(self, n_class, profile='normal'):
super(MobileNet, self).__init__()
# original
if profile == 'normal':
in_planes = 32
cfg = [64, (128, 2), 128, (256, 2), 256, (512, 2), 512, 512, 512, 512, 512, (1024, 2), 1024]
# 0.5 AMC
elif profile == '0.5flops':
in_planes = 24
cfg = [48, (96, 2), 80, (192, 2), 200, (328, 2), 352, 368, 360, 328, 400, (736, 2), 752]
else:
raise NotImplementedError
self.conv1 = conv_bn(3, in_planes, stride=2)
self.features = self._make_layers(in_planes, cfg, conv_dw)
self.classifier = nn.Sequential(
nn.Linear(cfg[-1], n_class),
)
self._initialize_weights()
def forward(self, x):
x = self.conv1(x)
x = self.features(x)
x = x.mean([2, 3]) # global average pooling
x = self.classifier(x)
return x
def _make_layers(self, in_planes, cfg, layer):
layers = []
for x in cfg:
out_planes = x if isinstance(x, int) else x[0]
stride = 1 if isinstance(x, int) else x[1]
layers.append(layer(in_planes, out_planes, stride))
in_planes = out_planes
return nn.Sequential(*layers)
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
n = m.weight.size(1)
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import torch.nn as nn
import math
def conv_bn(inp, oup, stride):
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
nn.BatchNorm2d(oup),
nn.ReLU6(inplace=True)
)
def conv_1x1_bn(inp, oup):
return nn.Sequential(
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
nn.ReLU6(inplace=True)
)
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
hidden_dim = round(inp * expand_ratio)
self.use_res_connect = self.stride == 1 and inp == oup
if expand_ratio == 1:
self.conv = nn.Sequential(
# dw
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.ReLU6(inplace=True),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
)
else:
self.conv = nn.Sequential(
# pw
nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.ReLU6(inplace=True),
# dw
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.ReLU6(inplace=True),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self, n_class=1000, input_size=224, width_mult=1.):
super(MobileNetV2, self).__init__()
block = InvertedResidual
input_channel = 32
last_channel = 1280
interverted_residual_setting = [
# t, c, n, s
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
# building first layer
assert input_size % 32 == 0
input_channel = int(input_channel * width_mult)
self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel
self.features = [conv_bn(3, input_channel, 2)]
# building inverted residual blocks
for t, c, n, s in interverted_residual_setting:
output_channel = int(c * width_mult)
for i in range(n):
if i == 0:
self.features.append(block(input_channel, output_channel, s, expand_ratio=t))
else:
self.features.append(block(input_channel, output_channel, 1, expand_ratio=t))
input_channel = output_channel
# building last several layers
self.features.append(conv_1x1_bn(input_channel, self.last_channel))
# make it nn.Sequential
self.features = nn.Sequential(*self.features)
# building classifier
self.classifier = nn.Sequential(
nn.Dropout(0.2),
nn.Linear(self.last_channel, n_class),
)
self._initialize_weights()
def forward(self, x):
x = self.features(x)
# it's same with .mean(3).mean(2), but
# speedup only suport the mean option
# whose output only have two dimensions
x = x.mean([2, 3])
x = self.classifier(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
n = m.weight.size(1)
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
cifar-10-python.tar.gz
cifar-10-batches-py/
\ No newline at end of file
# AMCPruner Example
This example shows us how to use AMCPruner example.
## Step 1: train a model for pruning
Run following command to train a mobilenetv2 model:
```bash
python3 amc_train.py --model_type mobilenetv2 --n_epoch 50
```
Once finished, saved checkpoint file can be found at:
```
logs/mobilenetv2_cifar10_train-run1/ckpt.best.pth
```
## Pruning with AMCPruner
Run following command to prune the trained model:
```bash
python3 amc_search.py --model_type mobilenetv2 --ckpt logs/mobilenetv2_cifar10_train-run1/ckpt.best.pth
```
Once finished, pruned model and mask can be found at:
```
logs/mobilenetv2_cifar10_r0.5_search-run2
```
## Finetune pruned model
Run `amc_train.py` again with `--ckpt` and `--mask` to speedup and finetune the pruned model:
```bash
python3 amc_train.py --model_type mobilenetv2 --ckpt logs/mobilenetv2_cifar10_r0.5_search-run2/best_model.pth --mask logs/mobilenetv2_cifar10_r0.5_search-run2/best_mask.pth --n_epoch 100
```
# AMCPruner 示例
此示例将说明如何使用 AMCPruner。
## 步骤一:训练模型
运行以下命令来训练 mobilenetv2 模型:
```bash
python3 amc_train.py --model_type mobilenetv2 --n_epoch 50
```
训练完成之后,检查点文件被保存在这里:
```
logs/mobilenetv2_cifar10_train-run1/ckpt.best.pth
```
## 使用 AMCPruner 剪枝
运行以下命令对模型进行剪枝:
```bash
python3 amc_search.py --model_type mobilenetv2 --ckpt logs/mobilenetv2_cifar10_train-run1/ckpt.best.pth
```
完成之后,剪枝后的模型和掩码文件被保存在:
```
logs/mobilenetv2_cifar10_r0.5_search-run2
```
## 微调剪枝后的模型
加上 `--ckpt``--mask` 参数,再次运行 `amc_train.py` 命令去加速和微调剪枝后的模型。
```bash
python3 amc_train.py --model_type mobilenetv2 --ckpt logs/mobilenetv2_cifar10_r0.5_search-run2/best_model.pth --mask logs/mobilenetv2_cifar10_r0.5_search-run2/best_mask.pth --n_epoch 100
```
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import sys
import argparse
import time
import torch
import torch.nn as nn
from torchvision.models import resnet
from nni.algorithms.compression.pytorch.pruning import AMCPruner
from data import get_split_dataset
from utils import AverageMeter, accuracy
sys.path.append('../../models')
def parse_args():
parser = argparse.ArgumentParser(description='AMC search script')
parser.add_argument('--model_type', default='mobilenet', type=str, choices=['mobilenet', 'mobilenetv2', 'resnet18', 'resnet34', 'resnet50'],
help='model to prune')
parser.add_argument('--dataset', default='cifar10', type=str, choices=['cifar10', 'imagenet'], help='dataset to use (cifar/imagenet)')
parser.add_argument('--batch_size', default=50, type=int, help='number of data batch size')
parser.add_argument('--data_root', default='./data', type=str, help='dataset path')
parser.add_argument('--flops_ratio', default=0.5, type=float, help='target flops ratio to preserve of the model')
parser.add_argument('--lbound', default=0.2, type=float, help='minimum sparsity')
parser.add_argument('--rbound', default=1., type=float, help='maximum sparsity')
parser.add_argument('--ckpt_path', default=None, type=str, help='manual path of checkpoint')
parser.add_argument('--train_episode', default=800, type=int, help='number of training episode')
parser.add_argument('--n_gpu', default=1, type=int, help='number of gpu to use')
parser.add_argument('--n_worker', default=16, type=int, help='number of data loader worker')
parser.add_argument('--suffix', default=None, type=str, help='suffix of auto-generated log directory')
return parser.parse_args()
def get_model_and_checkpoint(model, dataset, checkpoint_path, n_gpu=1):
if dataset == 'imagenet':
n_class = 1000
elif dataset == 'cifar10':
n_class = 10
else:
raise ValueError('unsupported dataset')
if model == 'mobilenet':
from mobilenet import MobileNet
net = MobileNet(n_class=n_class)
elif model == 'mobilenetv2':
from mobilenet_v2 import MobileNetV2
net = MobileNetV2(n_class=n_class)
elif model.startswith('resnet'):
net = resnet.__dict__[model](pretrained=True)
in_features = net.fc.in_features
net.fc = nn.Linear(in_features, n_class)
else:
raise NotImplementedError
if checkpoint_path:
print('loading {}...'.format(checkpoint_path))
sd = torch.load(checkpoint_path, map_location=torch.device('cpu'))
if 'state_dict' in sd: # a checkpoint but not a state_dict
sd = sd['state_dict']
sd = {k.replace('module.', ''): v for k, v in sd.items()}
net.load_state_dict(sd)
if torch.cuda.is_available() and n_gpu > 0:
net = net.cuda()
if n_gpu > 1:
net = torch.nn.DataParallel(net, range(n_gpu))
return net
def init_data(args):
# split the train set into train + val
# for CIFAR, split 5k for val
# for ImageNet, split 3k for val
val_size = 5000 if 'cifar' in args.dataset else 3000
train_loader, val_loader, _ = get_split_dataset(
args.dataset, args.batch_size,
args.n_worker, val_size,
data_root=args.data_root,
shuffle=False
) # same sampling
return train_loader, val_loader
def validate(val_loader, model, verbose=False):
batch_time = AverageMeter()
losses = AverageMeter()
top1 = AverageMeter()
top5 = AverageMeter()
criterion = nn.CrossEntropyLoss().cuda()
# switch to evaluate mode
model.eval()
end = time.time()
t1 = time.time()
with torch.no_grad():
for i, (input, target) in enumerate(val_loader):
target = target.to(device)
input_var = torch.autograd.Variable(input).to(device)
target_var = torch.autograd.Variable(target).to(device)
# compute output
output = model(input_var)
loss = criterion(output, target_var)
# measure accuracy and record loss
prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
losses.update(loss.item(), input.size(0))
top1.update(prec1.item(), input.size(0))
top5.update(prec5.item(), input.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
t2 = time.time()
if verbose:
print('* Test loss: %.3f top1: %.3f top5: %.3f time: %.3f' %
(losses.avg, top1.avg, top5.avg, t2 - t1))
return top5.avg
if __name__ == "__main__":
args = parse_args()
device = torch.device('cuda') if torch.cuda.is_available() and args.n_gpu > 0 else torch.device('cpu')
model = get_model_and_checkpoint(args.model_type, args.dataset, checkpoint_path=args.ckpt_path, n_gpu=args.n_gpu)
_, val_loader = init_data(args)
config_list = [{
'op_types': ['Conv2d', 'Linear']
}]
pruner = AMCPruner(
model, config_list, validate, val_loader, model_type=args.model_type, dataset=args.dataset,
train_episode=args.train_episode, flops_ratio=args.flops_ratio, lbound=args.lbound,
rbound=args.rbound, suffix=args.suffix)
pruner.compress()
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import sys
import os
import time
import argparse
import shutil
import math
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from tensorboardX import SummaryWriter
from torchvision.models import resnet
from nni.algorithms.compression.pytorch.pruning.amc.lib.net_measure import measure_model
from nni.algorithms.compression.pytorch.pruning.amc.lib.utils import get_output_folder
from nni.compression.pytorch import ModelSpeedup
from data import get_dataset
from utils import AverageMeter, accuracy, progress_bar
from pathlib import Path
sys.path.append(str(Path(__file__).absolute().parents[2] / 'models'))
from mobilenet import MobileNet
from mobilenet_v2 import MobileNetV2
def parse_args():
parser = argparse.ArgumentParser(description='AMC train / fine-tune script')
parser.add_argument('--model_type', default='mobilenet', type=str,
choices=['mobilenet', 'mobilenetv2', 'resnet18', 'resnet34', 'resnet50'],
help='name of the model to train')
parser.add_argument('--dataset', default='cifar10', type=str, help='name of the dataset to train')
parser.add_argument('--lr', default=0.05, type=float, help='learning rate')
parser.add_argument('--n_gpu', default=4, type=int, help='number of GPUs to use')
parser.add_argument('--batch_size', default=256, type=int, help='batch size')
parser.add_argument('--n_worker', default=32, type=int, help='number of data loader worker')
parser.add_argument('--lr_type', default='cos', type=str, help='lr scheduler (exp/cos/step3/fixed)')
parser.add_argument('--n_epoch', default=150, type=int, help='number of epochs to train')
parser.add_argument('--wd', default=4e-5, type=float, help='weight decay')
parser.add_argument('--seed', default=None, type=int, help='random seed to set')
parser.add_argument('--data_root', default='./data', type=str, help='dataset path')
# resume
parser.add_argument('--ckpt_path', default=None, type=str, help='checkpoint path to fine tune')
parser.add_argument('--mask_path', default=None, type=str, help='mask path for speedup')
# run eval
parser.add_argument('--eval', action='store_true', help='Simply run eval')
parser.add_argument('--calc_flops', action='store_true', help='Calculate flops')
return parser.parse_args()
def get_model(args):
print('=> Building model..')
if args.dataset == 'imagenet':
n_class = 1000
elif args.dataset == 'cifar10':
n_class = 10
else:
raise NotImplementedError
if args.model_type == 'mobilenet':
net = MobileNet(n_class=n_class)
elif args.model_type == 'mobilenetv2':
net = MobileNetV2(n_class=n_class)
elif args.model_type.startswith('resnet'):
net = resnet.__dict__[args.model_type](pretrained=True)
in_features = net.fc.in_features
net.fc = nn.Linear(in_features, n_class)
else:
raise NotImplementedError
if args.ckpt_path is not None:
# the checkpoint can be state_dict exported by amc_search.py or saved by amc_train.py
print('=> Loading checkpoint {} ..'.format(args.ckpt_path))
net.load_state_dict(torch.load(args.ckpt_path, torch.device('cpu')))
if args.mask_path is not None:
SZ = 224 if args.dataset == 'imagenet' else 32
data = torch.randn(2, 3, SZ, SZ)
ms = ModelSpeedup(net, data, args.mask_path, torch.device('cpu'))
ms.speedup_model()
net.to(args.device)
if torch.cuda.is_available() and args.n_gpu > 1:
net = torch.nn.DataParallel(net, list(range(args.n_gpu)))
return net
def train(epoch, train_loader, device):
print('\nEpoch: %d' % epoch)
net.train()
batch_time = AverageMeter()
losses = AverageMeter()
top1 = AverageMeter()
top5 = AverageMeter()
end = time.time()
for batch_idx, (inputs, targets) in enumerate(train_loader):
inputs, targets = inputs.to(device), targets.to(device)
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
# measure accuracy and record loss
prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
losses.update(loss.item(), inputs.size(0))
top1.update(prec1.item(), inputs.size(0))
top5.update(prec5.item(), inputs.size(0))
# timing
batch_time.update(time.time() - end)
end = time.time()
progress_bar(batch_idx, len(train_loader), 'Loss: {:.3f} | Acc1: {:.3f}% | Acc5: {:.3f}%'
.format(losses.avg, top1.avg, top5.avg))
writer.add_scalar('loss/train', losses.avg, epoch)
writer.add_scalar('acc/train_top1', top1.avg, epoch)
writer.add_scalar('acc/train_top5', top5.avg, epoch)
def test(epoch, test_loader, device, save=True):
global best_acc
net.eval()
batch_time = AverageMeter()
losses = AverageMeter()
top1 = AverageMeter()
top5 = AverageMeter()
end = time.time()
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(test_loader):
inputs, targets = inputs.to(device), targets.to(device)
outputs = net(inputs)
loss = criterion(outputs, targets)
# measure accuracy and record loss
prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
losses.update(loss.item(), inputs.size(0))
top1.update(prec1.item(), inputs.size(0))
top5.update(prec5.item(), inputs.size(0))
# timing
batch_time.update(time.time() - end)
end = time.time()
progress_bar(batch_idx, len(test_loader), 'Loss: {:.3f} | Acc1: {:.3f}% | Acc5: {:.3f}%'
.format(losses.avg, top1.avg, top5.avg))
if save:
writer.add_scalar('loss/test', losses.avg, epoch)
writer.add_scalar('acc/test_top1', top1.avg, epoch)
writer.add_scalar('acc/test_top5', top5.avg, epoch)
is_best = False
if top1.avg > best_acc:
best_acc = top1.avg
is_best = True
print('Current best acc: {}'.format(best_acc))
save_checkpoint({
'epoch': epoch,
'model': args.model_type,
'dataset': args.dataset,
'state_dict': net.module.state_dict() if isinstance(net, nn.DataParallel) else net.state_dict(),
'acc': top1.avg,
'optimizer': optimizer.state_dict(),
}, is_best, checkpoint_dir=log_dir)
def adjust_learning_rate(optimizer, epoch):
if args.lr_type == 'cos': # cos without warm-up
lr = 0.5 * args.lr * (1 + math.cos(math.pi * epoch / args.n_epoch))
elif args.lr_type == 'exp':
step = 1
decay = 0.96
lr = args.lr * (decay ** (epoch // step))
elif args.lr_type == 'fixed':
lr = args.lr
else:
raise NotImplementedError
print('=> lr: {}'.format(lr))
for param_group in optimizer.param_groups:
param_group['lr'] = lr
return lr
def save_checkpoint(state, is_best, checkpoint_dir='.'):
filename = os.path.join(checkpoint_dir, 'ckpt.pth')
print('=> Saving checkpoint to {}'.format(filename))
torch.save(state, filename)
if is_best:
shutil.copyfile(filename, filename.replace('.pth', '.best.pth'))
if __name__ == '__main__':
args = parse_args()
if torch.cuda.is_available():
torch.backends.cudnn.benchmark = True
args.device = torch.device('cuda') if torch.cuda.is_available() and args.n_gpu > 0 else torch.device('cpu')
best_acc = 0 # best test accuracy
start_epoch = 0 # start from epoch 0 or last checkpoint epoch
if args.seed is not None:
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)
print('=> Preparing data..')
train_loader, val_loader, n_class = get_dataset(args.dataset, args.batch_size, args.n_worker,
data_root=args.data_root)
net = get_model(args) # for measure
if args.calc_flops:
IMAGE_SIZE = 224 if args.dataset == 'imagenet' else 32
n_flops, n_params = measure_model(net, IMAGE_SIZE, IMAGE_SIZE, args.device)
print('=> Model Parameter: {:.3f} M, FLOPs: {:.3f}M'.format(n_params / 1e6, n_flops / 1e6))
exit(0)
criterion = nn.CrossEntropyLoss()
print('Using SGD...')
print('weight decay = {}'.format(args.wd))
optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.wd)
if args.eval: # just run eval
print('=> Start evaluation...')
test(0, val_loader, args.device, save=False)
else: # train
print('=> Start training...')
print('Training {} on {}...'.format(args.model_type, args.dataset))
train_type = 'train' if args.ckpt_path is None else 'finetune'
log_dir = get_output_folder('./logs', '{}_{}_{}'.format(args.model_type, args.dataset, train_type))
print('=> Saving logs to {}'.format(log_dir))
# tf writer
writer = SummaryWriter(logdir=log_dir)
for epoch in range(start_epoch, start_epoch + args.n_epoch):
lr = adjust_learning_rate(optimizer, epoch)
train(epoch, train_loader, args.device)
test(epoch, val_loader, args.device)
writer.close()
print('=> Best top-1 acc: {}%'.format(best_acc))
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import torch
import torch.nn.parallel
import torch.optim
import torch.utils.data
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np
import os
def get_dataset(dset_name, batch_size, n_worker, data_root='../../data'):
cifar_tran_train = [
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
]
cifar_tran_test = [
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
]
print('=> Preparing data..')
if dset_name == 'cifar10':
transform_train = transforms.Compose(cifar_tran_train)
transform_test = transforms.Compose(cifar_tran_test)
trainset = torchvision.datasets.CIFAR10(root=data_root, train=True, download=True, transform=transform_train)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True,
num_workers=n_worker, pin_memory=True, sampler=None)
testset = torchvision.datasets.CIFAR10(root=data_root, train=False, download=True, transform=transform_test)
val_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False,
num_workers=n_worker, pin_memory=True)
n_class = 10
elif dset_name == 'imagenet':
# get dir
traindir = os.path.join(data_root, 'train')
valdir = os.path.join(data_root, 'val')
# preprocessing
input_size = 224
imagenet_tran_train = [
transforms.RandomResizedCrop(input_size, scale=(0.2, 1.0)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
imagenet_tran_test = [
transforms.Resize(int(input_size / 0.875)),
transforms.CenterCrop(input_size),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
train_loader = torch.utils.data.DataLoader(
datasets.ImageFolder(traindir, transforms.Compose(imagenet_tran_train)),
batch_size=batch_size, shuffle=True,
num_workers=n_worker, pin_memory=True, sampler=None)
val_loader = torch.utils.data.DataLoader(
datasets.ImageFolder(valdir, transforms.Compose(imagenet_tran_test)),
batch_size=batch_size, shuffle=False,
num_workers=n_worker, pin_memory=True)
n_class = 1000
else:
raise NotImplementedError
return train_loader, val_loader, n_class
def get_split_dataset(dset_name, batch_size, n_worker, val_size, data_root='../data', shuffle=True):
'''
split the train set into train / val for rl search
'''
if shuffle:
index_sampler = SubsetRandomSampler
else: # every time we use the same order for the split subset
class SubsetSequentialSampler(SubsetRandomSampler):
def __iter__(self):
return (self.indices[i] for i in torch.arange(len(self.indices)).int())
index_sampler = SubsetSequentialSampler
print('=> Preparing data: {}...'.format(dset_name))
if dset_name == 'cifar10':
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
trainset = torchvision.datasets.CIFAR100(root=data_root, train=True, download=True, transform=transform_train)
valset = torchvision.datasets.CIFAR10(root=data_root, train=True, download=True, transform=transform_test)
n_train = len(trainset)
indices = list(range(n_train))
# now shuffle the indices
#np.random.shuffle(indices)
assert val_size < n_train
train_idx, val_idx = indices[val_size:], indices[:val_size]
train_sampler = index_sampler(train_idx)
val_sampler = index_sampler(val_idx)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=False, sampler=train_sampler,
num_workers=n_worker, pin_memory=True)
val_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False, sampler=val_sampler,
num_workers=n_worker, pin_memory=True)
n_class = 10
elif dset_name == 'imagenet':
train_dir = os.path.join(data_root, 'train')
val_dir = os.path.join(data_root, 'val')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
input_size = 224
train_transform = transforms.Compose([
transforms.RandomResizedCrop(input_size),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize,
])
test_transform = transforms.Compose([
transforms.Resize(int(input_size/0.875)),
transforms.CenterCrop(input_size),
transforms.ToTensor(),
normalize,
])
trainset = datasets.ImageFolder(train_dir, train_transform)
valset = datasets.ImageFolder(train_dir, test_transform)
n_train = len(trainset)
indices = list(range(n_train))
np.random.shuffle(indices)
assert val_size < n_train
train_idx, val_idx = indices[val_size:], indices[:val_size]
train_sampler = index_sampler(train_idx)
val_sampler = index_sampler(val_idx)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, sampler=train_sampler,
num_workers=n_worker, pin_memory=True)
val_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size, sampler=val_sampler,
num_workers=n_worker, pin_memory=True)
n_class = 1000
else:
raise NotImplementedError
return train_loader, val_loader, n_class
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import sys
import os
import time
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
if self.count > 0:
self.avg = self.sum / self.count
def accumulate(self, val, n=1):
self.sum += val
self.count += n
if self.count > 0:
self.avg = self.sum / self.count
def accuracy(output, target, topk=(1, 5)):
"""Computes the precision@k for the specified values of k"""
batch_size = target.size(0)
num = output.size(1)
target_topk = []
appendices = []
for k in topk:
if k <= num:
target_topk.append(k)
else:
appendices.append([0.0])
topk = target_topk
maxk = max(topk)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:k].contiguous().view(-1).float().sum(0)
res.append(correct_k.mul_(100.0 / batch_size))
return res + appendices
# Custom progress bar
_, term_width = os.popen('stty size', 'r').read().split()
term_width = int(term_width)
TOTAL_BAR_LENGTH = 40.
last_time = time.time()
begin_time = last_time
def progress_bar(current, total, msg=None):
def format_time(seconds):
days = int(seconds / 3600 / 24)
seconds = seconds - days * 3600 * 24
hours = int(seconds / 3600)
seconds = seconds - hours * 3600
minutes = int(seconds / 60)
seconds = seconds - minutes * 60
secondsf = int(seconds)
seconds = seconds - secondsf
millis = int(seconds * 1000)
f = ''
i = 1
if days > 0:
f += str(days) + 'D'
i += 1
if hours > 0 and i <= 2:
f += str(hours) + 'h'
i += 1
if minutes > 0 and i <= 2:
f += str(minutes) + 'm'
i += 1
if secondsf > 0 and i <= 2:
f += str(secondsf) + 's'
i += 1
if millis > 0 and i <= 2:
f += str(millis) + 'ms'
i += 1
if f == '':
f = '0ms'
return f
global last_time, begin_time
if current == 0:
begin_time = time.time() # Reset for new bar.
cur_len = int(TOTAL_BAR_LENGTH*current/total)
rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
sys.stdout.write(' [')
for i in range(cur_len):
sys.stdout.write('=')
sys.stdout.write('>')
for i in range(rest_len):
sys.stdout.write('.')
sys.stdout.write(']')
cur_time = time.time()
step_time = cur_time - last_time
last_time = cur_time
tot_time = cur_time - begin_time
L = []
L.append(' Step: %s' % format_time(step_time))
L.append(' | Tot: %s' % format_time(tot_time))
if msg:
L.append(' | ' + msg)
msg = ''.join(L)
sys.stdout.write(msg)
for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
sys.stdout.write(' ')
# Go back to the center of the bar.
for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
sys.stdout.write('\b')
sys.stdout.write(' %d/%d ' % (current+1, total))
if current < total-1:
sys.stdout.write('\r')
else:
sys.stdout.write('\n')
sys.stdout.flush()
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
Example for supported automatic pruning algorithms.
In this example, we present the usage of automatic pruners (NetAdapt, AutoCompressPruner). L1, L2, FPGM pruners are also executed for comparison purpose.
'''
import argparse
import os
import sys
import json
import torch
from torch.optim.lr_scheduler import StepLR, MultiStepLR
from torchvision import datasets, transforms
from nni.algorithms.compression.pytorch.pruning import L1FilterPruner, L2FilterPruner, FPGMPruner
from nni.algorithms.compression.pytorch.pruning import SimulatedAnnealingPruner, ADMMPruner, NetAdaptPruner, AutoCompressPruner
from nni.compression.pytorch import ModelSpeedup
from nni.compression.pytorch.utils.counter import count_flops_params
from pathlib import Path
sys.path.append(str(Path(__file__).absolute().parents[1] / 'models'))
from mnist.lenet import LeNet
from cifar10.vgg import VGG
from cifar10.resnet import ResNet18, ResNet50
def get_data(dataset, data_dir, batch_size, test_batch_size):
'''
get data
'''
kwargs = {'num_workers': 1, 'pin_memory': True} if torch.cuda.is_available() else {
}
if dataset == 'mnist':
train_loader = torch.utils.data.DataLoader(
datasets.MNIST(data_dir, train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True, **kwargs)
val_loader = torch.utils.data.DataLoader(
datasets.MNIST(data_dir, train=False,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=test_batch_size, shuffle=True, **kwargs)
criterion = torch.nn.NLLLoss()
elif dataset == 'cifar10':
normalize = transforms.Normalize(
(0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10(data_dir, train=True, transform=transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(32, 4),
transforms.ToTensor(),
normalize,
]), download=True),
batch_size=batch_size, shuffle=True, **kwargs)
val_loader = torch.utils.data.DataLoader(
datasets.CIFAR10(data_dir, train=False, transform=transforms.Compose([
transforms.ToTensor(),
normalize,
])),
batch_size=batch_size, shuffle=False, **kwargs)
criterion = torch.nn.CrossEntropyLoss()
return train_loader, val_loader, criterion
def train(args, model, device, train_loader, criterion, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test(model, device, criterion, val_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in val_loader:
data, target = data.to(device), target.to(device)
output = model(data)
# sum up batch loss
test_loss += criterion(output, target).item()
# get the index of the max log-probability
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(val_loader.dataset)
accuracy = correct / len(val_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
test_loss, correct, len(val_loader.dataset), 100. * accuracy))
return accuracy
def get_trained_model_optimizer(args, device, train_loader, val_loader, criterion):
if args.model == 'LeNet':
model = LeNet().to(device)
if args.load_pretrained_model:
model.load_state_dict(torch.load(args.pretrained_model_dir))
optimizer = torch.optim.Adadelta(model.parameters(), lr=1e-4)
else:
optimizer = torch.optim.Adadelta(model.parameters(), lr=1)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
elif args.model == 'vgg16':
model = VGG(depth=16).to(device)
if args.load_pretrained_model:
model.load_state_dict(torch.load(args.pretrained_model_dir))
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4)
else:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.pretrain_epochs*0.5), int(args.pretrain_epochs*0.75)], gamma=0.1)
elif args.model == 'resnet18':
model = ResNet18().to(device)
if args.load_pretrained_model:
model.load_state_dict(torch.load(args.pretrained_model_dir))
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4)
else:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.pretrain_epochs*0.5), int(args.pretrain_epochs*0.75)], gamma=0.1)
elif args.model == 'resnet50':
model = ResNet50().to(device)
if args.load_pretrained_model:
model.load_state_dict(torch.load(args.pretrained_model_dir))
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4)
else:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.pretrain_epochs*0.5), int(args.pretrain_epochs*0.75)], gamma=0.1)
else:
raise ValueError("model not recognized")
if not args.load_pretrained_model:
best_acc = 0
best_epoch = 0
for epoch in range(args.pretrain_epochs):
train(args, model, device, train_loader, criterion, optimizer, epoch)
scheduler.step()
acc = test(model, device, criterion, val_loader)
if acc > best_acc:
best_acc = acc
best_epoch = epoch
state_dict = model.state_dict()
model.load_state_dict(state_dict)
print('Best acc:', best_acc)
print('Best epoch:', best_epoch)
if args.save_model:
torch.save(state_dict, os.path.join(args.experiment_data_dir, 'model_trained.pth'))
print('Model trained saved to %s' % args.experiment_data_dir)
return model, optimizer
def get_dummy_input(args, device):
if args.dataset == 'mnist':
dummy_input = torch.randn([args.test_batch_size, 1, 28, 28]).to(device)
elif args.dataset in ['cifar10', 'imagenet']:
dummy_input = torch.randn([args.test_batch_size, 3, 32, 32]).to(device)
return dummy_input
def get_input_size(dataset):
if dataset == 'mnist':
input_size = (1, 1, 28, 28)
elif dataset == 'cifar10':
input_size = (1, 3, 32, 32)
elif dataset == 'imagenet':
input_size = (1, 3, 256, 256)
return input_size
def main(args):
# prepare dataset
torch.manual_seed(0)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_loader, val_loader, criterion = get_data(args.dataset, args.data_dir, args.batch_size, args.test_batch_size)
model, optimizer = get_trained_model_optimizer(args, device, train_loader, val_loader, criterion)
def short_term_fine_tuner(model, epochs=1):
for epoch in range(epochs):
train(args, model, device, train_loader, criterion, optimizer, epoch)
def trainer(model, optimizer, criterion, epoch):
return train(args, model, device, train_loader, criterion, optimizer, epoch=epoch)
def evaluator(model):
return test(model, device, criterion, val_loader)
# used to save the performance of the original & pruned & finetuned models
result = {'flops': {}, 'params': {}, 'performance':{}}
flops, params, _ = count_flops_params(model, get_input_size(args.dataset))
result['flops']['original'] = flops
result['params']['original'] = params
evaluation_result = evaluator(model)
print('Evaluation result (original model): %s' % evaluation_result)
result['performance']['original'] = evaluation_result
# module types to prune, only "Conv2d" supported for channel pruning
if args.base_algo in ['l1', 'l2', 'fpgm']:
op_types = ['Conv2d']
elif args.base_algo == 'level':
op_types = ['default']
config_list = [{
'sparsity': args.sparsity,
'op_types': op_types
}]
dummy_input = get_dummy_input(args, device)
if args.pruner == 'L1FilterPruner':
pruner = L1FilterPruner(model, config_list)
elif args.pruner == 'L2FilterPruner':
pruner = L2FilterPruner(model, config_list)
elif args.pruner == 'FPGMPruner':
pruner = FPGMPruner(model, config_list)
elif args.pruner == 'NetAdaptPruner':
pruner = NetAdaptPruner(model, config_list, short_term_fine_tuner=short_term_fine_tuner, evaluator=evaluator,
base_algo=args.base_algo, experiment_data_dir=args.experiment_data_dir)
elif args.pruner == 'ADMMPruner':
# users are free to change the config here
if args.model == 'LeNet':
if args.base_algo in ['l1', 'l2', 'fpgm']:
config_list = [{
'sparsity': 0.8,
'op_types': ['Conv2d'],
'op_names': ['conv1']
}, {
'sparsity': 0.92,
'op_types': ['Conv2d'],
'op_names': ['conv2']
}]
elif args.base_algo == 'level':
config_list = [{
'sparsity': 0.8,
'op_names': ['conv1']
}, {
'sparsity': 0.92,
'op_names': ['conv2']
}, {
'sparsity': 0.991,
'op_names': ['fc1']
}, {
'sparsity': 0.93,
'op_names': ['fc2']
}]
else:
raise ValueError('Example only implemented for LeNet.')
pruner = ADMMPruner(model, config_list, trainer=trainer, num_iterations=2, epochs_per_iteration=2)
elif args.pruner == 'SimulatedAnnealingPruner':
pruner = SimulatedAnnealingPruner(
model, config_list, evaluator=evaluator, base_algo=args.base_algo,
cool_down_rate=args.cool_down_rate, experiment_data_dir=args.experiment_data_dir)
elif args.pruner == 'AutoCompressPruner':
pruner = AutoCompressPruner(
model, config_list, trainer=trainer, evaluator=evaluator, dummy_input=dummy_input,
num_iterations=3, optimize_mode='maximize', base_algo=args.base_algo,
cool_down_rate=args.cool_down_rate, admm_num_iterations=30, admm_epochs_per_iteration=5,
experiment_data_dir=args.experiment_data_dir)
else:
raise ValueError(
"Pruner not supported.")
# Pruner.compress() returns the masked model
# but for AutoCompressPruner, Pruner.compress() returns directly the pruned model
model = pruner.compress()
evaluation_result = evaluator(model)
print('Evaluation result (masked model): %s' % evaluation_result)
result['performance']['pruned'] = evaluation_result
if args.save_model:
pruner.export_model(
os.path.join(args.experiment_data_dir, 'model_masked.pth'), os.path.join(args.experiment_data_dir, 'mask.pth'))
print('Masked model saved to %s' % args.experiment_data_dir)
# model speed up
if args.speed_up:
if args.pruner != 'AutoCompressPruner':
if args.model == 'LeNet':
model = LeNet().to(device)
elif args.model == 'vgg16':
model = VGG(depth=16).to(device)
elif args.model == 'resnet18':
model = ResNet18().to(device)
elif args.model == 'resnet50':
model = ResNet50().to(device)
model.load_state_dict(torch.load(os.path.join(args.experiment_data_dir, 'model_masked.pth')))
masks_file = os.path.join(args.experiment_data_dir, 'mask.pth')
m_speedup = ModelSpeedup(model, dummy_input, masks_file, device)
m_speedup.speedup_model()
evaluation_result = evaluator(model)
print('Evaluation result (speed up model): %s' % evaluation_result)
result['performance']['speedup'] = evaluation_result
torch.save(model.state_dict(), os.path.join(args.experiment_data_dir, 'model_speed_up.pth'))
print('Speed up model saved to %s' % args.experiment_data_dir)
flops, params, _ = count_flops_params(model, get_input_size(args.dataset))
result['flops']['speedup'] = flops
result['params']['speedup'] = params
if args.fine_tune:
if args.dataset == 'mnist':
optimizer = torch.optim.Adadelta(model.parameters(), lr=1)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
elif args.dataset == 'cifar10' and args.model == 'vgg16':
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.fine_tune_epochs*0.5), int(args.fine_tune_epochs*0.75)], gamma=0.1)
elif args.dataset == 'cifar10' and args.model == 'resnet18':
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.fine_tune_epochs*0.5), int(args.fine_tune_epochs*0.75)], gamma=0.1)
elif args.dataset == 'cifar10' and args.model == 'resnet50':
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.fine_tune_epochs*0.5), int(args.fine_tune_epochs*0.75)], gamma=0.1)
best_acc = 0
for epoch in range(args.fine_tune_epochs):
train(args, model, device, train_loader, criterion, optimizer, epoch)
scheduler.step()
acc = evaluator(model)
if acc > best_acc:
best_acc = acc
torch.save(model.state_dict(), os.path.join(args.experiment_data_dir, 'model_fine_tuned.pth'))
print('Evaluation result (fine tuned): %s' % best_acc)
print('Fined tuned model saved to %s' % args.experiment_data_dir)
result['performance']['finetuned'] = best_acc
with open(os.path.join(args.experiment_data_dir, 'result.json'), 'w+') as f:
json.dump(result, f)
if __name__ == '__main__':
def str2bool(s):
if isinstance(s, bool):
return s
if s.lower() in ('yes', 'true', 't', 'y', '1'):
return True
if s.lower() in ('no', 'false', 'f', 'n', '0'):
return False
raise argparse.ArgumentTypeError('Boolean value expected.')
parser = argparse.ArgumentParser(description='PyTorch Example for SimulatedAnnealingPruner')
# dataset and model
parser.add_argument('--dataset', type=str, default='cifar10',
help='dataset to use, mnist, cifar10 or imagenet')
parser.add_argument('--data-dir', type=str, default='./data/',
help='dataset directory')
parser.add_argument('--model', type=str, default='vgg16',
help='model to use, LeNet, vgg16, resnet18 or resnet50')
parser.add_argument('--load-pretrained-model', type=str2bool, default=False,
help='whether to load pretrained model')
parser.add_argument('--pretrained-model-dir', type=str, default='./',
help='path to pretrained model')
parser.add_argument('--pretrain-epochs', type=int, default=100,
help='number of epochs to pretrain the model')
parser.add_argument('--batch-size', type=int, default=64,
help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=64,
help='input batch size for testing (default: 64)')
parser.add_argument('--fine-tune', type=str2bool, default=True,
help='whether to fine-tune the pruned model')
parser.add_argument('--fine-tune-epochs', type=int, default=5,
help='epochs to fine tune')
parser.add_argument('--experiment-data-dir', type=str, default='./experiment_data',
help='For saving experiment data')
# pruner
parser.add_argument('--pruner', type=str, default='SimulatedAnnealingPruner',
help='pruner to use')
parser.add_argument('--base-algo', type=str, default='l1',
help='base pruning algorithm. level, l1, l2, or fpgm')
parser.add_argument('--sparsity', type=float, default=0.1,
help='target overall target sparsity')
# param for SimulatedAnnealingPruner
parser.add_argument('--cool-down-rate', type=float, default=0.9,
help='cool down rate')
# param for NetAdaptPruner
parser.add_argument('--sparsity-per-iteration', type=float, default=0.05,
help='sparsity_per_iteration of NetAdaptPruner')
# speed-up
parser.add_argument('--speed-up', type=str2bool, default=False,
help='Whether to speed-up the pruned model')
# others
parser.add_argument('--log-interval', type=int, default=200,
help='how many batches to wait before logging training status')
parser.add_argument('--save-model', type=str2bool, default=True,
help='For Saving the current Model')
args = parser.parse_args()
if not os.path.exists(args.experiment_data_dir):
os.makedirs(args.experiment_data_dir)
main(args)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI example for supported basic pruning algorithms.
In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required.
You can also try auto_pruners_torch.py to see the usage of some automatic pruning algorithms.
'''
import logging
import argparse
import os
import sys
import torch
from torch.optim.lr_scheduler import StepLR, MultiStepLR
from torchvision import datasets, transforms
from pathlib import Path
sys.path.append(str(Path(__file__).absolute().parents[1] / 'models'))
from mnist.lenet import LeNet
from cifar10.vgg import VGG
from cifar10.resnet import ResNet18
from nni.compression.pytorch.utils.counter import count_flops_params
import nni
from nni.compression.pytorch import ModelSpeedup
from nni.algorithms.compression.pytorch.pruning import (
LevelPruner,
SlimPruner,
FPGMPruner,
TaylorFOWeightFilterPruner,
L1FilterPruner,
L2FilterPruner,
AGPPruner,
ActivationMeanRankFilterPruner,
ActivationAPoZRankFilterPruner
)
_logger = logging.getLogger('mnist_example')
_logger.setLevel(logging.INFO)
str2pruner = {
'level': LevelPruner,
'l1filter': L1FilterPruner,
'l2filter': L2FilterPruner,
'slim': SlimPruner,
'agp': AGPPruner,
'fpgm': FPGMPruner,
'mean_activation': ActivationMeanRankFilterPruner,
'apoz': ActivationAPoZRankFilterPruner,
'taylorfo': TaylorFOWeightFilterPruner
}
def get_dummy_input(args, device):
if args.dataset == 'mnist':
dummy_input = torch.randn([args.test_batch_size, 1, 28, 28]).to(device)
elif args.dataset in ['cifar10', 'imagenet']:
dummy_input = torch.randn([args.test_batch_size, 3, 32, 32]).to(device)
return dummy_input
def get_data(dataset, data_dir, batch_size, test_batch_size):
kwargs = {'num_workers': 1, 'pin_memory': True} if torch.cuda.is_available() else {
}
if dataset == 'mnist':
train_loader = torch.utils.data.DataLoader(
datasets.MNIST(data_dir, train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST(data_dir, train=False,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=test_batch_size, shuffle=True, **kwargs)
criterion = torch.nn.NLLLoss()
elif dataset == 'cifar10':
normalize = transforms.Normalize(
(0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10(data_dir, train=True, transform=transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(32, 4),
transforms.ToTensor(),
normalize,
]), download=True),
batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10(data_dir, train=False, transform=transforms.Compose([
transforms.ToTensor(),
normalize,
])),
batch_size=batch_size, shuffle=False, **kwargs)
criterion = torch.nn.CrossEntropyLoss()
return train_loader, test_loader, criterion
def get_model_optimizer_scheduler(args, device, train_loader, test_loader, criterion):
if args.model == 'lenet':
model = LeNet().to(device)
if args.pretrained_model_dir is None:
optimizer = torch.optim.Adadelta(model.parameters(), lr=1)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
elif args.model == 'vgg16':
model = VGG(depth=16).to(device)
if args.pretrained_model_dir is None:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.pretrain_epochs * 0.5), int(args.pretrain_epochs * 0.75)], gamma=0.1)
elif args.model == 'vgg19':
model = VGG(depth=19).to(device)
if args.pretrained_model_dir is None:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.pretrain_epochs * 0.5), int(args.pretrain_epochs * 0.75)], gamma=0.1)
elif args.model == 'resnet18':
model = ResNet18().to(device)
if args.pretrained_model_dir is None:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.pretrain_epochs * 0.5), int(args.pretrain_epochs * 0.75)], gamma=0.1)
else:
raise ValueError("model not recognized")
if args.pretrained_model_dir is None:
print('start pre-training...')
best_acc = 0
for epoch in range(args.pretrain_epochs):
train(args, model, device, train_loader, criterion, optimizer, epoch)
scheduler.step()
acc = test(args, model, device, criterion, test_loader)
if acc > best_acc:
best_acc = acc
state_dict = model.state_dict()
model.load_state_dict(state_dict)
acc = best_acc
torch.save(state_dict, os.path.join(args.experiment_data_dir, f'pretrain_{args.dataset}_{args.model}.pth'))
print('Model trained saved to %s' % args.experiment_data_dir)
else:
model.load_state_dict(torch.load(args.pretrained_model_dir))
best_acc = test(args, model, device, criterion, test_loader)
# setup new opotimizer for pruning
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(optimizer, milestones=[int(args.pretrain_epochs * 0.5), int(args.pretrain_epochs * 0.75)], gamma=0.1)
print('Pretrained model acc:', best_acc)
return model, optimizer, scheduler
def train(args, model, device, train_loader, criterion, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
if args.dry_run:
break
def test(args, model, device, criterion, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += criterion(output, target).item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
acc = 100 * correct / len(test_loader.dataset)
print('Test Loss: {} Accuracy: {}%\n'.format(
test_loss, acc))
return acc
def main(args):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.makedirs(args.experiment_data_dir, exist_ok=True)
# prepare model and data
train_loader, test_loader, criterion = get_data(args.dataset, args.data_dir, args.batch_size, args.test_batch_size)
model, optimizer, _ = get_model_optimizer_scheduler(args, device, train_loader, test_loader, criterion)
dummy_input = get_dummy_input(args, device)
flops, params, _ = count_flops_params(model, dummy_input)
print(f"FLOPs: {flops}, params: {params}")
print(f'start {args.pruner} pruning...')
def trainer(model, optimizer, criterion, epoch):
return train(args, model, device, train_loader, criterion, optimizer, epoch=epoch)
pruner_cls = str2pruner[args.pruner]
kw_args = {}
config_list = [{
'sparsity': args.sparsity,
'op_types': ['Conv2d']
}]
if args.pruner == 'level':
config_list = [{
'sparsity': args.sparsity,
'op_types': ['default']
}]
else:
if args.global_sort:
print('Enable the global_sort mode')
# only taylor pruner supports global sort mode currently
kw_args['global_sort'] = True
if args.dependency_aware:
dummy_input = get_dummy_input(args, device)
print('Enable the dependency_aware mode')
# note that, not all pruners support the dependency_aware mode
kw_args['dependency_aware'] = True
kw_args['dummy_input'] = dummy_input
if args.pruner not in ('l1filter', 'l2filter', 'fpgm'):
# set only work for training aware pruners
kw_args['trainer'] = trainer
kw_args['optimizer'] = optimizer
kw_args['criterion'] = criterion
if args.pruner in ('mean_activation', 'apoz', 'taylorfo'):
kw_args['sparsifying_training_batches'] = 1
if args.pruner == 'slim':
kw_args['sparsifying_training_epochs'] = 1
if args.pruner == 'agp':
kw_args['pruning_algorithm'] = 'l1'
kw_args['num_iterations'] = 2
kw_args['epochs_per_iteration'] = 1
# Reproduced result in paper 'PRUNING FILTERS FOR EFFICIENT CONVNETS',
# Conv_1, Conv_8, Conv_9, Conv_10, Conv_11, Conv_12 are pruned with 50% sparsity, as 'VGG-16-pruned-A'
# If you want to skip some layer, you can use 'exclude' like follow.
if args.pruner == 'slim':
config_list = [{
'sparsity': args.sparsity,
'op_types': ['BatchNorm2d'],
}]
elif args.model == 'resnet18':
config_list = [{
'sparsity': args.sparsity,
'op_types': ['Conv2d']
}, {
'exclude': True,
'op_names': ['layer1.0.conv1', 'layer1.0.conv2']
}]
else:
config_list = [{
'sparsity': args.sparsity,
'op_types': ['Conv2d'],
'op_names': ['feature.0', 'feature.24', 'feature.27', 'feature.30', 'feature.34', 'feature.37']
}]
pruner = pruner_cls(model, config_list, **kw_args)
# Pruner.compress() returns the masked model
model = pruner.compress()
pruner.get_pruned_weights()
# export the pruned model masks for model speedup
model_path = os.path.join(args.experiment_data_dir, 'pruned_{}_{}_{}.pth'.format(
args.model, args.dataset, args.pruner))
mask_path = os.path.join(args.experiment_data_dir, 'mask_{}_{}_{}.pth'.format(
args.model, args.dataset, args.pruner))
pruner.export_model(model_path=model_path, mask_path=mask_path)
if args.test_only:
test(args, model, device, criterion, test_loader)
if args.speed_up:
# Unwrap all modules to normal state
pruner._unwrap_model()
m_speedup = ModelSpeedup(model, dummy_input, mask_path, device)
m_speedup.speedup_model()
print('start finetuning...')
# Optimizer used in the pruner might be patched, so recommend to new an optimizer for fine-tuning stage.
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(optimizer, milestones=[int(args.pretrain_epochs * 0.5), int(args.pretrain_epochs * 0.75)], gamma=0.1)
best_top1 = 0
save_path = os.path.join(args.experiment_data_dir, f'finetuned.pth')
for epoch in range(args.fine_tune_epochs):
print('# Epoch {} #'.format(epoch))
train(args, model, device, train_loader, criterion, optimizer, epoch)
scheduler.step()
top1 = test(args, model, device, criterion, test_loader)
if top1 > best_top1:
best_top1 = top1
torch.save(model.state_dict(), save_path)
flops, params, results = count_flops_params(model, dummy_input)
print(f'Finetuned model FLOPs {flops/1e6:.2f} M, #Params: {params/1e6:.2f}M, Accuracy: {best_top1: .2f}')
if args.nni:
nni.report_final_result(best_top1)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='PyTorch Example for model comporession')
# dataset and model
parser.add_argument('--dataset', type=str, default='cifar10',
help='dataset to use, mnist, cifar10 or imagenet')
parser.add_argument('--data-dir', type=str, default='./data/',
help='dataset directory')
parser.add_argument('--model', type=str, default='vgg16',
choices=['lenet', 'vgg16', 'vgg19', 'resnet18'],
help='model to use')
parser.add_argument('--pretrained-model-dir', type=str, default=None,
help='path to pretrained model')
parser.add_argument('--pretrain-epochs', type=int, default=160,
help='number of epochs to pretrain the model')
parser.add_argument('--batch-size', type=int, default=128,
help='input batch size for training')
parser.add_argument('--test-batch-size', type=int, default=200,
help='input batch size for testing')
parser.add_argument('--experiment-data-dir', type=str, default='./experiment_data',
help='For saving output checkpoints')
parser.add_argument('--log-interval', type=int, default=100, metavar='N',
help='how many batches to wait before logging training status')
parser.add_argument('--dry-run', action='store_true', default=False,
help='quickly check a single pass')
parser.add_argument('--multi-gpu', action='store_true', default=False,
help='run on mulitple gpus')
parser.add_argument('--test-only', action='store_true', default=False,
help='run test only')
# pruner
parser.add_argument('--sparsity', type=float, default=0.5,
help='target overall target sparsity')
parser.add_argument('--dependency-aware', action='store_true', default=False,
help='toggle dependency aware mode')
parser.add_argument('--global-sort', action='store_true', default=False,
help='toggle global sort mode')
parser.add_argument('--pruner', type=str, default='l1filter',
choices=['level', 'l1filter', 'l2filter', 'slim', 'agp',
'fpgm', 'mean_activation', 'apoz', 'taylorfo'],
help='pruner to use')
# speed-up
parser.add_argument('--speed-up', action='store_true', default=False,
help='Whether to speed-up the pruned model')
# fine-tuning
parser.add_argument('--fine-tune-epochs', type=int, default=160,
help='epochs to fine tune')
parser.add_argument('--nni', action='store_true', default=False,
help="whether to tune the pruners using NNi tuners")
args = parser.parse_args()
if args.nni:
params = nni.get_next_parameter()
print(params)
args.sparsity = params['sparsity']
args.pruner = params['pruner']
args.model = params['model']
main(args)
\ No newline at end of file
import argparse
import json
import matplotlib.pyplot as plt
def plot_performance_comparison(args):
# reference data, performance of the original model and the performance declared in the AutoCompress Paper
references = {
'original':{
'cifar10':{
'vgg16':{
'performance': 0.9298,
'params':14987722.0,
'flops':314018314.0
},
'resnet18':{
'performance': 0.9433,
'params':11173962.0,
'flops':556651530.0
},
'resnet50':{
'performance': 0.9488,
'params':23520842.0,
'flops':1304694794.0
}
}
},
'AutoCompressPruner':{
'cifar10':{
'vgg16':{
'performance': 0.9321,
'params':52.2, # times
'flops':8.8
},
'resnet18':{
'performance': 0.9381,
'params':54.2, # times
'flops':12.2
}
}
}
}
markers = ['v', '^', '<', '1', '2', '3', '4', '8', '*', '+', 'o']
with open('cifar10/comparison_result_{}.json'.format(args.model), 'r') as jsonfile:
result = json.load(jsonfile)
pruners = result.keys()
performances = {}
flops = {}
params = {}
sparsities = {}
for pruner in pruners:
performances[pruner] = [val['performance'] for val in result[pruner]]
flops[pruner] = [val['flops'] for val in result[pruner]]
params[pruner] = [val['params'] for val in result[pruner]]
sparsities[pruner] = [val['sparsity'] for val in result[pruner]]
fig, axs = plt.subplots(2, 1, figsize=(8, 10))
fig.suptitle('Channel Pruning Comparison on {}/CIFAR10'.format(args.model))
fig.subplots_adjust(hspace=0.5)
for idx, pruner in enumerate(pruners):
axs[0].scatter(params[pruner], performances[pruner], marker=markers[idx], label=pruner)
axs[1].scatter(flops[pruner], performances[pruner], marker=markers[idx], label=pruner)
# references
params_original = references['original']['cifar10'][args.model]['params']
performance_original = references['original']['cifar10'][args.model]['performance']
axs[0].plot(params_original, performance_original, 'rx', label='original model')
if args.model in ['vgg16', 'resnet18']:
axs[0].plot(params_original/references['AutoCompressPruner']['cifar10'][args.model]['params'],
references['AutoCompressPruner']['cifar10'][args.model]['performance'],
'bx', label='AutoCompress Paper')
axs[0].set_title("Performance v.s. Number of Parameters")
axs[0].set_xlabel("Number of Parameters")
axs[0].set_ylabel('Accuracy')
axs[0].legend()
# references
flops_original = references['original']['cifar10'][args.model]['flops']
performance_original = references['original']['cifar10'][args.model]['performance']
axs[1].plot(flops_original, performance_original, 'rx', label='original model')
if args.model in ['vgg16', 'resnet18']:
axs[1].plot(flops_original/references['AutoCompressPruner']['cifar10'][args.model]['flops'],
references['AutoCompressPruner']['cifar10'][args.model]['performance'],
'bx', label='AutoCompress Paper')
axs[1].set_title("Performance v.s. FLOPs")
axs[1].set_xlabel("FLOPs")
axs[1].set_ylabel('Accuracy')
axs[1].legend()
plt.savefig('img/performance_comparison_{}.png'.format(args.model))
plt.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--model', type=str, default='vgg16',
help='vgg16, resnet18 or resnet50')
args = parser.parse_args()
plot_performance_comparison(args)
{
"L1FilterPruner": [
{
"sparsity": 0.1,
"params": 9642085.0,
"flops": 496882684.0,
"performance": 0.9436
},
{
"sparsity": 0.2,
"params": 8149126.0,
"flops": 436381222.0,
"performance": 0.9472
},
{
"sparsity": 0.3,
"params": 6705269.0,
"flops": 371666312.0,
"performance": 0.9391
},
{
"sparsity": 0.4,
"params": 5335138.0,
"flops": 307050934.0,
"performance": 0.9433
},
{
"sparsity": 0.5,
"params": 3998122.0,
"flops": 237900244.0,
"performance": 0.9379
},
{
"sparsity": 0.6,
"params": 2767325.0,
"flops": 175308326.0,
"performance": 0.9326
},
{
"sparsity": 0.7,
"params": 1617817.0,
"flops": 108532198.0,
"performance": 0.928
},
{
"sparsity": 0.8,
"params": 801338.0,
"flops": 53808728.0,
"performance": 0.9145
},
{
"sparsity": 0.9,
"params": 229372.0,
"flops": 15304972.0,
"performance": 0.8858
},
{
"sparsity": 0.95,
"params": 61337.0,
"flops": 4305146.0,
"performance": 0.8441
},
{
"sparsity": 0.975,
"params": 17763.0,
"flops": 1561644.0,
"performance": 0.7294
}
],
"L2FilterPruner": [
{
"sparsity": 0.1,
"params": 9680242.0,
"flops": 497492746.0,
"performance": 0.9423
},
{
"sparsity": 0.2,
"params": 8137784.0,
"flops": 436199900.0,
"performance": 0.9471
},
{
"sparsity": 0.3,
"params": 6702679.0,
"flops": 369733768.0,
"performance": 0.9415
},
{
"sparsity": 0.4,
"params": 5330426.0,
"flops": 305512736.0,
"performance": 0.9411
},
{
"sparsity": 0.5,
"params": 3961076.0,
"flops": 236467814.0,
"performance": 0.9349
},
{
"sparsity": 0.6,
"params": 2776512.0,
"flops": 175872204.0,
"performance": 0.9393
},
{
"sparsity": 0.7,
"params": 1622571.0,
"flops": 107994906.0,
"performance": 0.9295
},
{
"sparsity": 0.8,
"params": 797075.0,
"flops": 53534414.0,
"performance": 0.9187
},
{
"sparsity": 0.9,
"params": 232153.0,
"flops": 15385078.0,
"performance": 0.8838
},
{
"sparsity": 0.95,
"params": 58180.0,
"flops": 4510072.0,
"performance": 0.8396
},
{
"sparsity": 0.975,
"params": 16836.0,
"flops": 1429752.0,
"performance": 0.7482
}
],
"FPGMPruner": [
{
"sparsity": 0.1,
"params": 9705680.0,
"flops": 497899454.0,
"performance": 0.9443
},
{
"sparsity": 0.2,
"params": 8160468.0,
"flops": 436562544.0,
"performance": 0.946
},
{
"sparsity": 0.3,
"params": 6710052.0,
"flops": 367960482.0,
"performance": 0.9452
},
{
"sparsity": 0.4,
"params": 5334205.0,
"flops": 306166432.0,
"performance": 0.9412
},
{
"sparsity": 0.5,
"params": 4007259.0,
"flops": 237702210.0,
"performance": 0.9385
},
{
"sparsity": 0.6,
"params": 2782236.0,
"flops": 175813620.0,
"performance": 0.9304
},
{
"sparsity": 0.7,
"params": 1634603.0,
"flops": 108904676.0,
"performance": 0.9249
},
{
"sparsity": 0.8,
"params": 799610.0,
"flops": 53645918.0,
"performance": 0.9203
},
{
"sparsity": 0.9,
"params": 233644.0,
"flops": 15408784.0,
"performance": 0.8856
},
{
"sparsity": 0.95,
"params": 56518.0,
"flops": 4266910.0,
"performance": 0.83
},
{
"sparsity": 0.975,
"params": 17610.0,
"flops": 1441836.0,
"performance": 0.7356
}
],
"NetAdaptPruner": [
{
"sparsity": 0.1,
"params": 11173962.0,
"flops": 556651530.0,
"performance": 0.9474
},
{
"sparsity": 0.2,
"params": 10454958.0,
"flops": 545147466.0,
"performance": 0.9482
},
{
"sparsity": 0.3,
"params": 9299986.0,
"flops": 526681564.0,
"performance": 0.9469
},
{
"sparsity": 0.4,
"params": 8137618.0,
"flops": 508087276.0,
"performance": 0.9451
},
{
"sparsity": 0.5,
"params": 6267654.0,
"flops": 478185102.0,
"performance": 0.947
},
{
"sparsity": 0.6,
"params": 5277444.0,
"flops": 462341742.0,
"performance": 0.9469
},
{
"sparsity": 0.7,
"params": 4854190.0,
"flops": 455580628.0,
"performance": 0.9466
},
{
"sparsity": 0.8,
"params": 3531098.0,
"flops": 434411156.0,
"performance": 0.9472
}
],
"SimulatedAnnealingPruner": [
{
"sparsity": 0.1,
"params": 10307424.0,
"flops": 537697098.0,
"performance": 0.942
},
{
"sparsity": 0.2,
"params": 9264598.0,
"flops": 513101368.0,
"performance": 0.9456
},
{
"sparsity": 0.3,
"params": 7999316.0,
"flops": 489260738.0,
"performance": 0.946
},
{
"sparsity": 0.4,
"params": 6996176.0,
"flops": 450768626.0,
"performance": 0.9413
},
{
"sparsity": 0.5,
"params": 5412616.0,
"flops": 408698434.0,
"performance": 0.9477
},
{
"sparsity": 0.6,
"params": 5106924.0,
"flops": 391735326.0,
"performance": 0.9483
},
{
"sparsity": 0.7,
"params": 3032105.0,
"flops": 269777978.0,
"performance": 0.9414
},
{
"sparsity": 0.8,
"params": 2423230.0,
"flops": 294783862.0,
"performance": 0.9384
},
{
"sparsity": 0.9,
"params": 1151046.0,
"flops": 209639226.0,
"performance": 0.939
},
{
"sparsity": 0.95,
"params": 394406.0,
"flops": 108776618.0,
"performance": 0.923
},
{
"sparsity": 0.975,
"params": 250649.0,
"flops": 84645050.0,
"performance": 0.917
}
],
"AutoCompressPruner": [
{
"sparsity": 0.1,
"params": 10238286.0,
"flops": 536590794.0,
"performance": 0.9406
},
{
"sparsity": 0.2,
"params": 9272049.0,
"flops": 512333916.0,
"performance": 0.9392
},
{
"sparsity": 0.3,
"params": 8099915.0,
"flops": 485418056.0,
"performance": 0.9398
},
{
"sparsity": 0.4,
"params": 6864547.0,
"flops": 449359492.0,
"performance": 0.9406
},
{
"sparsity": 0.5,
"params": 6106994.0,
"flops": 430766432.0,
"performance": 0.9397
},
{
"sparsity": 0.6,
"params": 5338096.0,
"flops": 415085278.0,
"performance": 0.9384
},
{
"sparsity": 0.7,
"params": 3701330.0,
"flops": 351057878.0,
"performance": 0.938
},
{
"sparsity": 0.8,
"params": 2229760.0,
"flops": 269058346.0,
"performance": 0.9388
},
{
"sparsity": 0.9,
"params": 1108564.0,
"flops": 189355930.0,
"performance": 0.9348
},
{
"sparsity": 0.95,
"params": 616893.0,
"flops": 159314256.0,
"performance": 0.93
},
{
"sparsity": 0.975,
"params": 297368.0,
"flops": 113398292.0,
"performance": 0.9072
}
]
}
\ No newline at end of file
{
"L1FilterPruner": [
{
"sparsity": 0.1,
"params": 20378141.0,
"flops": 1134740738.0,
"performance": 0.9456
},
{
"sparsity": 0.2,
"params": 17286560.0,
"flops": 966734852.0,
"performance": 0.9433
},
{
"sparsity": 0.3,
"params": 14403947.0,
"flops": 807114812.0,
"performance": 0.9396
},
{
"sparsity": 0.4,
"params": 11558288.0,
"flops": 656314106.0,
"performance": 0.9402
},
{
"sparsity": 0.5,
"params": 8826728.0,
"flops": 507965924.0,
"performance": 0.9394
},
{
"sparsity": 0.6,
"params": 6319902.0,
"flops": 374211960.0,
"performance": 0.9372
},
{
"sparsity": 0.7,
"params": 4063713.0,
"flops": 246788556.0,
"performance": 0.9304
},
{
"sparsity": 0.8,
"params": 2120717.0,
"flops": 133614422.0,
"performance": 0.9269
},
{
"sparsity": 0.9,
"params": 652524.0,
"flops": 41973714.0,
"performance": 0.9081
},
{
"sparsity": 0.95,
"params": 195468.0,
"flops": 13732020.0,
"performance": 0.8723
},
{
"sparsity": 0.975,
"params": 58054.0,
"flops": 4268104.0,
"performance": 0.7941
}
],
"L2FilterPruner": [
{
"sparsity": 0.1,
"params": 20378141.0,
"flops": 1134740738.0,
"performance": 0.9442
},
{
"sparsity": 0.2,
"params": 17275244.0,
"flops": 966400928.0,
"performance": 0.9463
},
{
"sparsity": 0.3,
"params": 14415409.0,
"flops": 807710914.0,
"performance": 0.9367
},
{
"sparsity": 0.4,
"params": 11564310.0,
"flops": 656653008.0,
"performance": 0.9391
},
{
"sparsity": 0.5,
"params": 8843266.0,
"flops": 508086256.0,
"performance": 0.9381
},
{
"sparsity": 0.6,
"params": 6316815.0,
"flops": 373882614.0,
"performance": 0.9368
},
{
"sparsity": 0.7,
"params": 4054272.0,
"flops": 246477678.0,
"performance": 0.935
},
{
"sparsity": 0.8,
"params": 2129321.0,
"flops": 134527520.0,
"performance": 0.9275
},
{
"sparsity": 0.9,
"params": 667500.0,
"flops": 42927060.0,
"performance": 0.9129
},
{
"sparsity": 0.95,
"params": 192464.0,
"flops": 13669430.0,
"performance": 0.8757
},
{
"sparsity": 0.975,
"params": 58250.0,
"flops": 4365620.0,
"performance": 0.7978
}
],
"FPGMPruner": [
{
"sparsity": 0.1,
"params": 20401570.0,
"flops": 1135114552.0,
"performance": 0.9438
},
{
"sparsity": 0.2,
"params": 17321414.0,
"flops": 967137398.0,
"performance": 0.9427
},
{
"sparsity": 0.3,
"params": 14418221.0,
"flops": 807755756.0,
"performance": 0.9422
},
{
"sparsity": 0.4,
"params": 11565000.0,
"flops": 655412124.0,
"performance": 0.9403
},
{
"sparsity": 0.5,
"params": 8829840.0,
"flops": 506715294.0,
"performance": 0.9355
},
{
"sparsity": 0.6,
"params": 6308085.0,
"flops": 374231682.0,
"performance": 0.9359
},
{
"sparsity": 0.7,
"params": 4054237.0,
"flops": 246511714.0,
"performance": 0.9285
},
{
"sparsity": 0.8,
"params": 2134187.0,
"flops": 134456366.0,
"performance": 0.9275
},
{
"sparsity": 0.9,
"params": 665931.0,
"flops": 42859752.0,
"performance": 0.9083
},
{
"sparsity": 0.95,
"params": 191590.0,
"flops": 13641052.0,
"performance": 0.8762
},
{
"sparsity": 0.975,
"params": 57767.0,
"flops": 4350074.0,
"performance": 0.789
}
],
"NetAdaptPruner": [
{
"sparsity": 0.1,
"params": 22348970.0,
"flops": 1275701258.0,
"performance": 0.9404
},
{
"sparsity": 0.2,
"params": 21177162.0,
"flops": 1256952330.0,
"performance": 0.9445
},
{
"sparsity": 0.3,
"params": 18407434.0,
"flops": 1212636682.0,
"performance": 0.9433
},
{
"sparsity": 0.4,
"params": 16061284.0,
"flops": 1175098282.0,
"performance": 0.9401
}
],
"SimulatedAnnealingPruner": [
{
"sparsity": 0.1,
"params": 20551755.0,
"flops": 1230145122.0,
"performance": 0.9438
},
{
"sparsity": 0.2,
"params": 17766048.0,
"flops": 1159924128.0,
"performance": 0.9432
},
{
"sparsity": 0.3,
"params": 15105146.0,
"flops": 1094478662.0,
"performance": 0.943
},
{
"sparsity": 0.4,
"params": 12378092.0,
"flops": 1008801158.0,
"performance": 0.9398
},
{
"sparsity": 0.5,
"params": 9890487.0,
"flops": 911941770.0,
"performance": 0.9426
},
{
"sparsity": 0.6,
"params": 7638262.0,
"flops": 831218770.0,
"performance": 0.9412
},
{
"sparsity": 0.7,
"params": 5469936.0,
"flops": 691881792.0,
"performance": 0.9405
},
{
"sparsity": 0.8,
"params": 3668951.0,
"flops": 580850666.0,
"performance": 0.941
},
{
"sparsity": 0.9,
"params": 1765284.0,
"flops": 389162310.0,
"performance": 0.9294
}
],
"AutoCompressPruner": [
{
"sparsity": 0.1,
"params": 20660299.0,
"flops": 1228508590.0,
"performance": 0.9337
},
{
"sparsity": 0.2,
"params": 17940465.0,
"flops": 1152868146.0,
"performance": 0.9326
},
{
"sparsity": 0.3,
"params": 15335831.0,
"flops": 1084996094.0,
"performance": 0.9348
},
{
"sparsity": 0.4,
"params": 12821408.0,
"flops": 991305524.0,
"performance": 0.936
},
{
"sparsity": 0.5,
"params": 10695425.0,
"flops": 919638860.0,
"performance": 0.9349
},
{
"sparsity": 0.6,
"params": 8536821.0,
"flops": 802011678.0,
"performance": 0.9339
},
{
"sparsity": 0.7,
"params": 7276898.0,
"flops": 744248114.0,
"performance": 0.9337
},
{
"sparsity": 0.8,
"params": 5557721.0,
"flops": 643881710.0,
"performance": 0.9323
},
{
"sparsity": 0.9,
"params": 3925140.0,
"flops": 512545272.0,
"performance": 0.9304
},
{
"sparsity": 0.95,
"params": 2867004.0,
"flops": 365184762.0,
"performance": 0.9263
},
{
"sparsity": 0.975,
"params": 1773257.0,
"flops": 229320266.0,
"performance": 0.9175
}
]
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment