Commit e773dfcc authored by qianyj's avatar qianyj
Browse files

create branch for v2.9

parents
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI exmaple for fine-tuning the pruned model with KD.
Run basic_pruners_torch.py first to get the masks of the pruned model. Then pass the mask as argument for model speedup. The compressed model is further used for fine-tuning.
'''
import argparse
import os
import sys
from copy import deepcopy
import torch
import torch.nn as nn
import torch.nn.functional as F
from nni.compression.pytorch import ModelSpeedup
from torch.optim.lr_scheduler import MultiStepLR
from basic_pruners_torch import get_data
from pathlib import Path
sys.path.append(str(Path(__file__).absolute().parents[2] / 'models'))
from mnist.lenet import LeNet
from cifar10.vgg import VGG
class DistillKL(nn.Module):
"""Distilling the Knowledge in a Neural Network"""
def __init__(self, T):
super(DistillKL, self).__init__()
self.T = T
def forward(self, y_s, y_t):
p_s = F.log_softmax(y_s/self.T, dim=1)
p_t = F.softmax(y_t/self.T, dim=1)
loss = F.kl_div(p_s, p_t, size_average=False) * (self.T**2) / y_s.shape[0]
return loss
def get_dummy_input(args, device):
if args.dataset == 'mnist':
dummy_input = torch.randn([args.test_batch_size, 1, 28, 28]).to(device)
elif args.dataset in ['cifar10', 'imagenet']:
dummy_input = torch.randn([args.test_batch_size, 3, 32, 32]).to(device)
return dummy_input
def get_model_optimizer_scheduler(args, device, test_loader, criterion):
if args.model == 'LeNet':
model = LeNet().to(device)
elif args.model == 'vgg16':
model = VGG(depth=16).to(device)
elif args.model == 'vgg19':
model = VGG(depth=19).to(device)
else:
raise ValueError("model not recognized")
# In this example, we set the architecture of teacher and student to be the same. It is feasible to set a different teacher architecture.
if args.teacher_model_dir is None:
raise NotImplementedError('please load pretrained teacher model first')
else:
model.load_state_dict(torch.load(args.teacher_model_dir))
best_acc = test(args, model, device, criterion, test_loader)
model_t = deepcopy(model)
model_s = deepcopy(model)
if args.student_model_dir is not None:
# load the pruned student model checkpoint
model_s.load_state_dict(torch.load(args.student_model_dir))
dummy_input = get_dummy_input(args, device)
m_speedup = ModelSpeedup(model_s, dummy_input, args.mask_path, device)
m_speedup.speedup_model()
module_list = nn.ModuleList([])
module_list.append(model_s)
module_list.append(model_t)
# setup opotimizer for fine-tuning studeng model
optimizer = torch.optim.SGD(model_s.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(
optimizer, milestones=[int(args.fine_tune_epochs*0.5), int(args.fine_tune_epochs*0.75)], gamma=0.1)
print('Pretrained teacher model acc:', best_acc)
return module_list, optimizer, scheduler
def train(args, models, device, train_loader, criterion, optimizer, epoch):
# model.train()
model_s = models[0].train()
model_t = models[-1].eval()
cri_cls = criterion
cri_kd = DistillKL(args.kd_T)
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output_s = model_s(data)
output_t = model_t(data)
loss_cls = cri_cls(output_s, target)
loss_kd = cri_kd(output_s, output_t)
loss = loss_cls + loss_kd
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
if args.dry_run:
break
def test(args, model, device, criterion, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += criterion(output, target).item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
acc = 100 * correct / len(test_loader.dataset)
print('Test Loss: {} Accuracy: {}%\n'.format(
test_loss, acc))
return acc
def main(args):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.makedirs(args.experiment_data_dir, exist_ok=True)
# prepare model and data
train_loader, test_loader, criterion = get_data(args.dataset, args.data_dir, args.batch_size, args.test_batch_size)
models, optimizer, scheduler = get_model_optimizer_scheduler(args, device, test_loader, criterion)
best_top1 = 0
if args.test_only:
test(args, models[0], device, criterion, test_loader)
print('start fine-tuning...')
for epoch in range(args.fine_tune_epochs):
print('# Epoch {} #'.format(epoch))
train(args, models, device, train_loader, criterion, optimizer, epoch)
scheduler.step()
# test student only
top1 = test(args, models[0], device, criterion, test_loader)
if top1 > best_top1:
best_top1 = top1
torch.save(models[0].state_dict(), os.path.join(args.experiment_data_dir, 'model_trained.pth'))
print('Model trained saved to %s' % args.experiment_data_dir)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='PyTorch Example for model comporession')
# dataset and model
parser.add_argument('--dataset', type=str, default='cifar10',
help='dataset to use, mnist, cifar10 or imagenet')
parser.add_argument('--data-dir', type=str, default='./data/',
help='dataset directory')
parser.add_argument('--model', type=str, default='vgg16',
choices=['LeNet', 'vgg16' ,'vgg19', 'resnet18'],
help='model to use')
parser.add_argument('--teacher-model-dir', type=str, default=None,
help='path to the pretrained teacher model checkpoint')
parser.add_argument('--mask-path', type=str, default=None,
help='path to the pruned student model mask file')
parser.add_argument('--student-model-dir', type=str, default=None,
help='path to the pruned student model checkpoint')
parser.add_argument('--batch-size', type=int, default=128,
help='input batch size for training')
parser.add_argument('--test-batch-size', type=int, default=200,
help='input batch size for testing')
parser.add_argument('--fine-tune-epochs', type=int, default=160,
help='epochs to fine tune')
parser.add_argument('--experiment-data-dir', type=str, default='./experiment_data',
help='For saving output checkpoints')
parser.add_argument('--log-interval', type=int, default=100, metavar='N',
help='how many batches to wait before logging training status')
parser.add_argument('--dry-run', action='store_true', default=False,
help='quickly check a single pass')
parser.add_argument('--test-only', action='store_true', default=False,
help='run test only')
# knowledge distillation
parser.add_argument('--kd_T', type=float, default=4,
help='temperature for KD distillation')
args = parser.parse_args()
main(args)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI exmaple for reproducing Lottery Ticket Hypothesis.
'''
import argparse
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from nni.algorithms.compression.pytorch.pruning import LotteryTicketPruner
class fc1(nn.Module):
def __init__(self, num_classes=10):
super(fc1, self).__init__()
self.classifier = nn.Sequential(
nn.Linear(28 * 28, 300),
nn.ReLU(inplace=True),
nn.Linear(300, 100),
nn.ReLU(inplace=True),
nn.Linear(100, num_classes),
)
def forward(self, x):
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
def train(model, train_loader, optimizer, criterion):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.train()
for batch_idx, (imgs, targets) in enumerate(train_loader):
optimizer.zero_grad()
imgs, targets = imgs.to(device), targets.to(device)
output = model(imgs)
train_loss = criterion(output, targets)
train_loss.backward()
optimizer.step()
if batch_idx % 100 == 0:
print('{:2.0f}% Loss {}'.format(
100 * batch_idx / len(train_loader), train_loss.item()))
return train_loss.item()
def test(model, test_loader, criterion):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)
return accuracy
if __name__ == '__main__':
"""
THE LOTTERY TICKET HYPOTHESIS: FINDING SPARSE, TRAINABLE NEURAL NETWORKS (https://arxiv.org/pdf/1803.03635.pdf)
The Lottery Ticket Hypothesis. A randomly-initialized, dense neural network contains a subnetwork that is
initialized such that—when trained in isolation—it can match the test accuracy of the original network after
training for at most the same number of iterations.
Identifying winning tickets. We identify a winning ticket by training a network and pruning its
smallest-magnitude weights. The remaining, unpruned connections constitute the architecture of the
winning ticket. Unique to our work, each unpruned connection’s value is then reset to its initialization
from original network before it was trained. This forms our central experiment:
1. Randomly initialize a neural network f(x; θ0) (where θ0 ∼ Dθ).
2. Train the network for j iterations, arriving at parameters θj .
3. Prune p% of the parameters in θj , creating a mask m.
4. Reset the remaining parameters to their values in θ0, creating the winning ticket f(x; m θ0).
As described, this pruning approach is one-shot: the network is trained once, p% of weights are
pruned, and the surviving weights are reset. However, in this paper, we focus on iterative pruning,
which repeatedly trains, prunes, and resets the network over n rounds; each round prunes p**(1/n) % of
the weights that survive the previous round. Our results show that iterative pruning finds winning tickets
that match the accuracy of the original network at smaller sizes than does one-shot pruning.
"""
parser = argparse.ArgumentParser()
parser.add_argument("--train_epochs", type=int, default=10, help="training epochs")
args = parser.parse_args()
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
traindataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
testdataset = datasets.MNIST('./data', train=False, transform=transform)
train_loader = torch.utils.data.DataLoader(traindataset, batch_size=60, shuffle=True, num_workers=0, drop_last=False)
test_loader = torch.utils.data.DataLoader(testdataset, batch_size=60, shuffle=False, num_workers=0, drop_last=True)
model = fc1().to("cuda" if torch.cuda.is_available() else "cpu")
optimizer = torch.optim.Adam(model.parameters(), lr=1.2e-3)
criterion = nn.CrossEntropyLoss()
# Record the random intialized model weights
orig_state = copy.deepcopy(model.state_dict())
# train the model to get unpruned metrics
for epoch in range(args.train_epochs):
train(model, train_loader, optimizer, criterion)
orig_accuracy = test(model, test_loader, criterion)
print('unpruned model accuracy: {}'.format(orig_accuracy))
# reset model weights and optimizer for pruning
model.load_state_dict(orig_state)
optimizer = torch.optim.Adam(model.parameters(), lr=1.2e-3)
# Prune the model to find a winning ticket
configure_list = [{
'prune_iterations': 5,
'sparsity': 0.96,
'op_types': ['default']
}]
pruner = LotteryTicketPruner(model, configure_list, optimizer)
pruner.compress()
best_accuracy = 0.
best_state_dict = None
for i in pruner.get_prune_iterations():
pruner.prune_iteration_start()
loss = 0
accuracy = 0
for epoch in range(args.train_epochs):
loss = train(model, train_loader, optimizer, criterion)
accuracy = test(model, test_loader, criterion)
print('current epoch: {0}, loss: {1}, accuracy: {2}'.format(epoch, loss, accuracy))
if accuracy > best_accuracy:
best_accuracy = accuracy
# state dict of weights and masks
best_state_dict = copy.deepcopy(model.state_dict())
print('prune iteration: {0}, loss: {1}, accuracy: {2}'.format(i, loss, accuracy))
if best_accuracy > orig_accuracy:
# load weights and masks
pruner.bound_model.load_state_dict(best_state_dict)
# reset weights to original untrained model and keep masks unchanged to export winning ticket
pruner.load_model_state_dict(orig_state)
pruner.export_model('model_winning_ticket.pth', 'mask_winning_ticket.pth')
print('winning ticket has been saved: model_winning_ticket.pth, mask_winning_ticket.pth')
else:
print('winning ticket is not found in this run, you can run it again.')
This source diff could not be displayed because it is too large. You can view the blob instead.
#!/bin/bash
# download and preprocess the Stanford Dogs dataset
mkdir -p data/stanford-dogs
# download raw data (images, annotations, and train-test split)
cd data/stanford-dogs
if [ ! -d './Images' ] ; then
if [ ! -f 'images.tar' ] ; then
wget http://vision.stanford.edu/aditya86/ImageNetDogs/images.tar
fi
tar -xvf images.tar
fi
if [ ! -d './Annotation' ] ; then
if [ ! -f 'annotation.tar' ] ; then
wget http://vision.stanford.edu/aditya86/ImageNetDogs/annotation.tar
fi
tar -xvf annotation.tar
fi
if [ ! -f 'lists.tar' ] ; then
wget http://vision.stanford.edu/aditya86/ImageNetDogs/lists.tar
fi
tar -xvf lists.tar
cd ../..
# preprocess: train-valid-test splitting and image cropping
python preprocess.py
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import os
import xml.etree.ElementTree
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split
from scipy import io
ROOT_DIR = './data/stanford-dogs/'
NUM_CATEGORIES = 120
OUT_IMAGE_SIZE = (224, 224)
RANDOM_SEED = 42 # for splitting train and validation
TRAIN_RATIO = 0.9 # train / (train + validation)
def get_bounding_box(annotation_file):
"""
Parse the annotation file and returns the bounding box information
Parameters
----------
annotation_file: path to the annotation XML file
Returns
-------
A dict containing bounding box information
"""
ret = {}
xml_root = xml.etree.ElementTree.parse(annotation_file).getroot()
bounding_box = xml_root.findall('object')[0].findall('bndbox')[0]
ret['X_min'] = int(bounding_box.findall('xmin')[0].text)
ret['X_max'] = int(bounding_box.findall('xmax')[0].text)
ret['Y_min'] = int(bounding_box.findall('ymin')[0].text)
ret['Y_max'] = int(bounding_box.findall('ymax')[0].text)
return ret
def main(root_dir):
try:
os.mkdir(root_dir + 'Processed')
os.mkdir(root_dir + 'Processed/train')
os.mkdir(root_dir + 'Processed/valid')
os.mkdir(root_dir + 'Processed/test')
except:
print('Directory already exists. Nothing done.')
exit()
# load train test splits
train_metadata = io.loadmat(root_dir + 'train_list.mat')
train_valid_file_list = [x[0][0] for x in train_metadata['file_list']]
train_valid_annotation_list = [x[0][0] for x in train_metadata['annotation_list']]
train_valid_labels = [x[0] - 1 for x in train_metadata['labels']]
train_valid_lists = [x for x in zip(train_valid_file_list, train_valid_annotation_list, train_valid_labels)]
train_lists, valid_lists = train_test_split(train_valid_lists, train_size=TRAIN_RATIO, random_state=RANDOM_SEED)
train_file_list, train_annotation_list, train_labels = zip(*train_lists)
valid_file_list, valid_annotation_list, valid_labels = zip(*valid_lists)
test_metadata = io.loadmat(root_dir + 'test_list.mat')
test_file_list = [x[0][0] for x in test_metadata['file_list']]
test_annotation_list = [x[0][0] for x in test_metadata['annotation_list']]
test_labels = [x[0] - 1 for x in test_metadata['labels']]
label2idx = {}
for split, file_list, annotation_list, labels in zip(['train', 'valid', 'test'],
[train_file_list, valid_file_list, test_file_list],
[train_annotation_list, valid_annotation_list, test_annotation_list],
[train_labels, valid_labels, test_labels]):
print('Preprocessing {} set: {} cases'.format(split, len(file_list)))
for cur_file, cur_annotation, cur_label in zip(file_list, annotation_list, labels):
label_name = cur_file.split('/')[0].split('-')[-1].lower()
if label_name not in label2idx:
label2idx[label_name] = cur_label
image = Image.open(root_dir + '/Images/' + cur_file)
# cropping and reshape
annotation_file = root_dir + '/Annotation/' + cur_annotation
bounding_box = get_bounding_box(annotation_file)
image = image.crop([bounding_box['X_min'], bounding_box['Y_min'],
bounding_box['X_max'], bounding_box['Y_max']])
image = image.convert('RGB')
image = image.resize(OUT_IMAGE_SIZE)
# Normalize and save the instance
X = np.array(image)
X = (X - np.mean(X, axis=(0, 1))) / np.std(X, axis=(0, 1)) # normalize each channel separately
# image.save(root_dir + 'Processed/' + split + '/' + image_name)
np.save(root_dir + 'Processed/' + split + '/' + cur_file.split('/')[-1].replace('.jpg', '.npy'),
{'input': X, 'label': cur_label})
# save mapping from label name to index to a dict
with open(ROOT_DIR + '/category_dict.tsv', 'w') as dict_f:
final_dict_list = sorted(list(label2idx.items()), key=(lambda x: x[-1]))
for label, index in final_dict_list:
dict_f.write('{}\t{}\n'.format(index, label))
print(final_dict_list)
if __name__ == '__main__':
main(ROOT_DIR)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import os
import argparse
from time import gmtime, strftime
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from tqdm import tqdm
import numpy as np
from utils import *
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def run_validation(model, valid_dataloader):
model.eval()
loss_func = nn.CrossEntropyLoss()
acc_list, loss_list = [], []
with torch.no_grad():
for i, (inputs, labels) in enumerate(tqdm(valid_dataloader)):
inputs, labels = inputs.float().to(device), labels.to(device)
preds= model(inputs)
pred_idx = preds.max(1).indices
acc = (pred_idx == labels).sum().item() / labels.size(0)
acc_list.append(acc)
loss = loss_func(preds, labels).item()
loss_list.append(loss)
valid_loss = np.array(loss_list).mean()
valid_acc = np.array(acc_list).mean()
return valid_loss, valid_acc
def run_pretrain(args):
print(args)
torch.set_num_threads(args.n_workers)
model_type = 'mobilenet_v2_torchhub'
pretrained = True # load imagenet weight
experiment_dir = 'pretrained_{}'.format(model_type) if args.experiment_dir is None else args.experiment_dir
os.mkdir(experiment_dir)
checkpoint = None
input_size = 224
n_classes = 120
log = open(experiment_dir + '/pretrain.log', 'w')
model = create_model(model_type=model_type, pretrained=pretrained, n_classes=n_classes,
input_size=input_size, checkpoint=checkpoint)
model = model.to(device)
print(model)
# count_flops(model, device=device)
train_dataset = TrainDataset('./data/stanford-dogs/Processed/train')
train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
valid_dataset = EvalDataset('./data/stanford-dogs/Processed/valid')
valid_dataloader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0.9, weight_decay=args.weight_decay)
best_valid_acc = 0.0
for epoch in range(args.n_epochs):
print('Start training epoch {}'.format(epoch))
loss_list = []
# train
model.train()
for i, (inputs, labels) in enumerate(tqdm(train_dataloader)):
optimizer.zero_grad()
inputs, labels = inputs.float().to(device), labels.to(device)
preds = model(inputs)
loss = criterion(preds, labels)
loss_list.append(loss.item())
loss.backward()
optimizer.step()
# validation
valid_loss, valid_acc = run_validation(model, valid_dataloader)
train_loss = np.array(loss_list).mean()
print('Epoch {}: train loss {:.4f}, valid loss {:.4f}, valid acc {:.4f}'.format
(epoch, train_loss, valid_loss, valid_acc))
log.write('Epoch {}: train loss {:.4f}, valid loss {:.4f}, valid acc {:.4f}\n'.format
(epoch, train_loss, valid_loss, valid_acc))
# save
if valid_acc > best_valid_acc:
best_valid_acc = valid_acc
torch.save(model.state_dict(), experiment_dir + '/checkpoint_best.pt')
log.close()
def parse_args():
parser = argparse.ArgumentParser(description='Example code for pruning MobileNetV2')
parser.add_argument('--experiment_dir', type=str, default=None,
help='directory containing the pretrained model')
parser.add_argument('--checkpoint_name', type=str, default='checkpoint_best.pt',
help='checkpoint of the pretrained model')
# finetuning parameters
parser.add_argument('--n_workers', type=int, default=16,
help='number of threads')
parser.add_argument('--n_epochs', type=int, default=180,
help='number of epochs to train the model')
parser.add_argument('--learning_rate', type=float, default=1e-4)
parser.add_argument('--weight_decay', type=float, default=0.0)
parser.add_argument('--batch_size', type=int, default=32,
help='input batch size for training and inference')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_args()
run_pretrain(args)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import os
import argparse
import copy
from time import gmtime, strftime
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from tqdm import tqdm
import numpy as np
import nni
from nni.compression.pytorch import ModelSpeedup
from nni.algorithms.compression.pytorch.pruning import (
LevelPruner,
SlimPruner,
FPGMPruner,
TaylorFOWeightFilterPruner,
L1FilterPruner,
L2FilterPruner,
AGPPruner,
ActivationMeanRankFilterPruner,
ActivationAPoZRankFilterPruner
)
from utils import *
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_type = 'mobilenet_v2_torchhub'
input_size = 224
n_classes = 120
pruner_type_to_class = {'level': LevelPruner,
'l1': L1FilterPruner,
'l2': L2FilterPruner,
'slim': SlimPruner,
'fpgm': FPGMPruner,
'taylorfo': TaylorFOWeightFilterPruner,
'agp': AGPPruner,
'mean_activation': ActivationMeanRankFilterPruner,
'apoz': ActivationAPoZRankFilterPruner}
def run_eval(model, dataloader, device):
model.eval()
loss_func = nn.CrossEntropyLoss()
acc_list, loss_list = [], []
with torch.no_grad():
for i, (inputs, labels) in enumerate(tqdm(dataloader)):
inputs, labels = inputs.float().to(device), labels.to(device)
preds= model(inputs)
pred_idx = preds.max(1).indices
acc = (pred_idx == labels).sum().item() / labels.size(0)
acc_list.append(acc)
loss = loss_func(preds, labels).item()
loss_list.append(loss)
final_loss = np.array(loss_list).mean()
final_acc = np.array(acc_list).mean()
return final_loss, final_acc
def run_finetune(model, train_dataloader, valid_dataloader, device,
n_epochs=2, learning_rate=1e-4, weight_decay=0.0, log=None):
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
best_valid_acc = 0.0
best_model = None
for epoch in range(n_epochs):
print('Start finetuning epoch {}'.format(epoch))
loss_list = []
# train
model.train()
for i, (inputs, labels) in enumerate(tqdm(train_dataloader)):
optimizer.zero_grad()
inputs, labels = inputs.float().to(device), labels.to(device)
preds = model(inputs)
loss = criterion(preds, labels)
loss_list.append(loss.item())
loss.backward()
optimizer.step()
# validation
valid_loss, valid_acc = run_eval(model, valid_dataloader, device)
train_loss = np.array(loss_list).mean()
print('Epoch {}: train loss {:.4f}, valid loss {:.4f}, valid acc {:.4f}'.format
(epoch, train_loss, valid_loss, valid_acc))
if log is not None:
log.write('Epoch {}: train loss {:.4f}, valid loss {:.4f}, valid acc {:.4f}'.format
(epoch, train_loss, valid_loss, valid_acc))
if valid_acc > best_valid_acc:
best_valid_acc = valid_acc
best_model = copy.deepcopy(model).to(device)
print("Best validation accuracy: {}".format(best_valid_acc))
if log is not None:
log.write("Best validation accuracy: {}".format(best_valid_acc))
model = best_model
return model
def run_finetune_distillation(student_model, teacher_model, train_dataloader, valid_dataloader, device,
alpha, temperature,
n_epochs=2, learning_rate=1e-4, weight_decay=0.0, log=None):
optimizer = torch.optim.Adam(student_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
# optimizer = torch.optim.SGD(student_model.parameters(), lr=learning_rate, momentum=0.9)
best_valid_acc = 0.0
best_model = None
for epoch in range(n_epochs):
print('Start finetuning with distillation epoch {}'.format(epoch))
loss_list = []
# train
student_model.train()
for i, (inputs, labels) in enumerate(tqdm(train_dataloader)):
optimizer.zero_grad()
inputs, labels = inputs.float().to(device), labels.to(device)
with torch.no_grad():
teacher_preds = teacher_model(inputs)
preds = student_model(inputs)
soft_loss = nn.KLDivLoss()(F.log_softmax(preds/temperature, dim=1),
F.softmax(teacher_preds/temperature, dim=1))
hard_loss = F.cross_entropy(preds, labels)
loss = soft_loss * (alpha * temperature * temperature) + hard_loss * (1. - alpha)
loss_list.append(loss.item())
loss.backward()
optimizer.step()
# validation
valid_loss, valid_acc = run_eval(student_model, valid_dataloader, device)
train_loss = np.array(loss_list).mean()
print('Epoch {}: train loss {:.4f}, valid loss {:.4f}, valid acc {:.4f}'.format
(epoch, train_loss, valid_loss, valid_acc))
if log is not None:
log.write('Epoch {}: train loss {:.4f}, valid loss {:.4f}, valid acc {:.4f}'.format
(epoch, train_loss, valid_loss, valid_acc))
if valid_acc > best_valid_acc:
best_valid_acc = valid_acc
best_model = copy.deepcopy(student_model).to(device)
print("Best validation accuracy: {}".format(best_valid_acc))
if log is not None:
log.write("Best validation accuracy: {}".format(best_valid_acc))
student_model = best_model
return student_model
def trainer_helper(model, criterion, optimizer, dataloader, device):
print("Running trainer in tuner")
for epoch in range(1):
model.train()
for i, (inputs, labels) in enumerate(tqdm(dataloader)):
optimizer.zero_grad()
inputs, labels = inputs.float().to(device), labels.to(device)
preds = model(inputs)
loss = criterion(preds, labels)
loss.backward()
optimizer.step()
def trainer_helper_with_distillation(model, teacher_model, alpha, temperature, optimizer, dataloader, device):
print("Running trainer in tuner")
for epoch in range(1):
model.train()
for i, (inputs, labels) in enumerate(tqdm(dataloader)):
optimizer.zero_grad()
inputs, labels = inputs.float().to(device), labels.to(device)
with torch.no_grad():
teacher_preds = teacher_model(inputs)
preds = model(inputs)
soft_loss = nn.KLDivLoss()(F.log_softmax(preds/temperature, dim=1),
F.softmax(teacher_preds/temperature, dim=1))
hard_loss = F.cross_entropy(preds, labels)
loss = soft_loss * (alpha * temperature * temperature) + hard_loss * (1. - alpha)
loss.backward()
optimizer.step()
def parse_args():
parser = argparse.ArgumentParser(description='Example code for pruning MobileNetV2')
parser.add_argument('--experiment_dir', type=str, required=True,
help='directory containing the pretrained model')
parser.add_argument('--checkpoint_name', type=str, default='checkpoint_best.pt',
help='checkpoint of the pretrained model')
# pruner
parser.add_argument('--pruning_mode', type=str, default='conv1andconv2',
choices=['conv0', 'conv1', 'conv2', 'conv1andconv2', 'all'])
parser.add_argument('--sparsity', type=float, default=0.5,
help='target sparsity')
parser.add_argument('--pruner_name', type=str, default='l1',
choices=['l1', 'l2', 'slim', 'agp',
'fpgm', 'mean_activation', 'apoz', 'taylorfo'],
help='pruner to use')
# for agp only
parser.add_argument('--agp_pruning_alg', default='l1',
choices=['l1', 'l2', 'slim', 'fpgm',
'mean_activation', 'apoz', 'taylorfo'],
help='pruner to use for agp')
parser.add_argument('--agp_n_iters', type=int, default=64,
help='number of iterations for agp')
parser.add_argument('--agp_n_epochs_per_iter', type=int, default=1,
help='number of epochs per iteration for agp')
# speedup
parser.add_argument('--speedup', action='store_true', default=False,
help='Whether to speedup the pruned model')
# finetuning parameters
parser.add_argument('--n_workers', type=int, default=16,
help='number of threads')
parser.add_argument('--finetune_epochs', type=int, default=180,
help='number of epochs to finetune the model')
parser.add_argument('--learning_rate', type=float, default=1e-4)
parser.add_argument('--weight_decay', type=float, default=0.0)
parser.add_argument('--batch_size', type=int, default=32,
help='input batch size for training and inference')
parser.add_argument('--kd', action='store_true', default=False,
help='Whether to use knowledge distillation')
parser.add_argument('--alpha', type=float, default=0.99,
help='Alpha for knowledge distillation loss')
parser.add_argument('--temp', type=float, default=8,
help='Temperature for knowledge distillation loss')
args = parser.parse_args()
return args
def run_pruning(args):
print(args)
torch.set_num_threads(args.n_workers)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
log = open(args.experiment_dir + '/pruning_{}_{}_sparsity{}_{}.log'.format(
args.pruner_name, args.pruning_mode, args.sparsity,
strftime("%Y%m%d%H%M", gmtime())), 'w')
train_dataset = TrainDataset('./data/stanford-dogs/Processed/train')
train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
train_dataset_for_pruner = EvalDataset('./data/stanford-dogs/Processed/train')
train_dataloader_for_pruner = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False)
valid_dataset = EvalDataset('./data/stanford-dogs/Processed/valid')
valid_dataloader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False)
test_dataset = EvalDataset('./data/stanford-dogs/Processed/test')
test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False)
model = create_model(model_type=model_type, pretrained=False, n_classes=n_classes,
input_size=input_size, checkpoint=args.experiment_dir + '/' + args.checkpoint_name)
model = model.to(device)
teacher_model = None
if args.kd:
teacher_model = copy.deepcopy(model)
# evaluation before pruning
# count_flops(model, log, device)
initial_loss, initial_acc = run_eval(model, test_dataloader, device)
print('Before Pruning:\nLoss: {}\nAccuracy: {}'.format(initial_loss, initial_acc))
log.write('Before Pruning:\nLoss: {}\nAccuracy: {}\n'.format(initial_loss, initial_acc))
# set up config list and pruner
config_list = []
if 'conv0' in args.pruning_mode or args.pruning_mode == 'all':
if args.pruner_name == 'slim' or (args.pruner_name == 'agp' and args.agp_pruning_alg == 'slim'):
config_list.append({
'op_names': ['features.{}.conv.0.1'.format(x) for x in range(2, 18)],
'sparsity': args.sparsity
})
else:
config_list.append({
'op_names': ['features.{}.conv.0.0'.format(x) for x in range(2, 18)],
'sparsity': args.sparsity
})
if 'conv1' in args.pruning_mode or args.pruning_mode == 'all':
if args.pruner_name == 'slim' or (args.pruner_name == 'agp' and args.agp_pruning_alg == 'slim'):
config_list.append({
'op_names': ['features.{}.conv.1.1'.format(x) for x in range(2, 18)],
'sparsity': args.sparsity
})
else:
config_list.append({
'op_names': ['features.{}.conv.1.0'.format(x) for x in range(2, 18)],
'sparsity': args.sparsity
})
if 'conv2' in args.pruning_mode or args.pruning_mode == 'all':
if args.pruner_name == 'slim' or (args.pruner_name == 'agp' and args.agp_pruning_alg == 'slim'):
config_list.append({
'op_names': ['features.{}.conv.3'.format(x) for x in range(2, 18)],
'sparsity': args.sparsity
})
else:
config_list.append({
'op_names': ['features.{}.conv.2'.format(x) for x in range(2, 18)],
'sparsity': args.sparsity
})
print(config_list)
kwargs = {}
if args.pruner_name in ['slim', 'taylorfo', 'mean_activation', 'apoz', 'agp']:
def trainer(model, optimizer, criterion, epoch):
if not args.kd:
return trainer_helper(model, criterion, optimizer, train_dataloader, device)
else:
return trainer_helper_with_distillation(model, teacher_model, args.alpha, args.temp, optimizer, train_dataloader, device)
kwargs = {
'trainer': trainer,
'optimizer': torch.optim.Adam(model.parameters()),
'criterion': nn.CrossEntropyLoss()
}
if args.pruner_name == 'agp':
kwargs['pruning_algorithm'] = args.agp_pruning_alg
kwargs['num_iterations'] = args.agp_n_iters
kwargs['epochs_per_iteration'] = args.agp_n_epochs_per_iter
if args.pruner_name == 'slim':
kwargs['sparsifying_training_epochs'] = 10
# pruning
pruner = pruner_type_to_class[args.pruner_name](model, config_list, **kwargs)
pruner.compress()
pruner.export_model(args.experiment_dir + '/model_temp.pth', args.experiment_dir + './mask_temp.pth')
# model speedup
pruner._unwrap_model()
if args.speedup:
dummy_input = torch.rand(1,3,224,224).to(device)
ms = ModelSpeedup(model, dummy_input, args.experiment_dir + './mask_temp.pth')
ms.speedup_model()
print(model)
count_flops(model, log)
intermediate_loss, intermediate_acc = run_eval(model, test_dataloader, device)
print('Before Finetuning:\nLoss: {}\nAccuracy: {}'.format(intermediate_loss, intermediate_acc))
log.write('Before Finetuning:\nLoss: {}\nAccuracy: {}\n'.format(intermediate_loss, intermediate_acc))
# finetuning
if args.kd:
model = run_finetune_distillation(model, teacher_model, train_dataloader, valid_dataloader, device,
args.alpha, args.temp, n_epochs=args.finetune_epochs,
learning_rate=args.learning_rate, weight_decay=args.weight_decay)
else:
model = run_finetune(model, train_dataloader, valid_dataloader, device, n_epochs=args.finetune_epochs,
learning_rate=args.learning_rate, weight_decay=args.weight_decay)
# final evaluation
final_loss, final_acc = run_eval(model, test_dataloader, device)
print('After Pruning:\nLoss: {}\nAccuracy: {}'.format(final_loss, final_acc))
log.write('After Pruning:\nLoss: {}\nAccuracy: {}'.format(final_loss, final_acc))
# clean up
filePaths = [args.experiment_dir + '/model_tmp.pth', args.experiment_dir + '/mask_tmp.pth']
for f in filePaths:
if os.path.exists(f):
os.remove(f)
log.close()
if __name__ == '__main__':
args = parse_args()
run_pruning(args)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from tqdm import tqdm
import numpy as np
from utils import create_model, EvalDataset, count_flops
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_type = 'mobilenet_v2_torchhub' # 'mobilenet_v1' 'mobilenet_v2' 'mobilenet_v2_torchhub'
pretrained = False # load imagenet weight (only for 'mobilenet_v2_torchhub')
checkpoint_dir = './pretrained_{}/'.format(model_type)
checkpoint = checkpoint_dir + '/checkpoint_best.pt' # model checkpoint produced by pretrain.py
input_size = 224
n_classes = 120
batch_size = 32
def run_test():
model = create_model(model_type=model_type, pretrained=pretrained, n_classes=n_classes,
input_size=input_size, checkpoint=checkpoint)
model = model.to(device)
print(model)
# count_flops(model, device=device)
test_dataset = EvalDataset('./data/stanford-dogs/Processed/test')
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
model.eval()
loss_func = nn.CrossEntropyLoss()
acc_list, loss_list = [], []
with torch.no_grad():
for i, (inputs, labels) in enumerate(tqdm(test_dataloader)):
inputs, labels = inputs.float().to(device), labels.to(device)
preds= model(inputs)
pred_idx = preds.max(1).indices
acc = (pred_idx == labels).sum().item() / labels.size(0)
acc_list.append(acc)
loss = loss_func(preds, labels).item()
loss_list.append(loss)
final_loss = np.array(loss_list).mean()
final_acc = np.array(acc_list).mean()
print('Test loss: {}\nTest accuracy: {}'.format(final_loss, final_acc))
if __name__ == '__main__':
run_test()
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import os
import sys
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import numpy as np
from nni.compression.pytorch.utils import count_flops_params
from pathlib import Path
sys.path.append(str(Path(__file__).absolute().parents[3] / 'models'))
from mobilenet import MobileNet
from mobilenet_v2 import MobileNetV2
def create_model(model_type=None, n_classes=120, input_size=224, checkpoint=None, pretrained=False, width_mult=1.):
if model_type == 'mobilenet_v1':
model = MobileNet(n_class=n_classes, profile='normal')
elif model_type == 'mobilenet_v2':
model = MobileNetV2(n_class=n_classes, input_size=input_size, width_mult=width_mult)
elif model_type == 'mobilenet_v2_torchhub':
model = torch.hub.load('pytorch/vision:v0.8.1', 'mobilenet_v2', pretrained=pretrained)
# model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=pretrained)
feature_size = model.classifier[1].weight.data.size()[1]
replace_classifier = torch.nn.Linear(feature_size, n_classes)
model.classifier[1] = replace_classifier
elif model_type is None:
model = None
else:
raise RuntimeError('Unknown model_type.')
if checkpoint is not None:
model.load_state_dict(torch.load(checkpoint))
return model
def get_dataloader(dataset_type, data_path, batch_size=32, shuffle=True):
assert dataset_type in ['train', 'eval']
if dataset_type == 'train':
ds = TrainDataset(data_path)
else:
ds = EvalDataset(data_path)
return DataLoader(ds, batch_size, shuffle=shuffle)
class TrainDataset(Dataset):
def __init__(self, npy_dir):
self.root_dir = npy_dir
self.case_names = [self.root_dir + '/' + x for x in os.listdir(self.root_dir)]
transform_set = [transforms.Lambda(lambda x: x),
transforms.RandomRotation(30),
transforms.ColorJitter(),
transforms.RandomHorizontalFlip(p=1)]
self.transform = transforms.RandomChoice(transform_set)
def __len__(self):
return len(self.case_names)
def __getitem__(self, index):
instance = np.load(self.case_names[index], allow_pickle=True).item()
x = instance['input'].transpose(2, 0, 1) # (C, H, W)
x = torch.from_numpy(x).type(torch.float) # convert to Tensor to use torchvision.transforms
x = self.transform(x)
return x, instance['label']
class EvalDataset(Dataset):
def __init__(self, npy_dir):
self.root_dir = npy_dir
self.case_names = [self.root_dir + '/' + x for x in os.listdir(self.root_dir)]
def __len__(self):
return len(self.case_names)
def __getitem__(self, index):
instance = np.load(self.case_names[index], allow_pickle=True).item()
x = instance['input'].transpose(2, 0, 1)
x = torch.from_numpy(x).type(torch.float)
return x, instance['label']
def count_flops(model, log=None, device=None):
dummy_input = torch.rand([1, 3, 256, 256])
if device is not None:
dummy_input = dummy_input.to(device)
flops, params, results = count_flops_params(model, dummy_input)
print(f"FLOPs: {flops}, params: {params}")
if log is not None:
log.write(f"FLOPs: {flops}, params: {params}\n")
return flops, params
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI example for quick start of pruning.
In this example, we use level pruner to prune the LeNet on MNIST.
'''
import argparse
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import (Conv2D, Dense, Dropout, Flatten, MaxPool2D, BatchNormalization)
from nni.algorithms.compression.tensorflow.pruning import LevelPruner, SlimPruner
class LeNet(Model):
"""
LeNet-5 Model with customizable hyper-parameters
"""
def __init__(self, conv_size=3, hidden_size=32, dropout_rate=0.5):
"""
Initialize hyper-parameters.
Parameters
----------
conv_size : int
Kernel size of convolutional layers.
hidden_size : int
Dimensionality of last hidden layer.
dropout_rate : float
Dropout rate between two fully connected (dense) layers, to prevent co-adaptation.
"""
super().__init__()
self.conv1 = Conv2D(filters=32, kernel_size=conv_size, activation='relu')
self.pool1 = MaxPool2D(pool_size=2)
self.bn1 = BatchNormalization()
self.conv2 = Conv2D(filters=64, kernel_size=conv_size, activation='relu')
self.pool2 = MaxPool2D(pool_size=2)
self.bn2 = BatchNormalization()
self.flatten = Flatten()
self.fc1 = Dense(units=hidden_size, activation='relu')
self.dropout = Dropout(rate=dropout_rate)
self.fc2 = Dense(units=10, activation='softmax')
def call(self, x):
"""Override ``Model.call`` to build LeNet-5 model."""
x = self.conv1(x)
x = self.pool1(x)
x = self.bn1(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.bn2(x)
x = self.flatten(x)
x = self.fc1(x)
x = self.dropout(x)
return self.fc2(x)
def get_dataset(dataset_name='mnist'):
assert dataset_name == 'mnist'
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train[..., tf.newaxis] / 255.0
x_test = x_test[..., tf.newaxis] / 255.0
return (x_train, y_train), (x_test, y_test)
# def create_model(model_name='naive'):
# assert model_name == 'naive'
# return tf.keras.Sequential([
# tf.keras.layers.Conv2D(filters=20, kernel_size=5),
# tf.keras.layers.BatchNormalization(),
# tf.keras.layers.ReLU(),
# tf.keras.layers.MaxPool2D(pool_size=2),
# tf.keras.layers.Conv2D(filters=20, kernel_size=5),
# tf.keras.layers.BatchNormalization(),
# tf.keras.layers.ReLU(),
# tf.keras.layers.MaxPool2D(pool_size=2),
# tf.keras.layers.Flatten(),
# tf.keras.layers.Dense(units=500),
# tf.keras.layers.ReLU(),
# tf.keras.layers.Dense(units=10),
# tf.keras.layers.Softmax()
# ])
def main(args):
train_set, test_set = get_dataset('mnist')
model = LeNet()
print('start training')
optimizer = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9, decay=1e-4)
if args.pruner_name == 'slim':
def slim_loss(y_true, y_pred):
loss_1 = tf.keras.losses.sparse_categorical_crossentropy(y_true=y_true, y_pred=y_pred)
weight_list = []
for layer in [model.bn1, model.bn2]:
weight_list.append([w for w in layer.weights if '/gamma:' in w.name][0].read_value())
loss_2 = 0.0001 * tf.reduce_sum([tf.reduce_sum(tf.abs(w)) for w in weight_list])
return loss_1 + loss_2
model.compile(
optimizer=optimizer,
loss=slim_loss,
metrics=['accuracy']
)
else:
model.compile(
optimizer=optimizer,
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
model.fit(
train_set[0],
train_set[1],
batch_size=args.batch_size,
epochs=args.pretrain_epochs,
validation_data=test_set
)
print('start pruning')
optimizer_finetune = tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9, decay=1e-4)
# create_pruner
if args.pruner_name == 'level':
prune_config = [{
'sparsity': args.sparsity,
'op_types': ['default'],
}]
pruner = LevelPruner(model, prune_config)
elif args.pruner_name == 'slim':
prune_config = [{
'sparsity': args.sparsity,
'op_types': ['BatchNormalization'],
}]
pruner = SlimPruner(model, prune_config)
model = pruner.compress()
model.compile(
optimizer=optimizer_finetune,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'],
run_eagerly=True # NOTE: Important, model compression does not work in graph mode!
)
# fine-tuning
model.fit(
train_set[0],
train_set[1],
batch_size=args.batch_size,
epochs=args.prune_epochs,
validation_data=test_set
)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--pruner_name', type=str, default='level', choices=['level', 'slim'])
parser.add_argument('--batch-size', type=int, default=256)
parser.add_argument('--pretrain_epochs', type=int, default=10)
parser.add_argument('--prune_epochs', type=int, default=10)
parser.add_argument('--sparsity', type=float, default=0.5)
args = parser.parse_args()
main(args)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI example for quick start of pruning.
In this example, we use level pruner to prune the LeNet on MNIST.
'''
import logging
import argparse
import torch
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
from nni.algorithms.compression.pytorch.pruning import LevelPruner
import sys
sys.path.append('../models')
from mnist.lenet import LeNet
_logger = logging.getLogger('mnist_example')
_logger.setLevel(logging.INFO)
def train(args, model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
if args.dry_run:
break
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
acc = 100 * correct / len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset), acc))
return acc
def main(args):
torch.manual_seed(args.seed)
use_cuda = not args.no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
train_kwargs = {'batch_size': args.batch_size}
test_kwargs = {'batch_size': args.test_batch_size}
if use_cuda:
cuda_kwargs = {'num_workers': 1,
'pin_memory': True,
'shuffle': True}
train_kwargs.update(cuda_kwargs)
test_kwargs.update(cuda_kwargs)
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
dataset1 = datasets.MNIST('./data', train=True, download=True,
transform=transform)
dataset2 = datasets.MNIST('./data', train=False,
transform=transform)
train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
model = LeNet().to(device)
optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
print('start pre-training')
scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
for epoch in range(1, args.epochs + 1):
train(args, model, device, train_loader, optimizer, epoch)
test(model, device, test_loader)
scheduler.step()
torch.save(model.state_dict(), "pretrain_mnist_lenet.pt")
print('start pruning')
optimizer_finetune = torch.optim.SGD(model.parameters(), lr=0.01)
# create pruner
prune_config = [{
'sparsity': args.sparsity,
'op_types': ['default'],
}]
pruner = LevelPruner(model, prune_config)
model = pruner.compress()
# fine-tuning
best_top1 = 0
for epoch in range(1, args.epochs + 1):
pruner.update_epoch(epoch)
train(args, model, device, train_loader, optimizer_finetune, epoch)
top1 = test(model, device, test_loader)
if top1 > best_top1:
best_top1 = top1
# Export the best model, 'model_path' stores state_dict of the pruned model,
# mask_path stores mask_dict of the pruned model
pruner.export_model(model_path='pruend_mnist_lenet.pt', mask_path='mask_mnist_lenet.pt')
if __name__ == '__main__':
# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example for model comporession')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=10, metavar='N',
help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=1.0, metavar='LR',
help='learning rate (default: 1.0)')
parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
help='Learning rate step gamma (default: 0.7)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--dry-run', action='store_true', default=False,
help='quickly check a single pass')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
parser.add_argument('--sparsity', type=float, default=0.5,
help='target overall target sparsity')
args = parser.parse_args()
main(args)
import os
import sys
import argparse
import time
import torch
from pathlib import Path
sys.path.append(str(Path(__file__).absolute().parents[3] / 'models'))
from cifar10.vgg import VGG
from mnist.lenet import LeNet
from nni.compression.pytorch import apply_compression_results, ModelSpeedup
torch.manual_seed(0)
use_mask = True
use_speedup = True
compare_results = True
config = {
'apoz': {
'model_name': 'vgg16',
'input_shape': [64, 3, 32, 32],
'masks_file': './experiment_data/mask_vgg16_cifar10_apoz.pth'
},
'l1filter': {
'model_name': 'vgg16',
'input_shape': [64, 3, 32, 32],
'masks_file': './experiment_data/mask_vgg16_cifar10_l1filter.pth'
},
'fpgm': {
'model_name': 'vgg16',
'input_shape': [64, 3, 32, 32],
'masks_file': './experiment_data/mask_vgg16_cifar10_fpgm.pth'
},
'slim': {
'model_name': 'vgg19',
'input_shape': [64, 3, 32, 32],
'masks_file': './experiment_data/mask_vgg19_cifar10_slim.pth'
}
}
def model_inference(config):
masks_file = config['masks_file']
device = torch.device(
'cuda') if torch.cuda.is_available() else torch.device('cpu')
# device = torch.device(config['device'])
if config['model_name'] == 'vgg16':
model = VGG(depth=16)
elif config['model_name'] == 'vgg19':
model = VGG(depth=19)
elif config['model_name'] == 'lenet':
model = LeNet()
model.to(device)
model.eval()
dummy_input = torch.randn(config['input_shape']).to(device)
use_mask_out = use_speedup_out = None
# must run use_mask before use_speedup because use_speedup modify the model
if use_mask:
apply_compression_results(model, masks_file, device)
start = time.time()
for _ in range(32):
use_mask_out = model(dummy_input)
print('elapsed time when use mask: ', time.time() - start)
if use_speedup:
m_speedup = ModelSpeedup(model, dummy_input, masks_file, device)
m_speedup.speedup_model()
start = time.time()
for _ in range(32):
use_speedup_out = model(dummy_input)
print('elapsed time when use speedup: ', time.time() - start)
if compare_results:
if torch.allclose(use_mask_out, use_speedup_out, atol=1e-07):
print('the outputs from use_mask and use_speedup are the same')
else:
raise RuntimeError('the outputs from use_mask and use_speedup are different')
if __name__ == '__main__':
parser = argparse.ArgumentParser("speedup")
parser.add_argument("--example_name", type=str, default="slim", help="the name of pruning example")
parser.add_argument("--masks_file", type=str, default=None, help="the path of the masks file")
args = parser.parse_args()
if args.example_name != 'all':
if args.masks_file is not None:
config[args.example_name]['masks_file'] = args.masks_file
if not os.path.exists(config[args.example_name]['masks_file']):
msg = '{} does not exist! You should specify masks_file correctly, ' \
'or use default one which is generated by model_prune_torch.py'
raise RuntimeError(msg.format(config[args.example_name]['masks_file']))
model_inference(config[args.example_name])
else:
model_inference(config['fpgm'])
model_inference(config['slim'])
model_inference(config['l1filter'])
model_inference(config['apoz'])
import torch
from torchvision.models import mobilenet_v2
from nni.compression.pytorch import ModelSpeedup
from nni.algorithms.compression.pytorch.pruning import L1FilterPruner
model = mobilenet_v2(pretrained=True)
dummy_input = torch.rand(8, 3, 416, 416)
cfg_list = [{'op_types':['Conv2d'], 'sparsity':0.5}]
pruner = L1FilterPruner(model, cfg_list)
pruner.compress()
pruner.export_model('./model', './mask')
# need call _unwrap_model if you want run the speedup on the same model
pruner._unwrap_model()
# Speedup the nanodet
ms = ModelSpeedup(model, dummy_input, './mask')
ms.speedup_model()
model(dummy_input)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment