Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
1011377c
Commit
1011377c
authored
Mar 31, 2022
by
qianyj
Browse files
the source code of NNI for DCU
parent
abc22158
Changes
788
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2360 additions
and
0 deletions
+2360
-0
examples/model_compress/pruning/mobilenetv2_end2end/utils.py
examples/model_compress/pruning/mobilenetv2_end2end/utils.py
+94
-0
examples/model_compress/pruning/naive_prune_tf.py
examples/model_compress/pruning/naive_prune_tf.py
+168
-0
examples/model_compress/pruning/naive_prune_torch.py
examples/model_compress/pruning/naive_prune_torch.py
+153
-0
examples/model_compress/pruning/speedup/model_speedup.py
examples/model_compress/pruning/speedup/model_speedup.py
+98
-0
examples/model_compress/pruning/speedup/speedup_mobilnetv2.py
...ples/model_compress/pruning/speedup/speedup_mobilnetv2.py
+21
-0
examples/model_compress/pruning/speedup/speedup_nanodet.py
examples/model_compress/pruning/speedup/speedup_nanodet.py
+39
-0
examples/model_compress/pruning/speedup/speedup_yolov3.py
examples/model_compress/pruning/speedup/speedup_yolov3.py
+36
-0
examples/model_compress/pruning/transformers/run.sh
examples/model_compress/pruning/transformers/run.sh
+43
-0
examples/model_compress/pruning/transformers/transformer_pruning.py
...odel_compress/pruning/transformers/transformer_pruning.py
+387
-0
examples/model_compress/pruning/v2/activation_pruning_torch.py
...les/model_compress/pruning/v2/activation_pruning_torch.py
+142
-0
examples/model_compress/pruning/v2/admm_pruning_torch.py
examples/model_compress/pruning/v2/admm_pruning_torch.py
+138
-0
examples/model_compress/pruning/v2/amc_pruning_torch.py
examples/model_compress/pruning/v2/amc_pruning_torch.py
+98
-0
examples/model_compress/pruning/v2/auto_compress_pruner.py
examples/model_compress/pruning/v2/auto_compress_pruner.py
+94
-0
examples/model_compress/pruning/v2/fpgm_pruning_torch.py
examples/model_compress/pruning/v2/fpgm_pruning_torch.py
+131
-0
examples/model_compress/pruning/v2/iterative_pruning_torch.py
...ples/model_compress/pruning/v2/iterative_pruning_torch.py
+138
-0
examples/model_compress/pruning/v2/level_pruning_torch.py
examples/model_compress/pruning/v2/level_pruning_torch.py
+130
-0
examples/model_compress/pruning/v2/movement_pruning_glue.py
examples/model_compress/pruning/v2/movement_pruning_glue.py
+125
-0
examples/model_compress/pruning/v2/norm_pruning_torch.py
examples/model_compress/pruning/v2/norm_pruning_torch.py
+137
-0
examples/model_compress/pruning/v2/scheduler_torch.py
examples/model_compress/pruning/v2/scheduler_torch.py
+100
-0
examples/model_compress/pruning/v2/simple_pruning_torch.py
examples/model_compress/pruning/v2/simple_pruning_torch.py
+88
-0
No files found.
Too many changes to show.
To preserve performance only
788 of 788+
files are displayed.
Plain diff
Email patch
examples/model_compress/pruning/mobilenetv2_end2end/utils.py
0 → 100644
View file @
1011377c
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
os
import
sys
import
torch
from
torch.utils.data
import
Dataset
,
DataLoader
import
torchvision.transforms
as
transforms
import
numpy
as
np
from
nni.compression.pytorch.utils.counter
import
count_flops_params
from
pathlib
import
Path
sys
.
path
.
append
(
str
(
Path
(
__file__
).
absolute
().
parents
[
2
]
/
'models'
))
from
mobilenet
import
MobileNet
from
mobilenet_v2
import
MobileNetV2
def
create_model
(
model_type
=
None
,
n_classes
=
120
,
input_size
=
224
,
checkpoint
=
None
,
pretrained
=
False
,
width_mult
=
1.
):
if
model_type
==
'mobilenet_v1'
:
model
=
MobileNet
(
n_class
=
n_classes
,
profile
=
'normal'
)
elif
model_type
==
'mobilenet_v2'
:
model
=
MobileNetV2
(
n_class
=
n_classes
,
input_size
=
input_size
,
width_mult
=
width_mult
)
elif
model_type
==
'mobilenet_v2_torchhub'
:
model
=
torch
.
hub
.
load
(
'pytorch/vision:v0.8.1'
,
'mobilenet_v2'
,
pretrained
=
pretrained
)
# model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=pretrained)
feature_size
=
model
.
classifier
[
1
].
weight
.
data
.
size
()[
1
]
replace_classifier
=
torch
.
nn
.
Linear
(
feature_size
,
n_classes
)
model
.
classifier
[
1
]
=
replace_classifier
elif
model_type
is
None
:
model
=
None
else
:
raise
RuntimeError
(
'Unknown model_type.'
)
if
checkpoint
is
not
None
:
model
.
load_state_dict
(
torch
.
load
(
checkpoint
))
return
model
def
get_dataloader
(
dataset_type
,
data_path
,
batch_size
=
32
,
shuffle
=
True
):
assert
dataset_type
in
[
'train'
,
'eval'
]
if
dataset_type
==
'train'
:
ds
=
TrainDataset
(
data_path
)
else
:
ds
=
EvalDataset
(
data_path
)
return
DataLoader
(
ds
,
batch_size
,
shuffle
=
shuffle
)
class
TrainDataset
(
Dataset
):
def
__init__
(
self
,
npy_dir
):
self
.
root_dir
=
npy_dir
self
.
case_names
=
[
self
.
root_dir
+
'/'
+
x
for
x
in
os
.
listdir
(
self
.
root_dir
)]
transform_set
=
[
transforms
.
Lambda
(
lambda
x
:
x
),
transforms
.
RandomRotation
(
30
),
transforms
.
ColorJitter
(),
transforms
.
RandomHorizontalFlip
(
p
=
1
)]
self
.
transform
=
transforms
.
RandomChoice
(
transform_set
)
def
__len__
(
self
):
return
len
(
self
.
case_names
)
def
__getitem__
(
self
,
index
):
instance
=
np
.
load
(
self
.
case_names
[
index
],
allow_pickle
=
True
).
item
()
x
=
instance
[
'input'
].
transpose
(
2
,
0
,
1
)
# (C, H, W)
x
=
torch
.
from_numpy
(
x
).
type
(
torch
.
float
)
# convert to Tensor to use torchvision.transforms
x
=
self
.
transform
(
x
)
return
x
,
instance
[
'label'
]
class
EvalDataset
(
Dataset
):
def
__init__
(
self
,
npy_dir
):
self
.
root_dir
=
npy_dir
self
.
case_names
=
[
self
.
root_dir
+
'/'
+
x
for
x
in
os
.
listdir
(
self
.
root_dir
)]
def
__len__
(
self
):
return
len
(
self
.
case_names
)
def
__getitem__
(
self
,
index
):
instance
=
np
.
load
(
self
.
case_names
[
index
],
allow_pickle
=
True
).
item
()
x
=
instance
[
'input'
].
transpose
(
2
,
0
,
1
)
x
=
torch
.
from_numpy
(
x
).
type
(
torch
.
float
)
return
x
,
instance
[
'label'
]
def
count_flops
(
model
,
log
=
None
,
device
=
None
):
dummy_input
=
torch
.
rand
([
1
,
3
,
256
,
256
])
if
device
is
not
None
:
dummy_input
=
dummy_input
.
to
(
device
)
flops
,
params
,
results
=
count_flops_params
(
model
,
dummy_input
)
print
(
f
"FLOPs:
{
flops
}
, params:
{
params
}
"
)
if
log
is
not
None
:
log
.
write
(
f
"FLOPs:
{
flops
}
, params:
{
params
}
\n
"
)
return
flops
,
params
examples/model_compress/pruning/naive_prune_tf.py
0 → 100644
View file @
1011377c
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI example for quick start of pruning.
In this example, we use level pruner to prune the LeNet on MNIST.
'''
import
argparse
import
tensorflow
as
tf
from
tensorflow.keras
import
Model
from
tensorflow.keras.layers
import
(
Conv2D
,
Dense
,
Dropout
,
Flatten
,
MaxPool2D
,
BatchNormalization
)
from
nni.algorithms.compression.tensorflow.pruning
import
LevelPruner
,
SlimPruner
class
LeNet
(
Model
):
"""
LeNet-5 Model with customizable hyper-parameters
"""
def
__init__
(
self
,
conv_size
=
3
,
hidden_size
=
32
,
dropout_rate
=
0.5
):
"""
Initialize hyper-parameters.
Parameters
----------
conv_size : int
Kernel size of convolutional layers.
hidden_size : int
Dimensionality of last hidden layer.
dropout_rate : float
Dropout rate between two fully connected (dense) layers, to prevent co-adaptation.
"""
super
().
__init__
()
self
.
conv1
=
Conv2D
(
filters
=
32
,
kernel_size
=
conv_size
,
activation
=
'relu'
)
self
.
pool1
=
MaxPool2D
(
pool_size
=
2
)
self
.
bn1
=
BatchNormalization
()
self
.
conv2
=
Conv2D
(
filters
=
64
,
kernel_size
=
conv_size
,
activation
=
'relu'
)
self
.
pool2
=
MaxPool2D
(
pool_size
=
2
)
self
.
bn2
=
BatchNormalization
()
self
.
flatten
=
Flatten
()
self
.
fc1
=
Dense
(
units
=
hidden_size
,
activation
=
'relu'
)
self
.
dropout
=
Dropout
(
rate
=
dropout_rate
)
self
.
fc2
=
Dense
(
units
=
10
,
activation
=
'softmax'
)
def
call
(
self
,
x
):
"""Override ``Model.call`` to build LeNet-5 model."""
x
=
self
.
conv1
(
x
)
x
=
self
.
pool1
(
x
)
x
=
self
.
bn1
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
pool2
(
x
)
x
=
self
.
bn2
(
x
)
x
=
self
.
flatten
(
x
)
x
=
self
.
fc1
(
x
)
x
=
self
.
dropout
(
x
)
return
self
.
fc2
(
x
)
def
get_dataset
(
dataset_name
=
'mnist'
):
assert
dataset_name
==
'mnist'
(
x_train
,
y_train
),
(
x_test
,
y_test
)
=
tf
.
keras
.
datasets
.
mnist
.
load_data
()
x_train
=
x_train
[...,
tf
.
newaxis
]
/
255.0
x_test
=
x_test
[...,
tf
.
newaxis
]
/
255.0
return
(
x_train
,
y_train
),
(
x_test
,
y_test
)
# def create_model(model_name='naive'):
# assert model_name == 'naive'
# return tf.keras.Sequential([
# tf.keras.layers.Conv2D(filters=20, kernel_size=5),
# tf.keras.layers.BatchNormalization(),
# tf.keras.layers.ReLU(),
# tf.keras.layers.MaxPool2D(pool_size=2),
# tf.keras.layers.Conv2D(filters=20, kernel_size=5),
# tf.keras.layers.BatchNormalization(),
# tf.keras.layers.ReLU(),
# tf.keras.layers.MaxPool2D(pool_size=2),
# tf.keras.layers.Flatten(),
# tf.keras.layers.Dense(units=500),
# tf.keras.layers.ReLU(),
# tf.keras.layers.Dense(units=10),
# tf.keras.layers.Softmax()
# ])
def
main
(
args
):
train_set
,
test_set
=
get_dataset
(
'mnist'
)
model
=
LeNet
()
print
(
'start training'
)
optimizer
=
tf
.
keras
.
optimizers
.
SGD
(
learning_rate
=
0.1
,
momentum
=
0.9
,
decay
=
1e-4
)
if
args
.
pruner_name
==
'slim'
:
def
slim_loss
(
y_true
,
y_pred
):
loss_1
=
tf
.
keras
.
losses
.
sparse_categorical_crossentropy
(
y_true
=
y_true
,
y_pred
=
y_pred
)
weight_list
=
[]
for
layer
in
[
model
.
bn1
,
model
.
bn2
]:
weight_list
.
append
([
w
for
w
in
layer
.
weights
if
'/gamma:'
in
w
.
name
][
0
].
read_value
())
loss_2
=
0.0001
*
tf
.
reduce_sum
([
tf
.
reduce_sum
(
tf
.
abs
(
w
))
for
w
in
weight_list
])
return
loss_1
+
loss_2
model
.
compile
(
optimizer
=
optimizer
,
loss
=
slim_loss
,
metrics
=
[
'accuracy'
]
)
else
:
model
.
compile
(
optimizer
=
optimizer
,
loss
=
'sparse_categorical_crossentropy'
,
metrics
=
[
'accuracy'
]
)
model
.
fit
(
train_set
[
0
],
train_set
[
1
],
batch_size
=
args
.
batch_size
,
epochs
=
args
.
pretrain_epochs
,
validation_data
=
test_set
)
print
(
'start pruning'
)
optimizer_finetune
=
tf
.
keras
.
optimizers
.
SGD
(
learning_rate
=
0.001
,
momentum
=
0.9
,
decay
=
1e-4
)
# create_pruner
if
args
.
pruner_name
==
'level'
:
prune_config
=
[{
'sparsity'
:
args
.
sparsity
,
'op_types'
:
[
'default'
],
}]
pruner
=
LevelPruner
(
model
,
prune_config
)
elif
args
.
pruner_name
==
'slim'
:
prune_config
=
[{
'sparsity'
:
args
.
sparsity
,
'op_types'
:
[
'BatchNormalization'
],
}]
pruner
=
SlimPruner
(
model
,
prune_config
)
model
=
pruner
.
compress
()
model
.
compile
(
optimizer
=
optimizer_finetune
,
loss
=
'sparse_categorical_crossentropy'
,
metrics
=
[
'accuracy'
],
run_eagerly
=
True
# NOTE: Important, model compression does not work in graph mode!
)
# fine-tuning
model
.
fit
(
train_set
[
0
],
train_set
[
1
],
batch_size
=
args
.
batch_size
,
epochs
=
args
.
prune_epochs
,
validation_data
=
test_set
)
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--pruner_name'
,
type
=
str
,
default
=
'level'
,
choices
=
[
'level'
,
'slim'
])
parser
.
add_argument
(
'--batch-size'
,
type
=
int
,
default
=
256
)
parser
.
add_argument
(
'--pretrain_epochs'
,
type
=
int
,
default
=
10
)
parser
.
add_argument
(
'--prune_epochs'
,
type
=
int
,
default
=
10
)
parser
.
add_argument
(
'--sparsity'
,
type
=
float
,
default
=
0.5
)
args
=
parser
.
parse_args
()
main
(
args
)
examples/model_compress/pruning/naive_prune_torch.py
0 → 100644
View file @
1011377c
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI example for quick start of pruning.
In this example, we use level pruner to prune the LeNet on MNIST.
'''
import
logging
import
argparse
import
torch
import
torch.nn.functional
as
F
import
torch.optim
as
optim
from
torchvision
import
datasets
,
transforms
from
torch.optim.lr_scheduler
import
StepLR
from
nni.algorithms.compression.pytorch.pruning
import
LevelPruner
import
sys
sys
.
path
.
append
(
'../models'
)
from
mnist.lenet
import
LeNet
_logger
=
logging
.
getLogger
(
'mnist_example'
)
_logger
.
setLevel
(
logging
.
INFO
)
def
train
(
args
,
model
,
device
,
train_loader
,
optimizer
,
epoch
):
model
.
train
()
for
batch_idx
,
(
data
,
target
)
in
enumerate
(
train_loader
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
F
.
nll_loss
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
if
batch_idx
%
args
.
log_interval
==
0
:
print
(
'Train Epoch: {} [{}/{} ({:.0f}%)]
\t
Loss: {:.6f}'
.
format
(
epoch
,
batch_idx
*
len
(
data
),
len
(
train_loader
.
dataset
),
100.
*
batch_idx
/
len
(
train_loader
),
loss
.
item
()))
if
args
.
dry_run
:
break
def
test
(
model
,
device
,
test_loader
):
model
.
eval
()
test_loss
=
0
correct
=
0
with
torch
.
no_grad
():
for
data
,
target
in
test_loader
:
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
output
=
model
(
data
)
test_loss
+=
F
.
nll_loss
(
output
,
target
,
reduction
=
'sum'
).
item
()
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
test_loss
/=
len
(
test_loader
.
dataset
)
acc
=
100
*
correct
/
len
(
test_loader
.
dataset
)
print
(
'
\n
Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)
\n
'
.
format
(
test_loss
,
correct
,
len
(
test_loader
.
dataset
),
acc
))
return
acc
def
main
(
args
):
torch
.
manual_seed
(
args
.
seed
)
use_cuda
=
not
args
.
no_cuda
and
torch
.
cuda
.
is_available
()
device
=
torch
.
device
(
"cuda"
if
use_cuda
else
"cpu"
)
train_kwargs
=
{
'batch_size'
:
args
.
batch_size
}
test_kwargs
=
{
'batch_size'
:
args
.
test_batch_size
}
if
use_cuda
:
cuda_kwargs
=
{
'num_workers'
:
1
,
'pin_memory'
:
True
,
'shuffle'
:
True
}
train_kwargs
.
update
(
cuda_kwargs
)
test_kwargs
.
update
(
cuda_kwargs
)
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.1307
,),
(
0.3081
,))
])
dataset1
=
datasets
.
MNIST
(
'./data'
,
train
=
True
,
download
=
True
,
transform
=
transform
)
dataset2
=
datasets
.
MNIST
(
'./data'
,
train
=
False
,
transform
=
transform
)
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
dataset1
,
**
train_kwargs
)
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
dataset2
,
**
test_kwargs
)
model
=
LeNet
().
to
(
device
)
optimizer
=
optim
.
Adadelta
(
model
.
parameters
(),
lr
=
args
.
lr
)
print
(
'start pre-training'
)
scheduler
=
StepLR
(
optimizer
,
step_size
=
1
,
gamma
=
args
.
gamma
)
for
epoch
in
range
(
1
,
args
.
epochs
+
1
):
train
(
args
,
model
,
device
,
train_loader
,
optimizer
,
epoch
)
test
(
model
,
device
,
test_loader
)
scheduler
.
step
()
torch
.
save
(
model
.
state_dict
(),
"pretrain_mnist_lenet.pt"
)
print
(
'start pruning'
)
optimizer_finetune
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.01
)
# create pruner
prune_config
=
[{
'sparsity'
:
args
.
sparsity
,
'op_types'
:
[
'default'
],
}]
pruner
=
LevelPruner
(
model
,
prune_config
)
model
=
pruner
.
compress
()
# fine-tuning
best_top1
=
0
for
epoch
in
range
(
1
,
args
.
epochs
+
1
):
pruner
.
update_epoch
(
epoch
)
train
(
args
,
model
,
device
,
train_loader
,
optimizer_finetune
,
epoch
)
top1
=
test
(
model
,
device
,
test_loader
)
if
top1
>
best_top1
:
best_top1
=
top1
# Export the best model, 'model_path' stores state_dict of the pruned model,
# mask_path stores mask_dict of the pruned model
pruner
.
export_model
(
model_path
=
'pruend_mnist_lenet.pt'
,
mask_path
=
'mask_mnist_lenet.pt'
)
if
__name__
==
'__main__'
:
# Training settings
parser
=
argparse
.
ArgumentParser
(
description
=
'PyTorch MNIST Example for model comporession'
)
parser
.
add_argument
(
'--batch-size'
,
type
=
int
,
default
=
64
,
metavar
=
'N'
,
help
=
'input batch size for training (default: 64)'
)
parser
.
add_argument
(
'--test-batch-size'
,
type
=
int
,
default
=
1000
,
metavar
=
'N'
,
help
=
'input batch size for testing (default: 1000)'
)
parser
.
add_argument
(
'--epochs'
,
type
=
int
,
default
=
10
,
metavar
=
'N'
,
help
=
'number of epochs to train (default: 10)'
)
parser
.
add_argument
(
'--lr'
,
type
=
float
,
default
=
1.0
,
metavar
=
'LR'
,
help
=
'learning rate (default: 1.0)'
)
parser
.
add_argument
(
'--gamma'
,
type
=
float
,
default
=
0.7
,
metavar
=
'M'
,
help
=
'Learning rate step gamma (default: 0.7)'
)
parser
.
add_argument
(
'--no-cuda'
,
action
=
'store_true'
,
default
=
False
,
help
=
'disables CUDA training'
)
parser
.
add_argument
(
'--dry-run'
,
action
=
'store_true'
,
default
=
False
,
help
=
'quickly check a single pass'
)
parser
.
add_argument
(
'--seed'
,
type
=
int
,
default
=
1
,
metavar
=
'S'
,
help
=
'random seed (default: 1)'
)
parser
.
add_argument
(
'--log-interval'
,
type
=
int
,
default
=
10
,
metavar
=
'N'
,
help
=
'how many batches to wait before logging training status'
)
parser
.
add_argument
(
'--sparsity'
,
type
=
float
,
default
=
0.5
,
help
=
'target overall target sparsity'
)
args
=
parser
.
parse_args
()
main
(
args
)
examples/model_compress/pruning/speedup/model_speedup.py
0 → 100644
View file @
1011377c
import
os
import
sys
import
argparse
import
time
import
torch
from
pathlib
import
Path
sys
.
path
.
append
(
str
(
Path
(
__file__
).
absolute
().
parents
[
2
]
/
'models'
))
from
cifar10.vgg
import
VGG
from
mnist.lenet
import
LeNet
from
nni.compression.pytorch
import
apply_compression_results
,
ModelSpeedup
torch
.
manual_seed
(
0
)
use_mask
=
True
use_speedup
=
True
compare_results
=
True
config
=
{
'apoz'
:
{
'model_name'
:
'vgg16'
,
'input_shape'
:
[
64
,
3
,
32
,
32
],
'masks_file'
:
'./experiment_data/mask_vgg16_cifar10_apoz.pth'
},
'l1filter'
:
{
'model_name'
:
'vgg16'
,
'input_shape'
:
[
64
,
3
,
32
,
32
],
'masks_file'
:
'./experiment_data/mask_vgg16_cifar10_l1filter.pth'
},
'fpgm'
:
{
'model_name'
:
'vgg16'
,
'input_shape'
:
[
64
,
3
,
32
,
32
],
'masks_file'
:
'./experiment_data/mask_vgg16_cifar10_fpgm.pth'
},
'slim'
:
{
'model_name'
:
'vgg19'
,
'input_shape'
:
[
64
,
3
,
32
,
32
],
'masks_file'
:
'./experiment_data/mask_vgg19_cifar10_slim.pth'
}
}
def
model_inference
(
config
):
masks_file
=
config
[
'masks_file'
]
device
=
torch
.
device
(
'cuda'
)
if
torch
.
cuda
.
is_available
()
else
torch
.
device
(
'cpu'
)
# device = torch.device(config['device'])
if
config
[
'model_name'
]
==
'vgg16'
:
model
=
VGG
(
depth
=
16
)
elif
config
[
'model_name'
]
==
'vgg19'
:
model
=
VGG
(
depth
=
19
)
elif
config
[
'model_name'
]
==
'lenet'
:
model
=
LeNet
()
model
.
to
(
device
)
model
.
eval
()
dummy_input
=
torch
.
randn
(
config
[
'input_shape'
]).
to
(
device
)
use_mask_out
=
use_speedup_out
=
None
# must run use_mask before use_speedup because use_speedup modify the model
if
use_mask
:
apply_compression_results
(
model
,
masks_file
,
device
)
start
=
time
.
time
()
for
_
in
range
(
32
):
use_mask_out
=
model
(
dummy_input
)
print
(
'elapsed time when use mask: '
,
time
.
time
()
-
start
)
if
use_speedup
:
m_speedup
=
ModelSpeedup
(
model
,
dummy_input
,
masks_file
,
device
)
m_speedup
.
speedup_model
()
start
=
time
.
time
()
for
_
in
range
(
32
):
use_speedup_out
=
model
(
dummy_input
)
print
(
'elapsed time when use speedup: '
,
time
.
time
()
-
start
)
if
compare_results
:
if
torch
.
allclose
(
use_mask_out
,
use_speedup_out
,
atol
=
1e-07
):
print
(
'the outputs from use_mask and use_speedup are the same'
)
else
:
raise
RuntimeError
(
'the outputs from use_mask and use_speedup are different'
)
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
"speedup"
)
parser
.
add_argument
(
"--example_name"
,
type
=
str
,
default
=
"slim"
,
help
=
"the name of pruning example"
)
parser
.
add_argument
(
"--masks_file"
,
type
=
str
,
default
=
None
,
help
=
"the path of the masks file"
)
args
=
parser
.
parse_args
()
if
args
.
example_name
!=
'all'
:
if
args
.
masks_file
is
not
None
:
config
[
args
.
example_name
][
'masks_file'
]
=
args
.
masks_file
if
not
os
.
path
.
exists
(
config
[
args
.
example_name
][
'masks_file'
]):
msg
=
'{} does not exist! You should specify masks_file correctly, '
\
'or use default one which is generated by model_prune_torch.py'
raise
RuntimeError
(
msg
.
format
(
config
[
args
.
example_name
][
'masks_file'
]))
model_inference
(
config
[
args
.
example_name
])
else
:
model_inference
(
config
[
'fpgm'
])
model_inference
(
config
[
'slim'
])
model_inference
(
config
[
'l1filter'
])
model_inference
(
config
[
'apoz'
])
examples/model_compress/pruning/speedup/speedup_mobilnetv2.py
0 → 100644
View file @
1011377c
import
torch
from
torchvision.models
import
mobilenet_v2
from
nni.compression.pytorch
import
ModelSpeedup
from
nni.algorithms.compression.pytorch.pruning
import
L1FilterPruner
model
=
mobilenet_v2
(
pretrained
=
True
)
dummy_input
=
torch
.
rand
(
8
,
3
,
416
,
416
)
cfg_list
=
[{
'op_types'
:[
'Conv2d'
],
'sparsity'
:
0.5
}]
pruner
=
L1FilterPruner
(
model
,
cfg_list
)
pruner
.
compress
()
pruner
.
export_model
(
'./model'
,
'./mask'
)
# need call _unwrap_model if you want run the speedup on the same model
pruner
.
_unwrap_model
()
# Speedup the nanodet
ms
=
ModelSpeedup
(
model
,
dummy_input
,
'./mask'
)
ms
.
speedup_model
()
model
(
dummy_input
)
\ No newline at end of file
examples/model_compress/pruning/speedup/speedup_nanodet.py
0 → 100644
View file @
1011377c
import
torch
from
nanodet.model.arch
import
build_model
from
nanodet.util
import
cfg
,
load_config
from
nni.compression.pytorch
import
ModelSpeedup
from
nni.algorithms.compression.pytorch.pruning
import
L1FilterPruner
"""
NanoDet model can be installed from https://github.com/RangiLyu/nanodet.git
"""
cfg_path
=
r
"nanodet/config/nanodet-RepVGG-A0_416.yml"
load_config
(
cfg
,
cfg_path
)
model
=
build_model
(
cfg
.
model
).
cpu
()
dummy_input
=
torch
.
rand
(
8
,
3
,
416
,
416
)
op_names
=
[]
# these three conv layers are followed by reshape-like functions
# that cannot be replaced, so we skip these three conv layers,
# you can also get such layers by `not_safe_to_prune` function
excludes
=
[
'head.gfl_cls.0'
,
'head.gfl_cls.1'
,
'head.gfl_cls.2'
]
for
name
,
module
in
model
.
named_modules
():
if
isinstance
(
module
,
torch
.
nn
.
Conv2d
):
if
name
not
in
excludes
:
op_names
.
append
(
name
)
cfg_list
=
[{
'op_types'
:[
'Conv2d'
],
'sparsity'
:
0.5
,
'op_names'
:
op_names
}]
pruner
=
L1FilterPruner
(
model
,
cfg_list
)
pruner
.
compress
()
pruner
.
export_model
(
'./model'
,
'./mask'
)
# need call _unwrap_model if you want run the speedup on the same model
pruner
.
_unwrap_model
()
# Speedup the nanodet
ms
=
ModelSpeedup
(
model
,
dummy_input
,
'./mask'
)
ms
.
speedup_model
()
model
(
dummy_input
)
\ No newline at end of file
examples/model_compress/pruning/speedup/speedup_yolov3.py
0 → 100644
View file @
1011377c
import
torch
from
pytorchyolo
import
models
from
nni.compression.pytorch
import
ModelSpeedup
from
nni.algorithms.compression.pytorch.pruning
import
L1FilterPruner
,
LevelPruner
from
nni.compression.pytorch.utils
import
not_safe_to_prune
# The Yolo can be downloaded at https://github.com/eriklindernoren/PyTorch-YOLOv3.git
prefix
=
'/home/user/PyTorch-YOLOv3'
# replace this path with yours
# Load the YOLO model
model
=
models
.
load_model
(
"%s/config/yolov3.cfg"
%
prefix
,
"%s/yolov3.weights"
%
prefix
).
cpu
()
model
.
eval
()
dummy_input
=
torch
.
rand
(
8
,
3
,
320
,
320
)
model
(
dummy_input
)
# Generate the config list for pruner
# Filter the layers that may not be able to prune
not_safe
=
not_safe_to_prune
(
model
,
dummy_input
)
cfg_list
=
[]
for
name
,
module
in
model
.
named_modules
():
if
name
in
not_safe
:
continue
if
isinstance
(
module
,
torch
.
nn
.
Conv2d
):
cfg_list
.
append
({
'op_types'
:[
'Conv2d'
],
'sparsity'
:
0.6
,
'op_names'
:[
name
]})
# Prune the model
pruner
=
L1FilterPruner
(
model
,
cfg_list
)
pruner
.
compress
()
pruner
.
export_model
(
'./model'
,
'./mask'
)
pruner
.
_unwrap_model
()
# Speedup the model
ms
=
ModelSpeedup
(
model
,
dummy_input
,
'./mask'
)
ms
.
speedup_model
()
model
(
dummy_input
)
examples/model_compress/pruning/transformers/run.sh
0 → 100755
View file @
1011377c
#!/bin/bash
# Usage: ./run.sh gpu_id glue_task
export
HIP_VISIBLE_DEVICES
=
$1
TASK_NAME
=
$2
# "cola", "sst2", "mrpc", "stsb", "qqp", "mnli", "qnli", "rte", "wnli"
PRETRAINED_MODEL
=
"bert-base-uncased"
# "distilbert-base-uncased", "roberta-base", "bert-base-cased", ...
# parameters for pruning
SPARSITY
=
0.5
RANKING_CRITERION
=
l1_weight
# "l1_weight", "l2_weight", "l1_activation", "l2_activation", "taylorfo"
NUM_ITERATIONS
=
1
# 1 for one-shot pruning
EPOCHS_PER_ITERATION
=
1
# other training parameters, no need to change
MAX_LENGTH
=
128
BATCH_SIZE
=
32
LR
=
2e-5
N_EPOCHS
=
3
time
=
$(
date
"+%Y%m%d%H%M%S"
)
OUTDIR
=
"models_
${
PRETRAINED_MODEL
}
_
${
TASK_NAME
}
_
$time
/"
TASK_LIST
=(
"cola"
"sst2"
"mrpc"
"stsb"
"qqp"
"mnli"
"qnli"
"rte"
"wnli"
)
if
[[
${
TASK_LIST
[*]
}
=
~
(
^|[[:space:]]
)
$TASK_NAME
(
$|
[[
:space:]]
)
]]
;
then
mkdir
$OUTDIR
python transformer_pruning.py
\
--sparsity
$SPARSITY
\
--ranking_criterion
$RANKING_CRITERION
\
--num_iterations
$NUM_ITERATIONS
\
--epochs_per_iteration
$EPOCHS_PER_ITERATION
\
--speed_up
\
--model_name
$PRETRAINED_MODEL
\
--task_name
$TASK_NAME
\
--max_length
$MAX_LENGTH
\
--batch_size
$BATCH_SIZE
\
--learning_rate
$LR
\
--num_train_epochs
$N_EPOCHS
\
--output_dir
$OUTDIR
\
2>&1 |
tee
"
$OUTDIR
/output.log"
else
echo
"Unsupported task
$TASK_NAME
."
fi
examples/model_compress/pruning/transformers/transformer_pruning.py
0 → 100644
View file @
1011377c
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
argparse
import
logging
import
os
import
torch
from
torch.utils.data.dataloader
import
DataLoader
from
tqdm.auto
import
tqdm
from
nni.compression.pytorch.utils.counter
import
count_flops_params
from
nni.algorithms.compression.pytorch.pruning
import
TransformerHeadPruner
import
datasets
from
datasets
import
load_dataset
,
load_metric
import
transformers
from
transformers
import
(
AdamW
,
AutoConfig
,
AutoModelForSequenceClassification
,
AutoTokenizer
,
DataCollatorWithPadding
,
get_scheduler
,
)
logger
=
logging
.
getLogger
(
"bert_pruning_example"
)
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
"Example: prune a Huggingface transformer and finetune on GLUE tasks."
)
parser
.
add_argument
(
"--model_name"
,
type
=
str
,
required
=
True
,
help
=
"Pretrained model architecture."
)
parser
.
add_argument
(
"--task_name"
,
type
=
str
,
default
=
None
,
help
=
"The name of the GLUE task."
,
choices
=
[
"cola"
,
"mnli"
,
"mrpc"
,
"qnli"
,
"qqp"
,
"rte"
,
"sst2"
,
"stsb"
,
"wnli"
])
parser
.
add_argument
(
"--output_dir"
,
type
=
str
,
default
=
None
,
help
=
"Where to store the model and mask."
)
parser
.
add_argument
(
"--sparsity"
,
type
=
float
,
required
=
True
,
help
=
"Sparsity: proportion of heads to prune (should be between 0 and 1)"
)
parser
.
add_argument
(
"--global_sort"
,
action
=
"store_true"
,
default
=
False
,
help
=
"Rank the heads globally and prune the heads with lowest scores. If set to False, the "
"heads are only ranked within one layer"
)
parser
.
add_argument
(
"--ranking_criterion"
,
type
=
str
,
default
=
"l1_weight"
,
choices
=
[
"l1_weight"
,
"l2_weight"
,
"l1_activation"
,
"l2_activation"
,
"taylorfo"
],
help
=
"Criterion by which the attention heads are ranked."
)
parser
.
add_argument
(
"--num_iterations"
,
type
=
int
,
default
=
1
,
help
=
"Number of pruning iterations (1 for one-shot pruning)."
)
parser
.
add_argument
(
"--epochs_per_iteration"
,
type
=
int
,
default
=
1
,
help
=
"Epochs to finetune before the next pruning iteration "
"(only effective if num_iterations > 1)."
)
parser
.
add_argument
(
"--speed_up"
,
action
=
"store_true"
,
default
=
False
,
help
=
"Whether to speed-up the pruned model"
)
# parameters for model training; no need to change them for running examples
parser
.
add_argument
(
"--max_length"
,
type
=
int
,
default
=
128
,
help
=
(
"The maximum total input sequence length after tokenization. Sequences longer than this "
"will be truncated, sequences shorter will be padded if `--pad_to_max_lengh` is passed."
))
parser
.
add_argument
(
"--batch_size"
,
type
=
int
,
default
=
8
,
help
=
"Batch size."
)
parser
.
add_argument
(
"--learning_rate"
,
type
=
float
,
default
=
5e-5
,
help
=
"Initial learning rate."
)
parser
.
add_argument
(
"--num_train_epochs"
,
type
=
int
,
default
=
3
,
help
=
"Total number of training epochs to perform."
)
parser
.
add_argument
(
"--lr_scheduler_type"
,
default
=
"linear"
,
choices
=
[
"linear"
,
"cosine"
,
"cosine_with_restarts"
,
"polynomial"
,
"constant"
,
"constant_with_warmup"
])
parser
.
add_argument
(
"--num_warmup_steps"
,
type
=
int
,
default
=
0
,
help
=
"Number of steps for the warmup in the lr scheduler."
)
args
=
parser
.
parse_args
()
if
args
.
output_dir
is
not
None
:
os
.
makedirs
(
args
.
output_dir
,
exist_ok
=
True
)
return
args
def
get_raw_dataset
(
task_name
):
"""
Get a GLUE dataset using huggingface datasets.
"""
raw_dataset
=
load_dataset
(
"glue"
,
task_name
)
is_regression
=
task_name
==
"stsb"
num_labels
=
1
if
is_regression
else
len
(
raw_dataset
[
"train"
].
features
[
"label"
].
names
)
return
raw_dataset
,
is_regression
,
num_labels
def
preprocess
(
args
,
tokenizer
,
raw_dataset
):
"""
Tokenization and column renaming.
"""
assert
args
.
task_name
is
not
None
task_to_keys
=
{
"cola"
:
(
"sentence"
,
None
),
"mnli"
:
(
"premise"
,
"hypothesis"
),
"mrpc"
:
(
"sentence1"
,
"sentence2"
),
"qnli"
:
(
"question"
,
"sentence"
),
"qqp"
:
(
"question1"
,
"question2"
),
"rte"
:
(
"sentence1"
,
"sentence2"
),
"sst2"
:
(
"sentence"
,
None
),
"stsb"
:
(
"sentence1"
,
"sentence2"
),
"wnli"
:
(
"sentence1"
,
"sentence2"
),
}
sentence1_key
,
sentence2_key
=
task_to_keys
[
args
.
task_name
]
def
tokenize
(
data
):
texts
=
(
(
data
[
sentence1_key
],)
if
sentence2_key
is
None
else
(
data
[
sentence1_key
],
data
[
sentence2_key
])
)
result
=
tokenizer
(
*
texts
,
padding
=
False
,
max_length
=
args
.
max_length
,
truncation
=
True
)
if
"label"
in
data
:
result
[
"labels"
]
=
data
[
"label"
]
return
result
processed_datasets
=
raw_dataset
.
map
(
tokenize
,
batched
=
True
,
remove_columns
=
raw_dataset
[
"train"
].
column_names
)
return
processed_datasets
def
get_dataloader_and_optimizer
(
args
,
tokenizer
,
model
,
train_dataset
,
eval_dataset
):
data_collator
=
DataCollatorWithPadding
(
tokenizer
)
train_dataloader
=
DataLoader
(
train_dataset
,
shuffle
=
True
,
collate_fn
=
data_collator
,
batch_size
=
args
.
batch_size
)
eval_dataloader
=
DataLoader
(
eval_dataset
,
collate_fn
=
data_collator
,
batch_size
=
args
.
batch_size
)
optimizer
=
AdamW
(
model
.
parameters
(),
lr
=
args
.
learning_rate
)
return
optimizer
,
train_dataloader
,
eval_dataloader
,
data_collator
def
train_model
(
args
,
model
,
is_regression
,
train_dataloader
,
eval_dataloader
,
optimizer
,
lr_scheduler
,
metric
,
device
):
"""
Train the model using train_dataloader and evaluate after every epoch using eval_dataloader.
This function is called before and after pruning for "pretraining" on the GLUE task and further "finetuning".
"""
train_steps
=
args
.
num_train_epochs
*
len
(
train_dataloader
)
progress_bar
=
tqdm
(
range
(
train_steps
),
position
=
0
,
leave
=
True
)
for
epoch
in
range
(
args
.
num_train_epochs
):
model
.
train
()
for
step
,
batch
in
enumerate
(
train_dataloader
):
for
field
in
batch
.
keys
():
batch
[
field
]
=
batch
[
field
].
to
(
device
)
outputs
=
model
(
**
batch
)
outputs
.
loss
.
backward
()
optimizer
.
step
()
lr_scheduler
.
step
()
optimizer
.
zero_grad
()
progress_bar
.
update
(
1
)
model
.
eval
()
for
step
,
batch
in
enumerate
(
eval_dataloader
):
for
field
in
batch
.
keys
():
batch
[
field
]
=
batch
[
field
].
to
(
device
)
outputs
=
model
(
**
batch
)
predictions
=
outputs
.
logits
.
argmax
(
dim
=-
1
)
if
not
is_regression
\
else
outputs
.
logits
.
squeeze
()
metric
.
add_batch
(
predictions
=
predictions
,
references
=
batch
[
"labels"
])
eval_metric
=
metric
.
compute
()
logger
.
info
(
f
"epoch
{
epoch
}
:
{
eval_metric
}
"
)
def
trainer_helper
(
model
,
train_dataloader
,
optimizer
,
device
):
"""
This function is used for to create a "trainer" that is passed to the pruner.
Finetune the model for 1 epoch. This function is called by the pruner during pruning iterations (or called to
calculate scores for pruning when ranking criterion is "taylorfo").
"""
logger
.
info
(
"Training for 1 epoch..."
)
progress_bar
=
tqdm
(
range
(
len
(
train_dataloader
)),
position
=
0
,
leave
=
True
)
train_epoch
=
1
for
epoch
in
range
(
train_epoch
):
for
step
,
batch
in
enumerate
(
train_dataloader
):
for
field
in
batch
.
keys
():
batch
[
field
]
=
batch
[
field
].
to
(
device
)
outputs
=
model
(
**
batch
)
outputs
.
loss
.
backward
()
optimizer
.
step
()
optimizer
.
zero_grad
()
progress_bar
.
update
(
1
)
def
forward_runner_helper
(
model
,
train_dataloader
,
device
):
"""
This function is used for to create a "forward_runner" that is passed to the pruner.
The function just runs forward on the train set without updating the parameters.
This allows the pruner to collect data for activation-based pruning methods.
"""
logger
.
info
(
"Running forward on the entire train set without updating parameters..."
)
progress_bar
=
tqdm
(
range
(
len
(
train_dataloader
)),
position
=
0
,
leave
=
True
)
forward_epoch
=
1
for
epoch
in
range
(
forward_epoch
):
for
step
,
batch
in
enumerate
(
train_dataloader
):
for
field
in
batch
.
keys
():
batch
[
field
]
=
batch
[
field
].
to
(
device
)
_
=
model
(
**
batch
)
# note: no loss.backward or optimizer.step() is performed here
progress_bar
.
update
(
1
)
def
final_eval_for_mnli
(
args
,
model
,
processed_datasets
,
metric
,
data_collator
):
"""
If the task is MNLI, perform a final evaluation on mismatched validation set
"""
eval_dataset
=
processed_datasets
[
"validation_mismatched"
]
eval_dataloader
=
DataLoader
(
eval_dataset
,
collate_fn
=
data_collator
,
batch_size
=
args
.
batch_size
)
model
.
eval
()
for
step
,
batch
in
enumerate
(
eval_dataloader
):
outputs
=
model
(
**
batch
)
predictions
=
outputs
.
logits
.
argmax
(
dim
=-
1
)
metric
.
add_batch
(
predictions
=
predictions
,
references
=
batch
[
"labels"
],
)
eval_metric
=
metric
.
compute
()
logger
.
info
(
f
"mnli-mm:
{
eval_metric
}
"
)
def
main
():
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
args
=
parse_args
()
#########################################################################
# Prepare model, tokenizer, dataset, optimizer, and the scheduler
logger
.
setLevel
(
logging
.
INFO
)
datasets
.
utils
.
logging
.
set_verbosity_warning
()
transformers
.
utils
.
logging
.
set_verbosity_info
()
# Load dataset and tokenizer, and then preprocess the dataset
raw_dataset
,
is_regression
,
num_labels
=
get_raw_dataset
(
args
.
task_name
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
args
.
model_name
,
use_fast
=
True
)
processed_datasets
=
preprocess
(
args
,
tokenizer
,
raw_dataset
)
train_dataset
=
processed_datasets
[
"train"
]
eval_dataset
=
processed_datasets
[
"validation_matched"
if
args
.
task_name
==
"mnli"
else
"validation"
]
# Load pretrained model
config
=
AutoConfig
.
from_pretrained
(
args
.
model_name
,
num_labels
=
num_labels
,
finetuning_task
=
args
.
task_name
)
model
=
AutoModelForSequenceClassification
.
from_pretrained
(
args
.
model_name
,
config
=
config
)
model
.
to
(
device
)
#########################################################################
# Finetune on the target GLUE task before pruning
optimizer
,
train_dataloader
,
eval_dataloader
,
data_collator
=
get_dataloader_and_optimizer
(
args
,
tokenizer
,
model
,
train_dataset
,
eval_dataset
)
train_steps
=
args
.
num_train_epochs
*
len
(
train_dataloader
)
lr_scheduler
=
get_scheduler
(
name
=
args
.
lr_scheduler_type
,
optimizer
=
optimizer
,
num_warmup_steps
=
args
.
num_warmup_steps
,
num_training_steps
=
train_steps
)
metric
=
load_metric
(
"glue"
,
args
.
task_name
)
logger
.
info
(
"================= Finetuning before pruning ================="
)
train_model
(
args
,
model
,
is_regression
,
train_dataloader
,
eval_dataloader
,
optimizer
,
lr_scheduler
,
metric
,
device
)
if
args
.
output_dir
is
not
None
:
torch
.
save
(
model
.
state_dict
(),
args
.
output_dir
+
"/model_before_pruning.pt"
)
if
args
.
task_name
==
"mnli"
:
final_eval_for_mnli
(
args
,
model
,
processed_datasets
,
metric
,
data_collator
)
#########################################################################
# Pruning
optimizer
,
train_dataloader
,
eval_dataloader
,
data_collator
=
get_dataloader_and_optimizer
(
args
,
tokenizer
,
model
,
train_dataset
,
eval_dataset
)
dummy_input
=
next
(
iter
(
train_dataloader
))[
"input_ids"
].
to
(
device
)
flops
,
params
,
results
=
count_flops_params
(
model
,
dummy_input
)
print
(
f
"Initial model FLOPs
{
flops
/
1e6
:.
2
f
}
M, #Params:
{
params
/
1e6
:.
2
f
}
M"
)
# Here criterion is embedded in the model. Upper levels can just pass None to trainer.
def
trainer
(
model
,
optimizer
,
criterion
,
epoch
):
return
trainer_helper
(
model
,
train_dataloader
,
optimizer
,
device
)
def
forward_runner
(
model
):
return
forward_runner_helper
(
model
,
train_dataloader
,
device
)
# example: prune different layers with different sparsity
attention_name_groups
=
list
(
zip
([
"bert.encoder.layer.{}.attention.self.query"
.
format
(
i
)
for
i
in
range
(
12
)],
[
"bert.encoder.layer.{}.attention.self.key"
.
format
(
i
)
for
i
in
range
(
12
)],
[
"bert.encoder.layer.{}.attention.self.value"
.
format
(
i
)
for
i
in
range
(
12
)],
[
"bert.encoder.layer.{}.attention.output.dense"
.
format
(
i
)
for
i
in
range
(
12
)]))
kwargs
=
{
"ranking_criterion"
:
args
.
ranking_criterion
,
"global_sort"
:
args
.
global_sort
,
"num_iterations"
:
args
.
num_iterations
,
"epochs_per_iteration"
:
args
.
epochs_per_iteration
,
"attention_name_groups"
:
attention_name_groups
,
"head_hidden_dim"
:
64
,
"trainer"
:
trainer
,
"optimizer"
:
optimizer
,
"forward_runner"
:
forward_runner
}
config_list
=
[{
"sparsity"
:
args
.
sparsity
,
"op_types"
:
[
"Linear"
],
"op_names"
:
[
x
for
layer
in
attention_name_groups
[:
6
]
for
x
in
layer
]
},
{
"sparsity"
:
args
.
sparsity
/
2
,
"op_types"
:
[
"Linear"
],
"op_names"
:
[
x
for
layer
in
attention_name_groups
[
6
:]
for
x
in
layer
]
}]
pruner
=
TransformerHeadPruner
(
model
,
config_list
,
**
kwargs
)
pruner
.
compress
()
#########################################################################
# uncomment the following part to export the pruned model masks
# model_path = os.path.join(args.output_dir, "pruned_{}_{}.pth".format(args.model_name, args.task_name))
# mask_path = os.path.join(args.output_dir, "mask_{}_{}.pth".format(args.model_name, args.task_name))
# pruner.export_model(model_path=model_path, mask_path=mask_path)
#########################################################################
# Speedup
# Currently, speeding up Transformers through NNI ModelSpeedup is not supported because of shape inference issues.
# However, if you are using the transformers library, you can use the following workaround:
# The following code gets the head pruning decisions from the pruner and calls the _prune_heads() function
# implemented in models from the transformers library to speed up the model.
if
args
.
speed_up
:
speedup_rules
=
{}
for
group_idx
,
group
in
enumerate
(
pruner
.
attention_name_groups
):
# get the layer index
layer_idx
=
None
for
part
in
group
[
0
].
split
(
"."
):
try
:
layer_idx
=
int
(
part
)
break
except
:
continue
if
layer_idx
is
not
None
:
speedup_rules
[
layer_idx
]
=
pruner
.
pruned_heads
[
group_idx
]
pruner
.
_unwrap_model
()
model
.
bert
.
_prune_heads
(
speedup_rules
)
print
(
model
)
#########################################################################
# After pruning, finetune again on the target task
# Get the metric function
metric
=
load_metric
(
"glue"
,
args
.
task_name
)
# re-initialize the optimizer and the scheduler
optimizer
,
_
,
_
,
data_collator
=
get_dataloader_and_optimizer
(
args
,
tokenizer
,
model
,
train_dataset
,
eval_dataset
)
lr_scheduler
=
get_scheduler
(
name
=
args
.
lr_scheduler_type
,
optimizer
=
optimizer
,
num_warmup_steps
=
args
.
num_warmup_steps
,
num_training_steps
=
train_steps
)
logger
.
info
(
"================= Finetuning after Pruning ================="
)
train_model
(
args
,
model
,
is_regression
,
train_dataloader
,
eval_dataloader
,
optimizer
,
lr_scheduler
,
metric
,
device
)
if
args
.
output_dir
is
not
None
:
torch
.
save
(
model
.
state_dict
(),
args
.
output_dir
+
"/model_after_pruning.pt"
)
if
args
.
task_name
==
"mnli"
:
final_eval_for_mnli
(
args
,
model
,
processed_datasets
,
metric
,
data_collator
)
flops
,
params
,
results
=
count_flops_params
(
model
,
dummy_input
)
print
(
f
"Final model FLOPs
{
flops
/
1e6
:.
2
f
}
M, #Params:
{
params
/
1e6
:.
2
f
}
M"
)
if
__name__
==
"__main__"
:
main
()
examples/model_compress/pruning/v2/activation_pruning_torch.py
0 → 100644
View file @
1011377c
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI example for supported ActivationAPoZRank and ActivationMeanRank pruning algorithms.
In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required.
'''
import
argparse
import
sys
import
torch
from
torchvision
import
datasets
,
transforms
from
torch.optim.lr_scheduler
import
MultiStepLR
import
nni
from
nni.compression.pytorch
import
ModelSpeedup
from
nni.compression.pytorch.utils.counter
import
count_flops_params
from
nni.algorithms.compression.v2.pytorch.pruning.basic_pruner
import
ActivationAPoZRankPruner
,
ActivationMeanRankPruner
from
pathlib
import
Path
sys
.
path
.
append
(
str
(
Path
(
__file__
).
absolute
().
parents
[
2
]
/
'models'
))
from
cifar10.vgg
import
VGG
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
normalize
=
transforms
.
Normalize
((
0.4914
,
0.4822
,
0.4465
),
(
0.2023
,
0.1994
,
0.2010
))
g_epoch
=
0
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
True
,
transform
=
transforms
.
Compose
([
transforms
.
RandomHorizontalFlip
(),
transforms
.
RandomCrop
(
32
,
4
),
transforms
.
ToTensor
(),
normalize
,
]),
download
=
True
),
batch_size
=
128
,
shuffle
=
True
)
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
False
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
normalize
,
])),
batch_size
=
128
,
shuffle
=
False
)
def
trainer
(
model
,
optimizer
,
criterion
):
global
g_epoch
model
.
train
()
for
batch_idx
,
(
data
,
target
)
in
enumerate
(
train_loader
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
if
batch_idx
and
batch_idx
%
100
==
0
:
print
(
'Train Epoch: {} [{}/{} ({:.0f}%)]
\t
Loss: {:.6f}'
.
format
(
g_epoch
,
batch_idx
*
len
(
data
),
len
(
train_loader
.
dataset
),
100.
*
batch_idx
/
len
(
train_loader
),
loss
.
item
()))
g_epoch
+=
1
def
evaluator
(
model
):
model
.
eval
()
correct
=
0.0
with
torch
.
no_grad
():
for
data
,
target
in
test_loader
:
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
output
=
model
(
data
)
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
acc
=
100
*
correct
/
len
(
test_loader
.
dataset
)
print
(
'Accuracy: {}%
\n
'
.
format
(
acc
))
return
acc
def
optimizer_scheduler_generator
(
model
,
_lr
=
0.1
,
_momentum
=
0.9
,
_weight_decay
=
5e-4
,
total_epoch
=
160
):
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
_lr
,
momentum
=
_momentum
,
weight_decay
=
_weight_decay
)
scheduler
=
MultiStepLR
(
optimizer
,
milestones
=
[
int
(
total_epoch
*
0.5
),
int
(
total_epoch
*
0.75
)],
gamma
=
0.1
)
return
optimizer
,
scheduler
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
description
=
'PyTorch Example for model comporession'
)
parser
.
add_argument
(
'--pruner'
,
type
=
str
,
default
=
'apoz'
,
choices
=
[
'apoz'
,
'mean'
],
help
=
'pruner to use'
)
parser
.
add_argument
(
'--pretrain-epochs'
,
type
=
int
,
default
=
20
,
help
=
'number of epochs to pretrain the model'
)
parser
.
add_argument
(
'--fine-tune-epochs'
,
type
=
int
,
default
=
20
,
help
=
'number of epochs to fine tune the model'
)
args
=
parser
.
parse_args
()
print
(
'
\n
'
+
'='
*
50
+
' START TO TRAIN THE MODEL '
+
'='
*
50
)
model
=
VGG
().
to
(
device
)
optimizer
,
scheduler
=
optimizer_scheduler_generator
(
model
,
total_epoch
=
args
.
pretrain_epochs
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
pre_best_acc
=
0.0
best_state_dict
=
None
for
i
in
range
(
args
.
pretrain_epochs
):
trainer
(
model
,
optimizer
,
criterion
)
scheduler
.
step
()
acc
=
evaluator
(
model
)
if
acc
>
pre_best_acc
:
pre_best_acc
=
acc
best_state_dict
=
model
.
state_dict
()
print
(
"Best accuracy: {}"
.
format
(
pre_best_acc
))
model
.
load_state_dict
(
best_state_dict
)
pre_flops
,
pre_params
,
_
=
count_flops_params
(
model
,
torch
.
randn
([
128
,
3
,
32
,
32
]).
to
(
device
))
g_epoch
=
0
# Start to prune and speedup
print
(
'
\n
'
+
'='
*
50
+
' START TO PRUNE THE BEST ACCURACY PRETRAINED MODEL '
+
'='
*
50
)
config_list
=
[{
'total_sparsity'
:
0.5
,
'op_types'
:
[
'Conv2d'
],
}]
# make sure you have used nni.trace to wrap the optimizer class before initialize
traced_optimizer
=
nni
.
trace
(
torch
.
optim
.
SGD
)(
model
.
parameters
(),
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
if
'apoz'
in
args
.
pruner
:
pruner
=
ActivationAPoZRankPruner
(
model
,
config_list
,
trainer
,
traced_optimizer
,
criterion
,
training_batches
=
20
)
else
:
pruner
=
ActivationMeanRankPruner
(
model
,
config_list
,
trainer
,
traced_optimizer
,
criterion
,
training_batches
=
20
)
_
,
masks
=
pruner
.
compress
()
pruner
.
show_pruned_weights
()
pruner
.
_unwrap_model
()
ModelSpeedup
(
model
,
dummy_input
=
torch
.
rand
([
10
,
3
,
32
,
32
]).
to
(
device
),
masks_file
=
masks
).
speedup_model
()
print
(
'
\n
'
+
'='
*
50
+
' EVALUATE THE MODEL AFTER SPEEDUP '
+
'='
*
50
)
evaluator
(
model
)
# Optimizer used in the pruner might be patched, so recommend to new an optimizer for fine-tuning stage.
print
(
'
\n
'
+
'='
*
50
+
' START TO FINE TUNE THE MODEL '
+
'='
*
50
)
optimizer
,
scheduler
=
optimizer_scheduler_generator
(
model
,
_lr
=
0.01
,
total_epoch
=
args
.
fine_tune_epochs
)
best_acc
=
0.0
g_epoch
=
0
for
i
in
range
(
args
.
fine_tune_epochs
):
trainer
(
model
,
optimizer
,
criterion
)
scheduler
.
step
()
best_acc
=
max
(
evaluator
(
model
),
best_acc
)
flops
,
params
,
results
=
count_flops_params
(
model
,
torch
.
randn
([
128
,
3
,
32
,
32
]).
to
(
device
))
print
(
f
'Pretrained model FLOPs
{
pre_flops
/
1e6
:.
2
f
}
M, #Params:
{
pre_params
/
1e6
:.
2
f
}
M, Accuracy:
{
pre_best_acc
:
.
2
f
}
%'
)
print
(
f
'Finetuned model FLOPs
{
flops
/
1e6
:.
2
f
}
M, #Params:
{
params
/
1e6
:.
2
f
}
M, Accuracy:
{
best_acc
:
.
2
f
}
%'
)
examples/model_compress/pruning/v2/admm_pruning_torch.py
0 → 100644
View file @
1011377c
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI example for supported ADMM pruning algorithms.
In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required.
'''
import
argparse
import
sys
import
torch
from
torchvision
import
datasets
,
transforms
from
torch.optim.lr_scheduler
import
MultiStepLR
import
nni
from
nni.compression.pytorch.utils.counter
import
count_flops_params
from
nni.algorithms.compression.v2.pytorch.pruning.basic_pruner
import
ADMMPruner
from
pathlib
import
Path
sys
.
path
.
append
(
str
(
Path
(
__file__
).
absolute
().
parents
[
2
]
/
'models'
))
from
cifar10.vgg
import
VGG
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
normalize
=
transforms
.
Normalize
((
0.4914
,
0.4822
,
0.4465
),
(
0.2023
,
0.1994
,
0.2010
))
g_epoch
=
0
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
True
,
transform
=
transforms
.
Compose
([
transforms
.
RandomHorizontalFlip
(),
transforms
.
RandomCrop
(
32
,
4
),
transforms
.
ToTensor
(),
normalize
,
]),
download
=
True
),
batch_size
=
128
,
shuffle
=
True
)
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
False
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
normalize
,
])),
batch_size
=
128
,
shuffle
=
False
)
def
trainer
(
model
,
optimizer
,
criterion
):
global
g_epoch
model
.
train
()
for
batch_idx
,
(
data
,
target
)
in
enumerate
(
train_loader
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
if
batch_idx
and
batch_idx
%
100
==
0
:
print
(
'Train Epoch: {} [{}/{} ({:.0f}%)]
\t
Loss: {:.6f}'
.
format
(
g_epoch
,
batch_idx
*
len
(
data
),
len
(
train_loader
.
dataset
),
100.
*
batch_idx
/
len
(
train_loader
),
loss
.
item
()))
g_epoch
+=
1
def
evaluator
(
model
):
model
.
eval
()
correct
=
0.0
with
torch
.
no_grad
():
for
data
,
target
in
test_loader
:
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
output
=
model
(
data
)
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
acc
=
100
*
correct
/
len
(
test_loader
.
dataset
)
print
(
'Accuracy: {}%
\n
'
.
format
(
acc
))
return
acc
def
optimizer_scheduler_generator
(
model
,
_lr
=
0.1
,
_momentum
=
0.9
,
_weight_decay
=
5e-4
,
total_epoch
=
160
):
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
_lr
,
momentum
=
_momentum
,
weight_decay
=
_weight_decay
)
scheduler
=
MultiStepLR
(
optimizer
,
milestones
=
[
int
(
total_epoch
*
0.5
),
int
(
total_epoch
*
0.75
)],
gamma
=
0.1
)
return
optimizer
,
scheduler
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
description
=
'PyTorch Example for model comporession'
)
parser
.
add_argument
(
'--pretrain-epochs'
,
type
=
int
,
default
=
20
,
help
=
'number of epochs to pretrain the model'
)
parser
.
add_argument
(
'--fine-tune-epochs'
,
type
=
int
,
default
=
20
,
help
=
'number of epochs to fine tune the model'
)
args
=
parser
.
parse_args
()
print
(
'
\n
'
+
'='
*
50
+
' START TO TRAIN THE MODEL '
+
'='
*
50
)
model
=
VGG
().
to
(
device
)
optimizer
,
scheduler
=
optimizer_scheduler_generator
(
model
,
total_epoch
=
args
.
pretrain_epochs
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
pre_best_acc
=
0.0
best_state_dict
=
None
for
i
in
range
(
args
.
pretrain_epochs
):
trainer
(
model
,
optimizer
,
criterion
)
scheduler
.
step
()
acc
=
evaluator
(
model
)
if
acc
>
pre_best_acc
:
pre_best_acc
=
acc
best_state_dict
=
model
.
state_dict
()
print
(
"Best accuracy: {}"
.
format
(
pre_best_acc
))
model
.
load_state_dict
(
best_state_dict
)
pre_flops
,
pre_params
,
_
=
count_flops_params
(
model
,
torch
.
randn
([
128
,
3
,
32
,
32
]).
to
(
device
))
g_epoch
=
0
# Start to prune and speedup
print
(
'
\n
'
+
'='
*
50
+
' START TO PRUNE THE BEST ACCURACY PRETRAINED MODEL '
+
'='
*
50
)
config_list
=
[{
'sparsity'
:
0.8
,
'op_types'
:
[
'Conv2d'
],
},
{
'sparsity'
:
0.92
,
'op_types'
:
[
'Conv2d'
],
}]
# make sure you have used nni.trace to wrap the optimizer class before initialize
traced_optimizer
=
nni
.
trace
(
torch
.
optim
.
SGD
)(
model
.
parameters
(),
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
pruner
=
ADMMPruner
(
model
,
config_list
,
trainer
,
traced_optimizer
,
criterion
,
iterations
=
2
,
training_epochs
=
2
)
_
,
masks
=
pruner
.
compress
()
pruner
.
show_pruned_weights
()
# Fine-grained method does not need to speedup
print
(
'
\n
'
+
'='
*
50
+
' EVALUATE THE MODEL AFTER PRUNING '
+
'='
*
50
)
evaluator
(
model
)
# Optimizer used in the pruner might be patched, so recommend to new an optimizer for fine-tuning stage.
print
(
'
\n
'
+
'='
*
50
+
' START TO FINE TUNE THE MODEL '
+
'='
*
50
)
optimizer
,
scheduler
=
optimizer_scheduler_generator
(
model
,
_lr
=
0.01
,
total_epoch
=
args
.
fine_tune_epochs
)
best_acc
=
0.0
g_epoch
=
0
for
i
in
range
(
args
.
fine_tune_epochs
):
trainer
(
model
,
optimizer
,
criterion
)
scheduler
.
step
()
best_acc
=
max
(
evaluator
(
model
),
best_acc
)
flops
,
params
,
results
=
count_flops_params
(
model
,
torch
.
randn
([
128
,
3
,
32
,
32
]).
to
(
device
))
print
(
f
'Pretrained model FLOPs
{
pre_flops
/
1e6
:.
2
f
}
M, #Params:
{
pre_params
/
1e6
:.
2
f
}
M, Accuracy:
{
pre_best_acc
:
.
2
f
}
%'
)
print
(
f
'Finetuned model FLOPs
{
flops
/
1e6
:.
2
f
}
M, #Params:
{
params
/
1e6
:.
2
f
}
M, Accuracy:
{
best_acc
:
.
2
f
}
%'
)
examples/model_compress/pruning/v2/amc_pruning_torch.py
0 → 100644
View file @
1011377c
import
sys
from
tqdm
import
tqdm
import
torch
from
torchvision
import
datasets
,
transforms
from
torch.optim.lr_scheduler
import
MultiStepLR
from
nni.algorithms.compression.v2.pytorch.pruning
import
AMCPruner
from
nni.compression.pytorch.utils.counter
import
count_flops_params
from
pathlib
import
Path
sys
.
path
.
append
(
str
(
Path
(
__file__
).
absolute
().
parents
[
2
]
/
'models'
))
from
cifar10.vgg
import
VGG
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
normalize
=
transforms
.
Normalize
((
0.4914
,
0.4822
,
0.4465
),
(
0.2023
,
0.1994
,
0.2010
))
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
True
,
transform
=
transforms
.
Compose
([
transforms
.
RandomHorizontalFlip
(),
transforms
.
RandomCrop
(
32
,
4
),
transforms
.
ToTensor
(),
normalize
,
]),
download
=
True
),
batch_size
=
128
,
shuffle
=
True
)
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
False
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
normalize
,
])),
batch_size
=
128
,
shuffle
=
False
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
def
trainer
(
model
,
optimizer
,
criterion
,
epoch
):
model
.
train
()
for
data
,
target
in
tqdm
(
iterable
=
train_loader
,
desc
=
'Epoch {}'
.
format
(
epoch
)):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
def
finetuner
(
model
):
model
.
train
()
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
for
data
,
target
in
tqdm
(
iterable
=
train_loader
,
desc
=
'Epoch PFs'
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
def
evaluator
(
model
):
model
.
eval
()
correct
=
0
with
torch
.
no_grad
():
for
data
,
target
in
tqdm
(
iterable
=
test_loader
,
desc
=
'Test'
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
output
=
model
(
data
)
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
acc
=
100
*
correct
/
len
(
test_loader
.
dataset
)
print
(
'Accuracy: {}%
\n
'
.
format
(
acc
))
return
acc
if
__name__
==
'__main__'
:
# model = MobileNetV2(n_class=10).to(device)
model
=
VGG
().
to
(
device
)
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
scheduler
=
MultiStepLR
(
optimizer
,
milestones
=
[
50
,
75
],
gamma
=
0.1
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
for
i
in
range
(
100
):
trainer
(
model
,
optimizer
,
criterion
,
i
)
pre_best_acc
=
evaluator
(
model
)
dummy_input
=
torch
.
rand
(
10
,
3
,
32
,
32
).
to
(
device
)
pre_flops
,
pre_params
,
_
=
count_flops_params
(
model
,
dummy_input
)
config_list
=
[{
'op_types'
:
[
'Conv2d'
],
'total_sparsity'
:
0.5
,
'max_sparsity_per_layer'
:
0.8
}]
# if you just want to keep the final result as the best result, you can pass evaluator as None.
# or the result with the highest score (given by evaluator) will be the best result.
ddpg_params
=
{
'hidden1'
:
300
,
'hidden2'
:
300
,
'lr_c'
:
1e-3
,
'lr_a'
:
1e-4
,
'warmup'
:
100
,
'discount'
:
1.
,
'bsize'
:
64
,
'rmsize'
:
100
,
'window_length'
:
1
,
'tau'
:
0.01
,
'init_delta'
:
0.5
,
'delta_decay'
:
0.99
,
'max_episode_length'
:
1e9
,
'epsilon'
:
50000
}
pruner
=
AMCPruner
(
400
,
model
,
config_list
,
dummy_input
,
evaluator
,
finetuner
=
finetuner
,
ddpg_params
=
ddpg_params
,
target
=
'flops'
)
pruner
.
compress
()
_
,
model
,
masks
,
best_acc
,
_
=
pruner
.
get_best_result
()
flops
,
params
,
_
=
count_flops_params
(
model
,
dummy_input
)
print
(
f
'Pretrained model FLOPs
{
pre_flops
/
1e6
:.
2
f
}
M, #Params:
{
pre_params
/
1e6
:.
2
f
}
M, Accuracy:
{
pre_best_acc
:
.
2
f
}
%'
)
print
(
f
'Finetuned model FLOPs
{
flops
/
1e6
:.
2
f
}
M, #Params:
{
params
/
1e6
:.
2
f
}
M, Accuracy:
{
best_acc
:
.
2
f
}
%'
)
examples/model_compress/pruning/v2/auto_compress_pruner.py
0 → 100644
View file @
1011377c
import
sys
from
tqdm
import
tqdm
import
torch
from
torchvision
import
datasets
,
transforms
import
nni
from
nni.algorithms.compression.v2.pytorch.pruning
import
AutoCompressPruner
from
pathlib
import
Path
sys
.
path
.
append
(
str
(
Path
(
__file__
).
absolute
().
parents
[
2
]
/
'models'
))
from
cifar10.vgg
import
VGG
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
normalize
=
transforms
.
Normalize
((
0.4914
,
0.4822
,
0.4465
),
(
0.2023
,
0.1994
,
0.2010
))
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
True
,
transform
=
transforms
.
Compose
([
transforms
.
RandomHorizontalFlip
(),
transforms
.
RandomCrop
(
32
,
4
),
transforms
.
ToTensor
(),
normalize
,
]),
download
=
True
),
batch_size
=
128
,
shuffle
=
True
)
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
False
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
normalize
,
])),
batch_size
=
128
,
shuffle
=
False
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
epoch
=
0
def
trainer
(
model
,
optimizer
,
criterion
):
global
epoch
model
.
train
()
for
data
,
target
in
tqdm
(
iterable
=
train_loader
,
desc
=
'Total Epoch {}'
.
format
(
epoch
)):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
epoch
=
epoch
+
1
def
finetuner
(
model
):
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
trainer
(
model
,
optimizer
,
criterion
)
def
evaluator
(
model
):
model
.
eval
()
correct
=
0
with
torch
.
no_grad
():
for
data
,
target
in
tqdm
(
iterable
=
test_loader
,
desc
=
'Test'
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
output
=
model
(
data
)
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
acc
=
100
*
correct
/
len
(
test_loader
.
dataset
)
print
(
'Accuracy: {}%
\n
'
.
format
(
acc
))
return
acc
if
__name__
==
'__main__'
:
model
=
VGG
().
to
(
device
)
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
# pre-train the model
for
_
in
range
(
10
):
trainer
(
model
,
optimizer
,
criterion
)
config_list
=
[{
'op_types'
:
[
'Conv2d'
],
'total_sparsity'
:
0.8
}]
dummy_input
=
torch
.
rand
(
10
,
3
,
32
,
32
).
to
(
device
)
# make sure you have used nni.trace to wrap the optimizer class before initialize
traced_optimizer
=
nni
.
trace
(
torch
.
optim
.
SGD
)(
model
.
parameters
(),
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
admm_params
=
{
'trainer'
:
trainer
,
'traced_optimizer'
:
traced_optimizer
,
'criterion'
:
criterion
,
'iterations'
:
10
,
'training_epochs'
:
1
}
sa_params
=
{
'evaluator'
:
evaluator
}
pruner
=
AutoCompressPruner
(
model
,
config_list
,
10
,
admm_params
,
sa_params
,
keep_intermediate_result
=
True
,
finetuner
=
finetuner
)
pruner
.
compress
()
_
,
model
,
masks
,
_
,
_
=
pruner
.
get_best_result
()
examples/model_compress/pruning/v2/fpgm_pruning_torch.py
0 → 100644
View file @
1011377c
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI example for supported fpgm pruning algorithms.
In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required.
'''
import
argparse
import
sys
import
torch
from
torchvision
import
datasets
,
transforms
from
torch.optim.lr_scheduler
import
MultiStepLR
from
nni.compression.pytorch
import
ModelSpeedup
from
nni.compression.pytorch.utils.counter
import
count_flops_params
from
nni.algorithms.compression.v2.pytorch.pruning.basic_pruner
import
FPGMPruner
from
pathlib
import
Path
sys
.
path
.
append
(
str
(
Path
(
__file__
).
absolute
().
parents
[
2
]
/
'models'
))
from
cifar10.vgg
import
VGG
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
normalize
=
transforms
.
Normalize
((
0.4914
,
0.4822
,
0.4465
),
(
0.2023
,
0.1994
,
0.2010
))
g_epoch
=
0
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
True
,
transform
=
transforms
.
Compose
([
transforms
.
RandomHorizontalFlip
(),
transforms
.
RandomCrop
(
32
,
4
),
transforms
.
ToTensor
(),
normalize
,
]),
download
=
True
),
batch_size
=
128
,
shuffle
=
True
)
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
False
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
normalize
,
])),
batch_size
=
128
,
shuffle
=
False
)
def
trainer
(
model
,
optimizer
,
criterion
):
global
g_epoch
model
.
train
()
for
batch_idx
,
(
data
,
target
)
in
enumerate
(
train_loader
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
if
batch_idx
and
batch_idx
%
100
==
0
:
print
(
'Train Epoch: {} [{}/{} ({:.0f}%)]
\t
Loss: {:.6f}'
.
format
(
g_epoch
,
batch_idx
*
len
(
data
),
len
(
train_loader
.
dataset
),
100.
*
batch_idx
/
len
(
train_loader
),
loss
.
item
()))
g_epoch
+=
1
def
evaluator
(
model
):
model
.
eval
()
correct
=
0.0
with
torch
.
no_grad
():
for
data
,
target
in
test_loader
:
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
output
=
model
(
data
)
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
acc
=
100
*
correct
/
len
(
test_loader
.
dataset
)
print
(
'Accuracy: {}%
\n
'
.
format
(
acc
))
return
acc
def
optimizer_scheduler_generator
(
model
,
_lr
=
0.1
,
_momentum
=
0.9
,
_weight_decay
=
5e-4
,
total_epoch
=
160
):
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
_lr
,
momentum
=
_momentum
,
weight_decay
=
_weight_decay
)
scheduler
=
MultiStepLR
(
optimizer
,
milestones
=
[
int
(
total_epoch
*
0.5
),
int
(
total_epoch
*
0.75
)],
gamma
=
0.1
)
return
optimizer
,
scheduler
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
description
=
'PyTorch Example for model comporession'
)
parser
.
add_argument
(
'--pretrain-epochs'
,
type
=
int
,
default
=
20
,
help
=
'number of epochs to pretrain the model'
)
parser
.
add_argument
(
'--fine-tune-epochs'
,
type
=
int
,
default
=
20
,
help
=
'number of epochs to fine tune the model'
)
args
=
parser
.
parse_args
()
print
(
'
\n
'
+
'='
*
50
+
' START TO TRAIN THE MODEL '
+
'='
*
50
)
model
=
VGG
().
to
(
device
)
optimizer
,
scheduler
=
optimizer_scheduler_generator
(
model
,
total_epoch
=
args
.
pretrain_epochs
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
pre_best_acc
=
0.0
best_state_dict
=
None
for
i
in
range
(
args
.
pretrain_epochs
):
trainer
(
model
,
optimizer
,
criterion
)
scheduler
.
step
()
acc
=
evaluator
(
model
)
if
acc
>
pre_best_acc
:
pre_best_acc
=
acc
best_state_dict
=
model
.
state_dict
()
print
(
"Best accuracy: {}"
.
format
(
pre_best_acc
))
model
.
load_state_dict
(
best_state_dict
)
pre_flops
,
pre_params
,
_
=
count_flops_params
(
model
,
torch
.
randn
([
128
,
3
,
32
,
32
]).
to
(
device
))
g_epoch
=
0
# Start to prune and speedup
print
(
'
\n
'
+
'='
*
50
+
' START TO PRUNE THE BEST ACCURACY PRETRAINED MODEL '
+
'='
*
50
)
config_list
=
[{
'sparsity'
:
0.5
,
'op_types'
:
[
'Conv2d'
]
}]
pruner
=
FPGMPruner
(
model
,
config_list
)
_
,
masks
=
pruner
.
compress
()
pruner
.
show_pruned_weights
()
pruner
.
_unwrap_model
()
ModelSpeedup
(
model
,
dummy_input
=
torch
.
rand
([
10
,
3
,
32
,
32
]).
to
(
device
),
masks_file
=
masks
).
speedup_model
()
print
(
'
\n
'
+
'='
*
50
+
' EVALUATE THE MODEL AFTER SPEEDUP '
+
'='
*
50
)
evaluator
(
model
)
# Optimizer used in the pruner might be patched, so recommend to new an optimizer for fine-tuning stage.
print
(
'
\n
'
+
'='
*
50
+
' START TO FINE TUNE THE MODEL '
+
'='
*
50
)
optimizer
,
scheduler
=
optimizer_scheduler_generator
(
model
,
_lr
=
0.01
,
total_epoch
=
args
.
fine_tune_epochs
)
best_acc
=
0.0
for
i
in
range
(
args
.
fine_tune_epochs
):
trainer
(
model
,
optimizer
,
criterion
)
scheduler
.
step
()
best_acc
=
max
(
evaluator
(
model
),
best_acc
)
flops
,
params
,
results
=
count_flops_params
(
model
,
torch
.
randn
([
128
,
3
,
32
,
32
]).
to
(
device
))
print
(
f
'Pretrained model FLOPs
{
pre_flops
/
1e6
:.
2
f
}
M, #Params:
{
pre_params
/
1e6
:.
2
f
}
M, Accuracy:
{
pre_best_acc
:
.
2
f
}
%'
)
print
(
f
'Finetuned model FLOPs
{
flops
/
1e6
:.
2
f
}
M, #Params:
{
params
/
1e6
:.
2
f
}
M, Accuracy:
{
best_acc
:
.
2
f
}
%'
)
examples/model_compress/pruning/v2/iterative_pruning_torch.py
0 → 100644
View file @
1011377c
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI example for supported iterative pruning algorithms.
In this example, we show the end-to-end iterative pruning process: pre-training -> pruning -> fine-tuning.
'''
import
sys
import
argparse
from
tqdm
import
tqdm
import
torch
from
torchvision
import
datasets
,
transforms
from
nni.algorithms.compression.v2.pytorch.pruning
import
(
LinearPruner
,
AGPPruner
,
LotteryTicketPruner
)
from
pathlib
import
Path
sys
.
path
.
append
(
str
(
Path
(
__file__
).
absolute
().
parents
[
2
]
/
'models'
))
from
cifar10.vgg
import
VGG
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
normalize
=
transforms
.
Normalize
((
0.4914
,
0.4822
,
0.4465
),
(
0.2023
,
0.1994
,
0.2010
))
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
True
,
transform
=
transforms
.
Compose
([
transforms
.
RandomHorizontalFlip
(),
transforms
.
RandomCrop
(
32
,
4
),
transforms
.
ToTensor
(),
normalize
,
]),
download
=
True
),
batch_size
=
128
,
shuffle
=
True
)
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
False
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
normalize
,
])),
batch_size
=
128
,
shuffle
=
False
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
def
trainer
(
model
,
optimizer
,
criterion
,
epoch
):
model
.
train
()
for
data
,
target
in
tqdm
(
iterable
=
train_loader
,
desc
=
'Epoch {}'
.
format
(
epoch
)):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
def
finetuner
(
model
):
model
.
train
()
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
for
data
,
target
in
tqdm
(
iterable
=
train_loader
,
desc
=
'Epoch PFs'
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
def
evaluator
(
model
):
model
.
eval
()
correct
=
0
with
torch
.
no_grad
():
for
data
,
target
in
tqdm
(
iterable
=
test_loader
,
desc
=
'Test'
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
output
=
model
(
data
)
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
acc
=
100
*
correct
/
len
(
test_loader
.
dataset
)
print
(
'Accuracy: {}%
\n
'
.
format
(
acc
))
return
acc
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
description
=
'PyTorch Iterative Example for model comporession'
)
parser
.
add_argument
(
'--pruner'
,
type
=
str
,
default
=
'linear'
,
choices
=
[
'linear'
,
'agp'
,
'lottery'
],
help
=
'pruner to use'
)
parser
.
add_argument
(
'--pretrain-epochs'
,
type
=
int
,
default
=
10
,
help
=
'number of epochs to pretrain the model'
)
parser
.
add_argument
(
'--total-iteration'
,
type
=
int
,
default
=
10
,
help
=
'number of iteration to iteratively prune the model'
)
parser
.
add_argument
(
'--pruning-algo'
,
type
=
str
,
default
=
'l1'
,
choices
=
[
'level'
,
'l1'
,
'l2'
,
'fpgm'
,
'slim'
,
'apoz'
,
'mean_activation'
,
'taylorfo'
,
'admm'
],
help
=
'algorithm to evaluate weights to prune'
)
parser
.
add_argument
(
'--speed-up'
,
type
=
bool
,
default
=
False
,
help
=
'Whether to speed-up the pruned model'
)
parser
.
add_argument
(
'--reset-weight'
,
type
=
bool
,
default
=
True
,
help
=
'Whether to reset weight during each iteration'
)
args
=
parser
.
parse_args
()
model
=
VGG
().
to
(
device
)
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
# pre-train the model
for
i
in
range
(
args
.
pretrain_epochs
):
trainer
(
model
,
optimizer
,
criterion
,
i
)
evaluator
(
model
)
config_list
=
[{
'op_types'
:
[
'Conv2d'
],
'sparsity'
:
0.8
}]
dummy_input
=
torch
.
rand
(
10
,
3
,
32
,
32
).
to
(
device
)
# if you just want to keep the final result as the best result, you can pass evaluator as None.
# or the result with the highest score (given by evaluator) will be the best result.
kw_args
=
{
'pruning_algorithm'
:
args
.
pruning_algo
,
'total_iteration'
:
args
.
total_iteration
,
'evaluator'
:
None
,
'finetuner'
:
finetuner
}
if
args
.
speed_up
:
kw_args
[
'speed_up'
]
=
args
.
speed_up
kw_args
[
'dummy_input'
]
=
torch
.
rand
(
10
,
3
,
32
,
32
).
to
(
device
)
if
args
.
pruner
==
'linear'
:
iterative_pruner
=
LinearPruner
elif
args
.
pruner
==
'agp'
:
iterative_pruner
=
AGPPruner
elif
args
.
pruner
==
'lottery'
:
kw_args
[
'reset_weight'
]
=
args
.
reset_weight
iterative_pruner
=
LotteryTicketPruner
pruner
=
iterative_pruner
(
model
,
config_list
,
**
kw_args
)
pruner
.
compress
()
_
,
model
,
masks
,
_
,
_
=
pruner
.
get_best_result
()
evaluator
(
model
)
examples/model_compress/pruning/v2/level_pruning_torch.py
0 → 100644
View file @
1011377c
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI example for supported level pruning algorithm.
In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required.
'''
import
argparse
import
sys
import
torch
from
torchvision
import
datasets
,
transforms
from
torch.optim.lr_scheduler
import
MultiStepLR
from
nni.compression.pytorch.utils.counter
import
count_flops_params
from
nni.algorithms.compression.v2.pytorch.pruning.basic_pruner
import
LevelPruner
from
pathlib
import
Path
sys
.
path
.
append
(
str
(
Path
(
__file__
).
absolute
().
parents
[
2
]
/
'models'
))
from
cifar10.vgg
import
VGG
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
normalize
=
transforms
.
Normalize
((
0.4914
,
0.4822
,
0.4465
),
(
0.2023
,
0.1994
,
0.2010
))
g_epoch
=
0
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
True
,
transform
=
transforms
.
Compose
([
transforms
.
RandomHorizontalFlip
(),
transforms
.
RandomCrop
(
32
,
4
),
transforms
.
ToTensor
(),
normalize
,
]),
download
=
True
),
batch_size
=
128
,
shuffle
=
True
)
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
False
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
normalize
,
])),
batch_size
=
128
,
shuffle
=
False
)
def
trainer
(
model
,
optimizer
,
criterion
):
global
g_epoch
model
.
train
()
for
batch_idx
,
(
data
,
target
)
in
enumerate
(
train_loader
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
if
batch_idx
and
batch_idx
%
100
==
0
:
print
(
'Train Epoch: {} [{}/{} ({:.0f}%)]
\t
Loss: {:.6f}'
.
format
(
g_epoch
,
batch_idx
*
len
(
data
),
len
(
train_loader
.
dataset
),
100.
*
batch_idx
/
len
(
train_loader
),
loss
.
item
()))
g_epoch
+=
1
def
evaluator
(
model
):
model
.
eval
()
correct
=
0.0
with
torch
.
no_grad
():
for
data
,
target
in
test_loader
:
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
output
=
model
(
data
)
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
acc
=
100
*
correct
/
len
(
test_loader
.
dataset
)
print
(
'Accuracy: {}%
\n
'
.
format
(
acc
))
return
acc
def
optimizer_scheduler_generator
(
model
,
_lr
=
0.1
,
_momentum
=
0.9
,
_weight_decay
=
5e-4
,
total_epoch
=
160
):
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
_lr
,
momentum
=
_momentum
,
weight_decay
=
_weight_decay
)
scheduler
=
MultiStepLR
(
optimizer
,
milestones
=
[
int
(
total_epoch
*
0.5
),
int
(
total_epoch
*
0.75
)],
gamma
=
0.1
)
return
optimizer
,
scheduler
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
description
=
'PyTorch Example for model comporession'
)
parser
.
add_argument
(
'--pretrain-epochs'
,
type
=
int
,
default
=
20
,
help
=
'number of epochs to pretrain the model'
)
parser
.
add_argument
(
'--fine-tune-epochs'
,
type
=
int
,
default
=
20
,
help
=
'number of epochs to fine tune the model'
)
args
=
parser
.
parse_args
()
print
(
'
\n
'
+
'='
*
50
+
' START TO TRAIN THE MODEL '
+
'='
*
50
)
model
=
VGG
().
to
(
device
)
optimizer
,
scheduler
=
optimizer_scheduler_generator
(
model
,
total_epoch
=
args
.
pretrain_epochs
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
pre_best_acc
=
0.0
best_state_dict
=
None
for
i
in
range
(
args
.
pretrain_epochs
):
trainer
(
model
,
optimizer
,
criterion
)
scheduler
.
step
()
acc
=
evaluator
(
model
)
if
acc
>
pre_best_acc
:
pre_best_acc
=
acc
best_state_dict
=
model
.
state_dict
()
print
(
"Best accuracy: {}"
.
format
(
pre_best_acc
))
model
.
load_state_dict
(
best_state_dict
)
pre_flops
,
pre_params
,
_
=
count_flops_params
(
model
,
torch
.
randn
([
128
,
3
,
32
,
32
]).
to
(
device
))
# Start to prune and speedup
print
(
'
\n
'
+
'='
*
50
+
' START TO PRUNE THE BEST ACCURACY PRETRAINED MODEL '
+
'='
*
50
)
config_list
=
[{
'sparsity'
:
0.5
,
'op_types'
:
[
'default'
]
}]
pruner
=
LevelPruner
(
model
,
config_list
)
_
,
masks
=
pruner
.
compress
()
pruner
.
show_pruned_weights
()
# Fine-grained method does not need to speedup
print
(
'
\n
'
+
'='
*
50
+
' EVALUATE THE MODEL AFTER PRUNING '
+
'='
*
50
)
evaluator
(
model
)
# Optimizer used in the pruner might be patched, so recommend to new an optimizer for fine-tuning stage.
print
(
'
\n
'
+
'='
*
50
+
' START TO FINE TUNE THE MODEL '
+
'='
*
50
)
optimizer
,
scheduler
=
optimizer_scheduler_generator
(
model
,
_lr
=
0.01
,
total_epoch
=
args
.
fine_tune_epochs
)
best_acc
=
0.0
g_epoch
=
0
for
i
in
range
(
args
.
fine_tune_epochs
):
trainer
(
model
,
optimizer
,
criterion
)
scheduler
.
step
()
best_acc
=
max
(
evaluator
(
model
),
best_acc
)
flops
,
params
,
results
=
count_flops_params
(
model
,
torch
.
randn
([
128
,
3
,
32
,
32
]).
to
(
device
))
print
(
f
'Pretrained model FLOPs
{
pre_flops
/
1e6
:.
2
f
}
M, #Params:
{
pre_params
/
1e6
:.
2
f
}
M, Accuracy:
{
pre_best_acc
:
.
2
f
}
%'
)
print
(
f
'Finetuned model FLOPs
{
flops
/
1e6
:.
2
f
}
M, #Params:
{
params
/
1e6
:.
2
f
}
M, Accuracy:
{
best_acc
:
.
2
f
}
%'
)
examples/model_compress/pruning/v2/movement_pruning_glue.py
0 → 100644
View file @
1011377c
import
functools
from
tqdm
import
tqdm
import
torch
from
torch.optim
import
Adam
from
torch.utils.data
import
DataLoader
from
datasets
import
load_metric
,
load_dataset
from
transformers
import
(
BertForSequenceClassification
,
BertTokenizerFast
,
DataCollatorWithPadding
,
set_seed
)
import
nni
from
nni.algorithms.compression.v2.pytorch.pruning
import
MovementPruner
task_to_keys
=
{
"cola"
:
(
"sentence"
,
None
),
"mnli"
:
(
"premise"
,
"hypothesis"
),
"mrpc"
:
(
"sentence1"
,
"sentence2"
),
"qnli"
:
(
"question"
,
"sentence"
),
"qqp"
:
(
"question1"
,
"question2"
),
"rte"
:
(
"sentence1"
,
"sentence2"
),
"sst2"
:
(
"sentence"
,
None
),
"stsb"
:
(
"sentence1"
,
"sentence2"
),
"wnli"
:
(
"sentence1"
,
"sentence2"
),
}
device
=
torch
.
device
(
'cuda'
if
torch
.
cuda
.
is_available
()
else
'cpu'
)
gradient_accumulation_steps
=
16
# a fake criterion because huggingface output already has loss
def
criterion
(
input
,
target
):
return
input
.
loss
def
trainer
(
model
,
optimizer
,
criterion
,
train_dataloader
):
model
.
train
()
counter
=
0
for
batch
in
tqdm
(
train_dataloader
):
counter
+=
1
batch
.
to
(
device
)
optimizer
.
zero_grad
()
outputs
=
model
(
**
batch
)
# pruner may wrap the criterion, for example, loss = origin_loss + norm(weight), so call criterion to get loss here
loss
=
criterion
(
outputs
,
None
)
loss
=
loss
/
gradient_accumulation_steps
loss
.
backward
()
if
counter
%
gradient_accumulation_steps
==
0
or
counter
==
len
(
train_dataloader
):
optimizer
.
step
()
if
counter
%
16000
==
0
:
print
(
'Step {}: {}'
.
format
(
counter
//
gradient_accumulation_steps
,
evaluator
(
model
,
metric
,
is_regression
,
validate_dataloader
)))
def
evaluator
(
model
,
metric
,
is_regression
,
eval_dataloader
):
model
.
eval
()
for
batch
in
tqdm
(
eval_dataloader
):
batch
.
to
(
device
)
outputs
=
model
(
**
batch
)
predictions
=
outputs
.
logits
.
argmax
(
dim
=-
1
)
if
not
is_regression
else
outputs
.
logits
.
squeeze
()
metric
.
add_batch
(
predictions
=
predictions
,
references
=
batch
[
"labels"
],
)
return
metric
.
compute
()
if
__name__
==
'__main__'
:
task_name
=
'mnli'
is_regression
=
False
num_labels
=
1
if
is_regression
else
(
3
if
task_name
==
'mnli'
else
2
)
train_batch_size
=
8
eval_batch_size
=
8
set_seed
(
1024
)
tokenizer
=
BertTokenizerFast
.
from_pretrained
(
'bert-base-cased'
)
sentence1_key
,
sentence2_key
=
task_to_keys
[
task_name
]
# used to preprocess the raw data
def
preprocess_function
(
examples
):
# Tokenize the texts
args
=
(
(
examples
[
sentence1_key
],)
if
sentence2_key
is
None
else
(
examples
[
sentence1_key
],
examples
[
sentence2_key
])
)
result
=
tokenizer
(
*
args
,
padding
=
False
,
max_length
=
128
,
truncation
=
True
)
if
"label"
in
examples
:
# In all cases, rename the column to labels because the model will expect that.
result
[
"labels"
]
=
examples
[
"label"
]
return
result
raw_datasets
=
load_dataset
(
'glue'
,
task_name
,
cache_dir
=
'./data'
)
processed_datasets
=
raw_datasets
.
map
(
preprocess_function
,
batched
=
True
,
remove_columns
=
raw_datasets
[
"train"
].
column_names
)
train_dataset
=
processed_datasets
[
'train'
]
validate_dataset
=
processed_datasets
[
'validation_matched'
if
task_name
==
"mnli"
else
'validation'
]
data_collator
=
DataCollatorWithPadding
(
tokenizer
)
train_dataloader
=
DataLoader
(
train_dataset
,
shuffle
=
True
,
collate_fn
=
data_collator
,
batch_size
=
train_batch_size
)
validate_dataloader
=
DataLoader
(
validate_dataset
,
collate_fn
=
data_collator
,
batch_size
=
eval_batch_size
)
metric
=
load_metric
(
"glue"
,
task_name
)
model
=
BertForSequenceClassification
.
from_pretrained
(
'bert-base-cased'
,
num_labels
=
num_labels
).
to
(
device
)
print
(
'Initial: {}'
.
format
(
evaluator
(
model
,
metric
,
is_regression
,
validate_dataloader
)))
config_list
=
[{
'op_types'
:
[
'Linear'
],
'op_partial_names'
:
[
'bert.encoder'
],
'sparsity'
:
0.9
}]
p_trainer
=
functools
.
partial
(
trainer
,
train_dataloader
=
train_dataloader
)
# make sure you have used nni.trace to wrap the optimizer class before initialize
traced_optimizer
=
nni
.
trace
(
Adam
)(
model
.
parameters
(),
lr
=
2e-5
)
pruner
=
MovementPruner
(
model
,
config_list
,
p_trainer
,
traced_optimizer
,
criterion
,
training_epochs
=
10
,
warm_up_step
=
3000
,
cool_down_beginning_step
=
27000
)
_
,
masks
=
pruner
.
compress
()
pruner
.
show_pruned_weights
()
print
(
'Final: {}'
.
format
(
evaluator
(
model
,
metric
,
is_regression
,
validate_dataloader
)))
optimizer
=
Adam
(
model
.
parameters
(),
lr
=
2e-5
)
trainer
(
model
,
optimizer
,
criterion
,
train_dataloader
)
print
(
'After 1 epoch finetuning: {}'
.
format
(
evaluator
(
model
,
metric
,
is_regression
,
validate_dataloader
)))
examples/model_compress/pruning/v2/norm_pruning_torch.py
0 → 100644
View file @
1011377c
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI example for supported l1norm and l2norm pruning algorithms.
In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speed up is required.
'''
import
argparse
import
sys
import
torch
from
torchvision
import
datasets
,
transforms
from
torch.optim.lr_scheduler
import
MultiStepLR
from
nni.compression.pytorch
import
ModelSpeedup
from
nni.compression.pytorch.utils.counter
import
count_flops_params
from
nni.algorithms.compression.v2.pytorch.pruning.basic_pruner
import
L1NormPruner
,
L2NormPruner
from
pathlib
import
Path
sys
.
path
.
append
(
str
(
Path
(
__file__
).
absolute
().
parents
[
2
]
/
'models'
))
from
cifar10.vgg
import
VGG
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
normalize
=
transforms
.
Normalize
((
0.4914
,
0.4822
,
0.4465
),
(
0.2023
,
0.1994
,
0.2010
))
g_epoch
=
0
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
True
,
transform
=
transforms
.
Compose
([
transforms
.
RandomHorizontalFlip
(),
transforms
.
RandomCrop
(
32
,
4
),
transforms
.
ToTensor
(),
normalize
,
]),
download
=
True
),
batch_size
=
128
,
shuffle
=
True
)
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
False
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
normalize
,
])),
batch_size
=
128
,
shuffle
=
False
)
def
trainer
(
model
,
optimizer
,
criterion
):
global
g_epoch
model
.
train
()
for
batch_idx
,
(
data
,
target
)
in
enumerate
(
train_loader
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
if
batch_idx
and
batch_idx
%
100
==
0
:
print
(
'Train Epoch: {} [{}/{} ({:.0f}%)]
\t
Loss: {:.6f}'
.
format
(
g_epoch
,
batch_idx
*
len
(
data
),
len
(
train_loader
.
dataset
),
100.
*
batch_idx
/
len
(
train_loader
),
loss
.
item
()))
g_epoch
+=
1
def
evaluator
(
model
):
model
.
eval
()
correct
=
0.0
with
torch
.
no_grad
():
for
data
,
target
in
test_loader
:
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
output
=
model
(
data
)
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
acc
=
100
*
correct
/
len
(
test_loader
.
dataset
)
print
(
'Accuracy: {}%
\n
'
.
format
(
acc
))
return
acc
def
optimizer_scheduler_generator
(
model
,
_lr
=
0.1
,
_momentum
=
0.9
,
_weight_decay
=
5e-4
,
total_epoch
=
160
):
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
_lr
,
momentum
=
_momentum
,
weight_decay
=
_weight_decay
)
scheduler
=
MultiStepLR
(
optimizer
,
milestones
=
[
int
(
total_epoch
*
0.5
),
int
(
total_epoch
*
0.75
)],
gamma
=
0.1
)
return
optimizer
,
scheduler
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
description
=
'PyTorch Example for model comporession'
)
parser
.
add_argument
(
'--pruner'
,
type
=
str
,
default
=
'l1norm'
,
choices
=
[
'l1norm'
,
'l2norm'
],
help
=
'pruner to use'
)
parser
.
add_argument
(
'--pretrain-epochs'
,
type
=
int
,
default
=
20
,
help
=
'number of epochs to pretrain the model'
)
parser
.
add_argument
(
'--fine-tune-epochs'
,
type
=
int
,
default
=
20
,
help
=
'number of epochs to fine tune the model'
)
args
=
parser
.
parse_args
()
print
(
'
\n
'
+
'='
*
50
+
' START TO TRAIN THE MODEL '
+
'='
*
50
)
model
=
VGG
().
to
(
device
)
optimizer
,
scheduler
=
optimizer_scheduler_generator
(
model
,
total_epoch
=
args
.
pretrain_epochs
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
pre_best_acc
=
0.0
best_state_dict
=
None
for
i
in
range
(
args
.
pretrain_epochs
):
trainer
(
model
,
optimizer
,
criterion
)
scheduler
.
step
()
acc
=
evaluator
(
model
)
if
acc
>
pre_best_acc
:
pre_best_acc
=
acc
best_state_dict
=
model
.
state_dict
()
print
(
"Best accuracy: {}"
.
format
(
pre_best_acc
))
model
.
load_state_dict
(
best_state_dict
)
pre_flops
,
pre_params
,
_
=
count_flops_params
(
model
,
torch
.
randn
([
128
,
3
,
32
,
32
]).
to
(
device
))
g_epoch
=
0
# Start to prune and speedup
print
(
'
\n
'
+
'='
*
50
+
' START TO PRUNE THE BEST ACCURACY PRETRAINED MODEL '
+
'='
*
50
)
config_list
=
[{
'sparsity'
:
0.5
,
'op_types'
:
[
'Conv2d'
]
}]
if
'l1'
in
args
.
pruner
:
pruner
=
L1NormPruner
(
model
,
config_list
)
else
:
pruner
=
L2NormPruner
(
model
,
config_list
)
_
,
masks
=
pruner
.
compress
()
pruner
.
show_pruned_weights
()
pruner
.
_unwrap_model
()
ModelSpeedup
(
model
,
dummy_input
=
torch
.
rand
([
10
,
3
,
32
,
32
]).
to
(
device
),
masks_file
=
masks
).
speedup_model
()
print
(
'
\n
'
+
'='
*
50
+
' EVALUATE THE MODEL AFTER SPEEDUP '
+
'='
*
50
)
evaluator
(
model
)
# Optimizer used in the pruner might be patched, so recommend to new an optimizer for fine-tuning stage.
print
(
'
\n
'
+
'='
*
50
+
' START TO FINE TUNE THE MODEL '
+
'='
*
50
)
optimizer
,
scheduler
=
optimizer_scheduler_generator
(
model
,
_lr
=
0.01
,
total_epoch
=
args
.
fine_tune_epochs
)
best_acc
=
0.0
for
i
in
range
(
args
.
fine_tune_epochs
):
trainer
(
model
,
optimizer
,
criterion
)
scheduler
.
step
()
best_acc
=
max
(
evaluator
(
model
),
best_acc
)
flops
,
params
,
results
=
count_flops_params
(
model
,
torch
.
randn
([
128
,
3
,
32
,
32
]).
to
(
device
))
print
(
f
'Pretrained model FLOPs
{
pre_flops
/
1e6
:.
2
f
}
M, #Params:
{
pre_params
/
1e6
:.
2
f
}
M, Accuracy:
{
pre_best_acc
:
.
2
f
}
%'
)
print
(
f
'Finetuned model FLOPs
{
flops
/
1e6
:.
2
f
}
M, #Params:
{
params
/
1e6
:.
2
f
}
M, Accuracy:
{
best_acc
:
.
2
f
}
%'
)
examples/model_compress/pruning/v2/scheduler_torch.py
0 → 100644
View file @
1011377c
import
sys
from
tqdm
import
tqdm
import
torch
from
torchvision
import
datasets
,
transforms
from
nni.algorithms.compression.v2.pytorch.pruning
import
L1NormPruner
from
nni.algorithms.compression.v2.pytorch.pruning.tools
import
AGPTaskGenerator
from
nni.algorithms.compression.v2.pytorch.pruning.basic_scheduler
import
PruningScheduler
from
pathlib
import
Path
sys
.
path
.
append
(
str
(
Path
(
__file__
).
absolute
().
parents
[
2
]
/
'models'
))
from
cifar10.vgg
import
VGG
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
normalize
=
transforms
.
Normalize
((
0.4914
,
0.4822
,
0.4465
),
(
0.2023
,
0.1994
,
0.2010
))
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
True
,
transform
=
transforms
.
Compose
([
transforms
.
RandomHorizontalFlip
(),
transforms
.
RandomCrop
(
32
,
4
),
transforms
.
ToTensor
(),
normalize
,
]),
download
=
True
),
batch_size
=
128
,
shuffle
=
True
)
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
False
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
normalize
,
])),
batch_size
=
128
,
shuffle
=
False
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
def
trainer
(
model
,
optimizer
,
criterion
,
epoch
):
model
.
train
()
for
data
,
target
in
tqdm
(
iterable
=
train_loader
,
desc
=
'Epoch {}'
.
format
(
epoch
)):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
def
finetuner
(
model
):
model
.
train
()
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
for
data
,
target
in
tqdm
(
iterable
=
train_loader
,
desc
=
'Epoch PFs'
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
def
evaluator
(
model
):
model
.
eval
()
correct
=
0
with
torch
.
no_grad
():
for
data
,
target
in
tqdm
(
iterable
=
test_loader
,
desc
=
'Test'
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
output
=
model
(
data
)
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
acc
=
100
*
correct
/
len
(
test_loader
.
dataset
)
print
(
'Accuracy: {}%
\n
'
.
format
(
acc
))
return
acc
if
__name__
==
'__main__'
:
model
=
VGG
().
to
(
device
)
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
# pre-train the model
for
i
in
range
(
5
):
trainer
(
model
,
optimizer
,
criterion
,
i
)
# No need to pass model and config_list to pruner during initializing when using scheduler.
pruner
=
L1NormPruner
(
None
,
None
)
# you can specify the log_dir, all intermediate results and best result will save under this folder.
# if you don't want to keep intermediate results, you can set `keep_intermediate_result=False`.
config_list
=
[{
'op_types'
:
[
'Conv2d'
],
'sparsity'
:
0.8
}]
task_generator
=
AGPTaskGenerator
(
10
,
model
,
config_list
,
log_dir
=
'.'
,
keep_intermediate_result
=
True
)
dummy_input
=
torch
.
rand
(
10
,
3
,
32
,
32
).
to
(
device
)
# if you just want to keep the final result as the best result, you can pass evaluator as None.
# or the result with the highest score (given by evaluator) will be the best result.
# scheduler = PruningScheduler(pruner, task_generator, finetuner=finetuner, speed_up=True, dummy_input=dummy_input, evaluator=evaluator)
scheduler
=
PruningScheduler
(
pruner
,
task_generator
,
finetuner
=
finetuner
,
speed_up
=
True
,
dummy_input
=
dummy_input
,
evaluator
=
None
,
reset_weight
=
False
)
scheduler
.
compress
()
_
,
model
,
masks
,
_
,
_
=
scheduler
.
get_best_result
()
examples/model_compress/pruning/v2/simple_pruning_torch.py
0 → 100644
View file @
1011377c
import
sys
from
tqdm
import
tqdm
import
torch
from
torchvision
import
datasets
,
transforms
from
nni.algorithms.compression.v2.pytorch.pruning
import
L1NormPruner
from
nni.compression.pytorch.speedup
import
ModelSpeedup
from
pathlib
import
Path
sys
.
path
.
append
(
str
(
Path
(
__file__
).
absolute
().
parents
[
2
]
/
'models'
))
from
cifar10.vgg
import
VGG
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
normalize
=
transforms
.
Normalize
((
0.4914
,
0.4822
,
0.4465
),
(
0.2023
,
0.1994
,
0.2010
))
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
True
,
transform
=
transforms
.
Compose
([
transforms
.
RandomHorizontalFlip
(),
transforms
.
RandomCrop
(
32
,
4
),
transforms
.
ToTensor
(),
normalize
,
]),
download
=
True
),
batch_size
=
128
,
shuffle
=
True
)
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
False
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
normalize
,
])),
batch_size
=
128
,
shuffle
=
False
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
def
trainer
(
model
,
optimizer
,
criterion
,
epoch
):
model
.
train
()
for
data
,
target
in
tqdm
(
iterable
=
train_loader
,
desc
=
'Epoch {}'
.
format
(
epoch
)):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
def
evaluator
(
model
):
model
.
eval
()
correct
=
0
with
torch
.
no_grad
():
for
data
,
target
in
tqdm
(
iterable
=
test_loader
,
desc
=
'Test'
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
output
=
model
(
data
)
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
acc
=
100
*
correct
/
len
(
test_loader
.
dataset
)
print
(
'Accuracy: {}%
\n
'
.
format
(
acc
))
return
acc
if
__name__
==
'__main__'
:
model
=
VGG
().
to
(
device
)
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
print
(
'
\n
Pre-train the model:'
)
for
i
in
range
(
5
):
trainer
(
model
,
optimizer
,
criterion
,
i
)
evaluator
(
model
)
config_list
=
[{
'op_types'
:
[
'Conv2d'
],
'sparsity'
:
0.8
}]
pruner
=
L1NormPruner
(
model
,
config_list
)
_
,
masks
=
pruner
.
compress
()
print
(
'
\n
The accuracy with masks:'
)
evaluator
(
model
)
pruner
.
_unwrap_model
()
ModelSpeedup
(
model
,
dummy_input
=
torch
.
rand
(
10
,
3
,
32
,
32
).
to
(
device
),
masks_file
=
masks
).
speedup_model
()
print
(
'
\n
The accuracy after speed up:'
)
evaluator
(
model
)
# Need a new optimizer due to the modules in model will be replaced during speedup.
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
print
(
'
\n
Finetune the model after speed up:'
)
for
i
in
range
(
5
):
trainer
(
model
,
optimizer
,
criterion
,
i
)
evaluator
(
model
)
Prev
1
…
29
30
31
32
33
34
35
36
37
…
40
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment