Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
e773dfcc
Commit
e773dfcc
authored
Mar 21, 2023
by
qianyj
Browse files
create branch for v2.9
parents
Changes
633
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1714 additions
and
0 deletions
+1714
-0
examples/feature_engineering/gradient_feature_selector/benchmark_test.py
...e_engineering/gradient_feature_selector/benchmark_test.py
+148
-0
examples/feature_engineering/gradient_feature_selector/sklearn_test.py
...ure_engineering/gradient_feature_selector/sklearn_test.py
+57
-0
examples/feature_engineering/gradient_feature_selector/test_memory.py
...ture_engineering/gradient_feature_selector/test_memory.py
+26
-0
examples/feature_engineering/gradient_feature_selector/test_time.py
...eature_engineering/gradient_feature_selector/test_time.py
+26
-0
examples/model_compress/.gitignore
examples/model_compress/.gitignore
+9
-0
examples/model_compress/auto_compress/torch/auto_compress_module.py
...odel_compress/auto_compress/torch/auto_compress_module.py
+129
-0
examples/model_compress/auto_compress/torch/auto_compress_torch.py
...model_compress/auto_compress/torch/auto_compress_torch.py
+50
-0
examples/model_compress/auto_compress/torch/mnist_pretrain_lenet.pth
...del_compress/auto_compress/torch/mnist_pretrain_lenet.pth
+0
-0
examples/model_compress/end2end_compression.py
examples/model_compress/end2end_compression.py
+300
-0
examples/model_compress/experimental/compression_experiment/demo.py
...odel_compress/experimental/compression_experiment/demo.py
+43
-0
examples/model_compress/experimental/compression_experiment/vessel.py
...el_compress/experimental/compression_experiment/vessel.py
+99
-0
examples/model_compress/models/cifar10/resnet.py
examples/model_compress/models/cifar10/resnet.py
+115
-0
examples/model_compress/models/cifar10/vgg.py
examples/model_compress/models/cifar10/vgg.py
+63
-0
examples/model_compress/models/mnist/lenet.py
examples/model_compress/models/mnist/lenet.py
+29
-0
examples/model_compress/models/mnist/naive.py
examples/model_compress/models/mnist/naive.py
+28
-0
examples/model_compress/models/mobilenet.py
examples/model_compress/models/mobilenet.py
+83
-0
examples/model_compress/models/mobilenet_v2.py
examples/model_compress/models/mobilenet_v2.py
+131
-0
examples/model_compress/pruning/activation_pruning_torch.py
examples/model_compress/pruning/activation_pruning_torch.py
+142
-0
examples/model_compress/pruning/admm_pruning_torch.py
examples/model_compress/pruning/admm_pruning_torch.py
+138
-0
examples/model_compress/pruning/amc_pruning_torch.py
examples/model_compress/pruning/amc_pruning_torch.py
+98
-0
No files found.
Too many changes to show.
To preserve performance only
633 of 633+
files are displayed.
Plain diff
Email patch
examples/feature_engineering/gradient_feature_selector/benchmark_test.py
0 → 100644
View file @
e773dfcc
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import
bz2
import
urllib.request
import
numpy
as
np
import
datetime
import
line_profiler
profile
=
line_profiler
.
LineProfiler
()
import
os
from
sklearn.datasets
import
load_svmlight_file
from
sklearn.model_selection
import
train_test_split
from
sklearn.pipeline
import
make_pipeline
from
sklearn.linear_model
import
LogisticRegression
from
sklearn.ensemble
import
ExtraTreesClassifier
from
sklearn.feature_selection
import
SelectFromModel
from
nni.algorithms.feature_engineering.gradient_selector
import
FeatureGradientSelector
class
Benchmark
():
def
__init__
(
self
,
files
=
None
,
test_size
=
0.2
):
self
.
files
=
files
self
.
test_size
=
test_size
def
run_all_test
(
self
,
pipeline
):
for
file_name
in
self
.
files
:
file_path
=
self
.
files
[
file_name
]
self
.
run_test
(
pipeline
,
file_name
,
file_path
)
def
run_test
(
self
,
pipeline
,
name
,
path
):
print
(
"download "
+
name
)
update_name
=
self
.
download
(
name
,
path
)
X
,
y
=
load_svmlight_file
(
update_name
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
self
.
test_size
,
random_state
=
42
)
pipeline
.
fit
(
X_train
,
y_train
)
print
(
"[Benchmark "
+
name
+
" Score]: "
,
pipeline
.
score
(
X_test
,
y_test
))
def
download
(
self
,
name
,
path
):
old_name
=
name
+
'_train.bz2'
update_name
=
name
+
'_train.svm'
if
os
.
path
.
exists
(
old_name
)
and
os
.
path
.
exists
(
update_name
):
return
update_name
urllib
.
request
.
urlretrieve
(
path
,
filename
=
old_name
)
f_svm
=
open
(
update_name
,
'wt'
)
with
bz2
.
open
(
old_name
,
'rb'
)
as
f_zip
:
data
=
f_zip
.
read
()
f_svm
.
write
(
data
.
decode
(
'utf-8'
))
f_svm
.
close
()
return
update_name
@
profile
def
test_memory
(
pipeline_name
,
name
,
path
):
if
pipeline_name
==
"LR"
:
pipeline
=
make_pipeline
(
LogisticRegression
())
if
pipeline_name
==
"FGS"
:
pipeline
=
make_pipeline
(
FeatureGradientSelector
(),
LogisticRegression
())
if
pipeline_name
==
"Tree"
:
pipeline
=
make_pipeline
(
SelectFromModel
(
ExtraTreesClassifier
(
n_estimators
=
50
)),
LogisticRegression
())
test_benchmark
=
Benchmark
()
print
(
"Dataset:
\t
"
,
name
)
print
(
"Pipeline:
\t
"
,
pipeline_name
)
test_benchmark
.
run_test
(
pipeline
,
name
,
path
)
print
(
""
)
def
test_time
(
pipeline_name
,
name
,
path
):
if
pipeline_name
==
"LR"
:
pipeline
=
make_pipeline
(
LogisticRegression
())
if
pipeline_name
==
"FGS"
:
pipeline
=
make_pipeline
(
FeatureGradientSelector
(),
LogisticRegression
())
if
pipeline_name
==
"Tree"
:
pipeline
=
make_pipeline
(
SelectFromModel
(
ExtraTreesClassifier
(
n_estimators
=
50
)),
LogisticRegression
())
test_benchmark
=
Benchmark
()
print
(
"Dataset:
\t
"
,
name
)
print
(
"Pipeline:
\t
"
,
pipeline_name
)
starttime
=
datetime
.
datetime
.
now
()
test_benchmark
.
run_test
(
pipeline
,
name
,
path
)
endtime
=
datetime
.
datetime
.
now
()
print
(
"Used time: "
,
(
endtime
-
starttime
).
microseconds
/
1000
)
print
(
""
)
if
__name__
==
"__main__"
:
LIBSVM_DATA
=
{
"rcv1"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_train.binary.bz2"
,
"colon-cancer"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/covtype.libsvm.binary.bz2"
,
"gisette"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/gisette_scale.bz2"
,
"news20.binary"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/news20.binary.bz2"
,
"real-sim"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/real-sim.bz2"
,
"webspam"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/webspam_wc_normalized_trigram.svm.bz2"
,
"avazu"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/avazu-app.bz2"
}
import
argparse
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--pipeline_name'
,
type
=
str
,
help
=
'display pipeline_name.'
)
parser
.
add_argument
(
'--name'
,
type
=
str
,
help
=
'display name.'
)
parser
.
add_argument
(
'--object'
,
type
=
str
,
help
=
'display test object: time or memory.'
)
args
=
parser
.
parse_args
()
pipeline_name
=
args
.
pipeline_name
name
=
args
.
name
test_object
=
args
.
object
path
=
LIBSVM_DATA
[
name
]
if
test_object
==
'time'
:
test_time
(
pipeline_name
,
name
,
path
)
elif
test_object
==
'memory'
:
test_memory
(
pipeline_name
,
name
,
path
)
else
:
print
(
"Not support test object.
\t
"
,
test_object
)
print
(
"Done."
)
examples/feature_engineering/gradient_feature_selector/sklearn_test.py
0 → 100644
View file @
e773dfcc
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import
bz2
import
urllib.request
import
numpy
as
np
from
sklearn.datasets
import
load_svmlight_file
from
sklearn.model_selection
import
train_test_split
from
sklearn.pipeline
import
make_pipeline
from
sklearn.linear_model
import
LogisticRegression
from
sklearn.ensemble
import
ExtraTreesClassifier
from
sklearn.feature_selection
import
SelectFromModel
from
nni.algorithms.feature_engineering.gradient_selector
import
FeatureGradientSelector
def
test
():
url_zip_train
=
'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_train.binary.bz2'
urllib
.
request
.
urlretrieve
(
url_zip_train
,
filename
=
'train.bz2'
)
f_svm
=
open
(
'train.svm'
,
'wt'
)
with
bz2
.
open
(
'train.bz2'
,
'rb'
)
as
f_zip
:
data
=
f_zip
.
read
()
f_svm
.
write
(
data
.
decode
(
'utf-8'
))
f_svm
.
close
()
X
,
y
=
load_svmlight_file
(
'train.svm'
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.33
,
random_state
=
42
)
pipeline
=
make_pipeline
(
FeatureGradientSelector
(
n_epochs
=
1
,
n_features
=
10
),
LogisticRegression
())
# pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression())
pipeline
.
fit
(
X_train
,
y_train
)
print
(
"Pipeline Score: "
,
pipeline
.
score
(
X_train
,
y_train
))
if
__name__
==
"__main__"
:
test
()
examples/feature_engineering/gradient_feature_selector/test_memory.py
0 → 100644
View file @
e773dfcc
import
os
LIBSVM_DATA
=
{
"rcv1"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_train.binary.bz2"
,
"colon-cancer"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/covtype.libsvm.binary.bz2"
,
"gisette"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/gisette_scale.bz2"
,
"news20.binary"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/news20.binary.bz2"
,
"real-sim"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/real-sim.bz2"
,
"avazu"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/avazu-app.bz2"
,
}
pipeline_name
=
"Tree"
device
=
"CUDA_VISIBLE_DEVICES=0 "
script
=
"setsid python -m memory_profiler benchmark_test.py "
test_object
=
"memory"
for
name
in
LIBSVM_DATA
:
log_name
=
"_"
.
join
([
pipeline_name
,
name
,
test_object
])
command
=
device
+
script
+
"--pipeline_name "
+
pipeline_name
+
" --name "
+
name
+
" --object "
+
test_object
+
" >"
+
log_name
+
" 2>&1 &"
print
(
"command is
\t
"
,
command
)
os
.
system
(
command
)
print
(
"log is here
\t
"
,
log_name
)
print
(
"Done."
)
examples/feature_engineering/gradient_feature_selector/test_time.py
0 → 100644
View file @
e773dfcc
import
os
LIBSVM_DATA
=
{
"rcv1"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_train.binary.bz2"
,
"colon-cancer"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/covtype.libsvm.binary.bz2"
,
"gisette"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/gisette_scale.bz2"
,
"news20.binary"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/news20.binary.bz2"
,
"real-sim"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/real-sim.bz2"
,
"avazu"
:
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/avazu-app.bz2"
,
}
pipeline_name
=
"LR"
device
=
"CUDA_VISIBLE_DEVICES=0 "
script
=
"setsid python benchmark_test.py "
test_object
=
"time"
for
name
in
LIBSVM_DATA
:
log_name
=
"_"
.
join
([
pipeline_name
,
name
,
test_object
])
command
=
device
+
script
+
"--pipeline_name "
+
pipeline_name
+
" --name "
+
name
+
" --object "
+
test_object
+
" >"
+
log_name
+
" 2>&1 &"
print
(
"command is
\t
"
,
command
)
os
.
system
(
command
)
print
(
"log is here
\t
"
,
log_name
)
print
(
"Done."
)
examples/model_compress/.gitignore
0 → 100644
View file @
e773dfcc
.pth
.tar.gz
data/
MNIST/
cifar-10-batches-py/
experiment_data/
pruning/models
pruning/pruning_log
\ No newline at end of file
examples/model_compress/auto_compress/torch/auto_compress_module.py
0 → 100644
View file @
e773dfcc
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
typing
import
Callable
,
Optional
,
Iterable
import
torch
import
torch.nn
as
nn
import
torch.optim
as
optim
import
torch.nn.functional
as
F
from
torchvision
import
datasets
,
transforms
from
nni.algorithms.compression.pytorch.auto_compress
import
AbstractAutoCompressionModule
torch
.
manual_seed
(
1
)
class
LeNet
(
nn
.
Module
):
def
__init__
(
self
):
super
(
LeNet
,
self
).
__init__
()
self
.
conv1
=
nn
.
Conv2d
(
1
,
32
,
3
,
1
)
self
.
conv2
=
nn
.
Conv2d
(
32
,
64
,
3
,
1
)
self
.
dropout1
=
nn
.
Dropout2d
(
0.25
)
self
.
dropout2
=
nn
.
Dropout2d
(
0.5
)
self
.
fc1
=
nn
.
Linear
(
9216
,
128
)
self
.
fc2
=
nn
.
Linear
(
128
,
10
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
F
.
relu
(
x
)
x
=
self
.
conv2
(
x
)
x
=
F
.
relu
(
x
)
x
=
F
.
max_pool2d
(
x
,
2
)
x
=
self
.
dropout1
(
x
)
x
=
torch
.
flatten
(
x
,
1
)
x
=
self
.
fc1
(
x
)
x
=
F
.
relu
(
x
)
x
=
self
.
dropout2
(
x
)
x
=
self
.
fc2
(
x
)
output
=
F
.
log_softmax
(
x
,
dim
=
1
)
return
output
_use_cuda
=
torch
.
cuda
.
is_available
()
_train_kwargs
=
{
'batch_size'
:
64
}
_test_kwargs
=
{
'batch_size'
:
1000
}
if
_use_cuda
:
_cuda_kwargs
=
{
'num_workers'
:
1
,
'pin_memory'
:
True
,
'shuffle'
:
True
}
_train_kwargs
.
update
(
_cuda_kwargs
)
_test_kwargs
.
update
(
_cuda_kwargs
)
_transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.1307
,),
(
0.3081
,))
])
_device
=
torch
.
device
(
"cuda"
if
_use_cuda
else
"cpu"
)
_train_loader
=
None
_test_loader
=
None
def
_train
(
model
,
optimizer
,
criterion
,
epoch
):
global
_train_loader
if
_train_loader
is
None
:
dataset
=
datasets
.
MNIST
(
'./data'
,
train
=
True
,
download
=
True
,
transform
=
_transform
)
_train_loader
=
torch
.
utils
.
data
.
DataLoader
(
dataset
,
**
_train_kwargs
)
model
.
train
()
for
data
,
target
in
_train_loader
:
data
,
target
=
data
.
to
(
_device
),
target
.
to
(
_device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
def
_test
(
model
):
global
_test_loader
if
_test_loader
is
None
:
dataset
=
datasets
.
MNIST
(
'./data'
,
train
=
False
,
transform
=
_transform
)
_test_loader
=
torch
.
utils
.
data
.
DataLoader
(
dataset
,
**
_test_kwargs
)
model
.
eval
()
test_loss
=
0
correct
=
0
with
torch
.
no_grad
():
for
data
,
target
in
_test_loader
:
data
,
target
=
data
.
to
(
_device
),
target
.
to
(
_device
)
output
=
model
(
data
)
test_loss
+=
F
.
nll_loss
(
output
,
target
,
reduction
=
'sum'
).
item
()
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
test_loss
/=
len
(
_test_loader
.
dataset
)
acc
=
100
*
correct
/
len
(
_test_loader
.
dataset
)
print
(
'
\n
Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)
\n
'
.
format
(
test_loss
,
correct
,
len
(
_test_loader
.
dataset
),
acc
))
return
acc
_model
=
LeNet
().
to
(
_device
)
_model
.
load_state_dict
(
torch
.
load
(
'mnist_pretrain_lenet.pth'
))
class
AutoCompressionModule
(
AbstractAutoCompressionModule
):
@
classmethod
def
model
(
cls
)
->
nn
.
Module
:
return
_model
@
classmethod
def
evaluator
(
cls
)
->
Callable
[[
nn
.
Module
],
float
]:
return
_test
@
classmethod
def
optimizer_factory
(
cls
)
->
Optional
[
Callable
[[
Iterable
],
optim
.
Optimizer
]]:
def
_optimizer_factory
(
params
:
Iterable
):
return
torch
.
optim
.
SGD
(
params
,
lr
=
0.01
)
return
_optimizer_factory
@
classmethod
def
criterion
(
cls
)
->
Optional
[
Callable
]:
return
F
.
nll_loss
@
classmethod
def
sparsifying_trainer
(
cls
,
compress_algorithm_name
:
str
)
->
Optional
[
Callable
[[
nn
.
Module
,
optim
.
Optimizer
,
Callable
,
int
],
None
]]:
return
_train
@
classmethod
def
post_compress_finetuning_trainer
(
cls
,
compress_algorithm_name
:
str
)
->
Optional
[
Callable
[[
nn
.
Module
,
optim
.
Optimizer
,
Callable
,
int
],
None
]]:
return
_train
@
classmethod
def
post_compress_finetuning_epochs
(
cls
,
compress_algorithm_name
:
str
)
->
int
:
return
2
examples/model_compress/auto_compress/torch/auto_compress_torch.py
0 → 100644
View file @
e773dfcc
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
pathlib
import
Path
from
nni.algorithms.compression.pytorch.auto_compress
import
AutoCompressionExperiment
,
AutoCompressionSearchSpaceGenerator
from
auto_compress_module
import
AutoCompressionModule
generator
=
AutoCompressionSearchSpaceGenerator
()
generator
.
add_config
(
'level'
,
[
{
"sparsity"
:
{
"_type"
:
"uniform"
,
"_value"
:
[
0.01
,
0.99
]
},
'op_types'
:
[
'default'
]
}
])
generator
.
add_config
(
'l1'
,
[
{
"sparsity"
:
{
"_type"
:
"uniform"
,
"_value"
:
[
0.01
,
0.99
]
},
'op_types'
:
[
'Conv2d'
]
}
])
generator
.
add_config
(
'qat'
,
[
{
'quant_types'
:
[
'weight'
,
'output'
],
'quant_bits'
:
{
'weight'
:
8
,
'output'
:
8
},
'op_types'
:
[
'Conv2d'
,
'Linear'
]
}])
search_space
=
generator
.
dumps
()
experiment
=
AutoCompressionExperiment
(
AutoCompressionModule
,
'local'
)
experiment
.
config
.
experiment_name
=
'auto compression torch example'
experiment
.
config
.
trial_concurrency
=
1
experiment
.
config
.
max_trial_number
=
10
experiment
.
config
.
search_space
=
search_space
experiment
.
config
.
trial_code_directory
=
Path
(
__file__
).
parent
experiment
.
config
.
tuner
.
name
=
'TPE'
experiment
.
config
.
tuner
.
class_args
[
'optimize_mode'
]
=
'maximize'
experiment
.
config
.
training_service
.
use_active_gpu
=
True
experiment
.
run
(
8088
)
examples/model_compress/auto_compress/torch/mnist_pretrain_lenet.pth
0 → 100644
View file @
e773dfcc
File added
examples/model_compress/end2end_compression.py
0 → 100644
View file @
e773dfcc
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""
NNI example for combined pruning and quantization to compress a model.
In this example, we show the compression process to first prune a model, then quantize the pruned model.
"""
import
argparse
import
os
import
time
import
torch
import
torch.nn.functional
as
F
import
torch.optim
as
optim
from
torch.optim.lr_scheduler
import
StepLR
from
torchvision
import
datasets
,
transforms
from
nni.compression.pytorch.utils
import
count_flops_params
from
nni.compression.pytorch
import
ModelSpeedup
from
nni.algorithms.compression.pytorch.pruning
import
L1FilterPruner
from
nni.algorithms.compression.pytorch.quantization
import
QAT_Quantizer
from
models.mnist.naive
import
NaiveModel
from
nni.compression.pytorch.quantization_speedup
import
ModelSpeedupTensorRT
def
get_model_time_cost
(
model
,
dummy_input
):
model
.
eval
()
n_times
=
100
time_list
=
[]
for
_
in
range
(
n_times
):
torch
.
cuda
.
synchronize
()
tic
=
time
.
time
()
_
=
model
(
dummy_input
)
torch
.
cuda
.
synchronize
()
time_list
.
append
(
time
.
time
()
-
tic
)
time_list
=
time_list
[
10
:]
return
sum
(
time_list
)
/
len
(
time_list
)
def
train
(
args
,
model
,
device
,
train_loader
,
criterion
,
optimizer
,
epoch
):
model
.
train
()
for
batch_idx
,
(
data
,
target
)
in
enumerate
(
train_loader
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
if
batch_idx
%
args
.
log_interval
==
0
:
print
(
'Train Epoch: {} [{}/{} ({:.0f}%)]
\t
Loss: {:.6f}'
.
format
(
epoch
,
batch_idx
*
len
(
data
),
len
(
train_loader
.
dataset
),
100.
*
batch_idx
/
len
(
train_loader
),
loss
.
item
()))
if
args
.
dry_run
:
break
def
test
(
args
,
model
,
device
,
criterion
,
test_loader
):
model
.
eval
()
test_loss
=
0
correct
=
0
with
torch
.
no_grad
():
for
data
,
target
in
test_loader
:
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
output
=
model
(
data
)
test_loss
+=
criterion
(
output
,
target
).
item
()
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
test_loss
/=
len
(
test_loader
.
dataset
)
acc
=
100
*
correct
/
len
(
test_loader
.
dataset
)
print
(
'Test Loss: {:.6f} Accuracy: {}%
\n
'
.
format
(
test_loss
,
acc
))
return
acc
def
test_trt
(
engine
,
test_loader
):
test_loss
=
0
correct
=
0
time_elasped
=
0
for
data
,
target
in
test_loader
:
output
,
time
=
engine
.
inference
(
data
)
test_loss
+=
F
.
nll_loss
(
output
,
target
,
reduction
=
'sum'
).
item
()
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
time_elasped
+=
time
test_loss
/=
len
(
test_loader
.
dataset
)
print
(
'Loss: {} Accuracy: {}%'
.
format
(
test_loss
,
100
*
correct
/
len
(
test_loader
.
dataset
)))
print
(
"Inference elapsed_time (whole dataset): {}s"
.
format
(
time_elasped
))
def
main
(
args
):
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
os
.
makedirs
(
args
.
experiment_data_dir
,
exist_ok
=
True
)
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.1307
,),
(
0.3081
,))
])
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
MNIST
(
'data'
,
train
=
True
,
download
=
True
,
transform
=
transform
),
batch_size
=
64
,)
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
MNIST
(
'data'
,
train
=
False
,
transform
=
transform
),
batch_size
=
1000
)
# Step1. Model Pretraining
model
=
NaiveModel
().
to
(
device
)
criterion
=
torch
.
nn
.
NLLLoss
()
optimizer
=
optim
.
Adadelta
(
model
.
parameters
(),
lr
=
args
.
pretrain_lr
)
scheduler
=
StepLR
(
optimizer
,
step_size
=
1
,
gamma
=
0.7
)
flops
,
params
,
_
=
count_flops_params
(
model
,
(
1
,
1
,
28
,
28
),
verbose
=
False
)
if
args
.
pretrained_model_dir
is
None
:
args
.
pretrained_model_dir
=
os
.
path
.
join
(
args
.
experiment_data_dir
,
f
'pretrained.pth'
)
best_acc
=
0
for
epoch
in
range
(
args
.
pretrain_epochs
):
train
(
args
,
model
,
device
,
train_loader
,
criterion
,
optimizer
,
epoch
)
scheduler
.
step
()
acc
=
test
(
args
,
model
,
device
,
criterion
,
test_loader
)
if
acc
>
best_acc
:
best_acc
=
acc
state_dict
=
model
.
state_dict
()
model
.
load_state_dict
(
state_dict
)
torch
.
save
(
state_dict
,
args
.
pretrained_model_dir
)
print
(
f
'Model saved to
{
args
.
pretrained_model_dir
}
'
)
else
:
state_dict
=
torch
.
load
(
args
.
pretrained_model_dir
)
model
.
load_state_dict
(
state_dict
)
best_acc
=
test
(
args
,
model
,
device
,
criterion
,
test_loader
)
dummy_input
=
torch
.
randn
([
1000
,
1
,
28
,
28
]).
to
(
device
)
time_cost
=
get_model_time_cost
(
model
,
dummy_input
)
# 125.49 M, 0.85M, 93.29, 1.1012
print
(
f
'Pretrained model FLOPs
{
flops
/
1e6
:.
2
f
}
M, #Params:
{
params
/
1e6
:.
2
f
}
M, Accuracy:
{
best_acc
:
.
2
f
}
, Time Cost:
{
time_cost
}
'
)
# Step2. Model Pruning
config_list
=
[{
'sparsity'
:
args
.
sparsity
,
'op_types'
:
[
'Conv2d'
]
}]
kw_args
=
{}
if
args
.
dependency_aware
:
dummy_input
=
torch
.
randn
([
1000
,
1
,
28
,
28
]).
to
(
device
)
print
(
'Enable the dependency_aware mode'
)
# note that, not all pruners support the dependency_aware mode
kw_args
[
'dependency_aware'
]
=
True
kw_args
[
'dummy_input'
]
=
dummy_input
pruner
=
L1FilterPruner
(
model
,
config_list
,
**
kw_args
)
model
=
pruner
.
compress
()
pruner
.
get_pruned_weights
()
mask_path
=
os
.
path
.
join
(
args
.
experiment_data_dir
,
'mask.pth'
)
model_path
=
os
.
path
.
join
(
args
.
experiment_data_dir
,
'pruned.pth'
)
pruner
.
export_model
(
model_path
=
model_path
,
mask_path
=
mask_path
)
pruner
.
_unwrap_model
()
# unwrap all modules to normal state
# Step3. Model Speedup
m_speedup
=
ModelSpeedup
(
model
,
dummy_input
,
mask_path
,
device
)
m_speedup
.
speedup_model
()
print
(
'model after speedup'
,
model
)
flops
,
params
,
_
=
count_flops_params
(
model
,
dummy_input
,
verbose
=
False
)
acc
=
test
(
args
,
model
,
device
,
criterion
,
test_loader
)
time_cost
=
get_model_time_cost
(
model
,
dummy_input
)
print
(
f
'Pruned model FLOPs
{
flops
/
1e6
:.
2
f
}
M, #Params:
{
params
/
1e6
:.
2
f
}
M, Accuracy:
{
acc
:
.
2
f
}
, Time Cost:
{
time_cost
}
'
)
# Step4. Model Finetuning
optimizer
=
optim
.
Adadelta
(
model
.
parameters
(),
lr
=
args
.
pretrain_lr
)
scheduler
=
StepLR
(
optimizer
,
step_size
=
1
,
gamma
=
0.7
)
best_acc
=
0
for
epoch
in
range
(
args
.
finetune_epochs
):
train
(
args
,
model
,
device
,
train_loader
,
criterion
,
optimizer
,
epoch
)
scheduler
.
step
()
acc
=
test
(
args
,
model
,
device
,
criterion
,
test_loader
)
if
acc
>
best_acc
:
best_acc
=
acc
state_dict
=
model
.
state_dict
()
model
.
load_state_dict
(
state_dict
)
save_path
=
os
.
path
.
join
(
args
.
experiment_data_dir
,
f
'finetuned.pth'
)
torch
.
save
(
state_dict
,
save_path
)
flops
,
params
,
_
=
count_flops_params
(
model
,
dummy_input
,
verbose
=
True
)
time_cost
=
get_model_time_cost
(
model
,
dummy_input
)
# FLOPs 28.48 M, #Params: 0.18M, Accuracy: 89.03, Time Cost: 1.03
print
(
f
'Finetuned model FLOPs
{
flops
/
1e6
:.
2
f
}
M, #Params:
{
params
/
1e6
:.
2
f
}
M, Accuracy:
{
best_acc
:
.
2
f
}
, Time Cost:
{
time_cost
}
'
)
print
(
f
'Model saved to
{
save_path
}
'
)
# Step5. Model Quantization via QAT
config_list
=
[{
'quant_types'
:
[
'weight'
,
'output'
],
'quant_bits'
:
{
'weight'
:
8
,
'output'
:
8
},
'op_names'
:
[
'conv1'
]
},
{
'quant_types'
:
[
'output'
],
'quant_bits'
:
{
'output'
:
8
},
'op_names'
:
[
'relu1'
]
},
{
'quant_types'
:
[
'weight'
,
'output'
],
'quant_bits'
:
{
'weight'
:
8
,
'output'
:
8
},
'op_names'
:
[
'conv2'
]
},
{
'quant_types'
:
[
'output'
],
'quant_bits'
:
{
'output'
:
8
},
'op_names'
:
[
'relu2'
]
}]
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.01
,
momentum
=
0.5
)
quantizer
=
QAT_Quantizer
(
model
,
config_list
,
optimizer
)
quantizer
.
compress
()
# Step6. Quantization Aware Training
best_acc
=
0
for
epoch
in
range
(
1
):
train
(
args
,
model
,
device
,
train_loader
,
criterion
,
optimizer
,
epoch
)
scheduler
.
step
()
acc
=
test
(
args
,
model
,
device
,
criterion
,
test_loader
)
if
acc
>
best_acc
:
best_acc
=
acc
state_dict
=
model
.
state_dict
()
calibration_path
=
os
.
path
.
join
(
args
.
experiment_data_dir
,
'calibration.pth'
)
calibration_config
=
quantizer
.
export_model
(
model_path
,
calibration_path
)
print
(
"calibration_config: "
,
calibration_config
)
# Step7. Model Speedup
batch_size
=
32
input_shape
=
(
batch_size
,
1
,
28
,
28
)
engine
=
ModelSpeedupTensorRT
(
model
,
input_shape
,
config
=
calibration_config
,
batchsize
=
32
)
engine
.
compress
()
test_trt
(
engine
,
test_loader
)
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
description
=
'PyTorch Example for model comporession'
)
# dataset and model
# parser.add_argument('--dataset', type=str, default='mnist',
# help='dataset to use, mnist, cifar10 or imagenet')
# parser.add_argument('--data-dir', type=str, default='./data/',
# help='dataset directory')
parser
.
add_argument
(
'--pretrained-model-dir'
,
type
=
str
,
default
=
None
,
help
=
'path to pretrained model'
)
parser
.
add_argument
(
'--pretrain-epochs'
,
type
=
int
,
default
=
10
,
help
=
'number of epochs to pretrain the model'
)
parser
.
add_argument
(
'--pretrain-lr'
,
type
=
float
,
default
=
1.0
,
help
=
'learning rate to pretrain the model'
)
parser
.
add_argument
(
'--experiment-data-dir'
,
type
=
str
,
default
=
'./experiment_data'
,
help
=
'For saving output checkpoints'
)
parser
.
add_argument
(
'--log-interval'
,
type
=
int
,
default
=
100
,
metavar
=
'N'
,
help
=
'how many batches to wait before logging training status'
)
parser
.
add_argument
(
'--dry-run'
,
action
=
'store_true'
,
default
=
False
,
help
=
'quickly check a single pass'
)
# parser.add_argument('--multi-gpu', action='store_true', default=False,
# help='run on mulitple gpus')
# parser.add_argument('--test-only', action='store_true', default=False,
# help='run test only')
# pruner
# parser.add_argument('--pruner', type=str, default='l1filter',
# choices=['level', 'l1filter', 'l2filter', 'slim', 'agp',
# 'fpgm', 'mean_activation', 'apoz', 'admm'],
# help='pruner to use')
parser
.
add_argument
(
'--sparsity'
,
type
=
float
,
default
=
0.5
,
help
=
'target overall target sparsity'
)
parser
.
add_argument
(
'--dependency-aware'
,
action
=
'store_true'
,
default
=
False
,
help
=
'toggle dependency-aware mode'
)
# finetuning
parser
.
add_argument
(
'--finetune-epochs'
,
type
=
int
,
default
=
5
,
help
=
'epochs to fine tune'
)
# parser.add_argument('--kd', action='store_true', default=False,
# help='quickly check a single pass')
# parser.add_argument('--kd_T', type=float, default=4,
# help='temperature for KD distillation')
# parser.add_argument('--finetune-lr', type=float, default=0.5,
# help='learning rate to finetune the model')
# speedup
# parser.add_argument('--speedup', action='store_true', default=False,
# help='whether to speedup the pruned model')
# parser.add_argument('--nni', action='store_true', default=False,
# help="whether to tune the pruners using NNi tuners")
args
=
parser
.
parse_args
()
main
(
args
)
examples/model_compress/experimental/compression_experiment/demo.py
0 → 100644
View file @
e773dfcc
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
pathlib
import
Path
import
torch
from
torch.optim
import
Adam
import
nni
from
nni.compression.experiment.experiment
import
CompressionExperiment
from
nni.compression.experiment.config
import
CompressionExperimentConfig
,
TaylorFOWeightPrunerConfig
from
vessel
import
LeNet
,
finetuner
,
evaluator
,
trainer
,
criterion
,
device
model
=
LeNet
().
to
(
device
)
# pre-training model
finetuner
(
model
)
optimizer
=
nni
.
trace
(
Adam
)(
model
.
parameters
())
dummy_input
=
torch
.
rand
(
16
,
1
,
28
,
28
).
to
(
device
)
# normal experiment setting, no need to set search_space and trial_command
config
=
CompressionExperimentConfig
(
'local'
)
config
.
experiment_name
=
'auto compression torch example'
config
.
trial_concurrency
=
1
config
.
max_trial_number
=
10
config
.
trial_code_directory
=
Path
(
__file__
).
parent
config
.
tuner
.
name
=
'TPE'
config
.
tuner
.
class_args
[
'optimize_mode'
]
=
'maximize'
# compression experiment specific setting
# single float value means the expected remaining ratio upper limit for flops & params, lower limit for metric
config
.
compression_setting
.
flops
=
0.2
config
.
compression_setting
.
params
=
0.5
config
.
compression_setting
.
module_types
=
[
'Conv2d'
,
'Linear'
]
config
.
compression_setting
.
exclude_module_names
=
[
'fc2'
]
config
.
compression_setting
.
pruners
=
[
TaylorFOWeightPrunerConfig
()]
experiment
=
CompressionExperiment
(
config
,
model
,
finetuner
,
evaluator
,
dummy_input
,
trainer
,
optimizer
,
criterion
,
device
)
experiment
.
run
(
8080
)
examples/model_compress/experimental/compression_experiment/vessel.py
0 → 100644
View file @
e773dfcc
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
torch.optim
import
Adam
from
torchvision
import
datasets
,
transforms
import
nni
@
nni
.
trace
class
LeNet
(
nn
.
Module
):
def
__init__
(
self
):
super
().
__init__
()
self
.
conv1
=
nn
.
Conv2d
(
1
,
32
,
3
,
1
)
self
.
conv2
=
nn
.
Conv2d
(
32
,
64
,
3
,
1
)
self
.
dropout1
=
nn
.
Dropout2d
(
0.25
)
self
.
dropout2
=
nn
.
Dropout2d
(
0.5
)
self
.
fc1
=
nn
.
Linear
(
9216
,
128
)
self
.
fc2
=
nn
.
Linear
(
128
,
10
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
F
.
relu
(
x
)
x
=
self
.
conv2
(
x
)
x
=
F
.
relu
(
x
)
x
=
F
.
max_pool2d
(
x
,
2
)
x
=
self
.
dropout1
(
x
)
x
=
torch
.
flatten
(
x
,
1
)
x
=
self
.
fc1
(
x
)
x
=
F
.
relu
(
x
)
x
=
self
.
dropout2
(
x
)
x
=
self
.
fc2
(
x
)
output
=
F
.
log_softmax
(
x
,
dim
=
1
)
return
output
_use_cuda
=
True
device
=
torch
.
device
(
"cuda"
if
_use_cuda
else
"cpu"
)
_train_kwargs
=
{
'batch_size'
:
64
}
_test_kwargs
=
{
'batch_size'
:
1000
}
if
_use_cuda
:
_cuda_kwargs
=
{
'num_workers'
:
1
,
'pin_memory'
:
True
,
'shuffle'
:
True
}
_train_kwargs
.
update
(
_cuda_kwargs
)
_test_kwargs
.
update
(
_cuda_kwargs
)
_transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.1307
,),
(
0.3081
,))
])
_train_loader
=
None
_test_loader
=
None
def
trainer
(
model
,
optimizer
,
criterion
):
global
_train_loader
if
_train_loader
is
None
:
dataset
=
datasets
.
MNIST
(
'./data'
,
train
=
True
,
download
=
True
,
transform
=
_transform
)
_train_loader
=
torch
.
utils
.
data
.
DataLoader
(
dataset
,
**
_train_kwargs
)
model
.
train
()
for
data
,
target
in
_train_loader
:
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
def
evaluator
(
model
):
global
_test_loader
if
_test_loader
is
None
:
dataset
=
datasets
.
MNIST
(
'./data'
,
train
=
False
,
transform
=
_transform
,
download
=
True
)
_test_loader
=
torch
.
utils
.
data
.
DataLoader
(
dataset
,
**
_test_kwargs
)
model
.
eval
()
test_loss
=
0
correct
=
0
with
torch
.
no_grad
():
for
data
,
target
in
_test_loader
:
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
output
=
model
(
data
)
test_loss
+=
F
.
nll_loss
(
output
,
target
,
reduction
=
'sum'
).
item
()
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
test_loss
/=
len
(
_test_loader
.
dataset
)
acc
=
100
*
correct
/
len
(
_test_loader
.
dataset
)
print
(
'
\n
Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)
\n
'
.
format
(
test_loss
,
correct
,
len
(
_test_loader
.
dataset
),
acc
))
return
acc
criterion
=
F
.
nll_loss
def
finetuner
(
model
:
nn
.
Module
):
optimizer
=
Adam
(
model
.
parameters
())
for
i
in
range
(
3
):
trainer
(
model
,
optimizer
,
criterion
)
examples/model_compress/models/cifar10/resnet.py
0 → 100644
View file @
e773dfcc
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
class
BasicBlock
(
nn
.
Module
):
expansion
=
1
def
__init__
(
self
,
in_planes
,
planes
,
stride
=
1
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
conv1
=
nn
.
Conv2d
(
in_planes
,
planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias
=
False
)
self
.
bn1
=
nn
.
BatchNorm2d
(
planes
)
self
.
conv2
=
nn
.
Conv2d
(
planes
,
planes
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
False
)
self
.
bn2
=
nn
.
BatchNorm2d
(
planes
)
self
.
shortcut
=
nn
.
Sequential
()
if
stride
!=
1
or
in_planes
!=
self
.
expansion
*
planes
:
self
.
shortcut
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_planes
,
self
.
expansion
*
planes
,
kernel_size
=
1
,
stride
=
stride
,
bias
=
False
),
nn
.
BatchNorm2d
(
self
.
expansion
*
planes
)
)
def
forward
(
self
,
x
):
out
=
F
.
relu
(
self
.
bn1
(
self
.
conv1
(
x
)))
out
=
self
.
bn2
(
self
.
conv2
(
out
))
out
+=
self
.
shortcut
(
x
)
out
=
F
.
relu
(
out
)
return
out
class
Bottleneck
(
nn
.
Module
):
expansion
=
4
def
__init__
(
self
,
in_planes
,
planes
,
stride
=
1
):
super
(
Bottleneck
,
self
).
__init__
()
self
.
conv1
=
nn
.
Conv2d
(
in_planes
,
planes
,
kernel_size
=
1
,
bias
=
False
)
self
.
bn1
=
nn
.
BatchNorm2d
(
planes
)
self
.
conv2
=
nn
.
Conv2d
(
planes
,
planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias
=
False
)
self
.
bn2
=
nn
.
BatchNorm2d
(
planes
)
self
.
conv3
=
nn
.
Conv2d
(
planes
,
self
.
expansion
*
planes
,
kernel_size
=
1
,
bias
=
False
)
self
.
bn3
=
nn
.
BatchNorm2d
(
self
.
expansion
*
planes
)
self
.
shortcut
=
nn
.
Sequential
()
if
stride
!=
1
or
in_planes
!=
self
.
expansion
*
planes
:
self
.
shortcut
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_planes
,
self
.
expansion
*
planes
,
kernel_size
=
1
,
stride
=
stride
,
bias
=
False
),
nn
.
BatchNorm2d
(
self
.
expansion
*
planes
)
)
def
forward
(
self
,
x
):
out
=
F
.
relu
(
self
.
bn1
(
self
.
conv1
(
x
)))
out
=
F
.
relu
(
self
.
bn2
(
self
.
conv2
(
out
)))
out
=
self
.
bn3
(
self
.
conv3
(
out
))
out
+=
self
.
shortcut
(
x
)
out
=
F
.
relu
(
out
)
return
out
class
ResNet
(
nn
.
Module
):
def
__init__
(
self
,
block
,
num_blocks
,
num_classes
=
10
):
super
(
ResNet
,
self
).
__init__
()
self
.
in_planes
=
64
# this layer is different from torchvision.resnet18() since this model adopted for Cifar10
self
.
conv1
=
nn
.
Conv2d
(
3
,
64
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
False
)
self
.
bn1
=
nn
.
BatchNorm2d
(
64
)
self
.
layer1
=
self
.
_make_layer
(
block
,
64
,
num_blocks
[
0
],
stride
=
1
)
self
.
layer2
=
self
.
_make_layer
(
block
,
128
,
num_blocks
[
1
],
stride
=
2
)
self
.
layer3
=
self
.
_make_layer
(
block
,
256
,
num_blocks
[
2
],
stride
=
2
)
self
.
layer4
=
self
.
_make_layer
(
block
,
512
,
num_blocks
[
3
],
stride
=
2
)
self
.
linear
=
nn
.
Linear
(
512
*
block
.
expansion
,
num_classes
)
def
_make_layer
(
self
,
block
,
planes
,
num_blocks
,
stride
):
strides
=
[
stride
]
+
[
1
]
*
(
num_blocks
-
1
)
layers
=
[]
for
stride
in
strides
:
layers
.
append
(
block
(
self
.
in_planes
,
planes
,
stride
))
self
.
in_planes
=
planes
*
block
.
expansion
return
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
x
):
out
=
F
.
relu
(
self
.
bn1
(
self
.
conv1
(
x
)))
out
=
self
.
layer1
(
out
)
out
=
self
.
layer2
(
out
)
out
=
self
.
layer3
(
out
)
out
=
self
.
layer4
(
out
)
out
=
F
.
avg_pool2d
(
out
,
4
)
out
=
out
.
view
(
out
.
size
(
0
),
-
1
)
out
=
self
.
linear
(
out
)
return
out
def
ResNet18
():
return
ResNet
(
BasicBlock
,
[
2
,
2
,
2
,
2
])
def
ResNet34
():
return
ResNet
(
BasicBlock
,
[
3
,
4
,
6
,
3
])
def
ResNet50
():
return
ResNet
(
Bottleneck
,
[
3
,
4
,
6
,
3
])
def
ResNet101
():
return
ResNet
(
Bottleneck
,
[
3
,
4
,
23
,
3
])
def
ResNet152
():
return
ResNet
(
Bottleneck
,
[
3
,
8
,
36
,
3
])
examples/model_compress/models/cifar10/vgg.py
0 → 100644
View file @
e773dfcc
import
math
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
defaultcfg
=
{
11
:
[
64
,
'M'
,
128
,
'M'
,
256
,
256
,
'M'
,
512
,
512
,
'M'
,
512
,
512
],
13
:
[
64
,
64
,
'M'
,
128
,
128
,
'M'
,
256
,
256
,
'M'
,
512
,
512
,
'M'
,
512
,
512
],
16
:
[
64
,
64
,
'M'
,
128
,
128
,
'M'
,
256
,
256
,
256
,
'M'
,
512
,
512
,
512
,
'M'
,
512
,
512
,
512
],
19
:
[
64
,
64
,
'M'
,
128
,
128
,
'M'
,
256
,
256
,
256
,
256
,
'M'
,
512
,
512
,
512
,
512
,
'M'
,
512
,
512
,
512
,
512
],
}
class
VGG
(
nn
.
Module
):
def
__init__
(
self
,
depth
=
16
):
super
(
VGG
,
self
).
__init__
()
cfg
=
defaultcfg
[
depth
]
self
.
cfg
=
cfg
self
.
feature
=
self
.
make_layers
(
cfg
,
True
)
num_classes
=
10
self
.
classifier
=
nn
.
Sequential
(
nn
.
Linear
(
cfg
[
-
1
],
512
),
nn
.
BatchNorm1d
(
512
),
nn
.
ReLU
(
inplace
=
True
),
nn
.
Linear
(
512
,
num_classes
)
)
self
.
_initialize_weights
()
def
make_layers
(
self
,
cfg
,
batch_norm
=
False
):
layers
=
[]
in_channels
=
3
for
v
in
cfg
:
if
v
==
'M'
:
layers
+=
[
nn
.
MaxPool2d
(
kernel_size
=
2
,
stride
=
2
)]
else
:
conv2d
=
nn
.
Conv2d
(
in_channels
,
v
,
kernel_size
=
3
,
padding
=
1
,
bias
=
False
)
if
batch_norm
:
layers
+=
[
conv2d
,
nn
.
BatchNorm2d
(
v
),
nn
.
ReLU
(
inplace
=
True
)]
else
:
layers
+=
[
conv2d
,
nn
.
ReLU
(
inplace
=
True
)]
in_channels
=
v
return
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
x
):
x
=
self
.
feature
(
x
)
x
=
nn
.
AvgPool2d
(
2
)(
x
)
x
=
x
.
view
(
x
.
size
(
0
),
-
1
)
y
=
self
.
classifier
(
x
)
return
y
def
_initialize_weights
(
self
):
for
m
in
self
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
n
=
m
.
kernel_size
[
0
]
*
m
.
kernel_size
[
1
]
*
m
.
out_channels
m
.
weight
.
data
.
normal_
(
0
,
math
.
sqrt
(
2.
/
n
))
if
m
.
bias
is
not
None
:
m
.
bias
.
data
.
zero_
()
elif
isinstance
(
m
,
nn
.
BatchNorm2d
):
m
.
weight
.
data
.
fill_
(
0.5
)
m
.
bias
.
data
.
zero_
()
elif
isinstance
(
m
,
nn
.
Linear
):
m
.
weight
.
data
.
normal_
(
0
,
0.01
)
m
.
bias
.
data
.
zero_
()
examples/model_compress/models/mnist/lenet.py
0 → 100644
View file @
e773dfcc
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
class
LeNet
(
nn
.
Module
):
def
__init__
(
self
):
super
(
LeNet
,
self
).
__init__
()
self
.
conv1
=
nn
.
Conv2d
(
1
,
32
,
3
,
1
)
self
.
conv2
=
nn
.
Conv2d
(
32
,
64
,
3
,
1
)
self
.
dropout1
=
nn
.
Dropout2d
(
0.25
)
self
.
dropout2
=
nn
.
Dropout2d
(
0.5
)
self
.
fc1
=
nn
.
Linear
(
9216
,
128
)
self
.
fc2
=
nn
.
Linear
(
128
,
10
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
F
.
relu
(
x
)
x
=
self
.
conv2
(
x
)
x
=
F
.
relu
(
x
)
x
=
F
.
max_pool2d
(
x
,
2
)
x
=
self
.
dropout1
(
x
)
x
=
torch
.
flatten
(
x
,
1
)
x
=
self
.
fc1
(
x
)
x
=
F
.
relu
(
x
)
x
=
self
.
dropout2
(
x
)
x
=
self
.
fc2
(
x
)
output
=
F
.
log_softmax
(
x
,
dim
=
1
)
return
output
examples/model_compress/models/mnist/naive.py
0 → 100644
View file @
e773dfcc
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
functools
import
reduce
class
NaiveModel
(
torch
.
nn
.
Module
):
def
__init__
(
self
):
super
().
__init__
()
self
.
conv1
=
torch
.
nn
.
Conv2d
(
1
,
20
,
5
,
1
)
self
.
conv2
=
torch
.
nn
.
Conv2d
(
20
,
50
,
5
,
1
)
self
.
fc1
=
torch
.
nn
.
Linear
(
4
*
4
*
50
,
500
)
self
.
fc2
=
torch
.
nn
.
Linear
(
500
,
10
)
self
.
relu1
=
torch
.
nn
.
ReLU6
()
self
.
relu2
=
torch
.
nn
.
ReLU6
()
self
.
relu3
=
torch
.
nn
.
ReLU6
()
self
.
max_pool1
=
torch
.
nn
.
MaxPool2d
(
2
,
2
)
self
.
max_pool2
=
torch
.
nn
.
MaxPool2d
(
2
,
2
)
def
forward
(
self
,
x
):
x
=
self
.
relu1
(
self
.
conv1
(
x
))
x
=
self
.
max_pool1
(
x
)
x
=
self
.
relu2
(
self
.
conv2
(
x
))
x
=
self
.
max_pool2
(
x
)
x
=
x
.
view
(
-
1
,
x
.
size
()[
1
:].
numel
())
x
=
self
.
relu3
(
self
.
fc1
(
x
))
x
=
self
.
fc2
(
x
)
return
F
.
log_softmax
(
x
,
dim
=
1
)
\ No newline at end of file
examples/model_compress/models/mobilenet.py
0 → 100644
View file @
e773dfcc
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
torch.nn
as
nn
import
math
def
conv_bn
(
inp
,
oup
,
stride
):
return
nn
.
Sequential
(
nn
.
Conv2d
(
inp
,
oup
,
3
,
stride
,
1
,
bias
=
False
),
nn
.
BatchNorm2d
(
oup
),
nn
.
ReLU
(
inplace
=
True
)
)
def
conv_dw
(
inp
,
oup
,
stride
):
return
nn
.
Sequential
(
nn
.
Conv2d
(
inp
,
inp
,
3
,
stride
,
1
,
groups
=
inp
,
bias
=
False
),
nn
.
BatchNorm2d
(
inp
),
nn
.
ReLU
(
inplace
=
True
),
nn
.
Conv2d
(
inp
,
oup
,
1
,
1
,
0
,
bias
=
False
),
nn
.
BatchNorm2d
(
oup
),
nn
.
ReLU
(
inplace
=
True
),
)
class
MobileNet
(
nn
.
Module
):
def
__init__
(
self
,
n_class
,
profile
=
'normal'
):
super
(
MobileNet
,
self
).
__init__
()
# original
if
profile
==
'normal'
:
in_planes
=
32
cfg
=
[
64
,
(
128
,
2
),
128
,
(
256
,
2
),
256
,
(
512
,
2
),
512
,
512
,
512
,
512
,
512
,
(
1024
,
2
),
1024
]
# 0.5 AMC
elif
profile
==
'0.5flops'
:
in_planes
=
24
cfg
=
[
48
,
(
96
,
2
),
80
,
(
192
,
2
),
200
,
(
328
,
2
),
352
,
368
,
360
,
328
,
400
,
(
736
,
2
),
752
]
else
:
raise
NotImplementedError
self
.
conv1
=
conv_bn
(
3
,
in_planes
,
stride
=
2
)
self
.
features
=
self
.
_make_layers
(
in_planes
,
cfg
,
conv_dw
)
self
.
classifier
=
nn
.
Sequential
(
nn
.
Linear
(
cfg
[
-
1
],
n_class
),
)
self
.
_initialize_weights
()
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
features
(
x
)
x
=
x
.
mean
([
2
,
3
])
# global average pooling
x
=
self
.
classifier
(
x
)
return
x
def
_make_layers
(
self
,
in_planes
,
cfg
,
layer
):
layers
=
[]
for
x
in
cfg
:
out_planes
=
x
if
isinstance
(
x
,
int
)
else
x
[
0
]
stride
=
1
if
isinstance
(
x
,
int
)
else
x
[
1
]
layers
.
append
(
layer
(
in_planes
,
out_planes
,
stride
))
in_planes
=
out_planes
return
nn
.
Sequential
(
*
layers
)
def
_initialize_weights
(
self
):
for
m
in
self
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
n
=
m
.
kernel_size
[
0
]
*
m
.
kernel_size
[
1
]
*
m
.
out_channels
m
.
weight
.
data
.
normal_
(
0
,
math
.
sqrt
(
2.
/
n
))
if
m
.
bias
is
not
None
:
m
.
bias
.
data
.
zero_
()
elif
isinstance
(
m
,
nn
.
BatchNorm2d
):
m
.
weight
.
data
.
fill_
(
1
)
m
.
bias
.
data
.
zero_
()
elif
isinstance
(
m
,
nn
.
Linear
):
n
=
m
.
weight
.
size
(
1
)
m
.
weight
.
data
.
normal_
(
0
,
0.01
)
m
.
bias
.
data
.
zero_
()
examples/model_compress/models/mobilenet_v2.py
0 → 100644
View file @
e773dfcc
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
torch.nn
as
nn
import
math
def
conv_bn
(
inp
,
oup
,
stride
):
return
nn
.
Sequential
(
nn
.
Conv2d
(
inp
,
oup
,
3
,
stride
,
1
,
bias
=
False
),
nn
.
BatchNorm2d
(
oup
),
nn
.
ReLU6
(
inplace
=
True
)
)
def
conv_1x1_bn
(
inp
,
oup
):
return
nn
.
Sequential
(
nn
.
Conv2d
(
inp
,
oup
,
1
,
1
,
0
,
bias
=
False
),
nn
.
BatchNorm2d
(
oup
),
nn
.
ReLU6
(
inplace
=
True
)
)
class
InvertedResidual
(
nn
.
Module
):
def
__init__
(
self
,
inp
,
oup
,
stride
,
expand_ratio
):
super
(
InvertedResidual
,
self
).
__init__
()
self
.
stride
=
stride
assert
stride
in
[
1
,
2
]
hidden_dim
=
round
(
inp
*
expand_ratio
)
self
.
use_res_connect
=
self
.
stride
==
1
and
inp
==
oup
if
expand_ratio
==
1
:
self
.
conv
=
nn
.
Sequential
(
# dw
nn
.
Conv2d
(
hidden_dim
,
hidden_dim
,
3
,
stride
,
1
,
groups
=
hidden_dim
,
bias
=
False
),
nn
.
BatchNorm2d
(
hidden_dim
),
nn
.
ReLU6
(
inplace
=
True
),
# pw-linear
nn
.
Conv2d
(
hidden_dim
,
oup
,
1
,
1
,
0
,
bias
=
False
),
nn
.
BatchNorm2d
(
oup
),
)
else
:
self
.
conv
=
nn
.
Sequential
(
# pw
nn
.
Conv2d
(
inp
,
hidden_dim
,
1
,
1
,
0
,
bias
=
False
),
nn
.
BatchNorm2d
(
hidden_dim
),
nn
.
ReLU6
(
inplace
=
True
),
# dw
nn
.
Conv2d
(
hidden_dim
,
hidden_dim
,
3
,
stride
,
1
,
groups
=
hidden_dim
,
bias
=
False
),
nn
.
BatchNorm2d
(
hidden_dim
),
nn
.
ReLU6
(
inplace
=
True
),
# pw-linear
nn
.
Conv2d
(
hidden_dim
,
oup
,
1
,
1
,
0
,
bias
=
False
),
nn
.
BatchNorm2d
(
oup
),
)
def
forward
(
self
,
x
):
if
self
.
use_res_connect
:
return
x
+
self
.
conv
(
x
)
else
:
return
self
.
conv
(
x
)
class
MobileNetV2
(
nn
.
Module
):
def
__init__
(
self
,
n_class
=
1000
,
input_size
=
224
,
width_mult
=
1.
):
super
(
MobileNetV2
,
self
).
__init__
()
block
=
InvertedResidual
input_channel
=
32
last_channel
=
1280
interverted_residual_setting
=
[
# t, c, n, s
[
1
,
16
,
1
,
1
],
[
6
,
24
,
2
,
2
],
[
6
,
32
,
3
,
2
],
[
6
,
64
,
4
,
2
],
[
6
,
96
,
3
,
1
],
[
6
,
160
,
3
,
2
],
[
6
,
320
,
1
,
1
],
]
# building first layer
assert
input_size
%
32
==
0
input_channel
=
int
(
input_channel
*
width_mult
)
self
.
last_channel
=
int
(
last_channel
*
width_mult
)
if
width_mult
>
1.0
else
last_channel
self
.
features
=
[
conv_bn
(
3
,
input_channel
,
2
)]
# building inverted residual blocks
for
t
,
c
,
n
,
s
in
interverted_residual_setting
:
output_channel
=
int
(
c
*
width_mult
)
for
i
in
range
(
n
):
if
i
==
0
:
self
.
features
.
append
(
block
(
input_channel
,
output_channel
,
s
,
expand_ratio
=
t
))
else
:
self
.
features
.
append
(
block
(
input_channel
,
output_channel
,
1
,
expand_ratio
=
t
))
input_channel
=
output_channel
# building last several layers
self
.
features
.
append
(
conv_1x1_bn
(
input_channel
,
self
.
last_channel
))
# make it nn.Sequential
self
.
features
=
nn
.
Sequential
(
*
self
.
features
)
# building classifier
self
.
classifier
=
nn
.
Sequential
(
nn
.
Dropout
(
0.2
),
nn
.
Linear
(
self
.
last_channel
,
n_class
),
)
self
.
_initialize_weights
()
def
forward
(
self
,
x
):
x
=
self
.
features
(
x
)
# it's same with .mean(3).mean(2), but
# speedup only suport the mean option
# whose output only have two dimensions
x
=
x
.
mean
([
2
,
3
])
x
=
self
.
classifier
(
x
)
return
x
def
_initialize_weights
(
self
):
for
m
in
self
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
n
=
m
.
kernel_size
[
0
]
*
m
.
kernel_size
[
1
]
*
m
.
out_channels
m
.
weight
.
data
.
normal_
(
0
,
math
.
sqrt
(
2.
/
n
))
if
m
.
bias
is
not
None
:
m
.
bias
.
data
.
zero_
()
elif
isinstance
(
m
,
nn
.
BatchNorm2d
):
m
.
weight
.
data
.
fill_
(
1
)
m
.
bias
.
data
.
zero_
()
elif
isinstance
(
m
,
nn
.
Linear
):
n
=
m
.
weight
.
size
(
1
)
m
.
weight
.
data
.
normal_
(
0
,
0.01
)
m
.
bias
.
data
.
zero_
()
examples/model_compress/pruning/activation_pruning_torch.py
0 → 100644
View file @
e773dfcc
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI example for supported ActivationAPoZRank and ActivationMeanRank pruning algorithms.
In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
'''
import
argparse
import
sys
import
torch
from
torchvision
import
datasets
,
transforms
from
torch.optim.lr_scheduler
import
MultiStepLR
import
nni
from
nni.compression.pytorch
import
ModelSpeedup
from
nni.compression.pytorch.utils
import
count_flops_params
from
nni.compression.pytorch.pruning
import
ActivationAPoZRankPruner
,
ActivationMeanRankPruner
from
pathlib
import
Path
sys
.
path
.
append
(
str
(
Path
(
__file__
).
absolute
().
parents
[
1
]
/
'models'
))
from
cifar10.vgg
import
VGG
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
normalize
=
transforms
.
Normalize
((
0.4914
,
0.4822
,
0.4465
),
(
0.2023
,
0.1994
,
0.2010
))
g_epoch
=
0
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
True
,
transform
=
transforms
.
Compose
([
transforms
.
RandomHorizontalFlip
(),
transforms
.
RandomCrop
(
32
,
4
),
transforms
.
ToTensor
(),
normalize
,
]),
download
=
True
),
batch_size
=
128
,
shuffle
=
True
)
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
False
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
normalize
,
])),
batch_size
=
128
,
shuffle
=
False
)
def
trainer
(
model
,
optimizer
,
criterion
):
global
g_epoch
model
.
train
()
for
batch_idx
,
(
data
,
target
)
in
enumerate
(
train_loader
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
if
batch_idx
and
batch_idx
%
100
==
0
:
print
(
'Train Epoch: {} [{}/{} ({:.0f}%)]
\t
Loss: {:.6f}'
.
format
(
g_epoch
,
batch_idx
*
len
(
data
),
len
(
train_loader
.
dataset
),
100.
*
batch_idx
/
len
(
train_loader
),
loss
.
item
()))
g_epoch
+=
1
def
evaluator
(
model
):
model
.
eval
()
correct
=
0.0
with
torch
.
no_grad
():
for
data
,
target
in
test_loader
:
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
output
=
model
(
data
)
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
acc
=
100
*
correct
/
len
(
test_loader
.
dataset
)
print
(
'Accuracy: {}%
\n
'
.
format
(
acc
))
return
acc
def
optimizer_scheduler_generator
(
model
,
_lr
=
0.1
,
_momentum
=
0.9
,
_weight_decay
=
5e-4
,
total_epoch
=
160
):
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
_lr
,
momentum
=
_momentum
,
weight_decay
=
_weight_decay
)
scheduler
=
MultiStepLR
(
optimizer
,
milestones
=
[
int
(
total_epoch
*
0.5
),
int
(
total_epoch
*
0.75
)],
gamma
=
0.1
)
return
optimizer
,
scheduler
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
description
=
'PyTorch Example for model comporession'
)
parser
.
add_argument
(
'--pruner'
,
type
=
str
,
default
=
'apoz'
,
choices
=
[
'apoz'
,
'mean'
],
help
=
'pruner to use'
)
parser
.
add_argument
(
'--pretrain-epochs'
,
type
=
int
,
default
=
20
,
help
=
'number of epochs to pretrain the model'
)
parser
.
add_argument
(
'--fine-tune-epochs'
,
type
=
int
,
default
=
20
,
help
=
'number of epochs to fine tune the model'
)
args
=
parser
.
parse_args
()
print
(
'
\n
'
+
'='
*
50
+
' START TO TRAIN THE MODEL '
+
'='
*
50
)
model
=
VGG
().
to
(
device
)
optimizer
,
scheduler
=
optimizer_scheduler_generator
(
model
,
total_epoch
=
args
.
pretrain_epochs
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
pre_best_acc
=
0.0
best_state_dict
=
None
for
i
in
range
(
args
.
pretrain_epochs
):
trainer
(
model
,
optimizer
,
criterion
)
scheduler
.
step
()
acc
=
evaluator
(
model
)
if
acc
>
pre_best_acc
:
pre_best_acc
=
acc
best_state_dict
=
model
.
state_dict
()
print
(
"Best accuracy: {}"
.
format
(
pre_best_acc
))
model
.
load_state_dict
(
best_state_dict
)
pre_flops
,
pre_params
,
_
=
count_flops_params
(
model
,
torch
.
randn
([
128
,
3
,
32
,
32
]).
to
(
device
))
g_epoch
=
0
# Start to prune and speedup
print
(
'
\n
'
+
'='
*
50
+
' START TO PRUNE THE BEST ACCURACY PRETRAINED MODEL '
+
'='
*
50
)
config_list
=
[{
'total_sparsity'
:
0.5
,
'op_types'
:
[
'Conv2d'
],
}]
# make sure you have used nni.trace to wrap the optimizer class before initialize
traced_optimizer
=
nni
.
trace
(
torch
.
optim
.
SGD
)(
model
.
parameters
(),
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
if
'apoz'
in
args
.
pruner
:
pruner
=
ActivationAPoZRankPruner
(
model
,
config_list
,
trainer
,
traced_optimizer
,
criterion
,
training_batches
=
20
)
else
:
pruner
=
ActivationMeanRankPruner
(
model
,
config_list
,
trainer
,
traced_optimizer
,
criterion
,
training_batches
=
20
)
_
,
masks
=
pruner
.
compress
()
pruner
.
show_pruned_weights
()
pruner
.
_unwrap_model
()
ModelSpeedup
(
model
,
dummy_input
=
torch
.
rand
([
10
,
3
,
32
,
32
]).
to
(
device
),
masks_file
=
masks
).
speedup_model
()
print
(
'
\n
'
+
'='
*
50
+
' EVALUATE THE MODEL AFTER SPEEDUP '
+
'='
*
50
)
evaluator
(
model
)
# Optimizer used in the pruner might be patched, so recommend to new an optimizer for fine-tuning stage.
print
(
'
\n
'
+
'='
*
50
+
' START TO FINE TUNE THE MODEL '
+
'='
*
50
)
optimizer
,
scheduler
=
optimizer_scheduler_generator
(
model
,
_lr
=
0.01
,
total_epoch
=
args
.
fine_tune_epochs
)
best_acc
=
0.0
g_epoch
=
0
for
i
in
range
(
args
.
fine_tune_epochs
):
trainer
(
model
,
optimizer
,
criterion
)
scheduler
.
step
()
best_acc
=
max
(
evaluator
(
model
),
best_acc
)
flops
,
params
,
results
=
count_flops_params
(
model
,
torch
.
randn
([
128
,
3
,
32
,
32
]).
to
(
device
))
print
(
f
'Pretrained model FLOPs
{
pre_flops
/
1e6
:.
2
f
}
M, #Params:
{
pre_params
/
1e6
:.
2
f
}
M, Accuracy:
{
pre_best_acc
:
.
2
f
}
%'
)
print
(
f
'Finetuned model FLOPs
{
flops
/
1e6
:.
2
f
}
M, #Params:
{
params
/
1e6
:.
2
f
}
M, Accuracy:
{
best_acc
:
.
2
f
}
%'
)
examples/model_compress/pruning/admm_pruning_torch.py
0 → 100644
View file @
e773dfcc
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI example for supported ADMM pruning algorithms.
In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
'''
import
argparse
import
sys
import
torch
from
torchvision
import
datasets
,
transforms
from
torch.optim.lr_scheduler
import
MultiStepLR
import
nni
from
nni.compression.pytorch.speedup
import
ModelSpeedup
from
nni.compression.pytorch.utils
import
count_flops_params
from
nni.compression.pytorch.pruning
import
ADMMPruner
from
pathlib
import
Path
sys
.
path
.
append
(
str
(
Path
(
__file__
).
absolute
().
parents
[
1
]
/
'models'
))
from
cifar10.vgg
import
VGG
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
normalize
=
transforms
.
Normalize
((
0.4914
,
0.4822
,
0.4465
),
(
0.2023
,
0.1994
,
0.2010
))
g_epoch
=
0
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
True
,
transform
=
transforms
.
Compose
([
transforms
.
RandomHorizontalFlip
(),
transforms
.
RandomCrop
(
32
,
4
),
transforms
.
ToTensor
(),
normalize
,
]),
download
=
True
),
batch_size
=
128
,
shuffle
=
True
)
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
False
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
normalize
,
])),
batch_size
=
128
,
shuffle
=
False
)
def
trainer
(
model
,
optimizer
,
criterion
):
global
g_epoch
model
.
train
()
for
batch_idx
,
(
data
,
target
)
in
enumerate
(
train_loader
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
if
batch_idx
and
batch_idx
%
100
==
0
:
print
(
'Train Epoch: {} [{}/{} ({:.0f}%)]
\t
Loss: {:.6f}'
.
format
(
g_epoch
,
batch_idx
*
len
(
data
),
len
(
train_loader
.
dataset
),
100.
*
batch_idx
/
len
(
train_loader
),
loss
.
item
()))
g_epoch
+=
1
def
evaluator
(
model
):
model
.
eval
()
correct
=
0.0
with
torch
.
no_grad
():
for
data
,
target
in
test_loader
:
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
output
=
model
(
data
)
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
acc
=
100
*
correct
/
len
(
test_loader
.
dataset
)
print
(
'Accuracy: {}%
\n
'
.
format
(
acc
))
return
acc
def
optimizer_scheduler_generator
(
model
,
_lr
=
0.1
,
_momentum
=
0.9
,
_weight_decay
=
5e-4
,
total_epoch
=
160
):
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
_lr
,
momentum
=
_momentum
,
weight_decay
=
_weight_decay
)
scheduler
=
MultiStepLR
(
optimizer
,
milestones
=
[
int
(
total_epoch
*
0.5
),
int
(
total_epoch
*
0.75
)],
gamma
=
0.1
)
return
optimizer
,
scheduler
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
description
=
'PyTorch Example for model comporession'
)
parser
.
add_argument
(
'--pretrain-epochs'
,
type
=
int
,
default
=
20
,
help
=
'number of epochs to pretrain the model'
)
parser
.
add_argument
(
'--fine-tune-epochs'
,
type
=
int
,
default
=
20
,
help
=
'number of epochs to fine tune the model'
)
args
=
parser
.
parse_args
()
print
(
'
\n
'
+
'='
*
50
+
' START TO TRAIN THE MODEL '
+
'='
*
50
)
model
=
VGG
().
to
(
device
)
optimizer
,
scheduler
=
optimizer_scheduler_generator
(
model
,
total_epoch
=
args
.
pretrain_epochs
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
pre_best_acc
=
0.0
best_state_dict
=
None
for
i
in
range
(
args
.
pretrain_epochs
):
trainer
(
model
,
optimizer
,
criterion
)
scheduler
.
step
()
acc
=
evaluator
(
model
)
if
acc
>
pre_best_acc
:
pre_best_acc
=
acc
best_state_dict
=
model
.
state_dict
()
print
(
"Best accuracy: {}"
.
format
(
pre_best_acc
))
model
.
load_state_dict
(
best_state_dict
)
pre_flops
,
pre_params
,
_
=
count_flops_params
(
model
,
torch
.
randn
([
128
,
3
,
32
,
32
]).
to
(
device
))
g_epoch
=
0
# Start to prune and speedup
print
(
'
\n
'
+
'='
*
50
+
' START TO PRUNE THE BEST ACCURACY PRETRAINED MODEL '
+
'='
*
50
)
config_list
=
[{
'sparsity'
:
0.8
,
'op_types'
:
[
'Conv2d'
],
}]
# make sure you have used nni.trace to wrap the optimizer class before initialize
traced_optimizer
=
nni
.
trace
(
torch
.
optim
.
SGD
)(
model
.
parameters
(),
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
pruner
=
ADMMPruner
(
model
,
config_list
,
trainer
,
traced_optimizer
,
criterion
,
iterations
=
10
,
training_epochs
=
1
,
granularity
=
'coarse-grained'
)
_
,
masks
=
pruner
.
compress
()
pruner
.
show_pruned_weights
()
pruner
.
_unwrap_model
()
ModelSpeedup
(
model
,
torch
.
randn
([
128
,
3
,
32
,
32
]).
to
(
device
),
masks
).
speedup_model
()
print
(
'
\n
'
+
'='
*
50
+
' EVALUATE THE MODEL AFTER PRUNING '
+
'='
*
50
)
evaluator
(
model
)
# Optimizer used in the pruner might be patched, so recommend to new an optimizer for fine-tuning stage.
print
(
'
\n
'
+
'='
*
50
+
' START TO FINE TUNE THE MODEL '
+
'='
*
50
)
optimizer
,
scheduler
=
optimizer_scheduler_generator
(
model
,
_lr
=
0.01
,
total_epoch
=
args
.
fine_tune_epochs
)
best_acc
=
0.0
g_epoch
=
0
for
i
in
range
(
args
.
fine_tune_epochs
):
trainer
(
model
,
optimizer
,
criterion
)
scheduler
.
step
()
best_acc
=
max
(
evaluator
(
model
),
best_acc
)
flops
,
params
,
results
=
count_flops_params
(
model
,
torch
.
randn
([
128
,
3
,
32
,
32
]).
to
(
device
))
print
(
f
'Pretrained model FLOPs
{
pre_flops
/
1e6
:.
2
f
}
M, #Params:
{
pre_params
/
1e6
:.
2
f
}
M, Accuracy:
{
pre_best_acc
:
.
2
f
}
%'
)
print
(
f
'Finetuned model FLOPs
{
flops
/
1e6
:.
2
f
}
M, #Params:
{
params
/
1e6
:.
2
f
}
M, Accuracy:
{
best_acc
:
.
2
f
}
%'
)
examples/model_compress/pruning/amc_pruning_torch.py
0 → 100644
View file @
e773dfcc
import
sys
from
tqdm
import
tqdm
import
torch
from
torchvision
import
datasets
,
transforms
from
torch.optim.lr_scheduler
import
MultiStepLR
from
nni.compression.pytorch.pruning
import
AMCPruner
from
nni.compression.pytorch.utils
import
count_flops_params
from
pathlib
import
Path
sys
.
path
.
append
(
str
(
Path
(
__file__
).
absolute
().
parents
[
1
]
/
'models'
))
from
cifar10.vgg
import
VGG
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
normalize
=
transforms
.
Normalize
((
0.4914
,
0.4822
,
0.4465
),
(
0.2023
,
0.1994
,
0.2010
))
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
True
,
transform
=
transforms
.
Compose
([
transforms
.
RandomHorizontalFlip
(),
transforms
.
RandomCrop
(
32
,
4
),
transforms
.
ToTensor
(),
normalize
,
]),
download
=
True
),
batch_size
=
128
,
shuffle
=
True
)
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
CIFAR10
(
'./data'
,
train
=
False
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
normalize
,
])),
batch_size
=
128
,
shuffle
=
False
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
def
trainer
(
model
,
optimizer
,
criterion
,
epoch
):
model
.
train
()
for
data
,
target
in
tqdm
(
iterable
=
train_loader
,
desc
=
'Epoch {}'
.
format
(
epoch
)):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
def
finetuner
(
model
):
model
.
train
()
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
for
data
,
target
in
tqdm
(
iterable
=
train_loader
,
desc
=
'Epoch PFs'
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
criterion
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
def
evaluator
(
model
):
model
.
eval
()
correct
=
0
with
torch
.
no_grad
():
for
data
,
target
in
tqdm
(
iterable
=
test_loader
,
desc
=
'Test'
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
output
=
model
(
data
)
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
acc
=
100
*
correct
/
len
(
test_loader
.
dataset
)
print
(
'Accuracy: {}%
\n
'
.
format
(
acc
))
return
acc
if
__name__
==
'__main__'
:
# model = MobileNetV2(n_class=10).to(device)
model
=
VGG
().
to
(
device
)
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
5e-4
)
scheduler
=
MultiStepLR
(
optimizer
,
milestones
=
[
50
,
75
],
gamma
=
0.1
)
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
for
i
in
range
(
100
):
trainer
(
model
,
optimizer
,
criterion
,
i
)
pre_best_acc
=
evaluator
(
model
)
dummy_input
=
torch
.
rand
(
10
,
3
,
32
,
32
).
to
(
device
)
pre_flops
,
pre_params
,
_
=
count_flops_params
(
model
,
dummy_input
)
config_list
=
[{
'op_types'
:
[
'Conv2d'
],
'total_sparsity'
:
0.5
,
'max_sparsity_per_layer'
:
0.8
}]
# if you just want to keep the final result as the best result, you can pass evaluator as None.
# or the result with the highest score (given by evaluator) will be the best result.
ddpg_params
=
{
'hidden1'
:
300
,
'hidden2'
:
300
,
'lr_c'
:
1e-3
,
'lr_a'
:
1e-4
,
'warmup'
:
100
,
'discount'
:
1.
,
'bsize'
:
64
,
'rmsize'
:
100
,
'window_length'
:
1
,
'tau'
:
0.01
,
'init_delta'
:
0.5
,
'delta_decay'
:
0.99
,
'max_episode_length'
:
1e9
,
'epsilon'
:
50000
}
pruner
=
AMCPruner
(
400
,
model
,
config_list
,
dummy_input
,
evaluator
,
finetuner
=
finetuner
,
ddpg_params
=
ddpg_params
,
target
=
'flops'
)
pruner
.
compress
()
_
,
model
,
masks
,
best_acc
,
_
=
pruner
.
get_best_result
()
flops
,
params
,
_
=
count_flops_params
(
model
,
dummy_input
)
print
(
f
'Pretrained model FLOPs
{
pre_flops
/
1e6
:.
2
f
}
M, #Params:
{
pre_params
/
1e6
:.
2
f
}
M, Accuracy:
{
pre_best_acc
:
.
2
f
}
%'
)
print
(
f
'Finetuned model FLOPs
{
flops
/
1e6
:.
2
f
}
M, #Params:
{
params
/
1e6
:.
2
f
}
M, Accuracy:
{
best_acc
:
.
2
f
}
%'
)
Prev
1
…
25
26
27
28
29
30
31
32
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment