Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
1d9caa61
Unverified
Commit
1d9caa61
authored
Jul 16, 2021
by
lin bin
Committed by
GitHub
Jul 16, 2021
Browse files
[Quantization Speedup] Unit Test (#3862)
parent
7205cff5
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
217 additions
and
1 deletion
+217
-1
pipelines/integration-test-trt.yml
pipelines/integration-test-trt.yml
+0
-1
test/nni_test/nnitest/test_quantize_model_speedup.py
test/nni_test/nnitest/test_quantize_model_speedup.py
+217
-0
No files found.
pipelines/integration-test-trt.yml
View file @
1d9caa61
...
@@ -18,7 +18,6 @@ jobs:
...
@@ -18,7 +18,6 @@ jobs:
echo "Working directory: ${PWD}"
echo "Working directory: ${PWD}"
echo "NNI version: ${NNI_RELEASE}"
echo "NNI version: ${NNI_RELEASE}"
echo "Build docker image: $(build_docker_image)"
python3 -m pip install --upgrade pip setuptools
python3 -m pip install --upgrade pip setuptools
displayName
:
Prepare
displayName
:
Prepare
...
...
test/nni_test/nnitest/test_quantize_model_speedup.py
0 → 100644
View file @
1d9caa61
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
os
import
torch
import
torch.nn.functional
as
F
from
torchvision.models.vgg
import
vgg16
from
torchvision
import
datasets
,
transforms
import
unittest
from
unittest
import
TestCase
,
main
from
nni.algorithms.compression.pytorch.quantization
import
QAT_Quantizer
from
nni.compression.pytorch.quantization_speedup
import
ModelSpeedupTensorRT
torch
.
manual_seed
(
0
)
class
BackboneModel
(
torch
.
nn
.
Module
):
def
__init__
(
self
):
super
().
__init__
()
self
.
conv1
=
torch
.
nn
.
Conv2d
(
1
,
20
,
5
,
1
)
self
.
conv2
=
torch
.
nn
.
Conv2d
(
20
,
50
,
5
,
1
)
self
.
fc1
=
torch
.
nn
.
Linear
(
4
*
4
*
50
,
500
)
self
.
fc2
=
torch
.
nn
.
Linear
(
500
,
10
)
self
.
relu1
=
torch
.
nn
.
ReLU6
()
self
.
relu2
=
torch
.
nn
.
ReLU6
()
self
.
relu3
=
torch
.
nn
.
ReLU6
()
self
.
max_pool1
=
torch
.
nn
.
MaxPool2d
(
2
,
2
)
self
.
max_pool2
=
torch
.
nn
.
MaxPool2d
(
2
,
2
)
def
forward
(
self
,
x
):
x
=
self
.
relu1
(
self
.
conv1
(
x
))
x
=
self
.
max_pool1
(
x
)
x
=
self
.
relu2
(
self
.
conv2
(
x
))
x
=
self
.
max_pool2
(
x
)
x
=
x
.
view
(
-
1
,
x
.
size
()[
1
:].
numel
())
x
=
self
.
relu3
(
self
.
fc1
(
x
))
x
=
self
.
fc2
(
x
)
return
F
.
log_softmax
(
x
,
dim
=
1
)
class
QuantizationSpeedupTestCase
(
TestCase
):
def
__init__
(
self
,
methodName
:
str
)
->
None
:
super
().
__init__
(
methodName
=
methodName
)
self
.
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
trans
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.1307
,),
(
0.3081
,))])
self
.
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
MNIST
(
'data'
,
train
=
True
,
download
=
True
,
transform
=
trans
),
batch_size
=
64
,
shuffle
=
True
)
self
.
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
MNIST
(
'data'
,
train
=
False
,
transform
=
trans
),
batch_size
=
1000
,
shuffle
=
True
)
def
_train
(
self
,
model
,
optimizer
):
model
.
train
()
for
batch_idx
,
(
data
,
target
)
in
enumerate
(
self
.
train_loader
):
data
,
target
=
data
.
to
(
self
.
device
),
target
.
to
(
self
.
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
F
.
nll_loss
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
if
batch_idx
%
100
==
0
:
print
(
'{:2.0f}% Loss {}'
.
format
(
100
*
batch_idx
/
len
(
self
.
train_loader
),
loss
.
item
()))
def
_test
(
self
,
model
):
model
.
eval
()
test_loss
=
0
correct
=
0
with
torch
.
no_grad
():
for
data
,
target
in
self
.
test_loader
:
data
,
target
=
data
.
to
(
self
.
device
),
target
.
to
(
self
.
device
)
output
=
model
(
data
)
test_loss
+=
F
.
nll_loss
(
output
,
target
,
reduction
=
'sum'
).
item
()
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
test_loss
/=
len
(
self
.
test_loader
.
dataset
)
print
(
'Loss: {} Accuracy: {}%)
\n
'
.
format
(
test_loss
,
100
*
correct
/
len
(
self
.
test_loader
.
dataset
)))
def
_test_trt
(
self
,
engine
):
test_loss
=
0
correct
=
0
time_elasped
=
0
for
data
,
target
in
self
.
test_loader
:
output
,
time
=
engine
.
inference
(
data
)
test_loss
+=
F
.
nll_loss
(
output
,
target
,
reduction
=
'sum'
).
item
()
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
time_elasped
+=
time
test_loss
/=
len
(
self
.
test_loader
.
dataset
)
print
(
'Loss: {} Accuracy: {}%'
.
format
(
test_loss
,
100
*
correct
/
len
(
self
.
test_loader
.
dataset
)))
print
(
"Inference elapsed_time (whole dataset): {}s"
.
format
(
time_elasped
))
def
test_post_training_quantization_speedup
(
self
):
model
=
BackboneModel
()
configure_list
=
{
'conv1'
:{
'weight_bit'
:
8
,
'activation_bit'
:
8
},
'conv2'
:{
'weight_bit'
:
32
,
'activation_bit'
:
32
},
'fc1'
:{
'weight_bit'
:
16
,
'activation_bit'
:
16
},
'fc2'
:{
'weight_bit'
:
8
,
'activation_bit'
:
8
}
}
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.01
,
momentum
=
0.5
)
model
.
to
(
self
.
device
)
for
epoch
in
range
(
1
):
print
(
'# Epoch {} #'
.
format
(
epoch
))
self
.
_train
(
model
,
optimizer
)
self
.
_test
(
model
)
batch_size
=
32
input_shape
=
(
batch_size
,
1
,
28
,
28
)
calibration_path
=
"calibration.cache"
onnx_path
=
"default_model.onnx"
engine
=
ModelSpeedupTensorRT
(
model
,
input_shape
,
config
=
configure_list
,
calib_data_loader
=
self
.
train_loader
,
batchsize
=
batch_size
)
engine
.
compress
()
self
.
_test_trt
(
engine
)
os
.
remove
(
calibration_path
)
os
.
remove
(
onnx_path
)
def
test_qat_quantization_speedup
(
self
):
model
=
BackboneModel
()
configure_list
=
[{
'quant_types'
:
[
'weight'
,
'output'
],
'quant_bits'
:
{
'weight'
:
8
,
'output'
:
8
},
'op_names'
:
[
'conv1'
]
},
{
'quant_types'
:
[
'output'
],
'quant_bits'
:
{
'output'
:
8
},
'op_names'
:
[
'relu1'
]
},
{
'quant_types'
:
[
'weight'
,
'output'
],
'quant_bits'
:
{
'weight'
:
8
,
'output'
:
8
},
'op_names'
:
[
'conv2'
]
},
{
'quant_types'
:
[
'output'
],
'quant_bits'
:
{
'output'
:
8
},
'op_names'
:
[
'relu2'
]
}
]
# finetune the model by using QAT
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.01
,
momentum
=
0.5
)
quantizer
=
QAT_Quantizer
(
model
,
configure_list
,
optimizer
)
quantizer
.
compress
()
model
.
to
(
self
.
device
)
for
epoch
in
range
(
1
):
print
(
'# Epoch {} #'
.
format
(
epoch
))
self
.
_train
(
model
,
optimizer
)
self
.
_test
(
model
)
model_path
=
"mnist_model.pth"
calibration_path
=
"mnist_calibration.pth"
calibration_config
=
quantizer
.
export_model
(
model_path
,
calibration_path
)
self
.
_test
(
model
)
print
(
"calibration_config: "
,
calibration_config
)
batch_size
=
32
input_shape
=
(
batch_size
,
1
,
28
,
28
)
engine
=
ModelSpeedupTensorRT
(
model
,
input_shape
,
config
=
calibration_config
,
batchsize
=
batch_size
)
engine
.
compress
()
self
.
_test_trt
(
engine
)
os
.
remove
(
model_path
)
os
.
remove
(
calibration_path
)
def
test_export_load_quantized_model_vgg16
(
self
):
model
=
vgg16
()
configure_list
=
{
'features.0'
:{
'weight_bit'
:
8
,
'activation_bit'
:
8
},
'features.1'
:{
'weight_bit'
:
32
,
'activation_bit'
:
32
},
'features.2'
:{
'weight_bit'
:
16
,
'activation_bit'
:
16
},
'features.4'
:{
'weight_bit'
:
8
,
'activation_bit'
:
8
},
'features.7'
:{
'weight_bit'
:
8
,
'activation_bit'
:
8
},
'features.8'
:{
'weight_bit'
:
8
,
'activation_bit'
:
8
},
'features.11'
:{
'weight_bit'
:
8
,
'activation_bit'
:
8
}
}
model
.
to
(
self
.
device
)
batch_size
=
1
input_shape
=
(
batch_size
,
3
,
224
,
224
)
dummy_input
=
torch
.
randn
(
input_shape
).
to
(
self
.
device
)
output_torch
=
model
(
dummy_input
)
engine
=
ModelSpeedupTensorRT
(
model
,
input_shape
,
config
=
configure_list
,
calib_data_loader
=
dummy_input
,
batchsize
=
batch_size
)
engine
.
compress
()
output
,
_
=
engine
.
inference
(
dummy_input
)
# verify result shape
assert
(
output
.
shape
==
output_torch
.
shape
)
export_path
=
"vgg16_trt.engine"
calibration_path
=
"calibration.cache"
engine
.
export_quantized_model
(
export_path
)
engine
.
load_quantized_model
(
export_path
)
output
,
_
=
engine
.
inference
(
dummy_input
)
assert
(
output
.
shape
==
output_torch
.
shape
)
os
.
remove
(
export_path
)
os
.
remove
(
calibration_path
)
if
__name__
==
'__main__'
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment