Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
a9668347
Unverified
Commit
a9668347
authored
Oct 19, 2022
by
J-shang
Committed by
GitHub
Oct 19, 2022
Browse files
[BugFix] fix compression bugs (#5140)
parent
56c6cfea
Changes
35
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
31 additions
and
27 deletions
+31
-27
docs/source/tutorials/quantization_speedup.py
docs/source/tutorials/quantization_speedup.py
+1
-1
docs/source/tutorials/quantization_speedup.py.md5
docs/source/tutorials/quantization_speedup.py.md5
+1
-1
docs/source/tutorials/quantization_speedup.rst
docs/source/tutorials/quantization_speedup.rst
+8
-8
docs/source/tutorials/quantization_speedup_codeobj.pickle
docs/source/tutorials/quantization_speedup_codeobj.pickle
+0
-0
docs/source/tutorials/sg_execution_times.rst
docs/source/tutorials/sg_execution_times.rst
+5
-5
examples/model_compress/end2end_compression.py
examples/model_compress/end2end_compression.py
+1
-1
examples/model_compress/quantization/BNN_quantizer_cifar10.py
...ples/model_compress/quantization/BNN_quantizer_cifar10.py
+2
-2
examples/tutorials/pruning_quick_start_mnist.py
examples/tutorials/pruning_quick_start_mnist.py
+1
-1
examples/tutorials/pruning_speedup.py
examples/tutorials/pruning_speedup.py
+1
-1
examples/tutorials/quantization_quick_start_mnist.py
examples/tutorials/quantization_quick_start_mnist.py
+1
-1
examples/tutorials/quantization_speedup.py
examples/tutorials/quantization_speedup.py
+1
-1
nni/compression/pytorch/utils/shape_dependency.py
nni/compression/pytorch/utils/shape_dependency.py
+6
-0
nni/compression/pytorch/utils/utils.py
nni/compression/pytorch/utils/utils.py
+1
-1
nni_assets/compression/__init__.py
nni_assets/compression/__init__.py
+0
-0
nni_assets/compression/mnist_model.py
nni_assets/compression/mnist_model.py
+2
-4
No files found.
docs/source/tutorials/quantization_speedup.py
View file @
a9668347
...
...
@@ -64,7 +64,7 @@ Usage
import
torch
import
torch.nn.functional
as
F
from
torch.optim
import
SGD
from
scrip
ts.compression
_
mnist_model
import
TorchModel
,
device
,
trainer
,
evaluator
,
test_trt
from
nni_asse
ts.compression
.
mnist_model
import
TorchModel
,
device
,
trainer
,
evaluator
,
test_trt
config_list
=
[{
'quant_types'
:
[
'input'
,
'weight'
],
...
...
docs/source/tutorials/quantization_speedup.py.md5
View file @
a9668347
2404b8d0c3958a0191b77bbe882456e4
\ No newline at end of file
06c37bd5c886478ae20a1fc552af729a
\ No newline at end of file
docs/source/tutorials/quantization_speedup.rst
View file @
a9668347
...
...
@@ -84,7 +84,7 @@ Usage
import torch
import torch.nn.functional as F
from torch.optim import SGD
from
scrip
ts.compression
_
mnist_model import TorchModel, device, trainer, evaluator, test_trt
from
nni_asse
ts.compression
.
mnist_model import TorchModel, device, trainer, evaluator, test_trt
config_list = [{
'
quant_types
': ['
input
', '
weight
'],
...
...
@@ -174,9 +174,9 @@ finetuning the model by using QAT
.. code-block:: none
Average test loss: 0.
5386
, Accuracy: 8
619
/10000 (8
6
%)
Average test loss: 0.155
3
, Accuracy: 95
21
/10000 (95%)
Average test loss: 0.
1001
, Accuracy: 9
686
/10000 (97%)
Average test loss: 0.
6058
, Accuracy: 8
534
/10000 (8
5
%)
Average test loss: 0.15
8
5, Accuracy: 95
08
/10000 (95%)
Average test loss: 0.
0920
, Accuracy: 9
717
/10000 (97%)
...
...
@@ -207,7 +207,7 @@ export model and get calibration_config
.. code-block:: none
calibration_config: {'
conv1
': {'
weight_bits
': 8, '
weight_scale
': tensor([0.0029], device='
cuda
:
0
'), '
weight_zero_point
': tensor([9
8
.], device='
cuda
:
0
'), '
input_bits
': 8, '
tracked_min_input
': -0.4242129623889923, '
tracked_max_input
': 2.821486711502075}, '
conv2
': {'
weight_bits
': 8, '
weight_scale
': tensor([0.0017], device='
cuda
:
0
'), '
weight_zero_point
': tensor([1
24
.], device='
cuda
:
0
'), '
input_bits
': 8, '
tracked_min_input
': 0.0, '
tracked_max_input
':
8.848002433776855
}, '
fc1
': {'
weight_bits
': 8, '
weight_scale
': tensor([0.0010], device='
cuda
:
0
'), '
weight_zero_point
': tensor([1
34
.], device='
cuda
:
0
'), '
input_bits
': 8, '
tracked_min_input
': 0.0, '
tracked_max_input
': 1
4.64758586883545
}, '
fc2
': {'
weight_bits
': 8, '
weight_scale
': tensor([0.001
3
], device='
cuda
:
0
'), '
weight_zero_point
': tensor([12
1
.], device='
cuda
:
0
'), '
input_bits
': 8, '
tracked_min_input
': 0.0, '
tracked_max_input
': 1
5.807988166809082
}, '
relu1
': {'
output_bits
': 8, '
tracked_min_output
': 0.0, '
tracked_max_output
':
9.041301727294922
}, '
relu2
': {'
output_bits
': 8, '
tracked_min_output
': 0.0, '
tracked_max_output
': 1
5.143928527832031
}, '
relu3
': {'
output_bits
': 8, '
tracked_min_output
': 0.0, '
tracked_max_output
': 1
6.151935577392578
}, '
relu4
': {'
output_bits
': 8, '
tracked_min_output
': 0.0, '
tracked_max_output
':
11.749024391174316
}}
calibration_config: {'
conv1
': {'
weight_bits
': 8, '
weight_scale
': tensor([0.0029], device='
cuda
:
0
'), '
weight_zero_point
': tensor([9
7
.], device='
cuda
:
0
'), '
input_bits
': 8, '
tracked_min_input
': -0.4242129623889923, '
tracked_max_input
': 2.821486711502075}, '
conv2
': {'
weight_bits
': 8, '
weight_scale
': tensor([0.0017], device='
cuda
:
0
'), '
weight_zero_point
': tensor([1
15
.], device='
cuda
:
0
'), '
input_bits
': 8, '
tracked_min_input
': 0.0, '
tracked_max_input
':
7.800363063812256
}, '
fc1
': {'
weight_bits
': 8, '
weight_scale
': tensor([0.0010], device='
cuda
:
0
'), '
weight_zero_point
': tensor([1
21
.], device='
cuda
:
0
'), '
input_bits
': 8, '
tracked_min_input
': 0.0, '
tracked_max_input
': 1
3.914573669433594
}, '
fc2
': {'
weight_bits
': 8, '
weight_scale
': tensor([0.001
2
], device='
cuda
:
0
'), '
weight_zero_point
': tensor([12
5
.], device='
cuda
:
0
'), '
input_bits
': 8, '
tracked_min_input
': 0.0, '
tracked_max_input
': 1
1.657418251037598
}, '
relu1
': {'
output_bits
': 8, '
tracked_min_output
': 0.0, '
tracked_max_output
':
7.897384166717529
}, '
relu2
': {'
output_bits
': 8, '
tracked_min_output
': 0.0, '
tracked_max_output
': 1
4.337020874023438
}, '
relu3
': {'
output_bits
': 8, '
tracked_min_output
': 0.0, '
tracked_max_output
': 1
1.884227752685547
}, '
relu4
': {'
output_bits
': 8, '
tracked_min_output
': 0.0, '
tracked_max_output
':
9.330422401428223
}}
...
...
@@ -237,8 +237,8 @@ build tensorRT engine to make a real speedup
.. code-block:: none
Loss: 0.
10061546401977539
Accuracy: 9
6.83
%
Inference elapsed_time (whole dataset): 0.0
432267189025878
9s
Loss: 0.
09235906448364258
Accuracy: 9
7.19
%
Inference elapsed_time (whole dataset): 0.0
363299846649169
9s
...
...
@@ -300,7 +300,7 @@ input tensor: ``torch.randn(128, 3, 32, 32)``
.. rst-class:: sphx-glr-timing
**Total running time of the script:** ( 1 minutes
4.509
seconds)
**Total running time of the script:** ( 1 minutes
13.658
seconds)
.. _sphx_glr_download_tutorials_quantization_speedup.py:
...
...
docs/source/tutorials/quantization_speedup_codeobj.pickle
View file @
a9668347
No preview for this file type
docs/source/tutorials/sg_execution_times.rst
View file @
a9668347
...
...
@@ -5,17 +5,19 @@
Computation times
=================
**0
0:20.822
** total execution time for **tutorials** files:
**0
1:39.686
** total execution time for **tutorials** files:
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_
pruning_bert_glue.py` (``pruning_bert_glue.py``) | 00:20.822
| 0.0 MB |
| :ref:`sphx_glr_tutorials_
quantization_quick_start_mnist.py` (``quantization_quick_start_mnist.py``) | 01:39.686
| 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_darts.py` (``darts.py``) | 0
1:51.71
0 | 0.0 MB |
| :ref:`sphx_glr_tutorials_darts.py` (``darts.py``) | 0
0:00.00
0 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_hello_nas.py` (``hello_nas.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_nasbench_as_dataset.py` (``nasbench_as_dataset.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_pruning_bert_glue.py` (``pruning_bert_glue.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_pruning_customize.py` (``pruning_customize.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_pruning_quick_start_mnist.py` (``pruning_quick_start_mnist.py``) | 00:00.000 | 0.0 MB |
...
...
@@ -24,7 +26,5 @@ Computation times
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_quantization_customize.py` (``quantization_customize.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_quantization_quick_start_mnist.py` (``quantization_quick_start_mnist.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_quantization_speedup.py` (``quantization_speedup.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
examples/model_compress/end2end_compression.py
View file @
a9668347
...
...
@@ -217,7 +217,7 @@ def main(args):
}]
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.01
,
momentum
=
0.5
)
quantizer
=
QAT_Quantizer
(
model
,
config_list
,
optimizer
)
quantizer
=
QAT_Quantizer
(
model
,
config_list
,
optimizer
,
dummy_input
)
quantizer
.
compress
()
# Step6. Quantization Aware Training
...
...
examples/model_compress/quantization/BNN_quantizer_cifar10.py
View file @
a9668347
...
...
@@ -134,11 +134,11 @@ def main():
'op_names'
:
[
'features.6'
,
'features.9'
,
'features.13'
,
'features.16'
,
'features.20'
,
'classifier.2'
,
'classifier.5'
]
}]
quantizer
=
BNNQuantizer
(
model
,
configure_list
)
optimizer
=
torch
.
optim
.
Adam
(
model
.
parameters
(),
lr
=
1e-2
)
quantizer
=
BNNQuantizer
(
model
,
configure_list
,
optimizer
)
model
=
quantizer
.
compress
()
print
(
'='
*
10
+
'train'
+
'='
*
10
)
optimizer
=
torch
.
optim
.
Adam
(
model
.
parameters
(),
lr
=
1e-2
)
best_top1
=
0
for
epoch
in
range
(
400
):
print
(
'# Epoch {} #'
.
format
(
epoch
))
...
...
examples/tutorials/pruning_quick_start_mnist.py
View file @
a9668347
...
...
@@ -29,7 +29,7 @@ import torch
import
torch.nn.functional
as
F
from
torch.optim
import
SGD
from
scrip
ts.compression
_
mnist_model
import
TorchModel
,
trainer
,
evaluator
,
device
from
nni_asse
ts.compression
.
mnist_model
import
TorchModel
,
trainer
,
evaluator
,
device
# define the model
model
=
TorchModel
().
to
(
device
)
...
...
examples/tutorials/pruning_speedup.py
View file @
a9668347
...
...
@@ -43,7 +43,7 @@ Usage
# But in fact ``ModelSpeedup`` is a relatively independent tool, so you can use it independently.
import
torch
from
scrip
ts.compression
_
mnist_model
import
TorchModel
,
device
from
nni_asse
ts.compression
.
mnist_model
import
TorchModel
,
device
model
=
TorchModel
().
to
(
device
)
# masks = {layer_name: {'weight': weight_mask, 'bias': bias_mask}}
...
...
examples/tutorials/quantization_quick_start_mnist.py
View file @
a9668347
...
...
@@ -24,7 +24,7 @@ import torch
import
torch.nn.functional
as
F
from
torch.optim
import
SGD
from
scrip
ts.compression
_
mnist_model
import
TorchModel
,
trainer
,
evaluator
,
device
,
test_trt
from
nni_asse
ts.compression
.
mnist_model
import
TorchModel
,
trainer
,
evaluator
,
device
,
test_trt
# define the model
model
=
TorchModel
().
to
(
device
)
...
...
examples/tutorials/quantization_speedup.py
View file @
a9668347
...
...
@@ -64,7 +64,7 @@ Usage
import
torch
import
torch.nn.functional
as
F
from
torch.optim
import
SGD
from
scrip
ts.compression
_
mnist_model
import
TorchModel
,
device
,
trainer
,
evaluator
,
test_trt
from
nni_asse
ts.compression
.
mnist_model
import
TorchModel
,
device
,
trainer
,
evaluator
,
test_trt
config_list
=
[{
'quant_types'
:
[
'input'
,
'weight'
],
...
...
nni/compression/pytorch/utils/shape_dependency.py
View file @
a9668347
...
...
@@ -152,8 +152,12 @@ class ChannelDependency(Dependency):
parent_layers
=
[]
queue
=
[]
queue
.
append
(
node
)
visited_set
=
set
()
while
queue
:
curnode
=
queue
.
pop
(
0
)
if
curnode
in
visited_set
:
continue
visited_set
.
add
(
curnode
)
if
curnode
.
op_type
in
self
.
target_types
:
# find the first met conv
parent_layers
.
append
(
curnode
.
name
)
...
...
@@ -164,6 +168,8 @@ class ChannelDependency(Dependency):
parents
=
self
.
graph
.
find_predecessors
(
curnode
.
unique_name
)
parents
=
[
self
.
graph
.
name_to_node
[
name
]
for
name
in
parents
]
for
parent
in
parents
:
if
parent
in
visited_set
:
continue
queue
.
append
(
parent
)
return
parent_layers
...
...
nni/compression/pytorch/utils/utils.py
View file @
a9668347
...
...
@@ -56,7 +56,7 @@ def rand_like_with_shape(shape, ori_t):
higher_bound
=
torch
.
max
(
ori_t
)
if
dtype
in
[
torch
.
uint8
,
torch
.
int16
,
torch
.
short
,
torch
.
int16
,
torch
.
long
,
torch
.
bool
]:
return
torch
.
randint
(
lower_bound
,
higher_bound
+
1
,
shape
,
dtype
=
dtype
,
device
=
device
)
return
torch
.
randint
(
lower_bound
.
long
()
,
higher_bound
.
long
()
+
1
,
shape
,
dtype
=
dtype
,
device
=
device
)
else
:
return
torch
.
rand
(
shape
,
dtype
=
dtype
,
device
=
device
,
requires_grad
=
require_grad
)
...
...
nni_assets/compression/__init__.py
0 → 100644
View file @
a9668347
examples/tutorials/scrip
ts/compression
_
mnist_model.py
→
nni_asse
ts/compression
/
mnist_model.py
View file @
a9668347
from
pathlib
import
Path
root_path
=
Path
(
__file__
).
parent
.
parent
# define the model
import
torch
from
torch
import
nn
...
...
@@ -38,13 +36,13 @@ device = torch.device("cuda" if use_cuda else "cpu")
from
torchvision
import
datasets
,
transforms
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
MNIST
(
root_path
/
'data'
,
train
=
True
,
download
=
True
,
transform
=
transforms
.
Compose
([
datasets
.
MNIST
(
'data'
,
train
=
True
,
download
=
True
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.1307
,),
(
0.3081
,))
])),
batch_size
=
128
,
shuffle
=
True
)
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
MNIST
(
root_path
/
'data'
,
train
=
False
,
transform
=
transforms
.
Compose
([
datasets
.
MNIST
(
'data'
,
train
=
False
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.1307
,),
(
0.3081
,))
])),
batch_size
=
1000
,
shuffle
=
True
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment