Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
e483aa01
Unverified
Commit
e483aa01
authored
Feb 11, 2022
by
lin bin
Committed by
GitHub
Feb 11, 2022
Browse files
[Model Compression] Add bank pruning for level pruner (#4481)
parent
b8b7ed0e
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
121 additions
and
3 deletions
+121
-3
nni/algorithms/compression/v2/pytorch/pruning/basic_pruner.py
...algorithms/compression/v2/pytorch/pruning/basic_pruner.py
+55
-3
nni/algorithms/compression/v2/pytorch/pruning/tools/__init__.py
...gorithms/compression/v2/pytorch/pruning/tools/__init__.py
+1
-0
nni/algorithms/compression/v2/pytorch/pruning/tools/sparsity_allocator.py
...ompression/v2/pytorch/pruning/tools/sparsity_allocator.py
+55
-0
test/ut/compression/v2/test_pruner_torch.py
test/ut/compression/v2/test_pruner_torch.py
+10
-0
No files found.
nni/algorithms/compression/v2/pytorch/pruning/basic_pruner.py
View file @
e483aa01
...
@@ -36,6 +36,7 @@ from .tools import (
...
@@ -36,6 +36,7 @@ from .tools import (
from
.tools
import
(
from
.tools
import
(
SparsityAllocator
,
SparsityAllocator
,
NormalSparsityAllocator
,
NormalSparsityAllocator
,
BankSparsityAllocator
,
GlobalSparsityAllocator
,
GlobalSparsityAllocator
,
Conv2dDependencyAwareAllocator
Conv2dDependencyAwareAllocator
)
)
...
@@ -137,9 +138,55 @@ class LevelPruner(BasicPruner):
...
@@ -137,9 +138,55 @@ class LevelPruner(BasicPruner):
- op_names : Operation names to be pruned.
- op_names : Operation names to be pruned.
- op_partial_names: Operation partial names to be pruned, will be autocompleted by NNI.
- op_partial_names: Operation partial names to be pruned, will be autocompleted by NNI.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
mode : str
'normal' or 'balance'.
If setting 'normal' mode, target tensor will be pruned in the way of finegrained pruning.
If setting 'balance' mode, a specal sparse pattern will chosen by pruner. Take linear
operation an example, weight tensor will be split into sub block whose shape is aligned to
balance_gran. Then finegrained pruning will be applied internal of sub block. This sparsity
pattern have more chance to achieve better trade-off between model performance and hardware
acceleration. Please refer to releated paper for further information 'Balanced Sparsity for
Efficient DNN Inference on GPU'(https://arxiv.org/pdf/1811.00206.pdf).
balance_gran : list
Balance_gran is for special sparse pattern balanced sparsity, Default value is None which means pruning
without awaring balance, namely normal finegrained pruning.
If passing list of int, LevelPruner will prune the model in the granularity of multi-dimension block.
Attention that the length of balance_gran should be smaller than tensor dimension.
For instance, in Linear operation, length of balance_gran should be equal or smaller than two since
dimension of pruning weight is two. If setting balbance_gran = [5, 5], sparsity = 0.6, pruner will
divide pruning parameters into multiple block with tile size (5,5) and each bank has 5 * 5 values
and 10 values would be kept after pruning. Finegrained pruning is applied in the granularity of block
so that each block will kept same number of non-zero values after pruning. Such pruning method "balance"
the non-zero value in tensor which create chance for better hardware acceleration.
Note: If length of given balance_gran smaller than length of pruning tensor shape, it will be made up
in right align(such as example 1).
example 1:
operation: Linear
pruning tensor: weight
pruning tensor shape: [32, 32]
sparsity: 50%
balance_gran: [4]
pruning result: Weight tensor whose shape is [32, 32] will be split into 256 [1, 4] sub blocks.
Each sub block will be pruned 2 values.
example 2:
operation: Linear
pruning tensor: weight
pruning tensor shape: [64, 64]
sparsity: 25%
balance_gran: [32, 32]
pruning result: Weight tensor whose shape is [64, 64] will be split into 4 [32, 32] sub blocks.
Each sub block will be pruned 256 values.
"""
"""
def
__init__
(
self
,
model
:
Module
,
config_list
:
List
[
Dict
]):
def
__init__
(
self
,
model
:
Module
,
config_list
:
List
[
Dict
],
mode
:
str
=
"normal"
,
balance_gran
:
Optional
[
List
]
=
None
):
self
.
mode
=
mode
self
.
balance_gran
=
balance_gran
super
().
__init__
(
model
,
config_list
)
super
().
__init__
(
model
,
config_list
)
def
_validate_config_before_canonical
(
self
,
model
:
Module
,
config_list
:
List
[
Dict
]):
def
_validate_config_before_canonical
(
self
,
model
:
Module
,
config_list
:
List
[
Dict
]):
...
@@ -155,8 +202,13 @@ class LevelPruner(BasicPruner):
...
@@ -155,8 +202,13 @@ class LevelPruner(BasicPruner):
if
self
.
metrics_calculator
is
None
:
if
self
.
metrics_calculator
is
None
:
self
.
metrics_calculator
=
NormMetricsCalculator
()
self
.
metrics_calculator
=
NormMetricsCalculator
()
if
self
.
sparsity_allocator
is
None
:
if
self
.
sparsity_allocator
is
None
:
if
self
.
mode
==
"normal"
:
self
.
sparsity_allocator
=
NormalSparsityAllocator
(
self
)
self
.
sparsity_allocator
=
NormalSparsityAllocator
(
self
)
elif
self
.
mode
==
"balance"
:
assert
self
.
balance_gran
is
not
None
,
'balance_gran should be passed as param in balance mode'
self
.
sparsity_allocator
=
BankSparsityAllocator
(
self
,
self
.
balance_gran
)
else
:
raise
NotImplementedError
(
'Only support mode `normal` and `balance`'
)
class
NormPruner
(
BasicPruner
):
class
NormPruner
(
BasicPruner
):
"""
"""
...
...
nni/algorithms/compression/v2/pytorch/pruning/tools/__init__.py
View file @
e483aa01
...
@@ -20,6 +20,7 @@ from .metrics_calculator import (
...
@@ -20,6 +20,7 @@ from .metrics_calculator import (
)
)
from
.sparsity_allocator
import
(
from
.sparsity_allocator
import
(
NormalSparsityAllocator
,
NormalSparsityAllocator
,
BankSparsityAllocator
,
GlobalSparsityAllocator
,
GlobalSparsityAllocator
,
Conv2dDependencyAwareAllocator
Conv2dDependencyAwareAllocator
)
)
...
...
nni/algorithms/compression/v2/pytorch/pruning/tools/sparsity_allocator.py
View file @
e483aa01
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
# Licensed under the MIT license.
# Licensed under the MIT license.
import
math
import
math
import
itertools
from
typing
import
Any
,
Dict
,
List
,
Tuple
,
Union
from
typing
import
Any
,
Dict
,
List
,
Tuple
,
Union
import
numpy
as
np
import
numpy
as
np
...
@@ -40,6 +41,60 @@ class NormalSparsityAllocator(SparsityAllocator):
...
@@ -40,6 +41,60 @@ class NormalSparsityAllocator(SparsityAllocator):
masks
[
name
][
'weight'
]
*=
wrapper
.
weight_mask
masks
[
name
][
'weight'
]
*=
wrapper
.
weight_mask
return
masks
return
masks
class
BankSparsityAllocator
(
SparsityAllocator
):
"""
In bank pruner, all values in weight are divided into different sub blocks each shape
aligned with balance_gran. Each sub block has the same sparsity which equal to the overall sparsity.
This allocator pruned the weight in the granularity of block.
"""
def
__init__
(
self
,
pruner
:
Pruner
,
balance_gran
:
list
):
super
().
__init__
(
pruner
)
self
.
balance_gran
=
balance_gran
for
gran
in
self
.
balance_gran
:
assert
isinstance
(
gran
,
int
)
and
gran
>
0
,
'All values in list balance_gran
\
should be type int and bigger than zero'
def
generate_sparsity
(
self
,
metrics
:
Dict
[
str
,
Tensor
])
->
Dict
[
str
,
Dict
[
str
,
Tensor
]]:
masks
=
{}
for
name
,
wrapper
in
self
.
pruner
.
get_modules_wrapper
().
items
():
sparsity_rate
=
wrapper
.
config
[
'total_sparsity'
]
assert
name
in
metrics
,
'Metric of {} is not calculated.'
.
format
(
name
)
# We assume the metric value are all positive right now.
metric
=
metrics
[
name
]
if
self
.
continuous_mask
:
metric
*=
self
.
_compress_mask
(
wrapper
.
weight_mask
)
n_dim
=
len
(
metric
.
shape
)
assert
n_dim
>=
len
(
self
.
balance_gran
),
'Dimension of balance_gran should be smaller than metric'
# make up for balance_gran
balance_gran
=
[
1
]
*
(
n_dim
-
len
(
self
.
balance_gran
))
+
self
.
balance_gran
for
i
,
j
in
zip
(
metric
.
shape
,
balance_gran
):
assert
i
%
j
==
0
,
'Length of {} weight is not
\
aligned with balance granularity'
.
format
(
name
)
mask
=
torch
.
zeros
(
metric
.
shape
).
type_as
(
metric
)
loop_iters
=
[
range
(
int
(
i
/
j
))
for
i
,
j
in
zip
(
metric
.
shape
,
balance_gran
)]
for
iter_params
in
itertools
.
product
(
*
loop_iters
):
index_str_list
=
[
f
"
{
iter_param
*
gran
}
:
{
(
iter_param
+
1
)
*
gran
}
"
\
for
iter_param
,
gran
in
zip
(
iter_params
,
balance_gran
)]
index_str
=
","
.
join
(
index_str_list
)
sub_metric_str
=
"metric[{}]"
.
format
(
index_str
)
sub_mask_str
=
"mask[{}] = mask_bank"
.
format
(
index_str
)
metric_bank
=
eval
(
sub_metric_str
)
prune_num
=
int
(
sparsity_rate
*
metric_bank
.
numel
())
if
prune_num
==
0
:
threshold
=
metric_bank
.
min
()
-
1
else
:
threshold
=
torch
.
topk
(
metric_bank
.
reshape
(
-
1
),
prune_num
,
largest
=
False
)[
0
].
max
()
# mask_bank will be used in exec(sub_mask_str)
mask_bank
=
torch
.
gt
(
metric_bank
,
threshold
).
type_as
(
metric_bank
)
exec
(
sub_mask_str
)
masks
[
name
]
=
self
.
_expand_mask
(
name
,
mask
)
if
self
.
continuous_mask
:
masks
[
name
][
'weight'
]
*=
wrapper
.
weight_mask
return
masks
class
GlobalSparsityAllocator
(
SparsityAllocator
):
class
GlobalSparsityAllocator
(
SparsityAllocator
):
"""
"""
...
...
test/ut/compression/v2/test_pruner_torch.py
View file @
e483aa01
...
@@ -72,6 +72,16 @@ class PrunerTestCase(unittest.TestCase):
...
@@ -72,6 +72,16 @@ class PrunerTestCase(unittest.TestCase):
sparsity_list
=
compute_sparsity_mask2compact
(
pruned_model
,
masks
,
config_list
)
sparsity_list
=
compute_sparsity_mask2compact
(
pruned_model
,
masks
,
config_list
)
assert
0.78
<
sparsity_list
[
0
][
'total_sparsity'
]
<
0.82
assert
0.78
<
sparsity_list
[
0
][
'total_sparsity'
]
<
0.82
def
test_level_pruner_bank
(
self
):
model
=
TorchModel
()
config_list
=
[{
'op_types'
:
[
'Conv2d'
],
'sparsity'
:
0.7
}]
pruner
=
LevelPruner
(
model
=
model
,
config_list
=
config_list
,
mode
=
'balance'
,
balance_gran
=
[
5
])
pruned_model
,
masks
=
pruner
.
compress
()
pruner
.
_unwrap_model
()
sparsity_list
=
compute_sparsity_mask2compact
(
pruned_model
,
masks
,
config_list
)
# round down cause to lower sparsity
assert
sparsity_list
[
0
][
'total_sparsity'
]
==
0.6
def
test_l1_norm_pruner
(
self
):
def
test_l1_norm_pruner
(
self
):
model
=
TorchModel
()
model
=
TorchModel
()
config_list
=
[{
'op_types'
:
[
'Conv2d'
],
'sparsity'
:
0.8
}]
config_list
=
[{
'op_types'
:
[
'Conv2d'
],
'sparsity'
:
0.8
}]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment