Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
f24dc27b
"examples/vscode:/vscode.git/clone" did not exist on "d9f71ab3c3cc162226ec1c9945fef1a5faf4c512"
Unverified
Commit
f24dc27b
authored
Jun 30, 2022
by
J-shang
Committed by
GitHub
Jun 30, 2022
Browse files
[Compression] block sparse refactor (#4932)
parent
00e4debb
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
619 additions
and
477 deletions
+619
-477
nni/algorithms/compression/v2/pytorch/pruning/basic_pruner.py
...algorithms/compression/v2/pytorch/pruning/basic_pruner.py
+24
-23
nni/algorithms/compression/v2/pytorch/pruning/tools/__init__.py
...gorithms/compression/v2/pytorch/pruning/tools/__init__.py
+1
-1
nni/algorithms/compression/v2/pytorch/pruning/tools/base.py
nni/algorithms/compression/v2/pytorch/pruning/tools/base.py
+112
-149
nni/algorithms/compression/v2/pytorch/pruning/tools/metrics_calculator.py
...ompression/v2/pytorch/pruning/tools/metrics_calculator.py
+58
-111
nni/algorithms/compression/v2/pytorch/pruning/tools/sparsity_allocator.py
...ompression/v2/pytorch/pruning/tools/sparsity_allocator.py
+156
-187
nni/algorithms/compression/v2/pytorch/utils/__init__.py
nni/algorithms/compression/v2/pytorch/utils/__init__.py
+6
-1
nni/algorithms/compression/v2/pytorch/utils/attr.py
nni/algorithms/compression/v2/pytorch/utils/attr.py
+32
-0
nni/algorithms/compression/v2/pytorch/utils/scaling.py
nni/algorithms/compression/v2/pytorch/utils/scaling.py
+195
-0
test/algo/compression/v2/test_pruning_tools_torch.py
test/algo/compression/v2/test_pruning_tools_torch.py
+6
-5
test/algo/compression/v2/test_scaling.py
test/algo/compression/v2/test_scaling.py
+29
-0
No files found.
nni/algorithms/compression/v2/pytorch/pruning/basic_pruner.py
View file @
f24dc27b
...
@@ -13,8 +13,7 @@ from torch.nn import Module
...
@@ -13,8 +13,7 @@ from torch.nn import Module
from
torch.optim
import
Optimizer
from
torch.optim
import
Optimizer
from
nni.common.serializer
import
Traceable
from
nni.common.serializer
import
Traceable
from
nni.algorithms.compression.v2.pytorch.base.pruner
import
Pruner
from
..base
import
Pruner
from
nni.algorithms.compression.v2.pytorch.utils
import
CompressorSchema
,
config_list_canonical
,
OptimizerConstructHelper
from
.tools
import
(
from
.tools
import
(
DataCollector
,
DataCollector
,
...
@@ -38,9 +37,11 @@ from .tools import (
...
@@ -38,9 +37,11 @@ from .tools import (
NormalSparsityAllocator
,
NormalSparsityAllocator
,
BankSparsityAllocator
,
BankSparsityAllocator
,
GlobalSparsityAllocator
,
GlobalSparsityAllocator
,
Conv2d
DependencyAwareAllocator
DependencyAwareAllocator
)
)
from
..utils
import
CompressorSchema
,
config_list_canonical
,
OptimizerConstructHelper
,
Scaling
_logger
=
logging
.
getLogger
(
__name__
)
_logger
=
logging
.
getLogger
(
__name__
)
__all__
=
[
'LevelPruner'
,
'L1NormPruner'
,
'L2NormPruner'
,
'FPGMPruner'
,
'SlimPruner'
,
'ActivationPruner'
,
__all__
=
[
'LevelPruner'
,
'L1NormPruner'
,
'L2NormPruner'
,
'FPGMPruner'
,
'SlimPruner'
,
'ActivationPruner'
,
...
@@ -275,12 +276,12 @@ class NormPruner(BasicPruner):
...
@@ -275,12 +276,12 @@ class NormPruner(BasicPruner):
else
:
else
:
self
.
data_collector
.
reset
()
self
.
data_collector
.
reset
()
if
self
.
metrics_calculator
is
None
:
if
self
.
metrics_calculator
is
None
:
self
.
metrics_calculator
=
NormMetricsCalculator
(
p
=
self
.
p
,
dim
=
0
)
self
.
metrics_calculator
=
NormMetricsCalculator
(
p
=
self
.
p
,
scalers
=
Scaling
(
kernel_size
=
[
1
],
kernel_padding_mode
=
'back'
)
)
if
self
.
sparsity_allocator
is
None
:
if
self
.
sparsity_allocator
is
None
:
if
self
.
mode
==
'normal'
:
if
self
.
mode
==
'normal'
:
self
.
sparsity_allocator
=
NormalSparsityAllocator
(
self
,
dim
=
0
)
self
.
sparsity_allocator
=
NormalSparsityAllocator
(
self
,
Scaling
(
kernel_size
=
[
1
],
kernel_padding_mode
=
'back'
)
)
elif
self
.
mode
==
'dependency_aware'
:
elif
self
.
mode
==
'dependency_aware'
:
self
.
sparsity_allocator
=
Conv2d
DependencyAwareAllocator
(
self
,
0
,
self
.
dummy_input
)
self
.
sparsity_allocator
=
DependencyAwareAllocator
(
self
,
self
.
dummy_input
,
Scaling
(
kernel_size
=
[
1
],
kernel_padding_mode
=
'back'
)
)
else
:
else
:
raise
NotImplementedError
(
'Only support mode `normal` and `dependency_aware`'
)
raise
NotImplementedError
(
'Only support mode `normal` and `dependency_aware`'
)
...
@@ -440,12 +441,12 @@ class FPGMPruner(BasicPruner):
...
@@ -440,12 +441,12 @@ class FPGMPruner(BasicPruner):
else
:
else
:
self
.
data_collector
.
reset
()
self
.
data_collector
.
reset
()
if
self
.
metrics_calculator
is
None
:
if
self
.
metrics_calculator
is
None
:
self
.
metrics_calculator
=
DistMetricsCalculator
(
p
=
2
,
dim
=
0
)
self
.
metrics_calculator
=
DistMetricsCalculator
(
p
=
2
,
scalers
=
Scaling
(
kernel_size
=
[
1
],
kernel_padding_mode
=
'back'
)
)
if
self
.
sparsity_allocator
is
None
:
if
self
.
sparsity_allocator
is
None
:
if
self
.
mode
==
'normal'
:
if
self
.
mode
==
'normal'
:
self
.
sparsity_allocator
=
NormalSparsityAllocator
(
self
,
dim
=
0
)
self
.
sparsity_allocator
=
NormalSparsityAllocator
(
self
,
Scaling
(
kernel_size
=
[
1
],
kernel_padding_mode
=
'back'
)
)
elif
self
.
mode
==
'dependency_aware'
:
elif
self
.
mode
==
'dependency_aware'
:
self
.
sparsity_allocator
=
Conv2d
DependencyAwareAllocator
(
self
,
0
,
self
.
dummy_input
)
self
.
sparsity_allocator
=
DependencyAwareAllocator
(
self
,
self
.
dummy_input
,
Scaling
(
kernel_size
=
[
1
],
kernel_padding_mode
=
'back'
)
)
else
:
else
:
raise
NotImplementedError
(
'Only support mode `normal` and `dependency_aware`'
)
raise
NotImplementedError
(
'Only support mode `normal` and `dependency_aware`'
)
...
@@ -688,16 +689,16 @@ class ActivationPruner(BasicPruner):
...
@@ -688,16 +689,16 @@ class ActivationPruner(BasicPruner):
else
:
else
:
self
.
data_collector
.
reset
(
collector_infos
=
[
collector_info
])
# type: ignore
self
.
data_collector
.
reset
(
collector_infos
=
[
collector_info
])
# type: ignore
if
self
.
metrics_calculator
is
None
:
if
self
.
metrics_calculator
is
None
:
self
.
metrics_calculator
=
self
.
_
get
_metrics_calculator
()
self
.
metrics_calculator
=
self
.
_
create
_metrics_calculator
()
if
self
.
sparsity_allocator
is
None
:
if
self
.
sparsity_allocator
is
None
:
if
self
.
mode
==
'normal'
:
if
self
.
mode
==
'normal'
:
self
.
sparsity_allocator
=
NormalSparsityAllocator
(
self
,
dim
=
0
)
self
.
sparsity_allocator
=
NormalSparsityAllocator
(
self
,
Scaling
(
kernel_size
=
[
1
],
kernel_padding_mode
=
'back'
)
)
elif
self
.
mode
==
'dependency_aware'
:
elif
self
.
mode
==
'dependency_aware'
:
self
.
sparsity_allocator
=
Conv2d
DependencyAwareAllocator
(
self
,
0
,
self
.
dummy_input
)
self
.
sparsity_allocator
=
DependencyAwareAllocator
(
self
,
self
.
dummy_input
,
Scaling
(
kernel_size
=
[
1
],
kernel_padding_mode
=
'back'
)
)
else
:
else
:
raise
NotImplementedError
(
'Only support mode `normal` and `dependency_aware`'
)
raise
NotImplementedError
(
'Only support mode `normal` and `dependency_aware`'
)
def
_
get
_metrics_calculator
(
self
)
->
MetricsCalculator
:
def
_
create
_metrics_calculator
(
self
)
->
MetricsCalculator
:
raise
NotImplementedError
()
raise
NotImplementedError
()
...
@@ -782,8 +783,8 @@ class ActivationAPoZRankPruner(ActivationPruner):
...
@@ -782,8 +783,8 @@ class ActivationAPoZRankPruner(ActivationPruner):
# return a matrix that the position of zero in `output` is one, others is zero.
# return a matrix that the position of zero in `output` is one, others is zero.
return
torch
.
eq
(
self
.
_activation
(
output
.
detach
()),
torch
.
zeros_like
(
output
)).
type_as
(
output
)
return
torch
.
eq
(
self
.
_activation
(
output
.
detach
()),
torch
.
zeros_like
(
output
)).
type_as
(
output
)
def
_
get
_metrics_calculator
(
self
)
->
MetricsCalculator
:
def
_
create
_metrics_calculator
(
self
)
->
MetricsCalculator
:
return
APoZRankMetricsCalculator
(
dim
=
1
)
return
APoZRankMetricsCalculator
(
Scaling
(
kernel_size
=
[
-
1
,
1
],
kernel_padding_mode
=
'back'
)
)
class
ActivationMeanRankPruner
(
ActivationPruner
):
class
ActivationMeanRankPruner
(
ActivationPruner
):
...
@@ -865,8 +866,8 @@ class ActivationMeanRankPruner(ActivationPruner):
...
@@ -865,8 +866,8 @@ class ActivationMeanRankPruner(ActivationPruner):
# return the activation of `output` directly.
# return the activation of `output` directly.
return
self
.
_activation
(
output
.
detach
())
return
self
.
_activation
(
output
.
detach
())
def
_
get
_metrics_calculator
(
self
)
->
MetricsCalculator
:
def
_
create
_metrics_calculator
(
self
)
->
MetricsCalculator
:
return
MeanRankMetricsCalculator
(
dim
=
1
)
return
MeanRankMetricsCalculator
(
Scaling
(
kernel_size
=
[
-
1
,
1
],
kernel_padding_mode
=
'back'
)
)
class
TaylorFOWeightPruner
(
BasicPruner
):
class
TaylorFOWeightPruner
(
BasicPruner
):
...
@@ -1009,14 +1010,14 @@ class TaylorFOWeightPruner(BasicPruner):
...
@@ -1009,14 +1010,14 @@ class TaylorFOWeightPruner(BasicPruner):
else
:
else
:
self
.
data_collector
.
reset
(
collector_infos
=
[
collector_info
])
# type: ignore
self
.
data_collector
.
reset
(
collector_infos
=
[
collector_info
])
# type: ignore
if
self
.
metrics_calculator
is
None
:
if
self
.
metrics_calculator
is
None
:
self
.
metrics_calculator
=
MultiDataNormMetricsCalculator
(
p
=
1
,
dim
=
0
)
self
.
metrics_calculator
=
MultiDataNormMetricsCalculator
(
p
=
1
,
scalers
=
Scaling
(
kernel_size
=
[
1
],
kernel_padding_mode
=
'back'
)
)
if
self
.
sparsity_allocator
is
None
:
if
self
.
sparsity_allocator
is
None
:
if
self
.
mode
==
'normal'
:
if
self
.
mode
==
'normal'
:
self
.
sparsity_allocator
=
NormalSparsityAllocator
(
self
,
dim
=
0
)
self
.
sparsity_allocator
=
NormalSparsityAllocator
(
self
,
Scaling
(
kernel_size
=
[
1
],
kernel_padding_mode
=
'back'
)
)
elif
self
.
mode
==
'global'
:
elif
self
.
mode
==
'global'
:
self
.
sparsity_allocator
=
GlobalSparsityAllocator
(
self
,
dim
=
0
)
self
.
sparsity_allocator
=
GlobalSparsityAllocator
(
self
,
Scaling
(
kernel_size
=
[
1
],
kernel_padding_mode
=
'back'
)
)
elif
self
.
mode
==
'dependency_aware'
:
elif
self
.
mode
==
'dependency_aware'
:
self
.
sparsity_allocator
=
Conv2d
DependencyAwareAllocator
(
self
,
0
,
self
.
dummy_input
)
self
.
sparsity_allocator
=
DependencyAwareAllocator
(
self
,
self
.
dummy_input
,
Scaling
(
kernel_size
=
[
1
],
kernel_padding_mode
=
'back'
)
)
else
:
else
:
raise
NotImplementedError
(
'Only support mode `normal`, `global` and `dependency_aware`'
)
raise
NotImplementedError
(
'Only support mode `normal`, `global` and `dependency_aware`'
)
...
@@ -1146,12 +1147,12 @@ class ADMMPruner(BasicPruner):
...
@@ -1146,12 +1147,12 @@ class ADMMPruner(BasicPruner):
if
self
.
granularity
==
'fine-grained'
:
if
self
.
granularity
==
'fine-grained'
:
self
.
metrics_calculator
=
NormMetricsCalculator
(
p
=
1
)
self
.
metrics_calculator
=
NormMetricsCalculator
(
p
=
1
)
elif
self
.
granularity
==
'coarse-grained'
:
elif
self
.
granularity
==
'coarse-grained'
:
self
.
metrics_calculator
=
NormMetricsCalculator
(
dim
=
0
,
p
=
1
)
self
.
metrics_calculator
=
NormMetricsCalculator
(
p
=
1
,
scalers
=
Scaling
(
kernel_size
=
[
1
],
kernel_padding_mode
=
'back'
)
)
if
self
.
sparsity_allocator
is
None
:
if
self
.
sparsity_allocator
is
None
:
if
self
.
granularity
==
'fine-grained'
:
if
self
.
granularity
==
'fine-grained'
:
self
.
sparsity_allocator
=
NormalSparsityAllocator
(
self
)
self
.
sparsity_allocator
=
NormalSparsityAllocator
(
self
)
elif
self
.
granularity
==
'coarse-grained'
:
elif
self
.
granularity
==
'coarse-grained'
:
self
.
sparsity_allocator
=
NormalSparsityAllocator
(
self
,
dim
=
0
)
self
.
sparsity_allocator
=
NormalSparsityAllocator
(
self
,
Scaling
(
kernel_size
=
[
1
],
kernel_padding_mode
=
'back'
)
)
def
compress
(
self
)
->
Tuple
[
Module
,
Dict
]:
def
compress
(
self
)
->
Tuple
[
Module
,
Dict
]:
"""
"""
...
...
nni/algorithms/compression/v2/pytorch/pruning/tools/__init__.py
View file @
f24dc27b
...
@@ -25,7 +25,7 @@ from .sparsity_allocator import (
...
@@ -25,7 +25,7 @@ from .sparsity_allocator import (
NormalSparsityAllocator
,
NormalSparsityAllocator
,
BankSparsityAllocator
,
BankSparsityAllocator
,
GlobalSparsityAllocator
,
GlobalSparsityAllocator
,
Conv2d
DependencyAwareAllocator
DependencyAwareAllocator
)
)
from
.task_generator
import
(
from
.task_generator
import
(
AGPTaskGenerator
,
AGPTaskGenerator
,
...
...
nni/algorithms/compression/v2/pytorch/pruning/tools/base.py
View file @
f24dc27b
# Copyright (c) Microsoft Corporation.
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
# Licensed under the MIT license.
from
__future__
import
annotations
from
datetime
import
datetime
from
datetime
import
datetime
import
logging
import
logging
from
pathlib
import
Path
from
pathlib
import
Path
...
@@ -13,12 +14,24 @@ from torch import Tensor
...
@@ -13,12 +14,24 @@ from torch import Tensor
from
torch.nn
import
Module
from
torch.nn
import
Module
from
torch.optim
import
Optimizer
from
torch.optim
import
Optimizer
from
nni.algorithms.compression.v2.pytorch
.base
import
Pruner
,
LayerInfo
,
Task
,
TaskResult
from
..
.base
import
Pruner
,
LayerInfo
,
Task
,
TaskResult
from
nni.algorithms.compression.v2.pytorch
.utils
import
OptimizerConstructHelper
from
..
.utils
import
OptimizerConstructHelper
,
Scaling
_logger
=
logging
.
getLogger
(
__name__
)
_logger
=
logging
.
getLogger
(
__name__
)
def
_get_scaler
(
scalers
:
Dict
[
str
,
Dict
[
str
,
Scaling
]]
|
None
,
module_name
:
str
,
target_name
:
str
)
->
Scaling
|
None
:
# Get scaler for the specific target in the specific module. Return None if don't find it.
# `module_name` is not used in current nni version, will support different modules using different scalers in the future.
if
scalers
:
default_module_scalers
=
scalers
.
get
(
'_default'
,
{})
default_target_scaler
=
default_module_scalers
.
get
(
target_name
,
default_module_scalers
.
get
(
'_default'
,
None
))
module_scalers
=
scalers
.
get
(
module_name
,
{})
return
module_scalers
.
get
(
target_name
,
module_scalers
.
get
(
'_default'
,
default_target_scaler
))
else
:
return
None
class
DataCollector
:
class
DataCollector
:
"""
"""
An abstract class for collect the data needed by the compressor.
An abstract class for collect the data needed by the compressor.
...
@@ -245,49 +258,21 @@ class MetricsCalculator:
...
@@ -245,49 +258,21 @@ class MetricsCalculator:
Parameters
Parameters
----------
----------
dim
scalers
The dimensions that corresponding to the under pruning weight dimensions in collected data.
Scaler is used to scale the metrics' size. It scaling metric to the same size as the shrinked mask in the sparsity allocator.
None means one-to-one correspondence between pruned dimensions and data, which equal to set `dim` as all data dimensions.
If you want to use different scalers for different pruning targets in different modules, please use a dict `{module_name: {target_name: scaler}}`.
Only these `dim` will be kept and other dimensions of the data will be reduced.
If allocator meets an unspecified module name, it will try to use `scalers['_default'][target_name]` to scale its mask.
If allocator meets an unspecified target name, it will try to use `scalers[module_name]['_default']` to scale its mask.
Example:
Passing in a scaler instead of a `dict` of scalers will be treated as passed in `{'_default': {'_default': scalers}}`.
Passing in `None` means no need to scale.
If you want to prune the Conv2d weight in filter level, and the weight size is (32, 16, 3, 3) [out-channel, in-channel, kernal-size-1, kernal-size-2].
Then the under pruning dimensions is [0], which means you want to prune the filter or out-channel.
Case 1: Directly collect the conv module weight as data to calculate the metric.
Then the data has size (32, 16, 3, 3).
Mention that the dimension 0 of the data is corresponding to the under pruning weight dimension 0.
So in this case, `dim=0` will set in `__init__`.
Case 2: Use the output of the conv module as data to calculate the metric.
Then the data has size (batch_num, 32, feature_map_size_1, feature_map_size_2).
Mention that the dimension 1 of the data is corresponding to the under pruning weight dimension 0.
So in this case, `dim=1` will set in `__init__`.
In both of these two case, the metric of this module has size (32,).
block_sparse_size
This used to describe the block size a metric value represented. By default, None means the block size is ones(len(dim)).
Make sure len(dim) == len(block_sparse_size), and the block_sparse_size dimension position is corresponding to dim.
Example:
The under pruning weight size is (768, 768), and you want to apply a block sparse on dim=[0] with block size [64, 768],
then you can set block_sparse_size=[64]. The final metric size is (12,).
"""
"""
def
__init__
(
self
,
dim
:
Optional
[
Union
[
int
,
List
[
int
]]]
=
None
,
def
__init__
(
self
,
scalers
:
Dict
[
str
,
Dict
[
str
,
Scaling
]]
|
Scaling
|
None
=
None
):
block_sparse_size
:
Optional
[
Union
[
int
,
List
[
int
]]]
=
None
):
self
.
scalers
:
Dict
[
str
,
Dict
[
str
,
Scaling
]]
|
None
=
scalers
if
isinstance
(
scalers
,
(
dict
,
type
(
None
)))
else
{
'_default'
:
{
'_default'
:
scalers
}}
# type: ignore
self
.
dim
=
dim
if
not
isinstance
(
dim
,
int
)
else
[
dim
]
self
.
block_sparse_size
=
block_sparse_size
if
not
isinstance
(
block_sparse_size
,
int
)
else
[
block_sparse_size
]
def
_get_scaler
(
self
,
module_name
:
str
,
target_name
:
str
)
->
Scaling
:
if
self
.
block_sparse_size
is
not
None
:
scaler
=
_get_scaler
(
self
.
scalers
,
module_name
,
target_name
)
assert
all
(
i
>=
1
for
i
in
self
.
block_sparse_size
)
return
scaler
if
scaler
else
Scaling
([
1
])
elif
self
.
dim
is
not
None
:
self
.
block_sparse_size
=
[
1
]
*
len
(
self
.
dim
)
if
self
.
dim
is
not
None
:
assert
all
(
i
>=
0
for
i
in
self
.
dim
)
self
.
dim
,
self
.
block_sparse_size
=
(
list
(
t
)
for
t
in
zip
(
*
sorted
(
zip
(
self
.
dim
,
self
.
block_sparse_size
))))
# type: ignore
def
calculate_metrics
(
self
,
data
:
Dict
)
->
Dict
[
str
,
Tensor
]:
def
calculate_metrics
(
self
,
data
:
Dict
)
->
Dict
[
str
,
Tensor
]:
"""
"""
...
@@ -307,142 +292,120 @@ class MetricsCalculator:
...
@@ -307,142 +292,120 @@ class MetricsCalculator:
class
SparsityAllocator
:
class
SparsityAllocator
:
"""
"""
A
n abstract
class for allocat
e
mask based on metrics.
A
base
class for allocat
ing
mask based on metrics.
Parameters
Parameters
----------
----------
pruner
pruner
The pruner that binded with this `SparsityAllocator`.
The pruner that binded with this `SparsityAllocator`.
dim
scalers
The under pruning weight dimensions, which metric size should equal to the under pruning weight size on these dimensions.
Scaler is used to scale the masks' size. It shrinks the mask of the same size as the pruning target to the same size as the metric,
None means one-to-one correspondence between pruned dimensions and metric, which equal to set `dim` as all under pruning weight dimensions.
or expands the mask of the same size as the metric to the same size as the pruning target.
The mask will expand to the weight size depend on `dim`.
If you want to use different scalers for different pruning targets in different modules, please use a dict `{module_name: {target_name: scaler}}`.
If allocator meets an unspecified module name, it will try to use `scalers['_default'][target_name]` to scale its mask.
Example:
If allocator meets an unspecified target name, it will try to use `scalers[module_name]['_default']` to scale its mask.
Passing in a scaler instead of a `dict` of scalers will be treated as passed in `{'_default': {'_default': scalers}}`.
The under pruning weight has size (2, 3, 4), and `dim=1` means the under pruning weight dimension is 1.
Passing in `None` means no need to scale.
Then the metric should have a size (3,), i.e., `metric=[0.9, 0.1, 0.8]`.
Assuming by some kind of `SparsityAllocator` get the mask on weight dimension 1 `mask=[1, 0, 1]`,
then the dimension mask will expand to the final mask `[[[1, 1, 1, 1], [0, 0, 0, 0], [1, 1, 1, 1]], [[1, 1, 1, 1], [0, 0, 0, 0], [1, 1, 1, 1]]]`.
block_sparse_size
This used to describe the block size a metric value represented. By default, None means the block size is ones(len(dim)).
Make sure len(dim) == len(block_sparse_size), and the block_sparse_size dimension position is corresponding to dim.
Example:
The metric size is (12,), and block_sparse_size=[64], then the mask will expand to (768,) at first before expand with `dim`.
continuous_mask
continuous_mask
Inherit the mask already in the wrapper if set True.
If set True, the part that has been masked will be masked first.
If set False, the part that has been masked may be unmasked due to the increase of its corresponding metric.
"""
"""
def
__init__
(
self
,
pruner
:
Pruner
,
dim
:
Optional
[
Union
[
int
,
List
[
int
]]]
=
None
,
def
__init__
(
self
,
pruner
:
Pruner
,
scalers
:
Dict
[
str
,
Dict
[
str
,
Scaling
]]
|
Scaling
|
None
=
None
,
continuous_mask
:
bool
=
True
):
block_sparse_size
:
Optional
[
Union
[
int
,
List
[
int
]]]
=
None
,
continuous_mask
:
bool
=
True
):
self
.
pruner
=
pruner
self
.
pruner
=
pruner
self
.
dim
=
dim
if
not
isinstance
(
dim
,
int
)
else
[
dim
]
self
.
scalers
:
Dict
[
str
,
Dict
[
str
,
Scaling
]]
|
None
=
scalers
if
isinstance
(
scalers
,
(
dict
,
type
(
None
)))
else
{
'_default'
:
{
'_default'
:
scalers
}}
# type: ignore
self
.
block_sparse_size
=
block_sparse_size
if
not
isinstance
(
block_sparse_size
,
int
)
else
[
block_sparse_size
]
if
self
.
block_sparse_size
is
not
None
:
assert
all
(
i
>=
1
for
i
in
self
.
block_sparse_size
)
elif
self
.
dim
is
not
None
:
self
.
block_sparse_size
=
[
1
]
*
len
(
self
.
dim
)
if
self
.
dim
is
not
None
:
assert
all
(
i
>=
0
for
i
in
self
.
dim
)
self
.
dim
,
self
.
block_sparse_size
=
(
list
(
t
)
for
t
in
zip
(
*
sorted
(
zip
(
self
.
dim
,
self
.
block_sparse_size
))))
# type: ignore
self
.
continuous_mask
=
continuous_mask
self
.
continuous_mask
=
continuous_mask
def
generate_sparsity
(
self
,
metrics
:
Dict
)
->
Dict
[
str
,
Dict
[
str
,
Tensor
]]:
def
_get_scaler
(
self
,
module_name
:
str
,
target_name
:
str
)
->
Scaling
|
None
:
return
_get_scaler
(
self
.
scalers
,
module_name
,
target_name
)
def
_expand_mask
(
self
,
module_name
:
str
,
target_name
:
str
,
mask
:
Tensor
)
->
Tensor
:
# Expand the shrinked mask to the pruning target size.
scaler
=
self
.
_get_scaler
(
module_name
=
module_name
,
target_name
=
target_name
)
if
scaler
:
wrapper
=
self
.
pruner
.
get_modules_wrapper
()[
module_name
]
return
scaler
.
expand
(
mask
,
getattr
(
wrapper
,
f
'
{
target_name
}
_mask'
).
shape
)
else
:
return
mask
.
clone
()
def
_shrink_mask
(
self
,
module_name
:
str
,
target_name
:
str
,
mask
:
Tensor
)
->
Tensor
:
# Shrink the mask by scaler, shrinked mask usually has the same size with metric.
scaler
=
self
.
_get_scaler
(
module_name
=
module_name
,
target_name
=
target_name
)
if
scaler
:
mask
=
(
scaler
.
shrink
(
mask
)
!=
0
).
type_as
(
mask
)
return
mask
def
_continuous_mask
(
self
,
new_masks
:
Dict
[
str
,
Dict
[
str
,
Tensor
]])
->
Dict
[
str
,
Dict
[
str
,
Tensor
]]:
# Set the already masked part in the metric to the minimum value.
target_name
=
'weight'
for
module_name
,
target_mask
in
new_masks
.
items
():
wrapper
=
self
.
pruner
.
get_modules_wrapper
()[
module_name
]
old_target_mask
=
getattr
(
wrapper
,
f
'
{
target_name
}
_mask'
,
None
)
if
old_target_mask
is
not
None
:
new_masks
[
module_name
][
target_name
]
=
torch
.
min
(
target_mask
[
target_name
],
old_target_mask
)
return
new_masks
def
common_target_masks_generation
(
self
,
metrics
:
Dict
[
str
,
Tensor
])
->
Dict
[
str
,
Dict
[
str
,
Tensor
]]:
"""
"""
Generate masks for metrics-dependent targets.
Parameters
Parameters
----------
----------
metrics
metrics
A metric dict. The key is the name of layer, the value is its metric.
The format is {module_name: weight_metric}.
The metric of `weight` usually has the same size with shrinked mask.
Return
------
Dict[str, Dict[str, Tensor]]
The format is {module_name: {target_name: mask}}.
Return the masks of the same size as its target.
"""
"""
raise
NotImplementedError
()
raise
NotImplementedError
()
def
_expand_mask
(
self
,
name
:
str
,
mask
:
Tensor
)
->
Dict
[
str
,
Tensor
]:
def
special_target_masks_generation
(
self
,
masks
:
Dict
[
str
,
Dict
[
str
,
Tensor
]]
)
->
Dict
[
str
,
Dict
[
str
,
Tensor
]
]
:
"""
"""
Some pruning targets' mask generation depends on other targets, i.e., bias mask depends on weight mask.
This function is used to generate these masks, and it be called at the end of `generate_sparsity`.
Parameters
Parameters
----------
----------
name
masks
The masked module name.
The format is {module_name: {target_name: mask}}.
mask
It is usually the return value of `common_target_masks_generation`.
The reduced mask with `self.dim` and `self.block_sparse_size`.
Returns
-------
Dict[str, Tensor]
The key is `weight` or `bias`, value is the final mask.
"""
"""
weight_mask
=
mask
.
clone
()
for
module_name
,
module_masks
in
masks
.
items
():
# generate bias mask, this may move to wrapper in the future
if
self
.
block_sparse_size
is
not
None
:
weight_mask
=
module_masks
.
get
(
'weight'
,
None
)
# expend mask with block_sparse_size
wrapper
=
self
.
pruner
.
get_modules_wrapper
()[
module_name
]
expand_size
=
list
(
weight_mask
.
size
())
old_bias_mask
=
getattr
(
wrapper
,
'bias_mask'
,
None
)
reshape_size
=
list
(
weight_mask
.
size
())
if
weight_mask
is
not
None
and
old_bias_mask
is
not
None
and
weight_mask
.
shape
[
0
]
==
old_bias_mask
.
shape
[
0
]:
for
i
,
block_width
in
reversed
(
list
(
enumerate
(
self
.
block_sparse_size
))):
# keep dim 0 and reduce all other dims by sum
weight_mask
=
weight_mask
.
unsqueeze
(
i
+
1
)
reduce_dims
=
[
reduce_dim
for
reduce_dim
in
range
(
1
,
len
(
weight_mask
.
shape
))]
expand_size
.
insert
(
i
+
1
,
block_width
)
# count unmasked number of values on dim 0 (output channel) of weight
reshape_size
[
i
]
*=
block_width
unmasked_num_on_dim0
=
weight_mask
.
sum
(
reduce_dims
)
if
reduce_dims
else
weight_mask
weight_mask
=
weight_mask
.
expand
(
expand_size
).
reshape
(
reshape_size
)
module_masks
[
'bias'
]
=
(
unmasked_num_on_dim0
!=
0
).
type_as
(
old_bias_mask
)
return
masks
wrapper
=
self
.
pruner
.
get_modules_wrapper
()[
name
]
weight_size
=
wrapper
.
weight
.
data
.
size
()
# type: ignore
def
generate_sparsity
(
self
,
metrics
:
Dict
)
->
Dict
[
str
,
Dict
[
str
,
Tensor
]]:
if
self
.
dim
is
None
:
assert
weight_mask
.
size
()
==
weight_size
expand_mask
=
{
'weight'
:
weight_mask
}
else
:
# expand mask to weight size with dim
assert
len
(
weight_mask
.
size
())
==
len
(
self
.
dim
)
assert
all
(
weight_size
[
j
]
==
weight_mask
.
size
(
i
)
for
i
,
j
in
enumerate
(
self
.
dim
))
idxs
=
list
(
range
(
len
(
weight_size
)))
[
idxs
.
pop
(
i
)
for
i
in
reversed
(
self
.
dim
)]
for
i
in
idxs
:
weight_mask
=
weight_mask
.
unsqueeze
(
i
)
expand_mask
=
{
'weight'
:
weight_mask
.
expand
(
weight_size
).
clone
()}
# NOTE: assume we only mask output, so the mask and bias have a one-to-one correspondence.
# If we support more kind of masks, this place need refactor.
if
wrapper
.
bias_mask
is
not
None
and
weight_mask
.
size
()
==
wrapper
.
bias_mask
.
size
():
# type: ignore
expand_mask
[
'bias'
]
=
weight_mask
.
clone
()
return
expand_mask
def
_compress_mask
(
self
,
mask
:
Tensor
)
->
Tensor
:
"""
"""
This function will reduce the mask with `self.dim` and `self.block_sparse_size`.
The main function of `SparsityAllocator`, generate a set of masks based on the given metrics.
e.g., a mask tensor with size [50, 60, 70], self.dim is (0, 1), self.block_sparse_size is [10, 10].
Then, the reduced mask size is [50 / 10, 60 / 10] => [5, 6].
Parameters
Parameters
----------
----------
name
metrics
The masked module name.
A metric dict with format {module_name: weight_metric}
mask
The entire mask has the same size with weight.
Returns
Returns
-------
-------
Tensor
Dict[str, Dict[str,
Tensor
]]
Reduced
mask.
The masks format is {module_name: {target_name:
mask
}}
.
"""
"""
if
self
.
dim
is
None
or
len
(
mask
.
size
())
==
1
:
masks
=
self
.
common_target_masks_generation
(
metrics
)
mask
=
mask
.
clone
()
masks
=
self
.
special_target_masks_generation
(
masks
)
else
:
if
self
.
continuous_mask
:
mask_dim
=
list
(
range
(
len
(
mask
.
size
())))
masks
=
self
.
_continuous_mask
(
masks
)
for
dim
in
self
.
dim
:
return
masks
mask_dim
.
remove
(
dim
)
mask
=
torch
.
sum
(
mask
,
dim
=
mask_dim
)
if
self
.
block_sparse_size
is
not
None
:
# operation like pooling
lower_case_letters
=
'abcdefghijklmnopqrstuvwxyz'
ein_expression
=
''
for
i
,
step
in
enumerate
(
self
.
block_sparse_size
):
mask
=
mask
.
unfold
(
i
,
step
,
step
)
ein_expression
+=
lower_case_letters
[
i
]
ein_expression
=
'...{},{}'
.
format
(
ein_expression
,
ein_expression
)
mask
=
torch
.
einsum
(
ein_expression
,
mask
,
torch
.
ones
(
self
.
block_sparse_size
).
to
(
mask
.
device
))
return
(
mask
!=
0
).
type_as
(
mask
)
class
TaskGenerator
:
class
TaskGenerator
:
...
...
nni/algorithms/compression/v2/pytorch/pruning/tools/metrics_calculator.py
View file @
f24dc27b
# Copyright (c) Microsoft Corporation.
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
# Licensed under the MIT license.
from
typing
import
Dict
,
List
,
Optional
,
Union
from
__future__
import
annotations
from
typing
import
Dict
,
List
import
torch
import
torch
from
torch
import
Tensor
from
torch
import
Tensor
from
.base
import
MetricsCalculator
from
.base
import
MetricsCalculator
from
...utils
import
Scaling
__all__
=
[
'NormMetricsCalculator'
,
'MultiDataNormMetricsCalculator'
,
'DistMetricsCalculator'
,
__all__
=
[
'NormMetricsCalculator'
,
'MultiDataNormMetricsCalculator'
,
'DistMetricsCalculator'
,
'APoZRankMetricsCalculator'
,
'MeanRankMetricsCalculator'
,
'StraightMetricsCalculator'
]
'APoZRankMetricsCalculator'
,
'MeanRankMetricsCalculator'
,
'StraightMetricsCalculator'
]
...
@@ -28,49 +31,28 @@ class NormMetricsCalculator(MetricsCalculator):
...
@@ -28,49 +31,28 @@ class NormMetricsCalculator(MetricsCalculator):
"""
"""
Calculate the specify norm for each tensor in data.
Calculate the specify norm for each tensor in data.
L1, L2, Level, Slim pruner use this to calculate metric.
L1, L2, Level, Slim pruner use this to calculate metric.
Parameters
----------
p
The order of norm. None means Frobenius norm.
scalers
Please view the base class `MetricsCalculator` docstring.
"""
"""
def
__init__
(
self
,
dim
:
Optional
[
Union
[
int
,
List
[
int
]]]
=
None
,
p
:
Optional
[
Union
[
int
,
float
]]
=
None
):
def
__init__
(
self
,
p
:
int
|
float
|
None
=
None
,
scalers
:
Dict
[
str
,
Dict
[
str
,
Scaling
]]
|
Scaling
|
None
=
None
):
"""
super
().
__init__
(
scalers
=
scalers
)
Parameters
----------
dim
The dimensions that corresponding to the under pruning weight dimensions in collected data.
None means one-to-one correspondence between pruned dimensions and data, which equal to set `dim` as all data dimensions.
Only these `dim` will be kept and other dimensions of the data will be reduced.
Example:
If you want to prune the Conv2d weight in filter level, and the weight size is (32, 16, 3, 3) [out-channel, in-channel, kernal-size-1, kernal-size-2].
Then the under pruning dimensions is [0], which means you want to prune the filter or out-channel.
Case 1: Directly collect the conv module weight as data to calculate the metric.
Then the data has size (32, 16, 3, 3).
Mention that the dimension 0 of the data is corresponding to the under pruning weight dimension 0.
So in this case, `dim=0` will set in `__init__`.
Case 2: Use the output of the conv module as data to calculate the metric.
Then the data has size (batch_num, 32, feature_map_size_1, feature_map_size_2).
Mention that the dimension 1 of the data is corresponding to the under pruning weight dimension 0.
So in this case, `dim=1` will set in `__init__`.
In both of these two case, the metric of this module has size (32,).
p
The order of norm. None means Frobenius norm.
"""
super
().
__init__
(
dim
=
dim
)
self
.
p
=
p
if
p
is
not
None
else
'fro'
self
.
p
=
p
if
p
is
not
None
else
'fro'
def
calculate_metrics
(
self
,
data
:
Dict
[
str
,
Tensor
])
->
Dict
[
str
,
Tensor
]:
def
calculate_metrics
(
self
,
data
:
Dict
[
str
,
Tensor
])
->
Dict
[
str
,
Tensor
]:
def
reduce_func
(
t
:
Tensor
)
->
Tensor
:
return
t
.
norm
(
p
=
self
.
p
,
dim
=-
1
)
# type: ignore
metrics
=
{}
metrics
=
{}
for
name
,
tensor
in
data
.
items
():
target_name
=
'weight'
keeped_dim
=
list
(
range
(
len
(
tensor
.
size
())))
if
self
.
dim
is
None
else
self
.
dim
for
module_name
,
target_data
in
data
.
items
():
across_dim
=
list
(
range
(
len
(
tensor
.
size
())))
scaler
=
self
.
_get_scaler
(
module_name
,
target_name
)
[
across_dim
.
pop
(
i
)
for
i
in
reversed
(
keeped_dim
)]
metrics
[
module_name
]
=
scaler
.
shrink
(
target_data
,
reduce_func
)
if
len
(
across_dim
)
==
0
:
metrics
[
name
]
=
tensor
.
abs
()
else
:
metrics
[
name
]
=
tensor
.
norm
(
p
=
self
.
p
,
dim
=
across_dim
)
# type: ignore
return
metrics
return
metrics
...
@@ -90,66 +72,32 @@ class DistMetricsCalculator(MetricsCalculator):
...
@@ -90,66 +72,32 @@ class DistMetricsCalculator(MetricsCalculator):
"""
"""
Calculate the sum of specify distance for each element with all other elements in specify `dim` in each tensor in data.
Calculate the sum of specify distance for each element with all other elements in specify `dim` in each tensor in data.
FPGM pruner uses this to calculate metric.
FPGM pruner uses this to calculate metric.
Parameters
----------
p
The order of norm. None means Frobenius norm.
scalers
Please view the base class `MetricsCalculator` docstring.
"""
"""
def
__init__
(
self
,
p
:
float
,
dim
:
Union
[
int
,
List
[
int
]]):
def
__init__
(
self
,
p
:
int
|
float
|
None
=
None
,
scalers
:
Dict
[
str
,
Dict
[
str
,
Scaling
]]
|
Scaling
|
None
=
None
):
"""
super
().
__init__
(
scalers
=
scalers
)
Parameters
self
.
p
=
p
if
p
is
not
None
else
'fro'
----------
dim
The dimensions that corresponding to the under pruning weight dimensions in collected data.
None means one-to-one correspondence between pruned dimensions and data, which equal to set `dim` as all data dimensions.
Only these `dim` will be kept and other dimensions of the data will be reduced.
Example:
If you want to prune the Conv2d weight in filter level, and the weight size is (32, 16, 3, 3) [out-channel, in-channel, kernal-size-1, kernal-size-2].
Then the under pruning dimensions is [0], which means you want to prune the filter or out-channel.
Case 1: Directly collect the conv module weight as data to calculate the metric.
Then the data has size (32, 16, 3, 3).
Mention that the dimension 0 of the data is corresponding to the under pruning weight dimension 0.
So in this case, `dim=0` will set in `__init__`.
Case 2: Use the output of the conv module as data to calculate the metric.
Then the data has size (batch_num, 32, feature_map_size_1, feature_map_size_2).
Mention that the dimension 1 of the data is corresponding to the under pruning weight dimension 0.
So in this case, `dim=1` will set in `__init__`.
In both of these two case, the metric of this module has size (32,).
p
The order of norm.
"""
super
().
__init__
(
dim
=
dim
)
self
.
p
=
p
def
calculate_metrics
(
self
,
data
:
Dict
[
str
,
Tensor
])
->
Dict
[
str
,
Tensor
]:
def
calculate_metrics
(
self
,
data
:
Dict
[
str
,
Tensor
])
->
Dict
[
str
,
Tensor
]:
def
reduce_func
(
t
:
Tensor
)
->
Tensor
:
reshape_data
=
t
.
reshape
(
-
1
,
t
.
shape
[
-
1
])
metric
=
torch
.
zeros
(
reshape_data
.
shape
[
0
],
device
=
reshape_data
.
device
)
for
i
in
range
(
reshape_data
.
shape
[
0
]):
metric
[
i
]
=
(
reshape_data
-
reshape_data
[
i
]).
norm
(
p
=
self
.
p
,
dim
=-
1
).
sum
()
# type: ignore
return
metric
.
reshape
(
t
.
shape
[:
-
1
])
metrics
=
{}
metrics
=
{}
for
name
,
tensor
in
data
.
items
():
target_name
=
'weight'
keeped_dim
=
list
(
range
(
len
(
tensor
.
size
())))
if
self
.
dim
is
None
else
self
.
dim
for
module_name
,
target_data
in
data
.
items
():
reorder_dim
=
list
(
keeped_dim
)
scaler
=
self
.
_get_scaler
(
module_name
,
target_name
)
reorder_dim
.
extend
([
i
for
i
in
range
(
len
(
tensor
.
size
()))
if
i
not
in
keeped_dim
])
metrics
[
module_name
]
=
scaler
.
shrink
(
target_data
,
reduce_func
)
reorder_tensor
=
tensor
.
permute
(
*
reorder_dim
).
clone
()
metric
=
torch
.
ones
(
*
reorder_tensor
.
size
()[:
len
(
keeped_dim
)],
device
=
reorder_tensor
.
device
)
across_dim
=
list
(
range
(
len
(
keeped_dim
),
len
(
reorder_dim
)))
idxs
=
metric
.
nonzero
(
as_tuple
=
False
)
for
idx
in
idxs
:
other
=
reorder_tensor
for
i
in
idx
:
other
=
other
[
i
]
other
=
other
.
clone
()
if
len
(
across_dim
)
==
0
:
dist_sum
=
torch
.
abs
(
reorder_tensor
-
other
).
sum
()
else
:
dist_sum
=
torch
.
norm
((
reorder_tensor
-
other
),
p
=
self
.
p
,
dim
=
across_dim
).
sum
()
# type: ignore
# NOTE: this place need refactor when support layer level pruning.
tmp_metric
=
metric
for
i
in
idx
[:
-
1
]:
tmp_metric
=
tmp_metric
[
i
]
tmp_metric
[
idx
[
-
1
]]
=
dist_sum
metrics
[
name
]
=
metric
return
metrics
return
metrics
...
@@ -161,19 +109,15 @@ class APoZRankMetricsCalculator(MetricsCalculator):
...
@@ -161,19 +109,15 @@ class APoZRankMetricsCalculator(MetricsCalculator):
APoZRank pruner uses this to calculate metric.
APoZRank pruner uses this to calculate metric.
"""
"""
def
calculate_metrics
(
self
,
data
:
Dict
[
str
,
List
])
->
Dict
[
str
,
Tensor
]:
def
calculate_metrics
(
self
,
data
:
Dict
[
str
,
List
])
->
Dict
[
str
,
Tensor
]:
def
reduce_func
(
t
:
Tensor
)
->
Tensor
:
return
1
-
t
.
mean
(
dim
=-
1
)
metrics
=
{}
metrics
=
{}
for
name
,
(
num
,
zero_counts
)
in
data
.
items
():
target_name
=
'weight'
keeped_dim
=
list
(
range
(
len
(
zero_counts
.
size
())))
if
self
.
dim
is
None
else
self
.
dim
for
module_name
,
target_data
in
data
.
items
():
across_dim
=
list
(
range
(
len
(
zero_counts
.
size
())))
target_data
=
target_data
[
1
]
/
target_data
[
0
]
[
across_dim
.
pop
(
i
)
for
i
in
reversed
(
keeped_dim
)]
scaler
=
self
.
_get_scaler
(
module_name
,
target_name
)
# The element number on each keeped_dim in zero_counts
metrics
[
module_name
]
=
scaler
.
shrink
(
target_data
,
reduce_func
)
total_size
=
num
for
dim
,
dim_size
in
enumerate
(
zero_counts
.
size
()):
if
dim
not
in
keeped_dim
:
total_size
*=
dim_size
_apoz
=
torch
.
sum
(
zero_counts
,
dim
=
across_dim
).
type_as
(
zero_counts
)
/
total_size
# NOTE: the metric is (1 - apoz) because we assume the smaller metric value is more needed to be pruned.
metrics
[
name
]
=
torch
.
ones_like
(
_apoz
)
-
_apoz
return
metrics
return
metrics
...
@@ -183,11 +127,14 @@ class MeanRankMetricsCalculator(MetricsCalculator):
...
@@ -183,11 +127,14 @@ class MeanRankMetricsCalculator(MetricsCalculator):
This metric simply calculate the average on `self.dim`, then divide by the batch_number.
This metric simply calculate the average on `self.dim`, then divide by the batch_number.
MeanRank pruner uses this to calculate metric.
MeanRank pruner uses this to calculate metric.
"""
"""
def
calculate_metrics
(
self
,
data
:
Dict
[
str
,
List
[
Tensor
]])
->
Dict
[
str
,
Tensor
]:
def
calculate_metrics
(
self
,
data
:
Dict
[
str
,
List
])
->
Dict
[
str
,
Tensor
]:
def
reduce_func
(
t
:
Tensor
)
->
Tensor
:
return
t
.
mean
(
dim
=-
1
)
metrics
=
{}
metrics
=
{}
for
name
,
(
num
,
activation_sum
)
in
data
.
items
():
target_name
=
'weight'
keeped_dim
=
list
(
range
(
len
(
activation_sum
.
size
())))
if
self
.
dim
is
None
else
self
.
dim
for
module_name
,
target_data
in
data
.
items
():
across_dim
=
list
(
range
(
len
(
activation_sum
.
size
())))
target_data
=
target_data
[
1
]
/
target_data
[
0
]
[
across_dim
.
pop
(
i
)
for
i
in
reversed
(
keeped_dim
)]
scaler
=
self
.
_get_scaler
(
module_name
,
target_name
)
metrics
[
name
]
=
torch
.
mean
(
activation_sum
,
across_dim
)
/
num
metrics
[
module_
name
]
=
scaler
.
shrink
(
target_data
,
reduce_func
)
return
metrics
return
metrics
nni/algorithms/compression/v2/pytorch/pruning/tools/sparsity_allocator.py
View file @
f24dc27b
This diff is collapsed.
Click to expand it.
nni/algorithms/compression/v2/pytorch/utils/__init__.py
View file @
f24dc27b
# Copyright (c) Microsoft Corporation.
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
# Licensed under the MIT license.
from
.attr
import
(
get_nested_attr
,
set_nested_attr
)
from
.config_validation
import
CompressorSchema
from
.config_validation
import
CompressorSchema
from
.constructor_helper
import
*
from
.pruning
import
(
from
.pruning
import
(
config_list_canonical
,
config_list_canonical
,
unfold_config_list
,
unfold_config_list
,
...
@@ -12,4 +17,4 @@ from .pruning import (
...
@@ -12,4 +17,4 @@ from .pruning import (
get_model_weights_numel
,
get_model_weights_numel
,
get_module_by_name
get_module_by_name
)
)
from
.
constructor_helper
import
*
from
.
scaling
import
Scaling
nni/algorithms/compression/v2/pytorch/utils/attr.py
0 → 100644
View file @
f24dc27b
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
functools
import
reduce
from
typing
import
Any
,
overload
@
overload
def
get_nested_attr
(
__o
:
object
,
__name
:
str
)
->
Any
:
...
@
overload
def
get_nested_attr
(
__o
:
object
,
__name
:
str
,
__default
:
Any
)
->
Any
:
...
def
get_nested_attr
(
__o
:
object
,
__name
:
str
,
*
args
)
->
Any
:
"""
Get a nested named attribute from an object by a `.` separated name.
rgetattr(x, 'y.z') is equivalent to getattr(getattr(x, 'y'), 'z') and x.y.z.
"""
def
_getattr
(
__o
,
__name
):
return
getattr
(
__o
,
__name
,
*
args
)
return
reduce
(
_getattr
,
[
__o
]
+
__name
.
split
(
'.'
))
# type: ignore
def
set_nested_attr
(
__obj
:
object
,
__name
:
str
,
__value
:
Any
):
"""
Set the nested named attribute on the given object to the specified value by a `.` separated name.
set_nested_attr(x, 'y.z', v) is equivalent to setattr(getattr(x, 'y'), 'z', v) x.y.z = v.
"""
pre
,
_
,
post
=
__name
.
rpartition
(
'.'
)
return
setattr
(
get_nested_attr
(
__obj
,
pre
)
if
pre
else
__obj
,
post
,
__value
)
nni/algorithms/compression/v2/pytorch/utils/scaling.py
0 → 100644
View file @
f24dc27b
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
__future__
import
annotations
from
functools
import
reduce
from
typing
import
Callable
,
List
,
overload
from
typing_extensions
import
Literal
import
torch
from
torch
import
Tensor
class
Scaling
:
"""
In the process of generating masks, a large number of operations like pooling or upsampling are involved.
This class provides tensor-related scaling functions for a given scaling kernel.
Similar to the concept of convolutional kernel, the scaling kernel also moves over the tensor and does operations.
The scaling kernel in this class is defined by two parts, kernel size and scaling function (shrink and expand).
Parameters
----------
kernel_size
kernel_size is the scale, which determines how large a range in a tensor should shrink to a value,
or how large a value in a tensor should expand.
`-1` can be used to indicate that it is a full step in this dimension,
and the dimension where -1 is located will be reduced or unsqueezed during scaling.
Example::
kernel_size = [2, -1]
# For a given 2D-tensor with size (4, 3),
[[1, 2, 3],
[4, 5, 6],
[7, 8, 9],
[10, 11, 12]]
# shrinking it by shrink function, its size becomes (2,) after shrinking:
[shrink([[1, 2, 3], [4, 5, 6]]), shrink([[7, 8, 9], [10, 11, 12]])]
# expanding it by expand function with a given expand size,
# if the expand function is repeating the values, and the expand size is (4, 6, 2):
[[[1, 1],
[1, 1],
[2, 2],
[2, 2],
[3, 3],
[3, 3]],
...
[9, 9]]]
# note that the original tensor with size (4, 3) will unsqueeze to size (4, 3, 1) at first
# for the `-1` in kernel_size, then expand size (4, 3, 1) to size (4, 6, 2).
kernel_padding_mode
'front' or 'back', default is 'front'.
If set 'front', for a given tensor when shrinking, padding `1` at front of kernel_size until `len(tensor.shape) == len(kernel_size)`;
for a given expand size when expanding, padding `1` at front of kernel_size until `len(expand_size) == len(kernel_size)`.
If set 'back', for a given tensor when shrinking, padding `-1` at back of kernel_size until `len(tensor.shape) == len(kernel_size)`;
for a given expand size when expanding, padding `-1` at back of kernel_size until `len(expand_size) == len(kernel_size)`.
"""
def
__init__
(
self
,
kernel_size
:
List
[
int
],
kernel_padding_mode
:
Literal
[
'front'
,
'back'
]
=
'front'
)
->
None
:
self
.
kernel_size
=
kernel_size
assert
kernel_padding_mode
in
[
'front'
,
'back'
],
f
"kernel_padding_mode should be one of ['front', 'back'], but get kernel_padding_mode=
{
kernel_padding_mode
}
."
self
.
kernel_padding_mode
=
kernel_padding_mode
def
_padding
(
self
,
_list
:
List
[
int
],
length
:
int
,
padding_value
:
int
=
-
1
,
padding_mode
:
Literal
[
'front'
,
'back'
]
=
'back'
)
->
List
[
int
]:
"""
Padding the `_list` to a specific length with `padding_value`.
Parameters
----------
_list
The list of int value to be padding.
length
The length to pad to.
padding_value
Padding value, should be a int.
padding_mode
If `padding_mode` is `'front'`, then the padding applied on the front of the size list.
If `padding_mode` is `'back'`, then the padding applied on the back of the size list.
Returns
-------
List[int]
The padded list.
"""
assert
len
(
_list
)
<=
length
padding
=
[
padding_value
for
_
in
range
(
length
-
len
(
_list
))]
if
padding_mode
==
'front'
:
new_list
=
padding
+
list
(
_list
)
elif
padding_mode
==
'back'
:
new_list
=
list
(
_list
)
+
padding
else
:
raise
ValueError
(
f
'Unsupported padding mode:
{
padding_mode
}
.'
)
return
new_list
def
_shrink
(
self
,
target
:
Tensor
,
kernel_size
:
List
[
int
],
reduce_func
:
Callable
[[
Tensor
],
Tensor
]
|
None
=
None
)
->
Tensor
:
"""
Main logic about how to shrink target. Subclass could override this function to customize.
Sum all values covered by the kernel as a simple implementation.
"""
# step 1: put the part covered by the kernel to the end of the converted target.
# e.g., target size is [10, 20], kernel_size is [2, 4], then new_target size is [5, 5, 8].
reshape_size
=
[]
final_size
=
[]
reduced_dims
=
[]
for
(
dim
,
step
)
in
enumerate
(
kernel_size
):
if
step
==
-
1
:
step
=
target
.
shape
[
dim
]
reduced_dims
.
insert
(
0
,
dim
)
assert
target
.
shape
[
dim
]
%
step
==
0
reshape_size
.
append
(
target
.
shape
[
dim
]
//
step
)
final_size
.
append
(
target
.
shape
[
dim
]
//
step
)
reshape_size
.
append
(
step
)
permute_dims
=
[
2
*
_
for
_
in
range
(
len
(
kernel_size
))]
+
[
2
*
_
+
1
for
_
in
range
(
len
(
kernel_size
))]
converted_target
=
target
.
reshape
(
reshape_size
).
permute
(
permute_dims
).
reshape
(
final_size
+
[
-
1
])
# step 2: reduce the converted_target last dim with a certain way, by default is converted_target.sum(-1).
result
=
reduce_func
(
converted_target
)
if
reduce_func
else
converted_target
.
sum
(
-
1
)
# step 3: reduce the dims where kernel_size is -1.
# e.g., target size is [10, 40], kernel_size is [-1, 4], result size is [1, 10], then reduce result to size [10].
result
=
reduce
(
lambda
t
,
dim
:
t
.
squeeze
(
dim
),
[
result
]
+
reduced_dims
)
# type: ignore
return
result
def
_expand
(
self
,
target
:
Tensor
,
kernel_size
:
List
[
int
],
expand_size
:
List
[
int
])
->
Tensor
:
"""
Main logic about how to expand target to a specific size. Subclass could override this function to customize.
Repeat each value to reach the kernel size as a simple implementation.
"""
# step 1: unsqueeze the target tensor where -1 is located in kernel_size.
unsqueezed_dims
=
[
dim
for
(
dim
,
step
)
in
enumerate
(
kernel_size
)
if
step
==
-
1
]
new_target
:
Tensor
=
reduce
(
lambda
t
,
dim
:
t
.
unsqueeze
(
dim
),
[
target
]
+
unsqueezed_dims
)
# type: ignore
# step 2: build the _expand_size and unsqueeze target tensor on each dim
_expand_size
=
[]
for
a
,
b
in
zip
(
kernel_size
,
expand_size
):
if
a
==
-
1
:
_expand_size
.
append
(
1
)
_expand_size
.
append
(
b
)
else
:
assert
b
%
a
==
0
,
f
'Can not expand tensor with
{
target
.
shape
}
to
{
expand_size
}
with kernel size
{
kernel_size
}
.'
_expand_size
.
append
(
b
//
a
)
_expand_size
.
append
(
a
)
new_target
:
Tensor
=
reduce
(
lambda
t
,
dim
:
t
.
unsqueeze
(
dim
),
[
new_target
]
+
[
2
*
_
+
1
for
_
in
range
(
len
(
expand_size
))])
# type: ignore
# step 3: expanding the new target to _expand_size and reshape to expand_size.
# Note that we can also give an interface for how to expand the tensor, like `reduce_func` in `_shrink`, currently we don't have that need.
result
=
new_target
.
expand
(
_expand_size
).
reshape
(
expand_size
).
clone
()
return
result
def
shrink
(
self
,
target
:
Tensor
,
reduce_func
:
Callable
[[
Tensor
],
Tensor
]
|
None
=
None
)
->
Tensor
:
# Canonicalize kernel_size to target size length at first.
# If kernel_padding_mode is 'front', padding 1 at the front of `self.kernel_size`.
# e.g., padding kernel_size [2, 2] to [1, 2, 2] when target size length is 3.
# If kernel_padding_mode is 'back', padding -1 at the back of `self.kernel_size`.
# e.g., padding kernel_size [1] to [1, -1, -1] when target size length is 3.
if
self
.
kernel_padding_mode
==
'front'
:
kernel_size
=
self
.
_padding
(
self
.
kernel_size
,
len
(
target
.
shape
),
1
,
'front'
)
elif
self
.
kernel_padding_mode
==
'back'
:
kernel_size
=
self
.
_padding
(
self
.
kernel_size
,
len
(
target
.
shape
),
-
1
,
'back'
)
else
:
raise
ValueError
(
f
'Unsupported kernel padding mode:
{
self
.
kernel_padding_mode
}
.'
)
return
self
.
_shrink
(
target
,
kernel_size
,
reduce_func
)
def
expand
(
self
,
target
:
Tensor
,
expand_size
:
List
[
int
]):
# Similar with `self.shrink`, canonicalize kernel_size to expand_size length at first.
if
self
.
kernel_padding_mode
==
'front'
:
kernel_size
=
self
.
_padding
(
self
.
kernel_size
,
len
(
expand_size
),
1
,
'front'
)
elif
self
.
kernel_padding_mode
==
'back'
:
kernel_size
=
self
.
_padding
(
self
.
kernel_size
,
len
(
expand_size
),
-
1
,
'back'
)
else
:
raise
ValueError
(
f
'Unsupported kernel padding mode:
{
self
.
kernel_padding_mode
}
.'
)
return
self
.
_expand
(
target
,
kernel_size
,
expand_size
)
@
overload
def
validate
(
self
,
target
:
List
[
int
]):
...
@
overload
def
validate
(
self
,
target
:
Tensor
):
...
def
validate
(
self
,
target
:
List
[
int
]
|
Tensor
):
"""
Validate the target tensor can be shape-lossless scaling.
That means the shape will not change after `shrink` then `expand`.
"""
target
=
target
if
isinstance
(
target
,
Tensor
)
else
torch
.
rand
(
target
)
if
self
.
expand
((
self
.
shrink
(
target
)),
list
(
target
.
shape
)).
shape
!=
target
.
shape
:
raise
ValueError
(
f
'The tensor with shape
{
target
.
shape
}
, can not shape-lossless scaling with '
+
f
'kernel size is
{
self
.
kernel_size
}
and kernel_padding_mode is
{
self
.
kernel_padding_mode
}
.'
)
test/algo/compression/v2/test_pruning_tools_torch.py
View file @
f24dc27b
...
@@ -26,6 +26,7 @@ from nni.algorithms.compression.v2.pytorch.pruning.tools import (
...
@@ -26,6 +26,7 @@ from nni.algorithms.compression.v2.pytorch.pruning.tools import (
)
)
from
nni.algorithms.compression.v2.pytorch.pruning.tools.base
import
HookCollectorInfo
from
nni.algorithms.compression.v2.pytorch.pruning.tools.base
import
HookCollectorInfo
from
nni.algorithms.compression.v2.pytorch.utils
import
get_module_by_name
from
nni.algorithms.compression.v2.pytorch.utils
import
get_module_by_name
from
nni.algorithms.compression.v2.pytorch.utils.scaling
import
Scaling
from
nni.algorithms.compression.v2.pytorch.utils.constructor_helper
import
OptimizerConstructHelper
from
nni.algorithms.compression.v2.pytorch.utils.constructor_helper
import
OptimizerConstructHelper
...
@@ -112,7 +113,7 @@ class PruningToolsTestCase(unittest.TestCase):
...
@@ -112,7 +113,7 @@ class PruningToolsTestCase(unittest.TestCase):
def
test_metrics_calculator
(
self
):
def
test_metrics_calculator
(
self
):
# Test NormMetricsCalculator
# Test NormMetricsCalculator
metrics_calculator
=
NormMetricsCalculator
(
dim
=
0
,
p
=
2
)
metrics_calculator
=
NormMetricsCalculator
(
p
=
2
,
scalers
=
Scaling
(
kernel_size
=
[
1
],
kernel_padding_mode
=
'back'
)
)
data
=
{
data
=
{
'1'
:
torch
.
ones
(
3
,
3
,
3
),
'1'
:
torch
.
ones
(
3
,
3
,
3
),
'2'
:
torch
.
ones
(
4
,
4
)
*
2
'2'
:
torch
.
ones
(
4
,
4
)
*
2
...
@@ -125,7 +126,7 @@ class PruningToolsTestCase(unittest.TestCase):
...
@@ -125,7 +126,7 @@ class PruningToolsTestCase(unittest.TestCase):
assert
all
(
torch
.
equal
(
result
[
k
],
v
)
for
k
,
v
in
metrics
.
items
())
assert
all
(
torch
.
equal
(
result
[
k
],
v
)
for
k
,
v
in
metrics
.
items
())
# Test DistMetricsCalculator
# Test DistMetricsCalculator
metrics_calculator
=
DistMetricsCalculator
(
dim
=
0
,
p
=
2
)
metrics_calculator
=
DistMetricsCalculator
(
p
=
2
,
scalers
=
Scaling
(
kernel_size
=
[
1
],
kernel_padding_mode
=
'back'
)
)
data
=
{
data
=
{
'1'
:
torch
.
tensor
([[
1
,
2
],
[
4
,
6
]],
dtype
=
torch
.
float32
),
'1'
:
torch
.
tensor
([[
1
,
2
],
[
4
,
6
]],
dtype
=
torch
.
float32
),
'2'
:
torch
.
tensor
([[
0
,
0
],
[
1
,
1
]],
dtype
=
torch
.
float32
)
'2'
:
torch
.
tensor
([[
0
,
0
],
[
1
,
1
]],
dtype
=
torch
.
float32
)
...
@@ -138,7 +139,7 @@ class PruningToolsTestCase(unittest.TestCase):
...
@@ -138,7 +139,7 @@ class PruningToolsTestCase(unittest.TestCase):
assert
all
(
torch
.
equal
(
result
[
k
],
v
)
for
k
,
v
in
metrics
.
items
())
assert
all
(
torch
.
equal
(
result
[
k
],
v
)
for
k
,
v
in
metrics
.
items
())
# Test MultiDataNormMetricsCalculator
# Test MultiDataNormMetricsCalculator
metrics_calculator
=
MultiDataNormMetricsCalculator
(
dim
=
0
,
p
=
1
)
metrics_calculator
=
MultiDataNormMetricsCalculator
(
p
=
1
,
scalers
=
Scaling
(
kernel_size
=
[
1
],
kernel_padding_mode
=
'back'
)
)
data
=
{
data
=
{
'1'
:
[
2
,
torch
.
ones
(
3
,
3
,
3
)
*
2
],
'1'
:
[
2
,
torch
.
ones
(
3
,
3
,
3
)
*
2
],
'2'
:
[
2
,
torch
.
ones
(
4
,
4
)
*
2
]
'2'
:
[
2
,
torch
.
ones
(
4
,
4
)
*
2
]
...
@@ -151,7 +152,7 @@ class PruningToolsTestCase(unittest.TestCase):
...
@@ -151,7 +152,7 @@ class PruningToolsTestCase(unittest.TestCase):
assert
all
(
torch
.
equal
(
result
[
k
],
v
)
for
k
,
v
in
metrics
.
items
())
assert
all
(
torch
.
equal
(
result
[
k
],
v
)
for
k
,
v
in
metrics
.
items
())
# Test APoZRankMetricsCalculator
# Test APoZRankMetricsCalculator
metrics_calculator
=
APoZRankMetricsCalculator
(
dim
=
1
)
metrics_calculator
=
APoZRankMetricsCalculator
(
Scaling
(
kernel_size
=
[
-
1
,
1
],
kernel_padding_mode
=
'back'
)
)
data
=
{
data
=
{
'1'
:
[
2
,
torch
.
tensor
([[
1
,
1
],
[
1
,
1
]],
dtype
=
torch
.
float32
)],
'1'
:
[
2
,
torch
.
tensor
([[
1
,
1
],
[
1
,
1
]],
dtype
=
torch
.
float32
)],
'2'
:
[
2
,
torch
.
tensor
([[
0
,
0
,
1
],
[
0
,
0
,
0
]],
dtype
=
torch
.
float32
)]
'2'
:
[
2
,
torch
.
tensor
([[
0
,
0
,
1
],
[
0
,
0
,
0
]],
dtype
=
torch
.
float32
)]
...
@@ -164,7 +165,7 @@ class PruningToolsTestCase(unittest.TestCase):
...
@@ -164,7 +165,7 @@ class PruningToolsTestCase(unittest.TestCase):
assert
all
(
torch
.
equal
(
result
[
k
],
v
)
for
k
,
v
in
metrics
.
items
())
assert
all
(
torch
.
equal
(
result
[
k
],
v
)
for
k
,
v
in
metrics
.
items
())
# Test MeanRankMetricsCalculator
# Test MeanRankMetricsCalculator
metrics_calculator
=
MeanRankMetricsCalculator
(
dim
=
1
)
metrics_calculator
=
MeanRankMetricsCalculator
(
Scaling
(
kernel_size
=
[
-
1
,
1
],
kernel_padding_mode
=
'back'
)
)
data
=
{
data
=
{
'1'
:
[
2
,
torch
.
tensor
([[
0
,
1
],
[
1
,
0
]],
dtype
=
torch
.
float32
)],
'1'
:
[
2
,
torch
.
tensor
([[
0
,
1
],
[
1
,
0
]],
dtype
=
torch
.
float32
)],
'2'
:
[
2
,
torch
.
tensor
([[
0
,
0
,
1
],
[
0
,
0
,
0
]],
dtype
=
torch
.
float32
)]
'2'
:
[
2
,
torch
.
tensor
([[
0
,
0
,
1
],
[
0
,
0
,
0
]],
dtype
=
torch
.
float32
)]
...
...
test/algo/compression/v2/test_scaling.py
0 → 100644
View file @
f24dc27b
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
pytest
import
torch
from
nni.algorithms.compression.v2.pytorch.utils.scaling
import
Scaling
def
test_scaling
():
data
=
torch
.
tensor
([
_
for
_
in
range
(
100
)]).
reshape
(
10
,
10
)
scaler
=
Scaling
([
5
],
kernel_padding_mode
=
'front'
)
shrinked_data
=
scaler
.
shrink
(
data
)
assert
list
(
shrinked_data
.
shape
)
==
[
10
,
2
]
expanded_data
=
scaler
.
expand
(
data
,
[
10
,
50
])
assert
list
(
expanded_data
.
shape
)
==
[
10
,
50
]
scaler
=
Scaling
([
5
,
5
],
kernel_padding_mode
=
'back'
)
shrinked_data
=
scaler
.
shrink
(
data
)
assert
list
(
shrinked_data
.
shape
)
==
[
2
,
2
]
expanded_data
=
scaler
.
expand
(
data
,
[
50
,
50
,
10
])
assert
list
(
expanded_data
.
shape
)
==
[
50
,
50
,
10
]
scaler
.
validate
([
10
,
10
,
10
])
if
__name__
==
'__main__'
:
test_scaling
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment