Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
070df4a0
Unverified
Commit
070df4a0
authored
Nov 04, 2021
by
liuzhe-lz
Committed by
GitHub
Nov 04, 2021
Browse files
Merge pull request #4291 from microsoft/v2.5
merge v2.5 back to master
parents
821706b8
6a082fe9
Changes
63
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
487 additions
and
466 deletions
+487
-466
examples/model_compress/quantization/mixed_precision_speedup_mnist.py
...el_compress/quantization/mixed_precision_speedup_mnist.py
+4
-4
examples/trials/benchmarking/automlbenchmark/setup.sh
examples/trials/benchmarking/automlbenchmark/setup.sh
+1
-1
nni/algorithms/compression/pytorch/pruning/iterative_pruner.py
...lgorithms/compression/pytorch/pruning/iterative_pruner.py
+1
-0
nni/algorithms/compression/pytorch/quantization/quantizers.py
...algorithms/compression/pytorch/quantization/quantizers.py
+27
-17
nni/algorithms/compression/v2/pytorch/pruning/basic_pruner.py
...algorithms/compression/v2/pytorch/pruning/basic_pruner.py
+321
-323
nni/algorithms/compression/v2/pytorch/pruning/basic_scheduler.py
...orithms/compression/v2/pytorch/pruning/basic_scheduler.py
+20
-20
nni/algorithms/compression/v2/pytorch/pruning/tools/sparsity_allocator.py
...ompression/v2/pytorch/pruning/tools/sparsity_allocator.py
+1
-1
nni/compression/pytorch/quantization/utils.py
nni/compression/pytorch/quantization/utils.py
+1
-1
nni/compression/pytorch/speedup/compress_modules.py
nni/compression/pytorch/speedup/compress_modules.py
+8
-4
nni/compression/pytorch/speedup/compressor.py
nni/compression/pytorch/speedup/compressor.py
+17
-18
nni/nas/benchmarks/constants.py
nni/nas/benchmarks/constants.py
+2
-1
nni/nas/benchmarks/download.py
nni/nas/benchmarks/download.py
+10
-0
nni/retiarii/converter/graph_gen.py
nni/retiarii/converter/graph_gen.py
+20
-35
nni/retiarii/evaluator/pytorch/cgo/accelerator.py
nni/retiarii/evaluator/pytorch/cgo/accelerator.py
+12
-11
nni/retiarii/evaluator/pytorch/cgo/evaluator.py
nni/retiarii/evaluator/pytorch/cgo/evaluator.py
+4
-4
nni/retiarii/evaluator/pytorch/lightning.py
nni/retiarii/evaluator/pytorch/lightning.py
+4
-3
nni/retiarii/experiment/pytorch.py
nni/retiarii/experiment/pytorch.py
+4
-4
nni/retiarii/operation_def/torch_op_def.py
nni/retiarii/operation_def/torch_op_def.py
+9
-1
nni/tools/nnictl/launcher_utils.py
nni/tools/nnictl/launcher_utils.py
+4
-2
test/nni_test/nnitest/test_quantize_model_speedup.py
test/nni_test/nnitest/test_quantize_model_speedup.py
+17
-16
No files found.
examples/model_compress/quantization/mixed_precision_speedup_mnist.py
View file @
070df4a0
...
...
@@ -83,16 +83,16 @@ def quantization_aware_training_example(train_loader, test_loader, device):
model
=
NaiveModel
()
configure_list
=
[{
'quant_types'
:
[
'
weight'
,
'outpu
t'
],
'quant_bits'
:
{
'
weigh
t'
:
8
,
'
outpu
t'
:
8
},
'quant_types'
:
[
'
input'
,
'weigh
t'
],
'quant_bits'
:
{
'
inpu
t'
:
8
,
'
weigh
t'
:
8
},
'op_names'
:
[
'conv1'
]
},
{
'quant_types'
:
[
'output'
],
'quant_bits'
:
{
'output'
:
8
},
'op_names'
:
[
'relu1'
]
},
{
'quant_types'
:
[
'
weight'
,
'outpu
t'
],
'quant_bits'
:
{
'
weigh
t'
:
8
,
'
outpu
t'
:
8
},
'quant_types'
:
[
'
input'
,
'weigh
t'
],
'quant_bits'
:
{
'
inpu
t'
:
8
,
'
weigh
t'
:
8
},
'op_names'
:
[
'conv2'
]
},
{
'quant_types'
:
[
'output'
],
...
...
examples/trials/benchmarking/automlbenchmark/setup.sh
View file @
070df4a0
...
...
@@ -2,7 +2,7 @@
# download automlbenchmark repository
if
[
!
-d
'./automlbenchmark'
]
;
then
git clone https://github.com/openml/automlbenchmark.git
--branch
stable
--depth
1
git clone https://github.com/openml/automlbenchmark.git
--branch
v1.6
--depth
1
fi
# install dependencies
...
...
nni/algorithms/compression/pytorch/pruning/iterative_pruner.py
View file @
070df4a0
...
...
@@ -384,6 +384,7 @@ class ADMMPruner(IterativePruner):
for
i
,
wrapper
in
enumerate
(
self
.
get_modules_wrapper
()):
z
=
wrapper
.
module
.
weight
.
data
+
self
.
U
[
i
]
self
.
Z
[
i
]
=
self
.
_projection
(
z
,
wrapper
.
config
[
'sparsity'
],
wrapper
)
torch
.
cuda
.
empty_cache
()
self
.
U
[
i
]
=
self
.
U
[
i
]
+
wrapper
.
module
.
weight
.
data
-
self
.
Z
[
i
]
# apply prune
...
...
nni/algorithms/compression/pytorch/quantization/quantizers.py
View file @
070df4a0
...
...
@@ -26,7 +26,6 @@ __all__ = ['NaiveQuantizer', 'QAT_Quantizer', 'DoReFaQuantizer', 'BNNQuantizer',
logger
=
logging
.
getLogger
(
__name__
)
class
NaiveQuantizer
(
Quantizer
):
"""quantize weight to 8 bits
"""
...
...
@@ -676,17 +675,20 @@ class QAT_Quantizer(Quantizer):
for
layer
,
_
in
modules_to_compress
:
name
,
module
=
layer
.
name
,
layer
.
module
if
name
not
in
calibration_config
:
if
hasattr
(
module
,
'weight_bits'
)
or
hasattr
(
module
,
'output_bits'
)
or
hasattr
(
module
,
'input_bits'
)
:
if
module
.
layer_quant_setting
.
weight
or
module
.
layer_quant_setting
.
input
or
module
.
layer_quant_setting
.
output
:
logger
.
warning
(
f
"Can not find module
{
name
}
's parameter in input config."
)
continue
if
hasattr
(
module
,
'weight_bits'
):
assert
calibration_config
[
name
][
'weight_bits'
]
==
module
.
weight_bits
,
f
"weight bits of module
{
name
}
fail to match"
if
hasattr
(
module
,
'input_bits'
):
assert
calibration_config
[
name
][
'input_bits'
]
==
module
.
input_bits
,
f
"input bits of module
{
name
}
fail to match"
if
module
.
layer_quant_setting
.
weight
:
assert
calibration_config
[
name
][
'weight_bits'
]
==
module
.
layer_quant_setting
.
weight
.
bits
,
\
f
"weight bits of module
{
name
}
fail to match"
if
module
.
layer_quant_setting
.
input
:
assert
calibration_config
[
name
][
'input_bits'
]
==
module
.
layer_quant_setting
.
input
.
bits
,
\
f
"input bits of module
{
name
}
fail to match"
module
.
tracked_min_input
.
data
=
torch
.
tensor
([
calibration_config
[
name
][
'tracked_min_input'
]])
module
.
tracked_max_input
.
data
=
torch
.
tensor
([
calibration_config
[
name
][
'tracked_max_input'
]])
if
hasattr
(
module
,
'output_bits'
):
assert
calibration_config
[
name
][
'output_bits'
]
==
module
.
output_bits
,
f
"output bits of module
{
name
}
fail to match"
if
module
.
layer_quant_setting
.
output
:
assert
calibration_config
[
name
][
'output_bits'
]
==
module
.
layer_quant_setting
.
output
.
bits
,
\
f
"output bits of module
{
name
}
fail to match"
module
.
tracked_min_output
.
data
=
torch
.
tensor
([
calibration_config
[
name
][
'tracked_min_output'
]])
module
.
tracked_max_output
.
data
=
torch
.
tensor
([
calibration_config
[
name
][
'tracked_max_output'
]])
...
...
@@ -716,11 +718,13 @@ class QAT_Quantizer(Quantizer):
self
.
_unwrap_model
()
calibration_config
=
{}
for
name
,
module
in
self
.
bound_model
.
named_modules
():
if
hasattr
(
module
,
'weight_bits'
)
or
hasattr
(
module
,
'output_bits'
):
modules_to_compress
=
self
.
get_modules_to_compress
()
for
layer
,
_
in
modules_to_compress
:
name
,
module
=
layer
.
name
,
layer
.
module
if
hasattr
(
module
.
layer_quant_setting
,
'weight'
)
or
hasattr
(
module
.
layer_quant_setting
,
'output'
):
calibration_config
[
name
]
=
{}
if
hasattr
(
module
,
'weight_bits'
)
:
calibration_config
[
name
][
'weight_bits'
]
=
int
(
module
.
weight
_
bits
)
if
module
.
layer_quant_setting
.
weight
:
calibration_config
[
name
][
'weight_bits'
]
=
int
(
module
.
layer_quant_setting
.
weight
.
bits
)
calibration_config
[
name
][
'weight_scale'
]
=
module
.
weight_scale
calibration_config
[
name
][
'weight_zero_point'
]
=
module
.
weight_zero_point
...
...
@@ -738,13 +742,14 @@ class QAT_Quantizer(Quantizer):
module
.
register_parameter
(
'bias'
,
actual_bias
)
else
:
setattr
(
module
,
'bias'
,
None
)
if
hasattr
(
module
,
'input_bits'
):
calibration_config
[
name
][
'input_bits'
]
=
int
(
module
.
input_bits
)
if
module
.
layer_quant_setting
.
input
:
calibration_config
[
name
][
'input_bits'
]
=
int
(
module
.
layer_quant_setting
.
input
.
bits
)
calibration_config
[
name
][
'tracked_min_input'
]
=
float
(
module
.
tracked_min_input
)
calibration_config
[
name
][
'tracked_max_input'
]
=
float
(
module
.
tracked_max_input
)
if
hasattr
(
module
,
'output_bits'
)
:
calibration_config
[
name
][
'output_bits'
]
=
int
(
module
.
output
_
bits
)
if
module
.
layer_quant_setting
.
output
:
calibration_config
[
name
][
'output_bits'
]
=
int
(
module
.
layer_quant_setting
.
output
.
bits
)
calibration_config
[
name
][
'tracked_min_output'
]
=
float
(
module
.
tracked_min_output
)
calibration_config
[
name
][
'tracked_max_output'
]
=
float
(
module
.
tracked_max_output
)
self
.
_del_simulated_attr
(
module
)
...
...
@@ -1157,7 +1162,7 @@ class LsqQuantizer(Quantizer):
calibration_config
=
{}
for
name
,
module
in
self
.
bound_model
.
named_modules
():
if
hasattr
(
module
,
'input_bits'
)
or
hasattr
(
module
,
'output_bits'
):
if
hasattr
(
module
,
'input_bits'
)
or
hasattr
(
module
,
'weight_bits'
)
or
hasattr
(
module
,
'output_bits'
):
calibration_config
[
name
]
=
{}
if
hasattr
(
module
,
'weight_bits'
):
calibration_config
[
name
][
'weight_bits'
]
=
int
(
module
.
weight_bits
)
...
...
@@ -1177,6 +1182,11 @@ class LsqQuantizer(Quantizer):
module
.
register_parameter
(
'bias'
,
actual_bias
)
else
:
setattr
(
module
,
'bias'
,
None
)
if
hasattr
(
module
,
'input_bits'
):
calibration_config
[
name
][
'input_bits'
]
=
int
(
module
.
input_bits
)
abs_max_input
=
float
(
module
.
input_scale
*
module
.
input_qmax
)
calibration_config
[
name
][
'tracked_min_input'
]
=
-
abs_max_input
calibration_config
[
name
][
'tracked_max_input'
]
=
abs_max_input
if
hasattr
(
module
,
'output_bits'
):
calibration_config
[
name
][
'output_bits'
]
=
int
(
module
.
output_bits
)
abs_max_output
=
float
(
module
.
output_scale
*
module
.
output_qmax
)
...
...
nni/algorithms/compression/v2/pytorch/pruning/basic_pruner.py
View file @
070df4a0
...
...
@@ -123,21 +123,21 @@ class BasicPruner(Pruner):
class
LevelPruner
(
BasicPruner
):
"""
Parameters
----------
model : torch.nn.Module
Model to be pruned
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Operation types to prune.
- op_names : Operation names to prune.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
"""
def
__init__
(
self
,
model
:
Module
,
config_list
:
List
[
Dict
]):
"""
Parameters
----------
model
Model to be pruned
config_list
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Operation types to prune.
- op_names : Operation names to prune.
- op_partial_names: An auxiliary field collecting matched op_names in model, then this will convert to op_names.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
"""
super
().
__init__
(
model
,
config_list
)
def
_validate_config_before_canonical
(
self
,
model
:
Module
,
config_list
:
List
[
Dict
]):
...
...
@@ -157,36 +157,36 @@ class LevelPruner(BasicPruner):
class
NormPruner
(
BasicPruner
):
"""
Parameters
----------
model : torch.nn.Module
Model to be pruned
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in NormPruner.
- op_names : Operation names to prune.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
p : int
The order of norm.
mode : str
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the norm of weights and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
def
__init__
(
self
,
model
:
Module
,
config_list
:
List
[
Dict
],
p
:
int
,
mode
:
str
=
'normal'
,
dummy_input
:
Optional
[
Tensor
]
=
None
):
"""
Parameters
----------
model
Model to be pruned
config_list
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in NormPruner.
- op_names : Operation names to prune.
- op_partial_names: An auxiliary field collecting matched op_names in model, then this will convert to op_names.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
p
The order of norm.
mode
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the norm of weights and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
self
.
p
=
p
self
.
mode
=
mode
self
.
dummy_input
=
dummy_input
...
...
@@ -217,98 +217,98 @@ class NormPruner(BasicPruner):
class
L1NormPruner
(
NormPruner
):
"""
Parameters
----------
model : torch.nn.Module
Model to be pruned
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in L1NormPruner.
- op_names : Operation names to prune.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
mode : str
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the l1-norm of weights and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
def
__init__
(
self
,
model
:
Module
,
config_list
:
List
[
Dict
],
mode
:
str
=
'normal'
,
dummy_input
:
Optional
[
Tensor
]
=
None
):
"""
Parameters
----------
model
Model to be pruned
config_list
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in L1NormPruner.
- op_names : Operation names to prune.
- op_partial_names: An auxiliary field collecting matched op_names in model, then this will convert to op_names.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
mode
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the l1-norm of weights and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
super
().
__init__
(
model
,
config_list
,
1
,
mode
,
dummy_input
)
class
L2NormPruner
(
NormPruner
):
"""
Parameters
----------
model : torch.nn.Module
Model to be pruned
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in L1NormPruner.
- op_names : Operation names to prune.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
mode : str
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the l2-norm of weights and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
def
__init__
(
self
,
model
:
Module
,
config_list
:
List
[
Dict
],
mode
:
str
=
'normal'
,
dummy_input
:
Optional
[
Tensor
]
=
None
):
"""
Parameters
----------
model
Model to be pruned
config_list
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in L2NormPruner.
- op_names : Operation names to prune.
- op_partial_names: An auxiliary field collecting matched op_names in model, then this will convert to op_names.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
mode
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the l2-norm of weights and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
super
().
__init__
(
model
,
config_list
,
2
,
mode
,
dummy_input
)
class
FPGMPruner
(
BasicPruner
):
"""
Parameters
----------
model : torch.nn.Module
Model to be pruned
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in FPGMPruner.
- op_names : Operation names to prune.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
mode : str
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the FPGM of weights and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
def
__init__
(
self
,
model
:
Module
,
config_list
:
List
[
Dict
],
mode
:
str
=
'normal'
,
dummy_input
:
Optional
[
Tensor
]
=
None
):
"""
Parameters
----------
model
Model to be pruned
config_list
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in FPGMPruner.
- op_names : Operation names to prune.
- op_partial_names: An auxiliary field collecting matched op_names in model, then this will convert to op_names.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
mode
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the FPGM of weights and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
self
.
mode
=
mode
self
.
dummy_input
=
dummy_input
super
().
__init__
(
model
,
config_list
)
...
...
@@ -338,57 +338,57 @@ class FPGMPruner(BasicPruner):
class
SlimPruner
(
BasicPruner
):
"""
Parameters
----------
model : torch.nn.Module
Model to be pruned
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- total_sparsity : This is to specify the total sparsity for all layers in this config,
each layer may have different sparsity.
- max_sparsity_per_layer : Always used with total_sparsity. Limit the max sparsity of each layer.
- op_types : Only BatchNorm2d is supported in SlimPruner.
- op_names : Operation names to prune.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer : Callable[[Module, Optimizer, Callable], None]
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
optimizer : torch.optim.Optimizer
The optimizer instance used in trainer. Note that this optimizer might be patched during collect data,
so do not use this optimizer in other places.
criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_epochs : int
The epoch number for training model to sparsify the BN weight.
mode : str
'normal' or 'global'.
If prune the model in a global way, all layer weights with same config will be considered uniformly.
That means a single layer may not reach or exceed the sparsity setting in config,
but the total pruned weights meet the sparsity setting.
"""
def
__init__
(
self
,
model
:
Module
,
config_list
:
List
[
Dict
],
trainer
:
Callable
[[
Module
,
Optimizer
,
Callable
],
None
],
optimizer
:
Optimizer
,
criterion
:
Callable
[[
Tensor
,
Tensor
],
Tensor
],
training_epochs
:
int
,
scale
:
float
=
0.0001
,
mode
=
'global'
):
"""
Parameters
----------
model
Model to be pruned
config_list
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- total_sparsity : This is to specify the total sparsity for all layers in this config,
each layer may have different sparsity.
- max_sparsity_per_layer : Always used with total_sparsity. Limit the max sparsity of each layer.
- op_types : Only BatchNorm2d is supported in SlimPruner.
- op_names : Operation names to prune.
- op_partial_names: An auxiliary field collecting matched op_names in model, then this will convert to op_names.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
optimizer
The optimizer instance used in trainer. Note that this optimizer might be patched during collect data,
so do not use this optimizer in other places.
criterion
The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_epochs
The epoch number for training model to sparsify the BN weight.
mode
'normal' or 'global'.
If prune the model in a global way, all layer weights with same config will be considered uniformly.
That means a single layer may not reach or exceed the sparsity setting in config,
but the total pruned weights meet the sparsity setting.
"""
self
.
mode
=
mode
self
.
trainer
=
trainer
self
.
optimizer
=
optimizer
...
...
@@ -435,61 +435,61 @@ class SlimPruner(BasicPruner):
class
ActivationPruner
(
BasicPruner
):
"""
Parameters
----------
model : torch.nn.Module
Model to be pruned
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in ActivationPruner.
- op_names : Operation names to prune.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer : Callable[[Module, Optimizer, Callable], None]
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
optimizer : torch.optim.Optimizer
The optimizer instance used in trainer. Note that this optimizer might be patched during collect data,
so do not use this optimizer in other places.
criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_batches
The batch number used to collect activations.
mode : str
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the activation-based metrics and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
def
__init__
(
self
,
model
:
Module
,
config_list
:
List
[
Dict
],
trainer
:
Callable
[[
Module
,
Optimizer
,
Callable
],
None
],
optimizer
:
Optimizer
,
criterion
:
Callable
[[
Tensor
,
Tensor
],
Tensor
],
training_batches
:
int
,
activation
:
str
=
'relu'
,
mode
:
str
=
'normal'
,
dummy_input
:
Optional
[
Tensor
]
=
None
):
"""
Parameters
----------
model
Model to be pruned
config_list
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in ActivationPruner.
- op_names : Operation names to prune.
- op_partial_names: An auxiliary field collecting matched op_names in model, then this will convert to op_names.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
optimizer
The optimizer instance used in trainer. Note that this optimizer might be patched during collect data,
so do not use this optimizer in other places.
criterion
The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_batches
The batch number used to collect activations.
mode
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the activation-based metrics and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
self
.
mode
=
mode
self
.
dummy_input
=
dummy_input
self
.
trainer
=
trainer
...
...
@@ -553,69 +553,69 @@ class ActivationMeanRankPruner(ActivationPruner):
class
TaylorFOWeightPruner
(
BasicPruner
):
"""
Parameters
----------
model : torch.nn.Module
Model to be pruned
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- total_sparsity : This is to specify the total sparsity for all layers in this config,
each layer may have different sparsity.
- max_sparsity_per_layer : Always used with total_sparsity. Limit the max sparsity of each layer.
- op_types : Conv2d and Linear are supported in TaylorFOWeightPruner.
- op_names : Operation names to prune.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer : Callable[[Module, Optimizer, Callable]
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
optimizer : torch.optim.Optimizer
The optimizer instance used in trainer. Note that this optimizer might be patched during collect data,
so do not use this optimizer in other places.
criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_batches : int
The batch number used to collect activations.
mode : str
'normal', 'dependency_aware' or 'global'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the taylorFO and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
If prune the model in a global way, all layer weights with same config will be considered uniformly.
That means a single layer may not reach or exceed the sparsity setting in config,
but the total pruned weights meet the sparsity setting.
dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
def
__init__
(
self
,
model
:
Module
,
config_list
:
List
[
Dict
],
trainer
:
Callable
[[
Module
,
Optimizer
,
Callable
],
None
],
optimizer
:
Optimizer
,
criterion
:
Callable
[[
Tensor
,
Tensor
],
Tensor
],
training_batches
:
int
,
mode
:
str
=
'normal'
,
dummy_input
:
Optional
[
Tensor
]
=
None
):
"""
Parameters
----------
model
Model to be pruned
config_list
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- total_sparsity : This is to specify the total sparsity for all layers in this config,
each layer may have different sparsity.
- max_sparsity_per_layer : Always used with total_sparsity. Limit the max sparsity of each layer.
- op_types : Conv2d and Linear are supported in TaylorFOWeightPruner.
- op_names : Operation names to prune.
- op_partial_names: An auxiliary field collecting matched op_names in model, then this will convert to op_names.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
optimizer
The optimizer instance used in trainer. Note that this optimizer might be patched during collect data,
so do not use this optimizer in other places.
criterion
The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_batches
The batch number used to collect activations.
mode
'normal', 'dependency_aware' or 'global'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the taylorFO and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
If prune the model in a global way, all layer weights with same config will be considered uniformly.
That means a single layer may not reach or exceed the sparsity setting in config,
but the total pruned weights meet the sparsity setting.
dummy_input
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
"""
self
.
mode
=
mode
self
.
dummy_input
=
dummy_input
self
.
trainer
=
trainer
...
...
@@ -674,53 +674,51 @@ class ADMMPruner(BasicPruner):
Only in the final iteration, the mask will be generated and apply to model wrapper.
The original paper refer to: https://arxiv.org/abs/1804.03294.
Parameters
----------
model : torch.nn.Module
Model to be pruned.
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- rho : Penalty parameters in ADMM algorithm.
- op_types : Operation types to prune.
- op_names : Operation names to prune.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer : Callable[[Module, Optimizer, Callable]
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
optimizer : torch.optim.Optimizer
The optimizer instance used in trainer. Note that this optimizer might be patched during collect data,
so do not use this optimizer in other places.
criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss.
iterations : int
The total iteration number in admm pruning algorithm.
training_epochs : int
The epoch number for training model in each iteration.
"""
def
__init__
(
self
,
model
:
Module
,
config_list
:
List
[
Dict
],
trainer
:
Callable
[[
Module
,
Optimizer
,
Callable
],
None
],
optimizer
:
Optimizer
,
criterion
:
Callable
[[
Tensor
,
Tensor
],
Tensor
],
iterations
:
int
,
training_epochs
:
int
):
"""
Parameters
----------
model
Model to be pruned.
config_list
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- rho : Penalty parameters in ADMM algorithm. Default: 1e-4.
- op_types : Operation types to prune.
- op_names : Operation names to prune.
- op_partial_names: An auxiliary field collecting matched op_names in model, then this will convert to op_names.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
optimizer
The optimizer instance used in trainer. Note that this optimizer might be patched during collect data,
so do not use this optimizer in other places.
criterion
The criterion function used in trainer. Take model output and target value as input, and return the loss.
iterations
The total iteration number in admm pruning algorithm.
training_epochs
The epoch number for training model in each iteration.
"""
self
.
trainer
=
trainer
self
.
optimizer
=
optimizer
self
.
criterion
=
criterion
...
...
nni/algorithms/compression/v2/pytorch/pruning/basic_scheduler.py
View file @
070df4a0
...
...
@@ -14,29 +14,29 @@ from .tools import TaskGenerator
class
PruningScheduler
(
BasePruningScheduler
):
"""
Parameters
----------
pruner
The pruner used in pruner scheduler.
The scheduler will use `Pruner.reset(model, config_list)` to reset it in each iteration.
task_generator
Used to generate task for each iteration.
finetuner
The finetuner handled all finetune logic, use a pytorch module as input.
speed_up
If set True, speed up the model in each iteration.
dummy_input
If `speed_up` is True, `dummy_input` is required for trace the model in speed up.
evaluator
Evaluate the pruned model and give a score.
If evaluator is None, the best result refers to the latest result.
reset_weight
If set True, the model weight will reset to the origin model weight at the end of each iteration step.
"""
def
__init__
(
self
,
pruner
:
Pruner
,
task_generator
:
TaskGenerator
,
finetuner
:
Callable
[[
Module
],
None
]
=
None
,
speed_up
:
bool
=
False
,
dummy_input
:
Tensor
=
None
,
evaluator
:
Optional
[
Callable
[[
Module
],
float
]]
=
None
,
reset_weight
:
bool
=
False
):
"""
Parameters
----------
pruner
The pruner used in pruner scheduler.
The scheduler will use `Pruner.reset(model, config_list)` to reset it in each iteration.
task_generator
Used to generate task for each iteration.
finetuner
The finetuner handled all finetune logic, use a pytorch module as input.
speed_up
If set True, speed up the model in each iteration.
dummy_input
If `speed_up` is True, `dummy_input` is required for trace the model in speed up.
evaluator
Evaluate the pruned model and give a score.
If evaluator is None, the best result refers to the latest result.
reset_weight
If set True, the model weight will reset to the origin model weight at the end of each iteration step.
"""
self
.
pruner
=
pruner
self
.
task_generator
=
task_generator
self
.
finetuner
=
finetuner
...
...
nni/algorithms/compression/v2/pytorch/pruning/tools/sparsity_allocator.py
View file @
070df4a0
...
...
@@ -80,7 +80,7 @@ class GlobalSparsityAllocator(SparsityAllocator):
stay_metric
=
torch
.
topk
(
metric
.
view
(
-
1
),
stay_metric_num
,
largest
=
False
)[
0
]
sub_thresholds
[
name
]
=
stay_metric
.
max
()
if
expend_times
>
1
:
stay_metric
=
stay_metric
.
expand
(
stay_metric_num
,
int
(
layer_weight_num
/
metric
.
numel
())).
view
(
-
1
)
stay_metric
=
stay_metric
.
expand
(
int
(
layer_weight_num
/
metric
.
numel
())
,
stay_metric_num
).
contiguous
(
).
view
(
-
1
)
metric_list
.
append
(
stay_metric
)
total_prune_num
=
int
(
total_sparsity
*
total_weight_num
)
...
...
nni/compression/pytorch/quantization/utils.py
View file @
070df4a0
...
...
@@ -79,5 +79,5 @@ def get_quant_shape(shape, quant_type, quant_scheme):
if
is_per_channel
(
quant_scheme
):
quant_shape
=
[
1
if
idx
!=
default_idx
else
s
for
idx
,
s
in
enumerate
(
shape
)]
else
:
quant_shape
=
[]
quant_shape
=
[
1
]
return
quant_shape
nni/compression/pytorch/speedup/compress_modules.py
View file @
070df4a0
...
...
@@ -110,6 +110,8 @@ def replace_prelu(prelu, masks):
in_mask
=
in_masks
[
0
]
weight_mask
=
weight_mask
[
'weight'
]
if
weight_mask
.
size
(
0
)
==
1
:
return
prelu
pruned_in
,
remained_in
=
convert_to_coarse_mask
(
in_mask
,
1
)
pruned_out
,
remained_out
=
convert_to_coarse_mask
(
output_mask
,
1
)
n_remained_in
=
weight_mask
.
size
(
0
)
-
pruned_in
.
size
(
0
)
...
...
@@ -221,8 +223,9 @@ def replace_batchnorm1d(norm, masks):
affine
=
norm
.
affine
,
track_running_stats
=
norm
.
track_running_stats
)
# assign weights
new_norm
.
weight
.
data
=
torch
.
index_select
(
norm
.
weight
.
data
,
0
,
remained_in
)
new_norm
.
bias
.
data
=
torch
.
index_select
(
norm
.
bias
.
data
,
0
,
remained_in
)
if
norm
.
affine
:
new_norm
.
weight
.
data
=
torch
.
index_select
(
norm
.
weight
.
data
,
0
,
remained_in
)
new_norm
.
bias
.
data
=
torch
.
index_select
(
norm
.
bias
.
data
,
0
,
remained_in
)
new_norm
.
running_mean
.
data
=
torch
.
index_select
(
norm
.
running_mean
.
data
,
0
,
remained_in
)
...
...
@@ -264,8 +267,9 @@ def replace_batchnorm2d(norm, masks):
affine
=
norm
.
affine
,
track_running_stats
=
norm
.
track_running_stats
)
# assign weights
new_norm
.
weight
.
data
=
torch
.
index_select
(
norm
.
weight
.
data
,
0
,
remained_in
)
new_norm
.
bias
.
data
=
torch
.
index_select
(
norm
.
bias
.
data
,
0
,
remained_in
)
if
norm
.
affine
:
new_norm
.
weight
.
data
=
torch
.
index_select
(
norm
.
weight
.
data
,
0
,
remained_in
)
new_norm
.
bias
.
data
=
torch
.
index_select
(
norm
.
bias
.
data
,
0
,
remained_in
)
new_norm
.
running_mean
.
data
=
torch
.
index_select
(
norm
.
running_mean
.
data
,
0
,
remained_in
)
...
...
nni/compression/pytorch/speedup/compressor.py
View file @
070df4a0
...
...
@@ -23,28 +23,27 @@ _logger.setLevel(logging.INFO)
class
ModelSpeedup
:
"""
This class is to speedup the model with provided weight mask.
Parameters
----------
model : pytorch model
The model user wants to speed up
dummy_input : pytorch tensor, tuple of tensor, list of tensor
Note: The first dimension of the dummy_input should be the batchsize.
The dummy input for ```jit.trace```, users should put it on the right
device.
masks_file : str/dict
The path of user provided mask file, or the mask object
map_location : str
the device on which masks are placed, same to map_location in ```torch.load```
batch_dim : int
the index of batch dimension in the dummy_input
confidence: the confidence coefficient of the sparsity inference. This value is
actually used as the batchsize of the dummy_input.
"""
def
__init__
(
self
,
model
,
dummy_input
,
masks_file
,
map_location
=
None
,
batch_dim
=
0
,
confidence
=
8
):
"""
Parameters
----------
model : pytorch model
The model user wants to speed up
dummy_input : pytorch tensor, tuple of tensor, list of tensor
Note: The first dimension of the dummy_input should be the batchsize.
The dummy input for ```jit.trace```, users should put it on the right
device.
masks_file : str/dict
The path of user provided mask file, or the mask object
map_location : str
the device on which masks are placed, same to map_location in ```torch.load```
batch_dim : int
the index of batch dimension in the dummy_input
confidence: the confidence coefficient of the sparsity inference. This value is
actually used as the batchsize of the dummy_input.
"""
assert
confidence
>
1
# The auto inference will change the values of the parameters in the model
# so we need make a copy before the mask inference
...
...
nni/nas/benchmarks/constants.py
View file @
070df4a0
import
os
ENV_NASBENCHMARK_DIR
=
'NASBENCHMARK_DIR'
ENV_NNI_HOME
=
'NNI_HOME'
ENV_XDG_CACHE_HOME
=
'XDG_CACHE_HOME'
DEFAULT_CACHE_DIR
=
'~/.cache'
...
...
@@ -10,7 +11,7 @@ def _get_nasbenchmark_dir():
nni_home
=
os
.
path
.
expanduser
(
os
.
getenv
(
ENV_NNI_HOME
,
os
.
path
.
join
(
os
.
getenv
(
ENV_XDG_CACHE_HOME
,
DEFAULT_CACHE_DIR
),
'nni'
)))
return
os
.
path
.
join
(
nni_home
,
'nasbenchmark'
)
return
os
.
getenv
(
ENV_NASBENCHMARK_DIR
,
os
.
path
.
join
(
nni_home
,
'nasbenchmark'
)
)
DATABASE_DIR
=
_get_nasbenchmark_dir
()
...
...
nni/nas/benchmarks/download.py
0 → 100644
View file @
070df4a0
import
argparse
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
'NAS benchmark downloader'
)
parser
.
add_argument
(
'benchmark_name'
,
choices
=
[
'nasbench101'
,
'nasbench201'
,
'nds'
])
args
=
parser
.
parse_args
()
from
.utils
import
download_benchmark
download_benchmark
(
args
.
benchmark_name
)
nni/retiarii/converter/graph_gen.py
View file @
070df4a0
...
...
@@ -381,17 +381,8 @@ class GraphConverter:
# step #1: generate graph ir for this method
method_ir_graph
=
Graph
(
model
=
ir_model
,
graph_id
=-
100
,
name
=
'temp_graph'
,
_internal
=
True
)
method_node_index
=
self
.
handle_graph_nodes
(
script_module
,
script_method
.
graph
,
module
,
module_name
,
ir_model
,
method_ir_graph
,
shared_module_index
)
for
_output
in
script_method
.
graph
.
outputs
():
method_ir_graph
.
_add_output
(
_convert_name
(
_output
.
debugName
()))
predecessor_node_outputs
=
[
o
for
o
in
_output
.
node
().
outputs
()]
if
len
(
predecessor_node_outputs
)
==
1
:
src_node_idx
=
None
else
:
src_node_idx
=
predecessor_node_outputs
.
index
(
_output
)
method_ir_graph
.
add_edge
(
head
=
(
method_node_index
[
_output
.
node
()],
src_node_idx
),
tail
=
(
method_ir_graph
.
output_node
,
None
))
self
.
handle_graph_nodes
(
script_module
,
script_method
.
graph
,
module
,
module_name
,
ir_model
,
method_ir_graph
,
shared_module_index
)
self
.
refine_graph
(
method_ir_graph
)
# step #2: merge this graph to its module graph
...
...
@@ -491,18 +482,24 @@ class GraphConverter:
for
node
in
sm_graph
.
nodes
():
handle_single_node
(
node
)
if
node_index
==
{}:
# here is an example that the ir_graph is empty
if
node_index
!=
{}:
for
_output
in
sm_graph
.
outputs
():
ir_graph
.
_add_output
(
_convert_name
(
_output
.
debugName
()))
predecessor_node_outputs
=
[
o
for
o
in
_output
.
node
().
outputs
()]
if
len
(
predecessor_node_outputs
)
==
1
:
src_node_idx
=
None
else
:
src_node_idx
=
predecessor_node_outputs
.
index
(
_output
)
ir_graph
.
add_edge
(
head
=
(
node_index
[
_output
.
node
()],
src_node_idx
),
tail
=
(
ir_graph
.
output_node
,
None
))
else
:
# here is an example that the ir_graph and node_index is empty
# graph(%self : __torch__.torchmodels.googlenet.GoogLeNet,
# %x.1 : Tensor): return (%x.1)
# add a noop_identity node to handle this situation
self
.
global_seq
+=
1
ni_node
=
ir_graph
.
add_node
(
build_full_name
(
module_name
,
'noop_identity'
,
self
.
global_seq
),
'noop_identity'
)
ir_graph
.
add_edge
(
head
=
(
ir_graph
.
input_node
,
0
),
tail
=
(
ni_node
,
None
))
ir_graph
.
add_edge
(
head
=
(
ni_node
,
None
),
tail
=
(
ir_graph
.
output_node
,
None
))
for
_output
in
sm_graph
.
outputs
():
node_index
[
_output
.
node
()]
=
ni_node
return
node_index
# add an edge from head to tail to handle this situation
ir_graph
.
add_edge
(
head
=
(
ir_graph
.
input_node
,
0
),
tail
=
(
ir_graph
.
output_node
,
None
))
def
merge_aten_slices
(
self
,
ir_graph
):
"""
...
...
@@ -625,20 +622,8 @@ class GraphConverter:
ir_graph
=
Graph
(
model
=
ir_model
,
graph_id
=
self
.
global_graph_id
,
name
=
module_name
,
_internal
=
True
)
# handle graph nodes
node_index
=
self
.
handle_graph_nodes
(
script_module
,
sm_graph
,
module
,
self
.
handle_graph_nodes
(
script_module
,
sm_graph
,
module
,
module_name
,
ir_model
,
ir_graph
)
# handle graph outputs
for
_output
in
sm_graph
.
outputs
():
ir_graph
.
_add_output
(
_convert_name
(
_output
.
debugName
()))
predecessor_node_outputs
=
[
o
for
o
in
_output
.
node
().
outputs
()]
if
len
(
predecessor_node_outputs
)
==
1
:
src_node_idx
=
None
else
:
src_node_idx
=
predecessor_node_outputs
.
index
(
_output
)
ir_graph
.
add_edge
(
head
=
(
node_index
[
_output
.
node
()],
src_node_idx
),
tail
=
(
ir_graph
.
output_node
,
None
))
self
.
refine_graph
(
ir_graph
)
ir_graph
.
_register
()
...
...
@@ -690,7 +675,7 @@ class GraphConverterWithShape(GraphConverter):
Known issues
------------
1. `InputChoice` and `ValueChoice` not supported yet.
2. Currently random inputs are fe
ede
d while tracing layerchoice.
2. Currently random inputs are fed while tracing layerchoice.
If forward path of candidates depends on input data, then wrong path will be traced.
This will result in incomplete shape info.
"""
...
...
nni/retiarii/evaluator/pytorch/cgo/accelerator.py
View file @
070df4a0
from
typing
import
Any
,
Union
,
Optional
,
List
import
torch
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
typing
import
Any
,
List
,
Optional
,
Union
import
torch
from
pytorch_lightning.accelerators.accelerator
import
Accelerator
from
pytorch_lightning.plugins.environments
import
ClusterEnvironment
from
pytorch_lightning.plugins.training_type.training_type_plugin
import
TrainingTypePlugin
from
pytorch_lightning.trainer.connectors.accelerator_connector
import
AcceleratorConnector
from
pytorch_lightning.trainer
import
Trainer
from
pytorch_lightning.plugins
import
Plugin
from
pytorch_lightning.plugins.environments
import
ClusterEnvironment
from
pytorch_lightning.trainer.connectors.accelerator_connector
import
AcceleratorConnector
from
....serializer
import
serialize_cls
...
...
@@ -69,9 +70,8 @@ class BypassPlugin(TrainingTypePlugin):
# bypass device placement from pytorch lightning
pass
def
setup
(
self
,
model
:
torch
.
nn
.
Module
)
->
torch
.
nn
.
Module
:
self
.
model_to_device
()
return
self
.
model
def
setup
(
self
)
->
None
:
pass
@
property
def
is_global_zero
(
self
)
->
bool
:
...
...
@@ -100,8 +100,9 @@ def get_accelerator_connector(
deterministic
:
bool
=
False
,
precision
:
int
=
32
,
amp_backend
:
str
=
'native'
,
amp_level
:
str
=
'O2'
,
plugins
:
Optional
[
Union
[
List
[
Union
[
Plugin
,
ClusterEnvironment
,
str
]],
Plugin
,
ClusterEnvironment
,
str
]]
=
None
,
amp_level
:
Optional
[
str
]
=
None
,
plugins
:
Optional
[
Union
[
List
[
Union
[
TrainingTypePlugin
,
ClusterEnvironment
,
str
]],
TrainingTypePlugin
,
ClusterEnvironment
,
str
]]
=
None
,
**
other_trainier_kwargs
)
->
AcceleratorConnector
:
gpu_ids
=
Trainer
().
_parse_devices
(
gpus
,
auto_select_gpus
,
tpu_cores
)
return
AcceleratorConnector
(
...
...
nni/retiarii/evaluator/pytorch/cgo/evaluator.py
View file @
070df4a0
...
...
@@ -7,7 +7,7 @@ from typing import Dict, List, Optional, Union
import
torch.nn
as
nn
import
torch.optim
as
optim
import
py
torch
_lightning
as
pl
import
torch
metrics
from
torch.utils.data
import
DataLoader
import
nni
...
...
@@ -19,7 +19,7 @@ from ....serializer import serialize_cls
@
serialize_cls
class
_MultiModelSupervisedLearningModule
(
LightningModule
):
def
__init__
(
self
,
criterion
:
nn
.
Module
,
metrics
:
Dict
[
str
,
pl
.
metrics
.
Metric
],
def
__init__
(
self
,
criterion
:
nn
.
Module
,
metrics
:
Dict
[
str
,
torch
metrics
.
Metric
],
n_models
:
int
=
0
,
learning_rate
:
float
=
0.001
,
weight_decay
:
float
=
0.
,
...
...
@@ -119,7 +119,7 @@ class MultiModelSupervisedLearningModule(_MultiModelSupervisedLearningModule):
Class for optimizer (not an instance). default: ``Adam``
"""
def
__init__
(
self
,
criterion
:
nn
.
Module
,
metrics
:
Dict
[
str
,
pl
.
metrics
.
Metric
],
def
__init__
(
self
,
criterion
:
nn
.
Module
,
metrics
:
Dict
[
str
,
torch
metrics
.
Metric
],
learning_rate
:
float
=
0.001
,
weight_decay
:
float
=
0.
,
optimizer
:
optim
.
Optimizer
=
optim
.
Adam
):
...
...
@@ -180,7 +180,7 @@ class _RegressionModule(MultiModelSupervisedLearningModule):
learning_rate
:
float
=
0.001
,
weight_decay
:
float
=
0.
,
optimizer
:
optim
.
Optimizer
=
optim
.
Adam
):
super
().
__init__
(
criterion
,
{
'mse'
:
pl
.
metrics
.
MeanSquaredError
},
super
().
__init__
(
criterion
,
{
'mse'
:
torch
metrics
.
MeanSquaredError
},
learning_rate
=
learning_rate
,
weight_decay
=
weight_decay
,
optimizer
=
optimizer
)
...
...
nni/retiarii/evaluator/pytorch/lightning.py
View file @
070df4a0
...
...
@@ -9,6 +9,7 @@ from typing import Dict, NoReturn, Union, Optional, List, Type
import
pytorch_lightning
as
pl
import
torch.nn
as
nn
import
torch.optim
as
optim
import
torchmetrics
from
torch.utils.data
import
DataLoader
import
nni
...
...
@@ -140,7 +141,7 @@ def _check_dataloader(dataloader):
### The following are some commonly used Lightning modules ###
class
_SupervisedLearningModule
(
LightningModule
):
def
__init__
(
self
,
criterion
:
nn
.
Module
,
metrics
:
Dict
[
str
,
pl
.
metrics
.
Metric
],
def
__init__
(
self
,
criterion
:
nn
.
Module
,
metrics
:
Dict
[
str
,
torch
metrics
.
Metric
],
learning_rate
:
float
=
0.001
,
weight_decay
:
float
=
0.
,
optimizer
:
optim
.
Optimizer
=
optim
.
Adam
,
...
...
@@ -213,7 +214,7 @@ class _SupervisedLearningModule(LightningModule):
return
{
name
:
self
.
trainer
.
callback_metrics
[
'val_'
+
name
].
item
()
for
name
in
self
.
metrics
}
class
_AccuracyWithLogits
(
pl
.
metrics
.
Accuracy
):
class
_AccuracyWithLogits
(
torch
metrics
.
Accuracy
):
def
update
(
self
,
pred
,
target
):
return
super
().
update
(
nn
.
functional
.
softmax
(
pred
),
target
)
...
...
@@ -278,7 +279,7 @@ class _RegressionModule(_SupervisedLearningModule):
weight_decay
:
float
=
0.
,
optimizer
:
optim
.
Optimizer
=
optim
.
Adam
,
export_onnx
:
bool
=
True
):
super
().
__init__
(
criterion
,
{
'mse'
:
pl
.
metrics
.
MeanSquaredError
},
super
().
__init__
(
criterion
,
{
'mse'
:
torch
metrics
.
MeanSquaredError
},
learning_rate
=
learning_rate
,
weight_decay
=
weight_decay
,
optimizer
=
optimizer
,
export_onnx
=
export_onnx
)
...
...
nni/retiarii/experiment/pytorch.py
View file @
070df4a0
...
...
@@ -219,7 +219,8 @@ class RetiariiExperiment(Experiment):
elif
self
.
config
.
execution_engine
==
'cgo'
:
from
..execution.cgo_engine
import
CGOExecutionEngine
# assert self.config.trial_gpu_number==1, "trial_gpu_number must be 1 to use CGOExecutionEngine"
assert
self
.
config
.
training_service
.
platform
==
'remote'
,
\
"CGO execution engine currently only supports remote training service"
assert
self
.
config
.
batch_waiting_time
is
not
None
devices
=
self
.
_construct_devices
()
engine
=
CGOExecutionEngine
(
devices
,
...
...
@@ -273,11 +274,10 @@ class RetiariiExperiment(Experiment):
devices
=
[]
if
hasattr
(
self
.
config
.
training_service
,
'machine_list'
):
for
machine
in
self
.
config
.
training_service
.
machine_list
:
assert
machine
.
gpu_indices
is
not
None
,
\
'gpu_indices must be set in RemoteMachineConfig for CGO execution engine'
for
gpu_idx
in
machine
.
gpu_indices
:
devices
.
append
(
GPUDevice
(
machine
.
host
,
gpu_idx
))
else
:
for
gpu_idx
in
self
.
config
.
training_service
.
gpu_indices
:
devices
.
append
(
GPUDevice
(
'local'
,
gpu_idx
))
return
devices
def
_create_dispatcher
(
self
):
...
...
nni/retiarii/operation_def/torch_op_def.py
View file @
070df4a0
...
...
@@ -254,6 +254,13 @@ class AtenFloordiv(PyTorchOperation):
return
f
'
{
output
}
=
{
inputs
[
0
]
}
//
{
inputs
[
1
]
}
'
class
AtenMul
(
PyTorchOperation
):
_ori_type_name
=
[
'aten::mul'
]
def
to_forward_code
(
self
,
field
:
str
,
output
:
str
,
inputs
:
List
[
str
],
inputs_value
:
List
[
Any
]
=
None
)
->
str
:
return
f
'
{
output
}
=
{
inputs
[
0
]
}
*
{
inputs
[
1
]
}
'
class
AtenLen
(
PyTorchOperation
):
_ori_type_name
=
[
'aten::len'
]
...
...
@@ -491,7 +498,8 @@ class AtenAvgpool2d(PyTorchOperation):
class
ToDevice
(
PyTorchOperation
):
_artificial_op_name
=
"ToDevice"
def
__init__
(
self
,
type_name
:
str
,
parameters
:
Dict
[
str
,
Any
],
_internal
:
bool
=
False
):
def
__init__
(
self
,
type_name
:
str
,
parameters
:
Dict
[
str
,
Any
],
_internal
:
bool
=
False
,
attributes
:
Dict
[
str
,
Any
]
=
None
):
self
.
type
=
"ToDevice"
self
.
device
=
parameters
[
'device'
]
self
.
overridden_device_repr
=
None
...
...
nni/tools/nnictl/launcher_utils.py
View file @
070df4a0
...
...
@@ -57,6 +57,8 @@ def parse_path(experiment_config, config_path):
expand_path
(
experiment_config
[
'assessor'
],
'codeDir'
)
if
experiment_config
.
get
(
'advisor'
):
expand_path
(
experiment_config
[
'advisor'
],
'codeDir'
)
if
experiment_config
[
'advisor'
].
get
(
'classArgs'
)
and
experiment_config
[
'advisor'
][
'classArgs'
].
get
(
'config_space'
):
expand_path
(
experiment_config
[
'advisor'
][
'classArgs'
],
'config_space'
)
if
experiment_config
.
get
(
'machineList'
):
for
index
in
range
(
len
(
experiment_config
[
'machineList'
])):
expand_path
(
experiment_config
[
'machineList'
][
index
],
'sshKeyPath'
)
...
...
@@ -95,8 +97,8 @@ def parse_path(experiment_config, config_path):
if
experiment_config
.
get
(
'advisor'
):
parse_relative_path
(
root_path
,
experiment_config
[
'advisor'
],
'codeDir'
)
# for BOHB when delivering a ConfigSpace file directly
if
experiment_config
.
get
(
'advisor'
)
.
get
(
'classArgs'
)
and
experiment_config
.
get
(
'advisor'
).
get
(
'classArgs'
)
.
get
(
'config_space'
):
parse_relative_path
(
root_path
,
experiment_config
.
get
(
'advisor'
).
get
(
'classArgs'
)
,
'config_space'
)
if
experiment_config
[
'advisor'
]
.
get
(
'classArgs'
)
and
experiment_config
[
'advisor'
][
'classArgs'
]
.
get
(
'config_space'
):
parse_relative_path
(
root_path
,
experiment_config
[
'advisor'
][
'classArgs'
]
,
'config_space'
)
if
experiment_config
.
get
(
'machineList'
):
for
index
in
range
(
len
(
experiment_config
[
'machineList'
])):
...
...
test/nni_test/nnitest/test_quantize_model_speedup.py
View file @
070df4a0
...
...
@@ -97,10 +97,10 @@ class QuantizationSpeedupTestCase(TestCase):
model
=
BackboneModel
()
configure_list
=
{
'conv1'
:{
'weight_bit'
:
8
,
'
activation
_bit'
:
8
},
'conv2'
:{
'weight_bit'
:
32
,
'
activation
_bit'
:
32
},
'fc1'
:{
'weight_bit'
:
16
,
'
activation
_bit'
:
16
},
'fc2'
:{
'weight_bit'
:
8
,
'
activation
_bit'
:
8
}
'conv1'
:{
'weight_bit
s
'
:
8
,
'
output
_bit
s
'
:
8
},
'conv2'
:{
'weight_bit
s
'
:
32
,
'
output
_bit
s
'
:
32
},
'fc1'
:{
'weight_bit
s
'
:
16
,
'
output
_bit
s
'
:
16
},
'fc2'
:{
'weight_bit
s
'
:
8
,
'
output
_bit
s
'
:
8
}
}
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.01
,
momentum
=
0.5
)
...
...
@@ -126,16 +126,16 @@ class QuantizationSpeedupTestCase(TestCase):
model
=
BackboneModel
()
configure_list
=
[{
'quant_types'
:
[
'
weight'
,
'outpu
t'
],
'quant_bits'
:
{
'
weigh
t'
:
8
,
'
outpu
t'
:
8
},
'quant_types'
:
[
'
input'
,
'weigh
t'
],
'quant_bits'
:
{
'
inpu
t'
:
8
,
'
weigh
t'
:
8
},
'op_names'
:
[
'conv1'
]
},
{
'quant_types'
:
[
'output'
],
'quant_bits'
:
{
'output'
:
8
},
'op_names'
:
[
'relu1'
]
},
{
'quant_types'
:
[
'
weight'
,
'outpu
t'
],
'quant_bits'
:
{
'
weigh
t'
:
8
,
'
outpu
t'
:
8
},
'quant_types'
:
[
'
input'
,
'weigh
t'
],
'quant_bits'
:
{
'
inpu
t'
:
8
,
'
weigh
t'
:
8
},
'op_names'
:
[
'conv2'
]
},
{
'quant_types'
:
[
'output'
],
...
...
@@ -145,8 +145,9 @@ class QuantizationSpeedupTestCase(TestCase):
]
# finetune the model by using QAT
dummy_input
=
torch
.
randn
(
1
,
1
,
28
,
28
)
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
0.01
,
momentum
=
0.5
)
quantizer
=
QAT_Quantizer
(
model
,
configure_list
,
optimizer
)
quantizer
=
QAT_Quantizer
(
model
,
configure_list
,
optimizer
,
dummy_input
)
quantizer
.
compress
()
model
.
to
(
self
.
device
)
...
...
@@ -178,13 +179,13 @@ class QuantizationSpeedupTestCase(TestCase):
model
=
vgg16
()
configure_list
=
{
'features.0'
:{
'weight_bit'
:
8
,
'
activation
_bit'
:
8
},
'features.1'
:{
'weight_bit'
:
32
,
'
activation
_bit'
:
32
},
'features.2'
:{
'weight_bit'
:
16
,
'
activation
_bit'
:
16
},
'features.4'
:{
'weight_bit'
:
8
,
'
activation
_bit'
:
8
},
'features.7'
:{
'weight_bit'
:
8
,
'
activation
_bit'
:
8
},
'features.8'
:{
'weight_bit'
:
8
,
'
activation
_bit'
:
8
},
'features.11'
:{
'weight_bit'
:
8
,
'
activation
_bit'
:
8
}
'features.0'
:{
'weight_bit
s
'
:
8
,
'
output
_bit
s
'
:
8
},
'features.1'
:{
'weight_bit
s
'
:
32
,
'
output
_bit
s
'
:
32
},
'features.2'
:{
'weight_bit
s
'
:
16
,
'
output
_bit
s
'
:
16
},
'features.4'
:{
'weight_bit
s
'
:
8
,
'
output
_bit
s
'
:
8
},
'features.7'
:{
'weight_bit
s
'
:
8
,
'
output
_bit
s
'
:
8
},
'features.8'
:{
'weight_bit
s
'
:
8
,
'
output
_bit
s
'
:
8
},
'features.11'
:{
'weight_bit
s
'
:
8
,
'
output
_bit
s
'
:
8
}
}
model
.
to
(
self
.
device
)
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment