Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
cd3a912a
Unverified
Commit
cd3a912a
authored
Nov 27, 2019
by
SparkSnail
Committed by
GitHub
Nov 27, 2019
Browse files
Merge pull request #218 from microsoft/master
merge master
parents
a0846f2a
e9cba778
Changes
375
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2414 additions
and
106 deletions
+2414
-106
src/sdk/pynni/nni/curvefitting_assessor/curvefunctions.py
src/sdk/pynni/nni/curvefitting_assessor/curvefunctions.py
+4
-16
src/sdk/pynni/nni/curvefitting_assessor/model_factory.py
src/sdk/pynni/nni/curvefitting_assessor/model_factory.py
+2
-16
src/sdk/pynni/nni/curvefitting_assessor/test.py
src/sdk/pynni/nni/curvefitting_assessor/test.py
+2
-17
src/sdk/pynni/nni/env_vars.py
src/sdk/pynni/nni/env_vars.py
+2
-19
src/sdk/pynni/nni/evolution_tuner/evolution_tuner.py
src/sdk/pynni/nni/evolution_tuner/evolution_tuner.py
+3
-19
src/sdk/pynni/nni/evolution_tuner/test_evolution_tuner.py
src/sdk/pynni/nni/evolution_tuner/test_evolution_tuner.py
+3
-19
src/sdk/pynni/nni/feature_engineering/__init__.py
src/sdk/pynni/nni/feature_engineering/__init__.py
+0
-0
src/sdk/pynni/nni/feature_engineering/feature_selector.py
src/sdk/pynni/nni/feature_engineering/feature_selector.py
+59
-0
src/sdk/pynni/nni/feature_engineering/gbdt_selector/__init__.py
...k/pynni/nni/feature_engineering/gbdt_selector/__init__.py
+1
-0
src/sdk/pynni/nni/feature_engineering/gbdt_selector/gbdt_selector.py
...ni/nni/feature_engineering/gbdt_selector/gbdt_selector.py
+114
-0
src/sdk/pynni/nni/feature_engineering/gbdt_selector/requirements.txt
...ni/nni/feature_engineering/gbdt_selector/requirements.txt
+1
-0
src/sdk/pynni/nni/feature_engineering/gradient_selector/__init__.py
...nni/nni/feature_engineering/gradient_selector/__init__.py
+1
-0
src/sdk/pynni/nni/feature_engineering/gradient_selector/constants.py
...ni/nni/feature_engineering/gradient_selector/constants.py
+100
-0
src/sdk/pynni/nni/feature_engineering/gradient_selector/fginitialize.py
...nni/feature_engineering/gradient_selector/fginitialize.py
+623
-0
src/sdk/pynni/nni/feature_engineering/gradient_selector/fgtrain.py
...ynni/nni/feature_engineering/gradient_selector/fgtrain.py
+228
-0
src/sdk/pynni/nni/feature_engineering/gradient_selector/gradient_selector.py
...eature_engineering/gradient_selector/gradient_selector.py
+631
-0
src/sdk/pynni/nni/feature_engineering/gradient_selector/learnability.py
...nni/feature_engineering/gradient_selector/learnability.py
+529
-0
src/sdk/pynni/nni/feature_engineering/gradient_selector/requirements.txt
...ni/feature_engineering/gradient_selector/requirements.txt
+4
-0
src/sdk/pynni/nni/feature_engineering/gradient_selector/syssettings.py
.../nni/feature_engineering/gradient_selector/syssettings.py
+29
-0
src/sdk/pynni/nni/feature_engineering/gradient_selector/utils.py
.../pynni/nni/feature_engineering/gradient_selector/utils.py
+78
-0
No files found.
src/sdk/pynni/nni/curvefitting_assessor/curvefunctions.py
View file @
cd3a912a
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""
A family of functions used by CurvefittingAssessor
"""
import
numpy
as
np
all_models
=
{}
...
...
src/sdk/pynni/nni/curvefitting_assessor/model_factory.py
View file @
cd3a912a
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
logging
import
numpy
as
np
...
...
src/sdk/pynni/nni/curvefitting_assessor/test.py
View file @
cd3a912a
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
numpy
as
np
import
unittest
...
...
src/sdk/pynni/nni/env_vars.py
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
os
from
collections
import
namedtuple
...
...
src/sdk/pynni/nni/evolution_tuner/evolution_tuner.py
View file @
cd3a912a
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""
evolution_tuner.py
"""
...
...
src/sdk/pynni/nni/evolution_tuner/test_evolution_tuner.py
View file @
cd3a912a
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""
test_evolution_tuner.py
"""
...
...
src/sdk/pynni/nni/feature_engineering/__init__.py
0 → 100644
View file @
cd3a912a
src/sdk/pynni/nni/feature_engineering/feature_selector.py
0 → 100644
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import
logging
_logger
=
logging
.
getLogger
(
__name__
)
class
FeatureSelector
():
def
__init__
(
self
,
**
kwargs
):
self
.
selected_features_
=
None
self
.
X
=
None
self
.
y
=
None
def
fit
(
self
,
X
,
y
,
**
kwargs
):
"""
Fit the training data to FeatureSelector
Paramters
---------
X : array-like numpy matrix
The training input samples, which shape is [n_samples, n_features].
y: array-like numpy matrix
The target values (class labels in classification, real numbers in
regression). Which shape is [n_samples].
"""
self
.
X
=
X
self
.
y
=
y
def
get_selected_features
(
self
):
"""
Fit the training data to FeatureSelector
Returns
-------
list :
Return the index of imprtant feature.
"""
return
self
.
selected_features_
src/sdk/pynni/nni/feature_engineering/gbdt_selector/__init__.py
0 → 100644
View file @
cd3a912a
from
.gbdt_selector
import
GBDTSelector
\ No newline at end of file
src/sdk/pynni/nni/feature_engineering/gbdt_selector/gbdt_selector.py
0 → 100644
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
"""
gbdt_selector.py including:
class GBDTSelector
"""
import
random
from
sklearn.model_selection
import
train_test_split
from
nni.feature_engineering.feature_selector
import
FeatureSelector
# pylint: disable=E0401
import
lightgbm
as
lgb
class
GBDTSelector
(
FeatureSelector
):
def
__init__
(
self
,
**
kwargs
):
self
.
selected_features_
=
None
self
.
X
=
None
self
.
y
=
None
self
.
feature_importance
=
None
self
.
lgb_params
=
None
self
.
eval_ratio
=
None
self
.
early_stopping_rounds
=
None
self
.
importance_type
=
None
self
.
num_boost_round
=
None
self
.
model
=
None
def
fit
(
self
,
X
,
y
,
**
kwargs
):
"""
Fit the training data to FeatureSelector
Paramters
---------
X : array-like numpy matrix
The training input samples, which shape is [n_samples, n_features].
y : array-like numpy matrix
The target values (class labels in classification, real numbers in
regression). Which shape is [n_samples].
lgb_params : dict
Parameters of lightgbm
eval_ratio : float
The ratio of data size. It's used for split the eval data and train data from self.X.
early_stopping_rounds : int
The early stopping setting in lightgbm.
importance_type : str
Supporting type is 'gain' or 'split'.
num_boost_round : int
num_boost_round in lightgbm.
"""
assert
kwargs
[
'lgb_params'
]
assert
kwargs
[
'eval_ratio'
]
assert
kwargs
[
'early_stopping_rounds'
]
assert
kwargs
[
'importance_type'
]
assert
kwargs
[
'num_boost_round'
]
self
.
X
=
X
self
.
y
=
y
self
.
lgb_params
=
kwargs
[
'lgb_params'
]
self
.
eval_ratio
=
kwargs
[
'eval_ratio'
]
self
.
early_stopping_rounds
=
kwargs
[
'early_stopping_rounds'
]
self
.
importance_type
=
kwargs
[
'importance_type'
]
self
.
num_boost_round
=
kwargs
[
'num_boost_round'
]
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
self
.
X
,
self
.
y
,
test_size
=
self
.
eval_ratio
,
random_state
=
random
.
seed
(
41
))
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
)
lgb_eval
=
lgb
.
Dataset
(
X_test
,
y_test
,
reference
=
lgb_train
)
self
.
model
=
lgb
.
train
(
self
.
lgb_params
,
lgb_train
,
num_boost_round
=
self
.
num_boost_round
,
valid_sets
=
lgb_eval
,
early_stopping_rounds
=
self
.
early_stopping_rounds
)
self
.
feature_importance
=
self
.
model
.
feature_importance
(
self
.
importance_type
)
def
get_selected_features
(
self
,
topk
):
"""
Fit the training data to FeatureSelector
Returns
-------
list :
Return the index of imprtant feature.
"""
assert
topk
>
0
self
.
selected_features_
=
self
.
feature_importance
.
argsort
()[
-
topk
:][::
-
1
]
return
self
.
selected_features_
src/sdk/pynni/nni/feature_engineering/gbdt_selector/requirements.txt
0 → 100644
View file @
cd3a912a
lightgbm
\ No newline at end of file
src/sdk/pynni/nni/feature_engineering/gradient_selector/__init__.py
0 → 100644
View file @
cd3a912a
from
.gradient_selector
import
FeatureGradientSelector
\ No newline at end of file
src/sdk/pynni/nni/feature_engineering/gradient_selector/constants.py
0 → 100644
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import
numpy
as
np
class
StorageLevel
:
DISK
=
'disk'
SPARSE
=
'sparse'
DENSE
=
'dense'
class
DataFormat
:
SVM
=
'svm'
NUMPY
=
'numpy'
ALL_FORMATS
=
[
SVM
,
NUMPY
]
class
Preprocess
:
"""
center the data to mean 0 and create unit variance
center the data to mean 0
"""
ZSCORE
=
'zscore'
CENTER
=
'center'
class
Device
:
CUDA
=
'cuda'
CPU
=
'cpu'
class
Checkpoint
:
MODEL
=
'model_state_dict'
OPT
=
'optimizer_state_dict'
RNG
=
'torch_rng_state'
class
NanError
(
ValueError
):
pass
class
Initialization
:
ZERO
=
'zero'
ON
=
'on'
OFF
=
'off'
ON_HIGH
=
'onhigh'
OFF_HIGH
=
'offhigh'
SKLEARN
=
'sklearn'
RANDOM
=
'random'
VALUE_DICT
=
{
ZERO
:
0
,
ON
:
1
,
OFF
:
-
1
,
ON_HIGH
:
5
,
OFF_HIGH
:
-
1
,
SKLEARN
:
None
,
RANDOM
:
None
}
class
Coefficients
:
""""
coefficients for sublinear estimator were computed running the sublinear
paper's authors' code
"""
SLE
=
{
1
:
np
.
array
([
0.60355337
]),
2
:
np
.
array
([
1.52705001
,
-
0.34841729
]),
3
:
np
.
array
([
2.90254224
,
-
1.87216745
,
0.
]),
4
:
np
.
array
([
4.63445685
,
-
5.19936195
,
0.
,
1.50391676
]),
5
:
np
.
array
([
6.92948049
,
-
14.12216211
,
9.4475009
,
0.
,
-
1.21093546
]),
6
:
np
.
array
([
9.54431082
,
-
28.09414643
,
31.84703652
,
-
11.18763791
,
-
1.14175281
,
0.
]),
7
:
np
.
array
([
12.54505041
,
-
49.64891525
,
79.78828031
,
-
46.72250909
,
0.
,
0.
,
5.02973646
]),
8
:
np
.
array
([
16.03550163
,
-
84.286182
,
196.86078756
,
-
215.36747071
,
92.63961263
,
0.
,
0.
,
-
4.86280869
]),
9
:
np
.
array
([
19.86409184
,
-
130.76801006
,
390.95349861
,
-
570.09210416
,
354.77764899
,
0.
,
-
73.84234865
,
0.
,
10.09148767
]),
10
:
np
.
array
([
2.41117752e+01
,
-
1.94946061e+02
,
7.34214614e+02
,
-
1.42851995e+03
,
1.41567410e+03
,
\
-
5.81738134e+02
,
0.
,
0.
,
3.11664751e+01
,
1.05018365e+00
]),
11
:
np
.
array
([
28.75280839
,
-
279.22576729
,
1280.46325445
,
-
3104.47148101
,
3990.6092248
,
-
2300.29413333
,
\
0.
,
427.35289033
,
0.
,
0.
,
-
42.17587475
]),
12
:
np
.
array
([
33.85141912
,
-
391.4229382
,
2184.97827882
,
-
6716.28280208
,
11879.75233977
,
-
11739.97267239
,
\
5384.94542245
,
0.
,
-
674.23291712
,
0.
,
0.
,
39.37456439
])}
EPSILON
=
1e-8
src/sdk/pynni/nni/feature_engineering/gradient_selector/fginitialize.py
0 → 100644
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import
os
import
pickle
import
sys
import
time
import
numpy
as
np
import
scipy.sparse
from
sklearn.datasets
import
load_svmlight_file
import
torch
from
torch.utils.data
import
DataLoader
,
Dataset
# pylint: disable=E0611
from
torch.utils.data.dataloader
import
_DataLoaderIter
,
_utils
import
nni.feature_engineering.gradient_selector.constants
as
constants
import
nni.feature_engineering.gradient_selector.syssettings
as
syssettings
torch
.
set_default_tensor_type
(
syssettings
.
torch
.
tensortype
)
sparsetensor
=
syssettings
.
torch
.
sparse
.
tensortype
BYTESPERREAL
=
8.
BYTESPERGB
=
1024.
**
3
class
PrepareData
(
Dataset
):
def
__init__
(
self
,
path_data
=
None
,
data_format
=
constants
.
DataFormat
.
NUMPY
,
D
=
None
,
N
=
None
,
classification
=
True
,
ordinal
=
False
,
balanced
=
True
,
preprocess
=
None
,
n_to_estimate
=
None
,
MAXMEMGB
=
syssettings
.
MAXMEMGB
,
set_params
=
True
,
path_mappings
=
None
,
X
=
None
,
y
=
None
,
verbose
=
0
,
n_classes
=
None
,
device
=
constants
.
Device
.
CPU
):
"""
Dataset class with helpful features and functions for being included in a dataloader
and managing memory usage.
can read following formats:
svm: svm light format (sklearn.datasets.load_svmlight_file)
numpy: Pass X and y as numpy or sparse arrays
assumes
1. if classification, y is in {-1, 1} or continuous and 0 indexed
2. y can fit into memory
3. consecutive calls to __getitem__() have consecutive idx values
notes:
1. this implementation is not careful wrt/ precise memory reqts. for
example, being able to store one dense row in memory is necessary,
but not sufficient.
2. for y with 4.2 billion elements, 31.3 GB of memory is necessary
@ 8 bytes/scalar. Use partial fit to avoid loading the entire dataset
at once
3. disk_size always refer to size of complete data file, even after
a split().
Parameters
----------
path_data : str
Path to load data from
data_format : str
File ending for path data.
"numpy" is the default when passing in X and y
D : int
Number of features.
N : int
Number of rows.
classification : bool
If True, problem is classification, else regression.
ordinal: bool
If True, problem is ordinal classification. Requires classification to be True.
balanced : bool
If true, each class is weighted equally in optimization, otherwise
weighted is done via support of each class. Requires classification to be True.
prerocess : str
'zscore' which refers to centering and normalizing data to unit variance or
'center' which only centers the data to 0 mean
n_to_estimate : int
Number of rows of data to estimate
MAXMEMGB : float
Maximum allowable size for a minibatch
set_params : bool
Whether or not to determine the statistics of the dataset
path_mappings : str
Used when streaming from disk
X : array-like
Shape = [n_samples, n_features]
The training input samples.
y : array-like
Shape = [n_samples]
The target values (class labels in classification, real numbers in
regression).
verbose : int
Controls the verbosity when fitting. Set to 0 for no printing
1 or higher for printing every verbose number of gradient steps.
device : str
'cpu' to run on CPU and 'cuda' to run on GPU. Runs much faster on GPU
n_classes : int
number of classes
"""
self
.
path_data
=
path_data
if
self
.
path_data
:
self
.
disk_size
=
os
.
path
.
getsize
(
path_data
)
else
:
assert
X
is
not
None
,
'X must be specified if no path data'
self
.
disk_size
=
X
.
nbytes
if
not
scipy
.
sparse
.
issparse
(
X
)
else
X
.
data
.
nbytes
assert
data_format
in
constants
.
DataFormat
.
ALL_FORMATS
,
'Format must in {0}.'
.
format
(
", "
.
join
(
constants
.
DataFormat
.
ALL_FORMATS
))
self
.
format
=
data_format
self
.
classification
=
classification
self
.
ordinal
=
ordinal
self
.
balanced
=
balanced
self
.
MAXMEMGB
=
MAXMEMGB
self
.
preprocess
=
preprocess
self
.
set_params
=
set_params
self
.
verbose
=
verbose
self
.
n_classes
=
n_classes
self
.
device
=
device
self
.
path_data_stats
=
None
if
D
is
None
:
assert
self
.
disk_size
/
BYTESPERGB
<=
self
.
MAXMEMGB
,
\
'Cannot load data into memory. Supply D.'
if
self
.
format
==
constants
.
DataFormat
.
SVM
:
self
.
X
,
self
.
y
=
load_svmlight_file
(
path_data
)
elif
self
.
format
==
constants
.
DataFormat
.
NUMPY
:
assert
X
is
not
None
,
'X must be specified in numpy mode'
assert
y
is
not
None
,
'y must be specified in numpy mode'
self
.
X
=
X
self
.
y
=
y
if
self
.
n_classes
is
None
:
self
.
n_classes
=
np
.
unique
(
y
).
shape
[
0
]
elif
self
.
classification
:
assert
self
.
n_classes
>=
np
.
unique
(
y
).
shape
[
0
],
\
'n_classes given must be greater than or equal to the number of classes in y'
else
:
raise
NotImplementedError
self
.
y
=
torch
.
as_tensor
(
self
.
y
,
dtype
=
torch
.
get_default_dtype
())
self
.
N
,
self
.
D
=
self
.
X
.
shape
# assumes X was returned as a sparse array
self
.
storage_level
=
(
constants
.
StorageLevel
.
SPARSE
if
scipy
.
sparse
.
issparse
(
self
.
X
)
else
constants
.
StorageLevel
.
DENSE
)
else
:
assert
N
is
not
None
,
'Supply N.'
self
.
N
,
self
.
D
=
N
,
D
# assume sparse matrix cannot fit into memory
self
.
storage_level
=
constants
.
StorageLevel
.
DISK
self
.
dense_size_gb
=
self
.
get_dense_size
()
# check dense size
self
.
set_dense_X
()
self
.
max_rows
=
int
(
self
.
MAXMEMGB
*
BYTESPERGB
/
BYTESPERREAL
/
self
.
D
)
assert
self
.
max_rows
,
\
'Cannot fit one dense row into %d GB memory.'
%
self
.
MAXMEMGB
self
.
max_rows
=
self
.
max_batch_size
()
sys
.
stdout
.
flush
()
if
n_to_estimate
is
None
:
self
.
n_to_estimate
=
self
.
max_batch_size
()
else
:
assert
n_to_estimate
<=
self
.
N
,
'n_to_estimate must be <= N.'
self
.
n_to_estimate
=
n_to_estimate
# initialize disk loader
if
self
.
storage_level
==
constants
.
StorageLevel
.
DISK
and
self
.
set_params
:
if
self
.
format
==
constants
.
DataFormat
.
SVM
:
raise
NotImplementedError
(
'Please use partial fit to train on datasets that do not fit in memory'
)
else
:
raise
NotImplementedError
# TODO: use a passed-in RNG here
self
.
ix_statistics
=
np
.
random
.
permutation
(
self
.
N
)[:
self
.
n_to_estimate
]
self
.
n_features
=
self
.
D
if
self
.
set_params
:
if
self
.
verbose
:
print
(
'Finding data statistics...'
,
end
=
''
)
sys
.
stdout
.
flush
()
Xmn
,
sv1
,
Xsd
,
ymn
,
ysd
=
self
.
compute_data_stats
()
self
.
set_data_stats
(
Xmn
,
sv1
,
Xsd
,
ymn
,
ysd
)
if
self
.
verbose
:
print
()
self
.
set_return_raw
(
False
)
else
:
self
.
set_return_raw
(
True
)
self
.
set_return_np
(
False
)
# this needs to occur after setting preprocessing params
if
(
self
.
storage_level
==
constants
.
StorageLevel
.
DISK
and
self
.
format
==
constants
.
DataFormat
.
SVM
and
self
.
set_params
):
self
.
loader
.
batchsize
=
1
def
get_dense_size
(
self
):
return
self
.
N
*
self
.
D
*
BYTESPERREAL
/
BYTESPERGB
def
set_dense_X
(
self
):
if
self
.
storage_level
!=
constants
.
StorageLevel
.
DISK
:
if
self
.
dense_size_gb
<=
self
.
MAXMEMGB
:
if
self
.
storage_level
==
constants
.
StorageLevel
.
SPARSE
:
self
.
X
=
self
.
X
.
toarray
()
self
.
X
=
torch
.
as_tensor
(
self
.
X
,
dtype
=
torch
.
get_default_dtype
())
self
.
storage_level
=
constants
.
StorageLevel
.
DENSE
def
set_return_np
(
self
,
boolean
):
self
.
return_np
=
boolean
def
set_return_raw
(
self
,
boolean
):
self
.
return_raw
=
boolean
def
save_data_stats
(
self
,
path_data_stats
):
"""
Dumps dataset statistics to pickle file.
"""
data_stats
=
{
'Xmn'
:
self
.
Xmn
,
'sv1'
:
self
.
sv1
,
'Xsd'
:
self
.
Xsd
,
'ymn'
:
self
.
ymn
,
'ysd'
:
self
.
ysd
,
'ix_statistics'
:
self
.
ix_statistics
,
}
pickle
.
dump
(
data_stats
,
open
(
path_data_stats
,
'wb'
))
def
load_data_stats
(
self
,
path_data_stats
):
stats
=
pickle
.
load
(
open
(
path_data_stats
,
'rb'
))
self
.
path_data_stats
=
path_data_stats
self
.
set_data_stats
(
np
.
asarray
(
stats
[
'Xmn'
]),
stats
[
'sv1'
],
stats
[
'Xsd'
],
stats
[
'ymn'
],
stats
[
'ysd'
])
if
self
.
storage_level
==
constants
.
StorageLevel
.
DISK
and
hasattr
(
self
,
'path_mappings'
):
if
'ix_statistics'
in
stats
:
self
.
ix_statistics
=
stats
[
'ix_statistics'
]
else
:
self
.
ix_statistics
=
range
(
self
.
N
)
self
.
set_return_raw
(
False
)
def
reset
(
self
):
"""
Resets the dataloader. Only implemented for disk StorageLevel.
"""
if
self
.
storage_level
==
constants
.
StorageLevel
.
DENSE
:
pass
elif
self
.
storage_level
==
constants
.
StorageLevel
.
SPARSE
:
pass
elif
self
.
storage_level
==
constants
.
StorageLevel
.
DISK
:
if
self
.
format
==
constants
.
DataFormat
.
SVM
:
self
.
loader
.
reset
()
else
:
raise
NotImplementedError
def
todense
(
self
):
assert
hasattr
(
self
,
'Xmn'
),
'Set preprocess params first.'
assert
len
(
self
)
<=
self
.
max_batch_size
(
),
'N must be <= max_batch_size().'
with
torch
.
no_grad
():
dense
,
_
=
self
.
split
(
range
(
len
(
self
)))
Braw
=
self
.
return_raw
Bnp
=
self
.
return_np
self
.
set_return_raw
(
True
)
self
.
set_return_np
(
True
)
dense
.
X
,
dense
.
y
=
[],
[]
def
f_Xy
(
X
,
y
):
dense
.
X
.
append
(
X
)
dense
.
y
.
append
(
y
)
self
.
apply
(
f_Xy
=
f_Xy
)
dense
.
X
=
dense
.
X
[
-
1
]
dense
.
y
=
dense
.
y
[
-
1
]
self
.
set_return_raw
(
Braw
)
self
.
set_return_np
(
Bnp
)
dense
.
storage_level
=
constants
.
StorageLevel
.
DENSE
return
dense
def
split
(
self
,
ix
):
assert
hasattr
(
self
,
'Xmn'
),
'Run set_preprocess_params() first.'
first
=
type
(
self
)(
self
.
path_data
,
self
.
format
,
self
.
D
,
N
=
len
(
ix
),
classification
=
self
.
classification
,
preprocess
=
self
.
preprocess
,
n_to_estimate
=
None
,
MAXMEMGB
=
self
.
MAXMEMGB
,
set_params
=
False
)
second
=
type
(
self
)(
self
.
path_data
,
self
.
format
,
self
.
D
,
N
=
self
.
N
-
len
(
ix
),
classification
=
self
.
classification
,
preprocess
=
self
.
preprocess
,
n_to_estimate
=
None
,
MAXMEMGB
=
self
.
MAXMEMGB
,
set_params
=
False
)
first
.
storage_level
=
self
.
storage_level
second
.
storage_level
=
self
.
storage_level
# copy preprocess params
if
not
self
.
classification
:
first
.
ymn
=
self
.
ymn
second
.
ymn
=
self
.
ymn
first
.
ysd
=
self
.
ysd
second
.
ysd
=
self
.
ysd
first
.
Xmn
=
self
.
Xmn
second
.
Xmn
=
self
.
Xmn
first
.
sv1
=
self
.
sv1
second
.
sv1
=
self
.
sv1
if
self
.
storage_level
==
constants
.
StorageLevel
.
DISK
:
if
self
.
format
==
constants
.
DataFormat
.
SVM
:
first
.
Xsd
=
self
.
Xsd
second
.
Xsd
=
self
.
Xsd
else
:
raise
NotImplementedError
# initialize data structures
if
self
.
storage_level
==
constants
.
StorageLevel
.
DISK
:
if
self
.
format
==
constants
.
DataFormat
.
SVM
:
raise
NotImplementedError
raise
NotImplementedError
elif
self
.
storage_level
in
[
constants
.
StorageLevel
.
SPARSE
,
constants
.
StorageLevel
.
DENSE
]:
first
.
X
,
first
.
y
=
self
.
X
[
ix
],
self
.
y
[
ix
]
ixsec
=
list
(
set
(
range
(
self
.
N
)).
difference
(
set
(
ix
)))
second
.
X
,
second
.
y
=
self
.
X
[
ixsec
],
self
.
y
[
ixsec
]
return
first
,
second
@
staticmethod
def
sparse_std
(
X
,
X_mean
):
"""
Calculate the column wise standard deviations of a sparse matrix.
"""
X_copy
=
X
.
copy
()
X_copy
.
data
**=
2
# square non zero elements
E_x_squared
=
np
.
array
(
X_copy
.
mean
(
axis
=
0
)).
ravel
()
Xsd
=
np
.
sqrt
(
E_x_squared
-
X_mean
**
2
)
return
Xsd
def
compute_data_stats
(
self
):
"""
1. computes/estimates feature means
2. if preprocess == 'zscore', computes/estimates feature standard devs
3. if not classification, computes/estimates target mean/standard dev
4. estimates largest singular value of data matrix
"""
t
=
time
.
time
()
X
,
y
=
self
.
X
[
self
.
ix_statistics
],
self
.
y
[
self
.
ix_statistics
]
preprocess
=
self
.
preprocess
classification
=
self
.
classification
Xmn
=
(
X
.
mean
(
dim
=
0
)
if
not
scipy
.
sparse
.
issparse
(
X
)
else
np
.
array
(
X
.
mean
(
axis
=
0
)).
ravel
())
if
preprocess
==
constants
.
Preprocess
.
ZSCORE
:
Xsd
=
(
X
.
std
(
dim
=
0
)
if
not
scipy
.
sparse
.
issparse
(
X
)
else
PrepareData
.
sparse_std
(
X
,
Xmn
))
Xsd
[
Xsd
==
0
]
=
1.
else
:
Xsd
=
1.
if
preprocess
is
not
None
and
preprocess
:
if
preprocess
==
constants
.
Preprocess
.
ZSCORE
:
Xc
=
(
X
-
Xmn
)
/
Xsd
else
:
Xc
=
X
-
Xmn
else
:
Xc
=
X
-
Xmn
sv1
=
scipy
.
sparse
.
linalg
.
svds
(
Xc
/
(
torch
.
sqrt
(
torch
.
prod
(
torch
.
as_tensor
(
y
.
size
(),
dtype
=
torch
.
get_default_dtype
())))
if
not
scipy
.
sparse
.
issparse
(
X
)
else
y
.
numpy
().
size
),
k
=
1
,
which
=
'LM'
,
return_singular_vectors
=
False
)
# avoid runaway sv1
sv1
=
np
.
array
([
min
(
np
.
finfo
(
np
.
float32
).
max
,
sv1
[
0
])])
if
not
classification
:
ymn
=
y
.
mean
()
ysd
=
y
.
std
()
else
:
# TODO: set these, for each class?
ymn
=
0.
ysd
=
1.
if
self
.
verbose
:
print
(
" computing data statistics took: "
,
time
.
time
()
-
t
)
return
Xmn
,
sv1
,
Xsd
,
ymn
,
ysd
def
set_data_stats
(
self
,
Xmn
,
sv1
,
Xsd
=
1.
,
ymn
=
0.
,
ysd
=
1.
):
"""
Saves dataset stats to self to be used for preprocessing.
"""
self
.
Xmn
=
torch
.
as_tensor
(
Xmn
,
dtype
=
torch
.
get_default_dtype
()).
to
(
self
.
device
)
self
.
sv1
=
torch
.
as_tensor
(
sv1
,
dtype
=
torch
.
get_default_dtype
()).
to
(
self
.
device
)
self
.
Xsd
=
torch
.
as_tensor
(
Xsd
,
dtype
=
torch
.
get_default_dtype
()).
to
(
self
.
device
)
self
.
ymn
=
torch
.
as_tensor
(
ymn
,
dtype
=
torch
.
get_default_dtype
()).
to
(
self
.
device
)
self
.
ysd
=
torch
.
as_tensor
(
ysd
,
dtype
=
torch
.
get_default_dtype
()).
to
(
self
.
device
)
def
apply_preprocess
(
self
,
X
,
y
):
"""
Faster on gpu device, while dataloading takes up a large portion of the time.
"""
with
torch
.
no_grad
():
if
not
self
.
classification
:
y
=
(
y
.
reshape
((
-
1
,
1
))
-
self
.
ymn
)
/
self
.
ysd
else
:
y
=
y
.
reshape
((
-
1
,
1
))
X
=
(
X
-
self
.
Xmn
)
/
self
.
sv1
if
self
.
preprocess
==
constants
.
Preprocess
.
ZSCORE
:
X
/=
self
.
Xsd
return
X
,
y
def
max_batch_size
(
self
):
"""
Return the maximum batchsize for the dataset.
"""
return
int
(
np
.
min
([
self
.
max_rows
,
self
.
N
]))
def
apply
(
self
,
ix_rows
=
None
,
ix_cols
=
None
,
f_Xy
=
None
):
if
f_Xy
is
None
:
return
if
ix_rows
is
None
:
ix_rows
=
range
(
self
.
N
)
if
ix_cols
is
None
:
ix_cols
=
range
(
self
.
n_features
)
f_Xy
((
self
.
X
[
ix_rows
,
ix_cols
]
if
not
self
.
storage_level
==
constants
.
StorageLevel
.
SPARSE
else
self
.
X
[
ix_rows
,
ix_cols
].
toarray
()),
self
.
y
[
ix_rows
])
def
get_dense_data
(
self
,
ix_cols
=
None
,
ix_rows
=
None
):
if
ix_cols
is
None
:
ix_cols
=
range
(
self
.
n_features
)
X
=
[
np
.
zeros
((
0
,
len
(
ix_cols
)))]
y
=
[
np
.
zeros
((
0
,
1
))]
Bnp
=
self
.
return_np
def
f_Xy
(
Xb
,
yb
,
n
):
X
[
-
1
]
=
np
.
concatenate
((
X
[
-
1
],
Xb
),
axis
=
0
)
y
[
-
1
]
=
np
.
concatenate
((
y
[
-
1
],
yb
),
axis
=
0
)
self
.
apply
(
f_Xy
=
f_Xy
,
ix_rows
=
ix_rows
,
ix_cols
=
ix_cols
)
self
.
set_return_np
(
Bnp
)
return
X
[
-
1
],
y
[
-
1
]
def
__len__
(
self
):
return
self
.
N
def
getXy
(
self
,
idx
):
if
self
.
storage_level
==
constants
.
StorageLevel
.
DENSE
:
X
,
y
=
self
.
X
[
idx
],
self
.
y
[
idx
]
elif
self
.
storage_level
==
constants
.
StorageLevel
.
SPARSE
:
# assume subset can fit into memory even if whole matrix cant
X
,
y
=
self
.
X
[
idx
].
toarray
(),
self
.
y
[
idx
]
else
:
raise
NotImplementedError
return
X
,
y
def
__getitem__
(
self
,
idx
):
with
torch
.
no_grad
():
X
,
y
=
self
.
getXy
(
idx
)
X
=
X
.
toarray
()
if
scipy
.
sparse
.
issparse
(
X
)
else
X
X
=
torch
.
as_tensor
(
X
,
dtype
=
torch
.
get_default_dtype
()).
to
(
self
.
device
)
y
=
torch
.
as_tensor
(
y
,
dtype
=
torch
.
get_default_dtype
()).
to
(
self
.
device
)
if
not
self
.
return_raw
:
X
,
y
=
self
.
apply_preprocess
(
X
,
y
)
if
self
.
classification
and
(
self
.
n_classes
is
None
or
self
.
n_classes
==
2
):
y
[
y
==
0
]
=
-
1
if
self
.
return_np
:
if
constants
.
Device
.
CPU
not
in
self
.
device
:
X
=
X
.
cpu
()
y
=
y
.
cpu
()
X
=
X
.
numpy
()
y
=
y
.
numpy
()
return
X
,
y
return
X
,
y
class
ChunkDataLoader
(
DataLoader
):
"""
DataLoader class used to more quickly load a batch of indices at once.
"""
def
__iter__
(
self
):
return
_ChunkDataLoaderIter
(
self
)
class
_ChunkDataLoaderIter
(
_DataLoaderIter
):
"""
DataLoaderIter class used to more quickly load a batch of indices at once.
"""
def
__next__
(
self
):
# only chunk that is edited from base
if
self
.
num_workers
==
0
:
# same-process loading
indices
=
next
(
self
.
sample_iter
)
# may raise StopIteration
if
len
(
indices
)
>
1
:
batch
=
self
.
dataset
[
np
.
array
(
indices
)]
else
:
batch
=
self
.
collate_fn
([
self
.
dataset
[
i
]
for
i
in
indices
])
if
self
.
pin_memory
:
batch
=
_utils
.
pin_memory
.
pin_memory_batch
(
batch
)
return
batch
# check if the next sample has already been generated
if
self
.
rcvd_idx
in
self
.
reorder_dict
:
batch
=
self
.
reorder_dict
.
pop
(
self
.
rcvd_idx
)
return
self
.
_process_next_batch
(
batch
)
if
self
.
batches_outstanding
==
0
:
self
.
_shutdown_workers
()
raise
StopIteration
while
True
:
assert
(
not
self
.
shutdown
and
self
.
batches_outstanding
>
0
)
idx
,
batch
=
self
.
_get_batch
()
self
.
batches_outstanding
-=
1
if
idx
!=
self
.
rcvd_idx
:
# store out-of-order samples
self
.
reorder_dict
[
idx
]
=
batch
continue
return
self
.
_process_next_batch
(
batch
)
src/sdk/pynni/nni/feature_engineering/gradient_selector/fgtrain.py
0 → 100644
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import
time
import
numpy
as
np
import
torch
from
sklearn.feature_selection
import
SelectKBest
,
\
f_classif
,
mutual_info_classif
,
f_regression
,
mutual_info_regression
import
nni.feature_engineering.gradient_selector.constants
as
constants
import
nni.feature_engineering.gradient_selector.syssettings
as
syssettings
from
nni.feature_engineering.gradient_selector.learnability
import
Solver
from
nni.feature_engineering.gradient_selector.utils
import
EMA
torch
.
set_default_tensor_type
(
syssettings
.
torch
.
tensortype
)
def
get_optim_f_stop
(
maxiter
,
maxtime
,
dftol_stop
,
freltol_stop
,
minibatch
=
True
):
"""
Check stopping conditions.
"""
discount_factor
=
1.
/
3
total_t
=
[
0.
]
df_store
=
[
np
.
nan
]
it_store
=
[
0
]
relchange_store
=
[
np
.
nan
]
f_ma
=
EMA
(
discount_factor
=
discount_factor
)
df_ma
=
EMA
(
discount_factor
=
discount_factor
)
def
f_stop
(
f0
,
v0
,
it
,
t
):
flag_stop
=
False
total_t
[
-
1
]
+=
t
g
=
f0
.
x
.
grad
.
clone
().
cpu
().
detach
()
df
=
g
.
abs
().
max
().
numpy
().
squeeze
()
v
=
v0
.
clone
().
cpu
().
detach
()
f
=
v
.
numpy
().
squeeze
()
if
it
>=
maxiter
:
flag_stop
=
True
elif
total_t
[
-
1
]
>=
maxtime
:
flag_stop
=
True
f_ma
.
update
(
f
)
df_ma
.
update
(
df
)
rel_change
=
f_ma
.
relchange
()
if
((
not
minibatch
)
and
(
df
<
dftol_stop
))
\
or
(
minibatch
and
(
df_ma
()
<
dftol_stop
)):
flag_stop
=
True
if
rel_change
<
freltol_stop
:
flag_stop
=
True
if
not
minibatch
:
df_store
[
-
1
]
=
df
else
:
df_store
[
-
1
]
=
df_ma
()
relchange_store
[
-
1
]
=
rel_change
it_store
[
-
1
]
=
it
return
flag_stop
return
f_stop
,
{
't'
:
total_t
,
'it'
:
it_store
,
'df'
:
df_store
,
'relchange'
:
relchange_store
}
def
get_init
(
data_train
,
init_type
=
'on'
,
rng
=
np
.
random
.
RandomState
(
0
),
prev_score
=
None
):
"""
Initialize the 'x' variable with different settings
"""
D
=
data_train
.
n_features
value_off
=
constants
.
Initialization
.
VALUE_DICT
[
constants
.
Initialization
.
OFF
]
value_on
=
constants
.
Initialization
.
VALUE_DICT
[
constants
.
Initialization
.
ON
]
if
prev_score
is
not
None
:
x0
=
prev_score
elif
not
isinstance
(
init_type
,
str
):
x0
=
value_off
*
np
.
ones
(
D
)
x0
[
init_type
]
=
value_on
elif
init_type
.
startswith
(
constants
.
Initialization
.
RANDOM
):
d
=
int
(
init_type
.
replace
(
constants
.
Initialization
.
RANDOM
,
''
))
x0
=
value_off
*
np
.
ones
(
D
)
x0
[
rng
.
permutation
(
D
)[:
d
]]
=
value_on
elif
init_type
==
constants
.
Initialization
.
SKLEARN
:
B
=
data_train
.
return_raw
X
,
y
=
data_train
.
get_dense_data
()
data_train
.
set_return_raw
(
B
)
ix
=
train_sk_dense
(
init_type
,
X
,
y
,
data_train
.
classification
)
x0
=
value_off
*
np
.
ones
(
D
)
x0
[
ix
]
=
value_on
elif
init_type
in
constants
.
Initialization
.
VALUE_DICT
:
x0
=
constants
.
Initialization
.
VALUE_DICT
[
init_type
]
*
np
.
ones
(
D
)
else
:
raise
NotImplementedError
(
'init_type {0} not supported yet'
.
format
(
init_type
))
# pylint: disable=E1102
return
torch
.
tensor
(
x0
.
reshape
((
-
1
,
1
)),
dtype
=
torch
.
get_default_dtype
())
def
get_checkpoint
(
S
,
stop_conds
,
rng
=
None
,
get_state
=
True
):
"""
Save the necessary information into a dictionary
"""
m
=
{}
m
[
'ninitfeats'
]
=
S
.
ninitfeats
m
[
'x0'
]
=
S
.
x0
x
=
S
.
x
.
clone
().
cpu
().
detach
()
m
[
'feats'
]
=
np
.
where
(
x
.
numpy
()
>=
0
)[
0
]
m
.
update
({
k
:
v
[
0
]
for
k
,
v
in
stop_conds
.
items
()})
if
get_state
:
m
.
update
({
constants
.
Checkpoint
.
MODEL
:
S
.
state_dict
(),
constants
.
Checkpoint
.
OPT
:
S
.
opt_train
.
state_dict
(),
constants
.
Checkpoint
.
RNG
:
torch
.
get_rng_state
(),
})
if
rng
:
m
.
update
({
'rng_state'
:
rng
.
get_state
()})
return
m
def
_train
(
data_train
,
Nminibatch
,
order
,
C
,
rng
,
lr_train
,
debug
,
maxiter
,
maxtime
,
init
,
dftol_stop
,
freltol_stop
,
dn_log
,
accum_steps
,
path_save
,
shuffle
,
device
=
constants
.
Device
.
CPU
,
verbose
=
1
,
prev_checkpoint
=
None
,
groups
=
None
,
soft_groups
=
None
):
"""
Main training loop.
"""
t_init
=
time
.
time
()
x0
=
get_init
(
data_train
,
init
,
rng
)
if
isinstance
(
init
,
str
)
and
init
==
constants
.
Initialization
.
ZERO
:
ninitfeats
=
-
1
else
:
ninitfeats
=
np
.
where
(
x0
.
detach
().
numpy
()
>
0
)[
0
].
size
S
=
Solver
(
data_train
,
order
,
Nminibatch
=
Nminibatch
,
x0
=
x0
,
C
=
C
,
ftransform
=
lambda
x
:
torch
.
sigmoid
(
2
*
x
),
get_train_opt
=
lambda
p
:
torch
.
optim
.
Adam
(
p
,
lr_train
),
rng
=
rng
,
accum_steps
=
accum_steps
,
shuffle
=
shuffle
,
groups
=
groups
,
soft_groups
=
soft_groups
,
device
=
device
,
verbose
=
verbose
)
S
=
S
.
to
(
device
)
S
.
ninitfeats
=
ninitfeats
S
.
x0
=
x0
if
prev_checkpoint
:
S
.
load_state_dict
(
prev_checkpoint
[
constants
.
Checkpoint
.
MODEL
])
S
.
opt_train
.
load_state_dict
(
prev_checkpoint
[
constants
.
Checkpoint
.
OPT
])
torch
.
set_rng_state
(
prev_checkpoint
[
constants
.
Checkpoint
.
RNG
])
minibatch
=
S
.
Ntrain
!=
S
.
Nminibatch
f_stop
,
stop_conds
=
get_optim_f_stop
(
maxiter
,
maxtime
,
dftol_stop
,
freltol_stop
,
minibatch
=
minibatch
)
if
debug
:
pass
else
:
f_callback
=
None
stop_conds
[
't'
][
-
1
]
=
time
.
time
()
-
t_init
S
.
train
(
f_stop
=
f_stop
,
f_callback
=
f_callback
)
return
get_checkpoint
(
S
,
stop_conds
,
rng
),
S
def
train_sk_dense
(
ty
,
X
,
y
,
classification
):
if
classification
:
if
ty
.
startswith
(
'skf'
):
d
=
int
(
ty
.
replace
(
'skf'
,
''
))
f_sk
=
f_classif
elif
ty
.
startswith
(
'skmi'
):
d
=
int
(
ty
.
replace
(
'skmi'
,
''
))
f_sk
=
mutual_info_classif
else
:
if
ty
.
startswith
(
'skf'
):
d
=
int
(
ty
.
replace
(
'skf'
,
''
))
f_sk
=
f_regression
elif
ty
.
startswith
(
'skmi'
):
d
=
int
(
ty
.
replace
(
'skmi'
,
''
))
f_sk
=
mutual_info_regression
t
=
time
.
time
()
clf
=
SelectKBest
(
f_sk
,
k
=
d
)
clf
.
fit_transform
(
X
,
y
.
squeeze
())
ix
=
np
.
argsort
(
-
clf
.
scores_
)
ix
=
ix
[
np
.
where
(
np
.
invert
(
np
.
isnan
(
clf
.
scores_
[
ix
])))[
0
]][:
d
]
t
=
time
.
time
()
-
t
return
{
'feats'
:
ix
,
't'
:
t
}
src/sdk/pynni/nni/feature_engineering/gradient_selector/gradient_selector.py
0 → 100644
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import
time
import
numpy
as
np
import
pandas
as
pd
from
sklearn.base
import
BaseEstimator
from
sklearn.feature_selection.base
import
SelectorMixin
from
sklearn.utils.validation
import
check_is_fitted
import
torch
from
nni.feature_engineering.feature_selector
import
FeatureSelector
import
nni.feature_engineering.gradient_selector.constants
as
constants
from
nni.feature_engineering.gradient_selector.fginitialize
import
PrepareData
from
nni.feature_engineering.gradient_selector.fgtrain
import
_train
class
FeatureGradientSelector
(
FeatureSelector
,
BaseEstimator
,
SelectorMixin
):
def
__init__
(
self
,
order
=
4
,
penalty
=
1
,
n_features
=
None
,
max_features
=
None
,
learning_rate
=
1e-1
,
init
=
'zero'
,
n_epochs
=
1
,
shuffle
=
True
,
batch_size
=
1000
,
target_batch_size
=
1000
,
max_time
=
np
.
inf
,
classification
=
True
,
ordinal
=
False
,
balanced
=
True
,
preprocess
=
'zscore'
,
soft_grouping
=
False
,
verbose
=
0
,
device
=
'cpu'
):
"""
FeatureGradientSelector is a class that selects features for a machine
learning model using a gradient based search.
Parameters
----------
order : int
What order of interactions to include. Higher orders
may be more accurate but increase the run time. 12 is the maximum allowed order.
penatly : int
Constant that multiplies the regularization term.
n_features: int
If None, will automatically choose number of features based on search.
Otherwise, number of top features to select.
max_features : int
If not None, will use the 'elbow method' to determine the number of features
with max_features as the upper limit.
learning_rate : float
init : str
How to initialize the vector of scores. 'zero' is the default.
Options: {'zero', 'on', 'off', 'onhigh', 'offhigh', 'sklearn'}
n_epochs : int
number of epochs to run
shuffle : bool
Shuffle "rows" prior to an epoch.
batch_size : int
Nnumber of "rows" to process at a time
target_batch_size : int
Number of "rows" to accumulate gradients over.
Useful when many rows will not fit into memory but are needed for accurate estimation.
classification : bool
If True, problem is classification, else regression.
ordinal : bool
If True, problem is ordinal classification. Requires classification to be True.
balanced : bool
If true, each class is weighted equally in optimization, otherwise
weighted is done via support of each class. Requires classification to be True.
prerocess : str
'zscore' which refers to centering and normalizing data to unit variance or
'center' which only centers the data to 0 mean
soft_grouping : bool
if True, groups represent features that come from the same source.
Used to encourage sparsity of groups and features within groups.
verbose : int
Controls the verbosity when fitting. Set to 0 for no printing
1 or higher for printing every verbose number of gradient steps.
device : str
'cpu' to run on CPU and 'cuda' to run on GPU. Runs much faster on GPU
"""
assert
order
<=
12
and
order
>=
1
,
'order must be an integer between 1 and 12, inclusive'
assert
n_features
is
None
or
max_features
is
None
,
\
'only specify one of n_features and max_features at a time'
self
.
order
=
order
self
.
penalty
=
penalty
self
.
n_features
=
n_features
self
.
max_features
=
max_features
self
.
learning_rate
=
learning_rate
self
.
init
=
init
self
.
n_epochs
=
n_epochs
self
.
shuffle
=
shuffle
self
.
batch_size
=
batch_size
self
.
target_batch_size
=
target_batch_size
self
.
max_time
=
max_time
self
.
dftol_stop
=
-
1
self
.
freltol_stop
=
-
1
self
.
classification
=
classification
self
.
ordinal
=
ordinal
self
.
balanced
=
balanced
self
.
preprocess
=
preprocess
self
.
soft_grouping
=
soft_grouping
self
.
verbose
=
verbose
self
.
device
=
device
self
.
model_
=
None
self
.
scores_
=
None
self
.
_prev_checkpoint
=
None
self
.
_data_train
=
None
def
partial_fit
(
self
,
X
,
y
,
n_classes
=
None
,
groups
=
None
):
"""
Select Features via a gradient based search on (X, y) on the given samples.
Can be called repeatedly with different X and y to handle streaming datasets.
Parameters
----------
X : array-like
Shape = [n_samples, n_features]
The training input samples.
y : array-like
Shape = [n_samples]
The target values (class labels in classification, real numbers in
regression).
n_classes : int
Number of classes
Classes across all calls to partial_fit.
Can be obtained by via `np.unique(y_all).shape[0]`, where y_all is the
target vector of the entire dataset.
This argument is expected for the first call to partial_fit,
otherwise will assume all classes are present in the batch of y given.
It will be ignored in the subsequent calls.
Note that y doesn't need to contain all labels in `classes`.
groups : array-like
Optional, shape = [n_features]
Groups of columns that must be selected as a unit
e.g. [0, 0, 1, 2] specifies the first two columns are part of a group.
This argument is expected for the first call to partial_fit,
otherwise will assume all classes are present in the batch of y given.
It will be ignored in the subsequent calls.
"""
try
:
self
.
_partial_fit
(
X
,
y
,
n_classes
=
n_classes
,
groups
=
groups
)
except
constants
.
NanError
:
if
hasattr
(
self
,
'_prev_checkpoint'
):
# if it's already done some batches successfully just ignore it
print
(
'failed fitting this batch, loss was nan'
)
else
:
# if this is the first batch, reset and try with doubles
if
self
.
verbose
:
print
(
'Loss was nan, trying with Doubles'
)
self
.
_reset
()
torch
.
set_default_tensor_type
(
torch
.
DoubleTensor
)
self
.
_partial_fit
(
X
,
y
,
n_classes
=
n_classes
,
groups
=
groups
)
return
self
def
_partial_fit
(
self
,
X
,
y
,
n_classes
=
None
,
groups
=
None
):
"""
Private function for partial_fit to enable trying floats before doubles.
"""
# pass in X and y in chunks
if
hasattr
(
self
,
'_data_train'
):
# just overwrite the X and y from the new chunk but make them tensors
# keep dataset stats from previous
self
.
_data_train
.
X
=
X
.
values
if
isinstance
(
X
,
pd
.
DataFrame
)
else
X
self
.
_data_train
.
N
,
self
.
_data_train
.
D
=
self
.
_data_train
.
X
.
shape
self
.
_data_train
.
dense_size_gb
=
self
.
_data_train
.
get_dense_size
()
self
.
_data_train
.
set_dense_X
()
self
.
_data_train
.
y
=
y
.
values
if
isinstance
(
y
,
pd
.
Series
)
else
y
self
.
_data_train
.
y
=
torch
.
as_tensor
(
y
,
dtype
=
torch
.
get_default_dtype
())
else
:
data_train
=
self
.
_prepare_data
(
X
,
y
,
n_classes
=
n_classes
)
self
.
_data_train
=
data_train
batch_size
,
_
,
accum_steps
,
max_iter
=
self
.
_set_batch_size
(
self
.
_data_train
)
rng
=
None
# not used
debug
=
0
# {0,1} print messages and do other stuff?
dn_logs
=
None
# tensorboard logs; only specify if debug=1
path_save
=
None
# intermediate models saves; only specify if debug=1
m
,
solver
=
_train
(
self
.
_data_train
,
batch_size
,
self
.
order
,
self
.
penalty
,
rng
,
self
.
learning_rate
,
debug
,
max_iter
,
self
.
max_time
,
self
.
init
,
self
.
dftol_stop
,
self
.
freltol_stop
,
dn_logs
,
accum_steps
,
path_save
,
self
.
shuffle
,
device
=
self
.
device
,
verbose
=
self
.
verbose
,
prev_checkpoint
=
self
.
_prev_checkpoint
if
hasattr
(
self
,
'_prev_checkpoint'
)
else
None
,
groups
=
groups
if
not
self
.
soft_grouping
else
None
,
soft_groups
=
groups
if
self
.
soft_grouping
else
None
)
self
.
_prev_checkpoint
=
m
self
.
_process_results
(
m
,
solver
,
X
,
groups
=
groups
)
return
self
def
fit
(
self
,
X
,
y
,
groups
=
None
):
"""
Select Features via a gradient based search on (X, y).
Parameters
----------
X : array-like
Shape = [n_samples, n_features]
The training input samples.
y : array-like
Shape = [n_samples]
The target values (class labels in classification, real numbers in
regression).
groups : array-like
Optional, shape = [n_features]
Groups of columns that must be selected as a unit
e.g. [0, 0, 1, 2] specifies the first two columns are part of a group.
"""
try
:
self
.
_fit
(
X
,
y
,
groups
=
groups
)
except
constants
.
NanError
:
if
self
.
verbose
:
print
(
'Loss was nan, trying with Doubles'
)
torch
.
set_default_tensor_type
(
torch
.
DoubleTensor
)
self
.
_fit
(
X
,
y
,
groups
=
groups
)
return
self
def
get_selected_features
(
self
):
return
self
.
selected_features_
def
_prepare_data
(
self
,
X
,
y
,
n_classes
=
None
):
"""
Returns a PrepareData object.
"""
return
PrepareData
(
X
=
X
.
values
if
isinstance
(
X
,
pd
.
DataFrame
)
else
X
,
y
=
y
.
values
if
isinstance
(
y
,
pd
.
Series
)
else
y
,
data_format
=
constants
.
DataFormat
.
NUMPY
,
classification
=
int
(
self
.
classification
),
ordinal
=
self
.
ordinal
,
balanced
=
self
.
balanced
,
preprocess
=
self
.
preprocess
,
verbose
=
self
.
verbose
,
device
=
self
.
device
,
n_classes
=
n_classes
)
def
_fit
(
self
,
X
,
y
,
groups
=
None
):
"""
Private function for fit to enable trying floats before doubles.
"""
data_train
=
self
.
_prepare_data
(
X
,
y
)
batch_size
,
_
,
accum_steps
,
max_iter
=
self
.
_set_batch_size
(
data_train
)
rng
=
None
# not used
debug
=
0
# {0,1} print messages and log to tensorboard
dn_logs
=
None
# tensorboard logs; only specify if debug=1
path_save
=
None
# intermediate models saves; only specify if debug=1
m
,
solver
=
_train
(
data_train
,
batch_size
,
self
.
order
,
self
.
penalty
,
rng
,
self
.
learning_rate
,
debug
,
max_iter
,
self
.
max_time
,
self
.
init
,
self
.
dftol_stop
,
self
.
freltol_stop
,
dn_logs
,
accum_steps
,
path_save
,
self
.
shuffle
,
device
=
self
.
device
,
verbose
=
self
.
verbose
,
groups
=
groups
if
not
self
.
soft_grouping
else
None
,
soft_groups
=
groups
if
self
.
soft_grouping
else
None
)
self
.
_process_results
(
m
,
solver
,
X
,
groups
=
groups
)
return
self
def
_process_torch_scores
(
self
,
scores
):
"""
Convert scores into flat numpy arrays.
"""
if
constants
.
Device
.
CUDA
in
scores
.
device
.
type
:
scores
=
scores
.
cpu
()
return
scores
.
numpy
().
ravel
()
def
_set_batch_size
(
self
,
data_train
):
"""
Ensures that batch_size is less than the number of rows.
"""
batch_size
=
min
(
self
.
batch_size
,
data_train
.
N
)
target_batch_size
=
min
(
max
(
self
.
batch_size
,
self
.
target_batch_size
),
data_train
.
N
)
accum_steps
=
max
(
int
(
np
.
ceil
(
target_batch_size
/
self
.
batch_size
)),
1
)
max_iter
=
self
.
n_epochs
*
(
data_train
.
N
//
batch_size
)
return
batch_size
,
target_batch_size
,
accum_steps
,
max_iter
def
_process_results
(
self
,
m
,
solver
,
X
,
groups
=
None
):
"""
Process the results of a run into something suitable for transform().
"""
self
.
scores_
=
self
.
_process_torch_scores
(
torch
.
sigmoid
(
m
[
constants
.
Checkpoint
.
MODEL
][
'x'
]
*
2
))
if
self
.
max_features
:
self
.
max_features
=
min
([
self
.
max_features
,
self
.
scores_
.
shape
[
0
]])
n_features
=
self
.
_recommend_number_features
(
solver
)
self
.
set_n_features
(
n_features
,
groups
=
groups
)
elif
self
.
n_features
:
self
.
set_n_features
(
self
.
n_features
,
groups
=
groups
)
else
:
self
.
selected_features_
=
m
[
'feats'
]
# subtract elapsed time from max_time
self
.
max_time
-=
m
[
't'
]
self
.
model_
=
m
return
self
def
transform
(
self
,
X
):
"""
Returns selected features from X.
Paramters
---------
X: array-like
Shape = [n_samples, n_features]
The training input samples.
"""
self
.
_get_support_mask
()
if
self
.
selected_features_
.
shape
[
0
]
==
0
:
raise
ValueError
(
'No Features selected, consider lowering the penalty or specifying n_features'
)
return
(
X
.
iloc
[:,
self
.
selected_features_
]
if
isinstance
(
X
,
pd
.
DataFrame
)
else
X
[:,
self
.
selected_features_
])
def
get_support
(
self
,
indices
=
False
):
"""
Get a mask, or integer index, of the features selected.
Parameters
----------
indices : bool
Default False
If True, the return value will be an array of integers, rather than a boolean mask.
Returns
-------
list :
returns support: An index that selects the retained features from a feature vector.
If indices is False, this is a boolean array of shape [# input features],
in which an element is True iff its corresponding feature is selected for retention.
If indices is True, this is an integer array of shape [# output features] whose values
are indices into the input feature vector.
"""
self
.
_get_support_mask
()
if
indices
:
return
self
.
selected_features_
mask
=
np
.
zeros_like
(
self
.
scores_
,
dtype
=
bool
)
# pylint: disable=E1137
mask
[
self
.
selected_features_
]
=
True
return
mask
def
inverse_transform
(
self
,
X
):
"""
Returns transformed X to the original number of column.
This operation is lossy and all columns not in the transformed data
will be returned as columns of 0s.
"""
self
.
_get_support_mask
()
X_new
=
np
.
zeros
((
X
.
shape
[
0
],
self
.
scores_
.
shape
[
0
]))
X_new
[
self
.
selected_features_
]
=
X
return
X_new
def
get_params
(
self
,
deep
=
True
):
"""
Get parameters for this estimator.
"""
params
=
self
.
__dict__
params
=
{
key
:
val
for
(
key
,
val
)
in
params
.
items
()
if
not
key
.
endswith
(
'_'
)}
return
params
def
set_params
(
self
,
**
params
):
"""
Set the parameters of this estimator.
"""
for
param
in
params
:
if
hasattr
(
self
,
param
):
setattr
(
self
,
param
,
params
[
param
])
return
self
def
fit_transform
(
self
,
X
,
y
):
"""
Select features and then return X with the selected features.
Parameters
----------
X : array-like
Shape = [n_samples, n_features]
The training input samples.
y : array-like
Shape = [n_samples]
The target values (class labels in classification, real numbers in
regression).
"""
self
.
fit
(
X
,
y
)
return
self
.
transform
(
X
)
def
_get_support_mask
(
self
):
"""
Check if it is fitted.
"""
check_is_fitted
(
self
,
'scores_'
)
def
_generate_scores
(
self
,
solver
,
xsub
,
ysub
,
step_size
,
feature_order
):
"""
Generate forward passes to determine the number of features when max_features is set.
"""
scores
=
[]
for
i
in
np
.
arange
(
1
,
self
.
max_features
+
1
,
step_size
):
# optimization possible since xsub is growing?
i
=
int
(
np
.
ceil
(
i
))
# pylint: disable=E1102
score
=
solver
.
f_train
(
torch
.
tensor
(
np
.
ones
(
i
),
dtype
=
torch
.
get_default_dtype
()
).
unsqueeze
(
1
).
to
(
self
.
device
),
xsub
[:,
feature_order
[:
i
]],
ysub
)
if
constants
.
Device
.
CUDA
in
score
.
device
.
type
:
score
=
score
.
cpu
()
# score.numpy()[0][0]
scores
.
append
(
score
)
return
scores
def
set_n_features
(
self
,
n
,
groups
=
None
):
"""
Set the number of features to return after fitting.
"""
self
.
_get_support_mask
()
self
.
n_features
=
n
return
self
.
_set_top_features
(
groups
=
groups
)
def
_set_top_features
(
self
,
groups
=
None
):
"""
Set the selected features after a run.
With groups, ensures that if any member of a group is selected, all members are selected
"""
self
.
_get_support_mask
()
assert
self
.
n_features
<=
self
.
scores_
.
shape
[
0
],
\
'n_features must be less than or equal to the number of columns in X'
# pylint: disable=E1130
self
.
selected_features_
=
np
.
argpartition
(
self
.
scores_
,
-
self
.
n_features
)[
-
self
.
n_features
:]
if
groups
is
not
None
and
not
self
.
soft_grouping
:
selected_feature_set
=
set
(
self
.
selected_features_
.
tolist
())
for
_
in
np
.
unique
(
groups
):
group_members
=
np
.
where
(
groups
==
groups
)[
0
].
tolist
()
if
selected_feature_set
.
intersection
(
group_members
):
selected_feature_set
.
update
(
group_members
)
self
.
selected_features_
=
np
.
array
(
list
(
selected_feature_set
))
self
.
selected_features_
=
np
.
sort
(
self
.
selected_features_
)
return
self
def
set_top_percentile
(
self
,
percentile
,
groups
=
None
):
"""
Set the percentile of features to return after fitting.
"""
self
.
_get_support_mask
()
assert
percentile
<=
1
and
percentile
>=
0
,
\
'percentile must between 0 and 1 inclusive'
self
.
n_features
=
int
(
self
.
scores_
.
shape
[
0
]
*
percentile
)
return
self
.
_set_top_features
(
groups
=
groups
)
def
_recommend_number_features
(
self
,
solver
,
max_time
=
None
):
"""
Get the recommended number of features by doing forward passes when max_features is set.
"""
max_time
=
max_time
if
max_time
else
self
.
max_time
if
max_time
<
0
:
max_time
=
60
# allow 1 minute extra if we already spent max_time
MAX_FORWARD_PASS
=
200
MAX_FULL_BATCHES
=
3
# the forward passes can take longer than the fitting
# if we allow a full epoch of data to be included. By only doing 3 full batches at most
# we get enough accuracy without increasing the time too much. This
# constant may not be optimal
accum_steps
=
solver
.
accum_steps
step_size
=
max
(
self
.
max_features
/
MAX_FORWARD_PASS
,
1
)
# pylint: disable=E1130
feature_order
=
np
.
argsort
(
-
self
.
scores_
)
# note the negative
t
=
time
.
time
()
dataloader_iterator
=
iter
(
solver
.
ds_train
)
full_scores
=
[]
# keep_going = True
with
torch
.
no_grad
():
# might want to only consider a batch valid if there are at least
# two classes
for
_
in
range
(
accum_steps
*
MAX_FULL_BATCHES
):
scores
=
[]
try
:
xsub
,
ysub
=
next
(
dataloader_iterator
)
except
StopIteration
:
# done with epoch, don't do more than one epoch
break
except
Exception
as
e
:
print
(
e
)
break
if
max_time
and
time
.
time
()
-
t
>
max_time
:
if
self
.
verbose
:
print
(
"Stoppinn forward passes because they reached max_time: "
,
max_time
)
if
not
full_scores
:
# no forward passes worked, return half of max_features
return
self
.
max_features
//
2
break
if
solver
.
multiclass
:
for
target_class
in
range
(
solver
.
n_classes
):
ysub_binary
=
solver
.
transform_y_into_binary
(
ysub
,
target_class
)
scaling_value
=
solver
.
_get_scaling_value
(
ysub
,
target_class
)
if
not
solver
.
_skip_y_forward
(
ysub_binary
):
scores
=
self
.
_generate_scores
(
solver
,
xsub
,
ysub_binary
,
step_size
,
feature_order
)
# one row will represent one class that is present in the data
# all classes are weighted equally
full_scores
.
append
(
[
score
*
scaling_value
for
score
in
scores
])
else
:
if
not
solver
.
_skip_y_forward
(
ysub
):
scores
=
self
.
_generate_scores
(
solver
,
xsub
,
ysub
,
step_size
,
feature_order
)
full_scores
.
append
(
scores
)
best_index
=
FeatureGradientSelector
.
_find_best_index_elbow
(
full_scores
)
if
self
.
verbose
:
print
(
"Forward passes took: "
,
time
.
time
()
-
t
)
# account for step size and off by one (n_features is 1 indexed, not 0
# )
return
int
(
np
.
ceil
(
np
.
arange
(
1
,
self
.
max_features
+
1
,
step_size
))[
best_index
])
@
staticmethod
def
_find_best_index_elbow
(
full_scores
):
"""
Finds the point on the curve that maximizes distance from the line determined by the endpoints.
"""
scores
=
pd
.
DataFrame
(
full_scores
).
mean
(
0
).
values
.
tolist
()
first_point
=
np
.
array
([
0
,
scores
[
0
]])
last_point
=
np
.
array
([
len
(
scores
)
-
1
,
scores
[
-
1
]])
elbow_metric
=
[]
for
i
in
range
(
len
(
scores
)):
elbow_metric
.
append
(
FeatureGradientSelector
.
_distance_to_line
(
first_point
,
last_point
,
np
.
array
([
i
,
scores
[
i
]])))
return
np
.
argmax
(
elbow_metric
)
@
staticmethod
def
_distance_to_line
(
start_point
,
end_point
,
new_point
):
"""
Calculates the shortest distance from new_point to the line determined by start_point and end_point.
"""
# for calculating elbow method
return
np
.
cross
(
new_point
-
start_point
,
end_point
-
start_point
)
/
np
.
linalg
.
norm
(
end_point
-
start_point
)
def
_reset
(
self
):
"""
Reset the estimator by deleting all private and fit parameters.
"""
params
=
self
.
__dict__
for
key
,
_
in
params
.
items
():
if
key
.
endswith
(
'_'
)
or
key
.
startswith
(
'_'
):
delattr
(
self
,
key
)
return
self
src/sdk/pynni/nni/feature_engineering/gradient_selector/learnability.py
0 → 100644
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import
time
import
numpy
as
np
import
scipy.special
import
torch
import
torch.nn
as
nn
import
nni.feature_engineering.gradient_selector.constants
as
constants
import
nni.feature_engineering.gradient_selector.syssettings
as
syssettings
from
nni.feature_engineering.gradient_selector.fginitialize
import
ChunkDataLoader
torch
.
set_default_tensor_type
(
syssettings
.
torch
.
tensortype
)
sparsetensor
=
syssettings
.
torch
.
sparse
.
tensortype
def
def_train_opt
(
p
):
"""
Return the default optimizer.
"""
return
torch
.
optim
.
Adam
(
p
,
1e-1
,
amsgrad
=
False
)
def
revcumsum
(
U
):
"""
Reverse cumulative sum for faster performance.
"""
return
U
.
flip
(
dims
=
[
0
]).
cumsum
(
dim
=
0
).
flip
(
dims
=
[
0
])
def
triudr
(
X
,
r
):
Zr
=
torch
.
zeros_like
(
X
,
requires_grad
=
False
)
U
=
X
*
r
Zr
[:
-
1
]
=
X
[:
-
1
]
*
revcumsum
(
U
)[
1
:]
return
Zr
def
triudl
(
X
,
l
):
Zl
=
torch
.
zeros_like
(
X
,
requires_grad
=
False
)
U
=
X
*
l
Zl
[
1
:]
=
X
[
1
:]
*
(
U
.
cumsum
(
dim
=
0
)[:
-
1
])
return
Zl
class
ramp
(
torch
.
autograd
.
Function
):
"""
Ensures input is between 0 and 1
"""
@
staticmethod
def
forward
(
ctx
,
input_data
):
ctx
.
save_for_backward
(
input_data
)
return
input_data
.
clamp
(
min
=
0
,
max
=
1
)
@
staticmethod
def
backward
(
ctx
,
grad_output
):
input_data
,
=
ctx
.
saved_tensors
grad_input
=
grad_output
.
clone
()
grad_input
[
input_data
<
0
]
=
1e-2
grad_input
[
input_data
>
1
]
=
-
1e-2
return
grad_input
class
safesqrt
(
torch
.
autograd
.
Function
):
"""
Square root without dividing by 0.
"""
@
staticmethod
def
forward
(
ctx
,
input_data
):
o
=
input_data
.
sqrt
()
ctx
.
save_for_backward
(
input_data
,
o
)
return
o
@
staticmethod
def
backward
(
ctx
,
grad_output
):
_
,
o
=
ctx
.
saved_tensors
grad_input
=
grad_output
.
clone
()
grad_input
*=
0.5
/
(
o
+
constants
.
EPSILON
)
return
grad_input
class
LearnabilityMB
(
nn
.
Module
):
"""
Calculates the learnability of a set of features.
mini-batch version w/ "left" and "right" multiplies
"""
def
__init__
(
self
,
Nminibatch
,
D
,
coeff
,
groups
=
None
,
binary
=
False
,
device
=
constants
.
Device
.
CPU
):
super
(
LearnabilityMB
,
self
).
__init__
()
a
=
coeff
/
scipy
.
special
.
binom
(
Nminibatch
,
np
.
arange
(
coeff
.
size
)
+
2
)
self
.
order
=
a
.
size
# pylint: disable=E1102
self
.
a
=
torch
.
tensor
(
a
,
dtype
=
torch
.
get_default_dtype
(),
requires_grad
=
False
)
self
.
binary
=
binary
self
.
a
=
self
.
a
.
to
(
device
)
def
ret_val
(
self
,
z
):
"""
Get the return value based on z.
"""
if
not
self
.
binary
:
return
1
-
z
else
:
return
0.5
*
(
1
-
safesqrt
.
apply
(
ramp
.
apply
(
z
)))
def
forward
(
self
,
s
,
X
,
y
):
l
=
y
.
clone
()
r
=
y
.
clone
()
z
=
0
for
i
in
range
(
self
.
order
):
if
i
%
2
==
0
:
Z
=
triudr
(
X
,
r
)
r
=
torch
.
mm
(
Z
,
s
)
else
:
Z
=
triudl
(
X
,
l
)
l
=
torch
.
mm
(
Z
,
s
)
if
self
.
a
[
i
]
!=
0
:
# same the computation if a[i] is 0
p
=
torch
.
mm
(
l
.
t
(),
r
)
z
+=
self
.
a
[
i
]
*
p
return
self
.
ret_val
(
z
)
class
Solver
(
nn
.
Module
):
"""
Class that performs the main optimization.
Keeps track of the current x and iterates through data to learn x given the penalty and order.
"""
def
__init__
(
self
,
PreparedData
,
order
,
Nminibatch
=
None
,
groups
=
None
,
soft_groups
=
None
,
x0
=
None
,
C
=
1
,
ftransform
=
torch
.
sigmoid
,
get_train_opt
=
def_train_opt
,
accum_steps
=
1
,
rng
=
np
.
random
.
RandomState
(
0
),
max_norm_clip
=
1.
,
shuffle
=
True
,
device
=
constants
.
Device
.
CPU
,
verbose
=
1
):
"""
Parameters
----------
PreparedData : Dataset of PrepareData class
order : int
What order of interactions to include. Higher orders
may be more accurate but increase the run time. 12 is the maximum allowed order.
Nminibatch : int
Number of rows in a mini batch
groups : array-like
Optional, shape = [n_features]
Groups of columns that must be selected as a unit
e.g. [0, 0, 1, 2] specifies the first two columns are part of a group.
soft_groups : array-like
optional, shape = [n_features]
Groups of columns come from the same source
Used to encourage sparsity of number of sources selected
e.g. [0, 0, 1, 2] specifies the first two columns are part of a group.
x0 : torch.tensor
Optional, initialization of x.
C : float
Penalty parameter.
get_train_opt : function
Function that returns a pytorch optimizer, Adam is the default
accum_steps : int
Number of steps
rng : random state
max_norm_clip : float
Maximum allowable size of the gradient
shuffle : bool
Whether or not to shuffle data within the dataloader
order : int
What order of interactions to include. Higher orders
may be more accurate but increase the run time. 12 is the maximum allowed order.
penalty : int
Constant that multiplies the regularization term.
ftransform : function
Function to transform the x. sigmoid is the default.
device : str
'cpu' to run on CPU and 'cuda' to run on GPU. Runs much faster on GPU
verbose : int
Controls the verbosity when fitting. Set to 0 for no printing
1 or higher for printing every verbose number of gradient steps.
"""
super
(
Solver
,
self
).
__init__
()
self
.
Ntrain
,
self
.
D
=
PreparedData
.
N
,
PreparedData
.
n_features
if
groups
is
not
None
:
# pylint: disable=E1102
groups
=
torch
.
tensor
(
groups
,
dtype
=
torch
.
long
)
self
.
groups
=
groups
else
:
self
.
groups
=
None
if
soft_groups
is
not
None
:
# pylint: disable=E1102
soft_groups
=
torch
.
tensor
(
soft_groups
,
dtype
=
torch
.
long
)
self
.
soft_D
=
torch
.
unique
(
soft_groups
).
size
()[
0
]
else
:
self
.
soft_D
=
None
self
.
soft_groups
=
soft_groups
if
Nminibatch
is
None
:
Nminibatch
=
self
.
Ntrain
else
:
if
Nminibatch
>
self
.
Ntrain
:
print
(
'Minibatch larger than sample size.'
+
(
' Reducing from %d to %d.'
%
(
Nminibatch
,
self
.
Ntrain
)))
Nminibatch
=
self
.
Ntrain
if
Nminibatch
>
PreparedData
.
max_rows
:
print
(
'Minibatch larger than mem-allowed.'
+
(
' Reducing from %d to %d.'
%
(
Nminibatch
,
PreparedData
.
max_rows
)))
Nminibatch
=
int
(
np
.
min
([
Nminibatch
,
PreparedData
.
max_rows
]))
self
.
Nminibatch
=
Nminibatch
self
.
accum_steps
=
accum_steps
if
x0
is
None
:
x0
=
torch
.
zeros
(
self
.
D
,
1
,
dtype
=
torch
.
get_default_dtype
())
self
.
ftransform
=
ftransform
self
.
x
=
nn
.
Parameter
(
x0
)
self
.
max_norm
=
max_norm_clip
self
.
device
=
device
self
.
verbose
=
verbose
self
.
multiclass
=
PreparedData
.
classification
and
PreparedData
.
n_classes
and
PreparedData
.
n_classes
>
2
if
self
.
multiclass
:
self
.
n_classes
=
PreparedData
.
n_classes
else
:
self
.
n_classes
=
None
# whether to treat all classes equally
self
.
balanced
=
PreparedData
.
balanced
self
.
ordinal
=
PreparedData
.
ordinal
if
(
hasattr
(
PreparedData
,
'mappings'
)
or
PreparedData
.
storage_level
==
'disk'
):
num_workers
=
PreparedData
.
num_workers
elif
PreparedData
.
storage_level
==
constants
.
StorageLevel
.
DENSE
:
num_workers
=
0
else
:
num_workers
=
0
if
constants
.
Device
.
CUDA
in
device
:
pin_memory
=
False
else
:
pin_memory
=
False
self
.
ds_train
=
ChunkDataLoader
(
PreparedData
,
batch_size
=
self
.
Nminibatch
,
shuffle
=
shuffle
,
drop_last
=
True
,
num_workers
=
num_workers
,
pin_memory
=
pin_memory
,
timeout
=
60
)
self
.
f_train
=
LearnabilityMB
(
self
.
Nminibatch
,
self
.
D
,
constants
.
Coefficients
.
SLE
[
order
],
self
.
groups
,
binary
=
PreparedData
.
classification
,
device
=
self
.
device
)
self
.
opt_train
=
get_train_opt
(
torch
.
nn
.
ParameterList
([
self
.
x
]))
self
.
it
=
0
self
.
iters_per_epoch
=
int
(
np
.
ceil
(
len
(
self
.
ds_train
.
dataset
)
/
self
.
ds_train
.
batch_size
))
self
.
f_train
=
self
.
f_train
.
to
(
device
)
# pylint: disable=E1102
self
.
w
=
torch
.
tensor
(
C
/
(
C
+
1
),
dtype
=
torch
.
get_default_dtype
(),
requires_grad
=
False
)
self
.
w
=
self
.
w
.
to
(
device
)
def
penalty
(
self
,
s
):
"""
Calculate L1 Penalty.
"""
to_return
=
torch
.
sum
(
s
)
/
self
.
D
if
self
.
soft_groups
is
not
None
:
# if soft_groups, there is an additional penalty for using more
# groups
s_grouped
=
torch
.
zeros
(
self
.
soft_D
,
1
,
dtype
=
torch
.
get_default_dtype
(),
device
=
self
.
device
)
for
group
in
torch
.
unique
(
self
.
soft_groups
):
# groups should be indexed 0 to n_group - 1
# TODO: consider other functions here
s_grouped
[
group
]
=
s
[
self
.
soft_groups
==
group
].
max
()
# each component of the penalty contributes .5
# TODO: could make this a user given parameter
to_return
=
(
to_return
+
torch
.
sum
(
s_grouped
)
/
self
.
soft_D
)
*
.
5
return
to_return
def
forward_and_backward
(
self
,
s
,
xsub
,
ysub
,
retain_graph
=
False
):
"""
Completes the forward operation and computes gradients for learnability and penalty.
"""
f_train
=
self
.
f_train
(
s
,
xsub
,
ysub
)
pen
=
self
.
penalty
(
s
)
# pylint: disable=E1102
grad_outputs
=
torch
.
tensor
([[
1
]],
dtype
=
torch
.
get_default_dtype
(),
device
=
self
.
device
)
g1
,
=
torch
.
autograd
.
grad
([
f_train
],
[
self
.
x
],
grad_outputs
,
retain_graph
=
True
)
# pylint: disable=E1102
grad_outputs
=
torch
.
tensor
([[
1
]],
dtype
=
torch
.
get_default_dtype
(),
device
=
self
.
device
)
g2
,
=
torch
.
autograd
.
grad
([
pen
],
[
self
.
x
],
grad_outputs
,
retain_graph
=
retain_graph
)
return
f_train
,
pen
,
g1
,
g2
def
combine_gradient
(
self
,
g1
,
g2
):
"""
Combine gradients from learnability and penalty
Parameters
----------
g1 : array-like
gradient from learnability
g2 : array-like
gradient from penalty
"""
to_return
=
((
1
-
self
.
w
)
*
g1
+
self
.
w
*
g2
)
/
self
.
accum_steps
if
self
.
groups
is
not
None
:
# each column will get a gradient
# but we can only up or down groups, so the gradient for the group
# should be the average of the gradients of the columns
to_return_grouped
=
torch
.
zeros_like
(
self
.
x
)
for
group
in
torch
.
unique
(
self
.
groups
):
to_return_grouped
[
self
.
groups
==
group
]
=
to_return
[
self
.
groups
==
group
].
mean
()
to_return
=
to_return_grouped
return
to_return
def
combine_loss
(
self
,
f_train
,
pen
):
"""
Combine the learnability and L1 penalty.
"""
return
((
1
-
self
.
w
)
*
f_train
.
detach
()
+
self
.
w
*
pen
.
detach
())
\
/
self
.
accum_steps
def
transform_y_into_binary
(
self
,
ysub
,
target_class
):
"""
Transforms multiclass classification problems into a binary classification problem.
"""
with
torch
.
no_grad
():
ysub_binary
=
torch
.
zeros_like
(
ysub
)
if
self
.
ordinal
:
# turn ordinal problems into n-1 classifications of is this
# example less than rank k
if
target_class
==
0
:
return
None
ysub_binary
[
ysub
>=
target_class
]
=
1
ysub_binary
[
ysub
<
target_class
]
=
-
1
else
:
# turn multiclass problems into n binary classifications
ysub_binary
[
ysub
==
target_class
]
=
1
ysub_binary
[
ysub
!=
target_class
]
=
-
1
return
ysub_binary
def
_get_scaling_value
(
self
,
ysub
,
target_class
):
"""
Returns the weight given to a class for multiclass classification.
"""
if
self
.
balanced
:
if
self
.
ordinal
:
return
1
/
(
torch
.
unique
(
ysub
).
size
()[
0
]
-
1
)
return
1
/
torch
.
unique
(
ysub
).
size
()[
0
]
else
:
if
self
.
ordinal
:
this_class_proportion
=
torch
.
mean
(
ysub
>=
target_class
)
normalizing_constant
=
0
for
i
in
range
(
1
,
self
.
n_classes
):
normalizing_constant
+=
torch
.
mean
(
ysub
>=
i
)
return
this_class_proportion
/
normalizing_constant
else
:
return
torch
.
mean
(
ysub
==
target_class
)
def
_skip_y_forward
(
self
,
y
):
"""
Returns boolean of whether to skip the currrent y if there is nothing to be learned from it.
"""
if
y
is
None
:
return
True
elif
torch
.
unique
(
y
).
size
()[
0
]
<
2
:
return
True
else
:
return
False
def
train
(
self
,
f_callback
=
None
,
f_stop
=
None
):
"""
Trains the estimator to determine which features to include.
Parameters
----------
f_callback : function
Function that performs a callback
f_stop: function
Function that tells you when to stop
"""
t
=
time
.
time
()
h
=
torch
.
zeros
([
1
,
1
],
dtype
=
torch
.
get_default_dtype
())
h
=
h
.
to
(
self
.
device
)
# h_complete is so when we divide by the number of classes
# we only do that for that minibatch if accumulating
h_complete
=
h
.
clone
()
flag_stop
=
False
dataloader_iterator
=
iter
(
self
.
ds_train
)
self
.
x
.
grad
=
torch
.
zeros_like
(
self
.
x
)
while
not
flag_stop
:
try
:
xsub
,
ysub
=
next
(
dataloader_iterator
)
except
StopIteration
:
dataloader_iterator
=
iter
(
self
.
ds_train
)
xsub
,
ysub
=
next
(
dataloader_iterator
)
try
:
s
=
self
.
ftransform
(
self
.
x
)
s
=
s
.
to
(
self
.
device
)
if
self
.
multiclass
:
# accumulate gradients over each class, classes range from
# 0 to n_classes - 1
#num_classes_batch = torch.unique(ysub).size()[0]
for
target_class
in
range
(
self
.
n_classes
):
ysub_binary
=
self
.
transform_y_into_binary
(
ysub
,
target_class
)
if
self
.
_skip_y_forward
(
ysub_binary
):
continue
# should should skip if target class is not included
# but that changes what we divide by
scaling_value
=
self
.
_get_scaling_value
(
ysub
,
target_class
)
f_train
,
pen
,
g1
,
g2
=
self
.
forward_and_backward
(
s
,
xsub
,
ysub_binary
,
retain_graph
=
True
)
self
.
x
.
grad
+=
self
.
combine_gradient
(
g1
,
g2
)
*
scaling_value
h
+=
self
.
combine_loss
(
f_train
,
pen
)
*
scaling_value
else
:
if
not
self
.
_skip_y_forward
(
ysub
):
f_train
,
pen
,
g1
,
g2
=
self
.
forward_and_backward
(
s
,
xsub
,
ysub
)
self
.
x
.
grad
+=
self
.
combine_gradient
(
g1
,
g2
)
h
+=
self
.
combine_loss
(
f_train
,
pen
)
else
:
continue
h_complete
+=
h
self
.
it
+=
1
if
torch
.
isnan
(
h
):
raise
constants
.
NanError
(
'Loss is nan, something may be misconfigured'
)
if
self
.
it
%
self
.
accum_steps
==
0
:
torch
.
nn
.
utils
.
clip_grad_norm_
(
torch
.
nn
.
ParameterList
([
self
.
x
]),
max_norm
=
self
.
max_norm
)
self
.
opt_train
.
step
()
t
=
time
.
time
()
-
t
if
f_stop
is
not
None
:
flag_stop
=
f_stop
(
self
,
h
,
self
.
it
,
t
)
if
f_callback
is
not
None
:
f_callback
(
self
,
h
,
self
.
it
,
t
)
elif
self
.
verbose
and
(
self
.
it
//
self
.
accum_steps
)
%
self
.
verbose
==
0
:
epoch
=
int
(
self
.
it
/
self
.
iters_per_epoch
)
print
(
'[Minibatch: %6d/ Epoch: %3d/ t: %3.3f s] Loss: %0.3f'
%
(
self
.
it
,
epoch
,
t
,
h_complete
/
self
.
accum_steps
))
if
flag_stop
:
break
self
.
opt_train
.
zero_grad
()
h
=
0
h_complete
=
0
t
=
time
.
time
()
except
KeyboardInterrupt
:
flag_stop
=
True
break
src/sdk/pynni/nni/feature_engineering/gradient_selector/requirements.txt
0 → 100644
View file @
cd3a912a
numpy==1.14.3
scikit-learn==0.20.0
scipy==1.1.0
torch==1.1.0
src/sdk/pynni/nni/feature_engineering/gradient_selector/syssettings.py
0 → 100644
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import
torch
# pytorch
torch
.
tensortype
=
torch
.
FloatTensor
torch
.
sparse
.
tensortype
=
torch
.
sparse
.
FloatTensor
# mem
MAXMEMGB
=
10
src/sdk/pynni/nni/feature_engineering/gradient_selector/utils.py
0 → 100644
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import
numpy
as
np
class
EMA
():
"""
maintains an exponential moving average
"""
def
__init__
(
self
,
f
=
np
.
nan
,
discount_factor
=
0.1
,
valid_after
=
None
,
n_iters_relchange
=
3
):
self
.
f_ma
=
[
f
]
self
.
fs
=
[
f
]
self
.
gamma
=
discount_factor
self
.
rel_change
=
[
np
.
nan
]
if
valid_after
is
None
:
self
.
valid_after
=
int
(
1
/
discount_factor
)
else
:
self
.
valid_after
=
valid_after
self
.
n_iters_relchange
=
n_iters_relchange
self
.
initialized
=
False
def
reset
(
self
,
f
):
self
.
f_ma
=
[
f
]
self
.
fs
=
[
f
]
self
.
rel_change
=
[
np
.
nan
]
self
.
initialized
=
True
def
relchange
(
self
):
if
self
.
num_updates
()
>
np
.
max
([
self
.
valid_after
,
self
.
n_iters_relchange
]):
return
np
.
max
(
self
.
rel_change
[
-
self
.
n_iters_relchange
:])
else
:
return
np
.
nan
def
update
(
self
,
f_new
):
if
not
self
.
initialized
:
self
.
reset
(
f_new
)
else
:
self
.
fs
.
append
(
f_new
)
self
.
f_ma
.
append
(
self
.
f_ma
[
-
1
]
*
(
1
-
self
.
gamma
)
+
self
.
gamma
*
f_new
)
if
self
.
num_updates
()
>
self
.
valid_after
:
self
.
rel_change
.
append
(
np
.
abs
((
self
.
f_ma
[
-
1
]
-
self
.
f_ma
[
-
2
])
/
self
.
f_ma
[
-
2
]))
def
num_updates
(
self
):
return
len
(
self
.
f_ma
)
def
__call__
(
self
):
if
self
.
num_updates
()
>
self
.
valid_after
:
return
self
.
f_ma
[
-
1
]
else
:
return
np
.
nan
Prev
1
…
7
8
9
10
11
12
13
14
15
…
19
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment