Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
cd3a912a
Unverified
Commit
cd3a912a
authored
Nov 27, 2019
by
SparkSnail
Committed by
GitHub
Nov 27, 2019
Browse files
Merge pull request #218 from microsoft/master
merge master
parents
a0846f2a
e9cba778
Changes
375
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2414 additions
and
106 deletions
+2414
-106
src/sdk/pynni/nni/curvefitting_assessor/curvefunctions.py
src/sdk/pynni/nni/curvefitting_assessor/curvefunctions.py
+4
-16
src/sdk/pynni/nni/curvefitting_assessor/model_factory.py
src/sdk/pynni/nni/curvefitting_assessor/model_factory.py
+2
-16
src/sdk/pynni/nni/curvefitting_assessor/test.py
src/sdk/pynni/nni/curvefitting_assessor/test.py
+2
-17
src/sdk/pynni/nni/env_vars.py
src/sdk/pynni/nni/env_vars.py
+2
-19
src/sdk/pynni/nni/evolution_tuner/evolution_tuner.py
src/sdk/pynni/nni/evolution_tuner/evolution_tuner.py
+3
-19
src/sdk/pynni/nni/evolution_tuner/test_evolution_tuner.py
src/sdk/pynni/nni/evolution_tuner/test_evolution_tuner.py
+3
-19
src/sdk/pynni/nni/feature_engineering/__init__.py
src/sdk/pynni/nni/feature_engineering/__init__.py
+0
-0
src/sdk/pynni/nni/feature_engineering/feature_selector.py
src/sdk/pynni/nni/feature_engineering/feature_selector.py
+59
-0
src/sdk/pynni/nni/feature_engineering/gbdt_selector/__init__.py
...k/pynni/nni/feature_engineering/gbdt_selector/__init__.py
+1
-0
src/sdk/pynni/nni/feature_engineering/gbdt_selector/gbdt_selector.py
...ni/nni/feature_engineering/gbdt_selector/gbdt_selector.py
+114
-0
src/sdk/pynni/nni/feature_engineering/gbdt_selector/requirements.txt
...ni/nni/feature_engineering/gbdt_selector/requirements.txt
+1
-0
src/sdk/pynni/nni/feature_engineering/gradient_selector/__init__.py
...nni/nni/feature_engineering/gradient_selector/__init__.py
+1
-0
src/sdk/pynni/nni/feature_engineering/gradient_selector/constants.py
...ni/nni/feature_engineering/gradient_selector/constants.py
+100
-0
src/sdk/pynni/nni/feature_engineering/gradient_selector/fginitialize.py
...nni/feature_engineering/gradient_selector/fginitialize.py
+623
-0
src/sdk/pynni/nni/feature_engineering/gradient_selector/fgtrain.py
...ynni/nni/feature_engineering/gradient_selector/fgtrain.py
+228
-0
src/sdk/pynni/nni/feature_engineering/gradient_selector/gradient_selector.py
...eature_engineering/gradient_selector/gradient_selector.py
+631
-0
src/sdk/pynni/nni/feature_engineering/gradient_selector/learnability.py
...nni/feature_engineering/gradient_selector/learnability.py
+529
-0
src/sdk/pynni/nni/feature_engineering/gradient_selector/requirements.txt
...ni/feature_engineering/gradient_selector/requirements.txt
+4
-0
src/sdk/pynni/nni/feature_engineering/gradient_selector/syssettings.py
.../nni/feature_engineering/gradient_selector/syssettings.py
+29
-0
src/sdk/pynni/nni/feature_engineering/gradient_selector/utils.py
.../pynni/nni/feature_engineering/gradient_selector/utils.py
+78
-0
No files found.
src/sdk/pynni/nni/curvefitting_assessor/curvefunctions.py
View file @
cd3a912a
# Copyright (c) Microsoft Corporation
# Copyright (c) Microsoft Corporation.
# All rights reserved.
# Licensed under the MIT license.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
"""
A family of functions used by CurvefittingAssessor
A family of functions used by CurvefittingAssessor
"""
"""
import
numpy
as
np
import
numpy
as
np
all_models
=
{}
all_models
=
{}
...
...
src/sdk/pynni/nni/curvefitting_assessor/model_factory.py
View file @
cd3a912a
# Copyright (c) Microsoft Corporation
# Copyright (c) Microsoft Corporation.
# All rights reserved.
# Licensed under the MIT license.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import
logging
import
logging
import
numpy
as
np
import
numpy
as
np
...
...
src/sdk/pynni/nni/curvefitting_assessor/test.py
View file @
cd3a912a
# Copyright (c) Microsoft Corporation
# Copyright (c) Microsoft Corporation.
# All rights reserved.
# Licensed under the MIT license.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import
numpy
as
np
import
numpy
as
np
import
unittest
import
unittest
...
...
src/sdk/pynni/nni/env_vars.py
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
# Copyright (c) Microsoft Corporation.
#
# Licensed under the MIT license.
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import
os
import
os
from
collections
import
namedtuple
from
collections
import
namedtuple
...
...
src/sdk/pynni/nni/evolution_tuner/evolution_tuner.py
View file @
cd3a912a
# Copyright (c) Microsoft Corporation
# Copyright (c) Microsoft Corporation.
# All rights reserved.
# Licensed under the MIT license.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
"""
evolution_tuner.py
evolution_tuner.py
"""
"""
...
...
src/sdk/pynni/nni/evolution_tuner/test_evolution_tuner.py
View file @
cd3a912a
# Copyright (c) Microsoft Corporation
# Copyright (c) Microsoft Corporation.
# All rights reserved.
# Licensed under the MIT license.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
"""
test_evolution_tuner.py
test_evolution_tuner.py
"""
"""
...
...
src/sdk/pynni/nni/feature_engineering/__init__.py
0 → 100644
View file @
cd3a912a
src/sdk/pynni/nni/feature_engineering/feature_selector.py
0 → 100644
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import
logging
_logger
=
logging
.
getLogger
(
__name__
)
class
FeatureSelector
():
def
__init__
(
self
,
**
kwargs
):
self
.
selected_features_
=
None
self
.
X
=
None
self
.
y
=
None
def
fit
(
self
,
X
,
y
,
**
kwargs
):
"""
Fit the training data to FeatureSelector
Paramters
---------
X : array-like numpy matrix
The training input samples, which shape is [n_samples, n_features].
y: array-like numpy matrix
The target values (class labels in classification, real numbers in
regression). Which shape is [n_samples].
"""
self
.
X
=
X
self
.
y
=
y
def
get_selected_features
(
self
):
"""
Fit the training data to FeatureSelector
Returns
-------
list :
Return the index of imprtant feature.
"""
return
self
.
selected_features_
src/sdk/pynni/nni/feature_engineering/gbdt_selector/__init__.py
0 → 100644
View file @
cd3a912a
from
.gbdt_selector
import
GBDTSelector
\ No newline at end of file
src/sdk/pynni/nni/feature_engineering/gbdt_selector/gbdt_selector.py
0 → 100644
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
"""
gbdt_selector.py including:
class GBDTSelector
"""
import
random
from
sklearn.model_selection
import
train_test_split
from
nni.feature_engineering.feature_selector
import
FeatureSelector
# pylint: disable=E0401
import
lightgbm
as
lgb
class
GBDTSelector
(
FeatureSelector
):
def
__init__
(
self
,
**
kwargs
):
self
.
selected_features_
=
None
self
.
X
=
None
self
.
y
=
None
self
.
feature_importance
=
None
self
.
lgb_params
=
None
self
.
eval_ratio
=
None
self
.
early_stopping_rounds
=
None
self
.
importance_type
=
None
self
.
num_boost_round
=
None
self
.
model
=
None
def
fit
(
self
,
X
,
y
,
**
kwargs
):
"""
Fit the training data to FeatureSelector
Paramters
---------
X : array-like numpy matrix
The training input samples, which shape is [n_samples, n_features].
y : array-like numpy matrix
The target values (class labels in classification, real numbers in
regression). Which shape is [n_samples].
lgb_params : dict
Parameters of lightgbm
eval_ratio : float
The ratio of data size. It's used for split the eval data and train data from self.X.
early_stopping_rounds : int
The early stopping setting in lightgbm.
importance_type : str
Supporting type is 'gain' or 'split'.
num_boost_round : int
num_boost_round in lightgbm.
"""
assert
kwargs
[
'lgb_params'
]
assert
kwargs
[
'eval_ratio'
]
assert
kwargs
[
'early_stopping_rounds'
]
assert
kwargs
[
'importance_type'
]
assert
kwargs
[
'num_boost_round'
]
self
.
X
=
X
self
.
y
=
y
self
.
lgb_params
=
kwargs
[
'lgb_params'
]
self
.
eval_ratio
=
kwargs
[
'eval_ratio'
]
self
.
early_stopping_rounds
=
kwargs
[
'early_stopping_rounds'
]
self
.
importance_type
=
kwargs
[
'importance_type'
]
self
.
num_boost_round
=
kwargs
[
'num_boost_round'
]
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
self
.
X
,
self
.
y
,
test_size
=
self
.
eval_ratio
,
random_state
=
random
.
seed
(
41
))
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
)
lgb_eval
=
lgb
.
Dataset
(
X_test
,
y_test
,
reference
=
lgb_train
)
self
.
model
=
lgb
.
train
(
self
.
lgb_params
,
lgb_train
,
num_boost_round
=
self
.
num_boost_round
,
valid_sets
=
lgb_eval
,
early_stopping_rounds
=
self
.
early_stopping_rounds
)
self
.
feature_importance
=
self
.
model
.
feature_importance
(
self
.
importance_type
)
def
get_selected_features
(
self
,
topk
):
"""
Fit the training data to FeatureSelector
Returns
-------
list :
Return the index of imprtant feature.
"""
assert
topk
>
0
self
.
selected_features_
=
self
.
feature_importance
.
argsort
()[
-
topk
:][::
-
1
]
return
self
.
selected_features_
src/sdk/pynni/nni/feature_engineering/gbdt_selector/requirements.txt
0 → 100644
View file @
cd3a912a
lightgbm
\ No newline at end of file
src/sdk/pynni/nni/feature_engineering/gradient_selector/__init__.py
0 → 100644
View file @
cd3a912a
from
.gradient_selector
import
FeatureGradientSelector
\ No newline at end of file
src/sdk/pynni/nni/feature_engineering/gradient_selector/constants.py
0 → 100644
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import
numpy
as
np
class
StorageLevel
:
DISK
=
'disk'
SPARSE
=
'sparse'
DENSE
=
'dense'
class
DataFormat
:
SVM
=
'svm'
NUMPY
=
'numpy'
ALL_FORMATS
=
[
SVM
,
NUMPY
]
class
Preprocess
:
"""
center the data to mean 0 and create unit variance
center the data to mean 0
"""
ZSCORE
=
'zscore'
CENTER
=
'center'
class
Device
:
CUDA
=
'cuda'
CPU
=
'cpu'
class
Checkpoint
:
MODEL
=
'model_state_dict'
OPT
=
'optimizer_state_dict'
RNG
=
'torch_rng_state'
class
NanError
(
ValueError
):
pass
class
Initialization
:
ZERO
=
'zero'
ON
=
'on'
OFF
=
'off'
ON_HIGH
=
'onhigh'
OFF_HIGH
=
'offhigh'
SKLEARN
=
'sklearn'
RANDOM
=
'random'
VALUE_DICT
=
{
ZERO
:
0
,
ON
:
1
,
OFF
:
-
1
,
ON_HIGH
:
5
,
OFF_HIGH
:
-
1
,
SKLEARN
:
None
,
RANDOM
:
None
}
class
Coefficients
:
""""
coefficients for sublinear estimator were computed running the sublinear
paper's authors' code
"""
SLE
=
{
1
:
np
.
array
([
0.60355337
]),
2
:
np
.
array
([
1.52705001
,
-
0.34841729
]),
3
:
np
.
array
([
2.90254224
,
-
1.87216745
,
0.
]),
4
:
np
.
array
([
4.63445685
,
-
5.19936195
,
0.
,
1.50391676
]),
5
:
np
.
array
([
6.92948049
,
-
14.12216211
,
9.4475009
,
0.
,
-
1.21093546
]),
6
:
np
.
array
([
9.54431082
,
-
28.09414643
,
31.84703652
,
-
11.18763791
,
-
1.14175281
,
0.
]),
7
:
np
.
array
([
12.54505041
,
-
49.64891525
,
79.78828031
,
-
46.72250909
,
0.
,
0.
,
5.02973646
]),
8
:
np
.
array
([
16.03550163
,
-
84.286182
,
196.86078756
,
-
215.36747071
,
92.63961263
,
0.
,
0.
,
-
4.86280869
]),
9
:
np
.
array
([
19.86409184
,
-
130.76801006
,
390.95349861
,
-
570.09210416
,
354.77764899
,
0.
,
-
73.84234865
,
0.
,
10.09148767
]),
10
:
np
.
array
([
2.41117752e+01
,
-
1.94946061e+02
,
7.34214614e+02
,
-
1.42851995e+03
,
1.41567410e+03
,
\
-
5.81738134e+02
,
0.
,
0.
,
3.11664751e+01
,
1.05018365e+00
]),
11
:
np
.
array
([
28.75280839
,
-
279.22576729
,
1280.46325445
,
-
3104.47148101
,
3990.6092248
,
-
2300.29413333
,
\
0.
,
427.35289033
,
0.
,
0.
,
-
42.17587475
]),
12
:
np
.
array
([
33.85141912
,
-
391.4229382
,
2184.97827882
,
-
6716.28280208
,
11879.75233977
,
-
11739.97267239
,
\
5384.94542245
,
0.
,
-
674.23291712
,
0.
,
0.
,
39.37456439
])}
EPSILON
=
1e-8
src/sdk/pynni/nni/feature_engineering/gradient_selector/fginitialize.py
0 → 100644
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import
os
import
pickle
import
sys
import
time
import
numpy
as
np
import
scipy.sparse
from
sklearn.datasets
import
load_svmlight_file
import
torch
from
torch.utils.data
import
DataLoader
,
Dataset
# pylint: disable=E0611
from
torch.utils.data.dataloader
import
_DataLoaderIter
,
_utils
import
nni.feature_engineering.gradient_selector.constants
as
constants
import
nni.feature_engineering.gradient_selector.syssettings
as
syssettings
torch
.
set_default_tensor_type
(
syssettings
.
torch
.
tensortype
)
sparsetensor
=
syssettings
.
torch
.
sparse
.
tensortype
BYTESPERREAL
=
8.
BYTESPERGB
=
1024.
**
3
class
PrepareData
(
Dataset
):
def
__init__
(
self
,
path_data
=
None
,
data_format
=
constants
.
DataFormat
.
NUMPY
,
D
=
None
,
N
=
None
,
classification
=
True
,
ordinal
=
False
,
balanced
=
True
,
preprocess
=
None
,
n_to_estimate
=
None
,
MAXMEMGB
=
syssettings
.
MAXMEMGB
,
set_params
=
True
,
path_mappings
=
None
,
X
=
None
,
y
=
None
,
verbose
=
0
,
n_classes
=
None
,
device
=
constants
.
Device
.
CPU
):
"""
Dataset class with helpful features and functions for being included in a dataloader
and managing memory usage.
can read following formats:
svm: svm light format (sklearn.datasets.load_svmlight_file)
numpy: Pass X and y as numpy or sparse arrays
assumes
1. if classification, y is in {-1, 1} or continuous and 0 indexed
2. y can fit into memory
3. consecutive calls to __getitem__() have consecutive idx values
notes:
1. this implementation is not careful wrt/ precise memory reqts. for
example, being able to store one dense row in memory is necessary,
but not sufficient.
2. for y with 4.2 billion elements, 31.3 GB of memory is necessary
@ 8 bytes/scalar. Use partial fit to avoid loading the entire dataset
at once
3. disk_size always refer to size of complete data file, even after
a split().
Parameters
----------
path_data : str
Path to load data from
data_format : str
File ending for path data.
"numpy" is the default when passing in X and y
D : int
Number of features.
N : int
Number of rows.
classification : bool
If True, problem is classification, else regression.
ordinal: bool
If True, problem is ordinal classification. Requires classification to be True.
balanced : bool
If true, each class is weighted equally in optimization, otherwise
weighted is done via support of each class. Requires classification to be True.
prerocess : str
'zscore' which refers to centering and normalizing data to unit variance or
'center' which only centers the data to 0 mean
n_to_estimate : int
Number of rows of data to estimate
MAXMEMGB : float
Maximum allowable size for a minibatch
set_params : bool
Whether or not to determine the statistics of the dataset
path_mappings : str
Used when streaming from disk
X : array-like
Shape = [n_samples, n_features]
The training input samples.
y : array-like
Shape = [n_samples]
The target values (class labels in classification, real numbers in
regression).
verbose : int
Controls the verbosity when fitting. Set to 0 for no printing
1 or higher for printing every verbose number of gradient steps.
device : str
'cpu' to run on CPU and 'cuda' to run on GPU. Runs much faster on GPU
n_classes : int
number of classes
"""
self
.
path_data
=
path_data
if
self
.
path_data
:
self
.
disk_size
=
os
.
path
.
getsize
(
path_data
)
else
:
assert
X
is
not
None
,
'X must be specified if no path data'
self
.
disk_size
=
X
.
nbytes
if
not
scipy
.
sparse
.
issparse
(
X
)
else
X
.
data
.
nbytes
assert
data_format
in
constants
.
DataFormat
.
ALL_FORMATS
,
'Format must in {0}.'
.
format
(
", "
.
join
(
constants
.
DataFormat
.
ALL_FORMATS
))
self
.
format
=
data_format
self
.
classification
=
classification
self
.
ordinal
=
ordinal
self
.
balanced
=
balanced
self
.
MAXMEMGB
=
MAXMEMGB
self
.
preprocess
=
preprocess
self
.
set_params
=
set_params
self
.
verbose
=
verbose
self
.
n_classes
=
n_classes
self
.
device
=
device
self
.
path_data_stats
=
None
if
D
is
None
:
assert
self
.
disk_size
/
BYTESPERGB
<=
self
.
MAXMEMGB
,
\
'Cannot load data into memory. Supply D.'
if
self
.
format
==
constants
.
DataFormat
.
SVM
:
self
.
X
,
self
.
y
=
load_svmlight_file
(
path_data
)
elif
self
.
format
==
constants
.
DataFormat
.
NUMPY
:
assert
X
is
not
None
,
'X must be specified in numpy mode'
assert
y
is
not
None
,
'y must be specified in numpy mode'
self
.
X
=
X
self
.
y
=
y
if
self
.
n_classes
is
None
:
self
.
n_classes
=
np
.
unique
(
y
).
shape
[
0
]
elif
self
.
classification
:
assert
self
.
n_classes
>=
np
.
unique
(
y
).
shape
[
0
],
\
'n_classes given must be greater than or equal to the number of classes in y'
else
:
raise
NotImplementedError
self
.
y
=
torch
.
as_tensor
(
self
.
y
,
dtype
=
torch
.
get_default_dtype
())
self
.
N
,
self
.
D
=
self
.
X
.
shape
# assumes X was returned as a sparse array
self
.
storage_level
=
(
constants
.
StorageLevel
.
SPARSE
if
scipy
.
sparse
.
issparse
(
self
.
X
)
else
constants
.
StorageLevel
.
DENSE
)
else
:
assert
N
is
not
None
,
'Supply N.'
self
.
N
,
self
.
D
=
N
,
D
# assume sparse matrix cannot fit into memory
self
.
storage_level
=
constants
.
StorageLevel
.
DISK
self
.
dense_size_gb
=
self
.
get_dense_size
()
# check dense size
self
.
set_dense_X
()
self
.
max_rows
=
int
(
self
.
MAXMEMGB
*
BYTESPERGB
/
BYTESPERREAL
/
self
.
D
)
assert
self
.
max_rows
,
\
'Cannot fit one dense row into %d GB memory.'
%
self
.
MAXMEMGB
self
.
max_rows
=
self
.
max_batch_size
()
sys
.
stdout
.
flush
()
if
n_to_estimate
is
None
:
self
.
n_to_estimate
=
self
.
max_batch_size
()
else
:
assert
n_to_estimate
<=
self
.
N
,
'n_to_estimate must be <= N.'
self
.
n_to_estimate
=
n_to_estimate
# initialize disk loader
if
self
.
storage_level
==
constants
.
StorageLevel
.
DISK
and
self
.
set_params
:
if
self
.
format
==
constants
.
DataFormat
.
SVM
:
raise
NotImplementedError
(
'Please use partial fit to train on datasets that do not fit in memory'
)
else
:
raise
NotImplementedError
# TODO: use a passed-in RNG here
self
.
ix_statistics
=
np
.
random
.
permutation
(
self
.
N
)[:
self
.
n_to_estimate
]
self
.
n_features
=
self
.
D
if
self
.
set_params
:
if
self
.
verbose
:
print
(
'Finding data statistics...'
,
end
=
''
)
sys
.
stdout
.
flush
()
Xmn
,
sv1
,
Xsd
,
ymn
,
ysd
=
self
.
compute_data_stats
()
self
.
set_data_stats
(
Xmn
,
sv1
,
Xsd
,
ymn
,
ysd
)
if
self
.
verbose
:
print
()
self
.
set_return_raw
(
False
)
else
:
self
.
set_return_raw
(
True
)
self
.
set_return_np
(
False
)
# this needs to occur after setting preprocessing params
if
(
self
.
storage_level
==
constants
.
StorageLevel
.
DISK
and
self
.
format
==
constants
.
DataFormat
.
SVM
and
self
.
set_params
):
self
.
loader
.
batchsize
=
1
def
get_dense_size
(
self
):
return
self
.
N
*
self
.
D
*
BYTESPERREAL
/
BYTESPERGB
def
set_dense_X
(
self
):
if
self
.
storage_level
!=
constants
.
StorageLevel
.
DISK
:
if
self
.
dense_size_gb
<=
self
.
MAXMEMGB
:
if
self
.
storage_level
==
constants
.
StorageLevel
.
SPARSE
:
self
.
X
=
self
.
X
.
toarray
()
self
.
X
=
torch
.
as_tensor
(
self
.
X
,
dtype
=
torch
.
get_default_dtype
())
self
.
storage_level
=
constants
.
StorageLevel
.
DENSE
def
set_return_np
(
self
,
boolean
):
self
.
return_np
=
boolean
def
set_return_raw
(
self
,
boolean
):
self
.
return_raw
=
boolean
def
save_data_stats
(
self
,
path_data_stats
):
"""
Dumps dataset statistics to pickle file.
"""
data_stats
=
{
'Xmn'
:
self
.
Xmn
,
'sv1'
:
self
.
sv1
,
'Xsd'
:
self
.
Xsd
,
'ymn'
:
self
.
ymn
,
'ysd'
:
self
.
ysd
,
'ix_statistics'
:
self
.
ix_statistics
,
}
pickle
.
dump
(
data_stats
,
open
(
path_data_stats
,
'wb'
))
def
load_data_stats
(
self
,
path_data_stats
):
stats
=
pickle
.
load
(
open
(
path_data_stats
,
'rb'
))
self
.
path_data_stats
=
path_data_stats
self
.
set_data_stats
(
np
.
asarray
(
stats
[
'Xmn'
]),
stats
[
'sv1'
],
stats
[
'Xsd'
],
stats
[
'ymn'
],
stats
[
'ysd'
])
if
self
.
storage_level
==
constants
.
StorageLevel
.
DISK
and
hasattr
(
self
,
'path_mappings'
):
if
'ix_statistics'
in
stats
:
self
.
ix_statistics
=
stats
[
'ix_statistics'
]
else
:
self
.
ix_statistics
=
range
(
self
.
N
)
self
.
set_return_raw
(
False
)
def
reset
(
self
):
"""
Resets the dataloader. Only implemented for disk StorageLevel.
"""
if
self
.
storage_level
==
constants
.
StorageLevel
.
DENSE
:
pass
elif
self
.
storage_level
==
constants
.
StorageLevel
.
SPARSE
:
pass
elif
self
.
storage_level
==
constants
.
StorageLevel
.
DISK
:
if
self
.
format
==
constants
.
DataFormat
.
SVM
:
self
.
loader
.
reset
()
else
:
raise
NotImplementedError
def
todense
(
self
):
assert
hasattr
(
self
,
'Xmn'
),
'Set preprocess params first.'
assert
len
(
self
)
<=
self
.
max_batch_size
(
),
'N must be <= max_batch_size().'
with
torch
.
no_grad
():
dense
,
_
=
self
.
split
(
range
(
len
(
self
)))
Braw
=
self
.
return_raw
Bnp
=
self
.
return_np
self
.
set_return_raw
(
True
)
self
.
set_return_np
(
True
)
dense
.
X
,
dense
.
y
=
[],
[]
def
f_Xy
(
X
,
y
):
dense
.
X
.
append
(
X
)
dense
.
y
.
append
(
y
)
self
.
apply
(
f_Xy
=
f_Xy
)
dense
.
X
=
dense
.
X
[
-
1
]
dense
.
y
=
dense
.
y
[
-
1
]
self
.
set_return_raw
(
Braw
)
self
.
set_return_np
(
Bnp
)
dense
.
storage_level
=
constants
.
StorageLevel
.
DENSE
return
dense
def
split
(
self
,
ix
):
assert
hasattr
(
self
,
'Xmn'
),
'Run set_preprocess_params() first.'
first
=
type
(
self
)(
self
.
path_data
,
self
.
format
,
self
.
D
,
N
=
len
(
ix
),
classification
=
self
.
classification
,
preprocess
=
self
.
preprocess
,
n_to_estimate
=
None
,
MAXMEMGB
=
self
.
MAXMEMGB
,
set_params
=
False
)
second
=
type
(
self
)(
self
.
path_data
,
self
.
format
,
self
.
D
,
N
=
self
.
N
-
len
(
ix
),
classification
=
self
.
classification
,
preprocess
=
self
.
preprocess
,
n_to_estimate
=
None
,
MAXMEMGB
=
self
.
MAXMEMGB
,
set_params
=
False
)
first
.
storage_level
=
self
.
storage_level
second
.
storage_level
=
self
.
storage_level
# copy preprocess params
if
not
self
.
classification
:
first
.
ymn
=
self
.
ymn
second
.
ymn
=
self
.
ymn
first
.
ysd
=
self
.
ysd
second
.
ysd
=
self
.
ysd
first
.
Xmn
=
self
.
Xmn
second
.
Xmn
=
self
.
Xmn
first
.
sv1
=
self
.
sv1
second
.
sv1
=
self
.
sv1
if
self
.
storage_level
==
constants
.
StorageLevel
.
DISK
:
if
self
.
format
==
constants
.
DataFormat
.
SVM
:
first
.
Xsd
=
self
.
Xsd
second
.
Xsd
=
self
.
Xsd
else
:
raise
NotImplementedError
# initialize data structures
if
self
.
storage_level
==
constants
.
StorageLevel
.
DISK
:
if
self
.
format
==
constants
.
DataFormat
.
SVM
:
raise
NotImplementedError
raise
NotImplementedError
elif
self
.
storage_level
in
[
constants
.
StorageLevel
.
SPARSE
,
constants
.
StorageLevel
.
DENSE
]:
first
.
X
,
first
.
y
=
self
.
X
[
ix
],
self
.
y
[
ix
]
ixsec
=
list
(
set
(
range
(
self
.
N
)).
difference
(
set
(
ix
)))
second
.
X
,
second
.
y
=
self
.
X
[
ixsec
],
self
.
y
[
ixsec
]
return
first
,
second
@
staticmethod
def
sparse_std
(
X
,
X_mean
):
"""
Calculate the column wise standard deviations of a sparse matrix.
"""
X_copy
=
X
.
copy
()
X_copy
.
data
**=
2
# square non zero elements
E_x_squared
=
np
.
array
(
X_copy
.
mean
(
axis
=
0
)).
ravel
()
Xsd
=
np
.
sqrt
(
E_x_squared
-
X_mean
**
2
)
return
Xsd
def
compute_data_stats
(
self
):
"""
1. computes/estimates feature means
2. if preprocess == 'zscore', computes/estimates feature standard devs
3. if not classification, computes/estimates target mean/standard dev
4. estimates largest singular value of data matrix
"""
t
=
time
.
time
()
X
,
y
=
self
.
X
[
self
.
ix_statistics
],
self
.
y
[
self
.
ix_statistics
]
preprocess
=
self
.
preprocess
classification
=
self
.
classification
Xmn
=
(
X
.
mean
(
dim
=
0
)
if
not
scipy
.
sparse
.
issparse
(
X
)
else
np
.
array
(
X
.
mean
(
axis
=
0
)).
ravel
())
if
preprocess
==
constants
.
Preprocess
.
ZSCORE
:
Xsd
=
(
X
.
std
(
dim
=
0
)
if
not
scipy
.
sparse
.
issparse
(
X
)
else
PrepareData
.
sparse_std
(
X
,
Xmn
))
Xsd
[
Xsd
==
0
]
=
1.
else
:
Xsd
=
1.
if
preprocess
is
not
None
and
preprocess
:
if
preprocess
==
constants
.
Preprocess
.
ZSCORE
:
Xc
=
(
X
-
Xmn
)
/
Xsd
else
:
Xc
=
X
-
Xmn
else
:
Xc
=
X
-
Xmn
sv1
=
scipy
.
sparse
.
linalg
.
svds
(
Xc
/
(
torch
.
sqrt
(
torch
.
prod
(
torch
.
as_tensor
(
y
.
size
(),
dtype
=
torch
.
get_default_dtype
())))
if
not
scipy
.
sparse
.
issparse
(
X
)
else
y
.
numpy
().
size
),
k
=
1
,
which
=
'LM'
,
return_singular_vectors
=
False
)
# avoid runaway sv1
sv1
=
np
.
array
([
min
(
np
.
finfo
(
np
.
float32
).
max
,
sv1
[
0
])])
if
not
classification
:
ymn
=
y
.
mean
()
ysd
=
y
.
std
()
else
:
# TODO: set these, for each class?
ymn
=
0.
ysd
=
1.
if
self
.
verbose
:
print
(
" computing data statistics took: "
,
time
.
time
()
-
t
)
return
Xmn
,
sv1
,
Xsd
,
ymn
,
ysd
def
set_data_stats
(
self
,
Xmn
,
sv1
,
Xsd
=
1.
,
ymn
=
0.
,
ysd
=
1.
):
"""
Saves dataset stats to self to be used for preprocessing.
"""
self
.
Xmn
=
torch
.
as_tensor
(
Xmn
,
dtype
=
torch
.
get_default_dtype
()).
to
(
self
.
device
)
self
.
sv1
=
torch
.
as_tensor
(
sv1
,
dtype
=
torch
.
get_default_dtype
()).
to
(
self
.
device
)
self
.
Xsd
=
torch
.
as_tensor
(
Xsd
,
dtype
=
torch
.
get_default_dtype
()).
to
(
self
.
device
)
self
.
ymn
=
torch
.
as_tensor
(
ymn
,
dtype
=
torch
.
get_default_dtype
()).
to
(
self
.
device
)
self
.
ysd
=
torch
.
as_tensor
(
ysd
,
dtype
=
torch
.
get_default_dtype
()).
to
(
self
.
device
)
def
apply_preprocess
(
self
,
X
,
y
):
"""
Faster on gpu device, while dataloading takes up a large portion of the time.
"""
with
torch
.
no_grad
():
if
not
self
.
classification
:
y
=
(
y
.
reshape
((
-
1
,
1
))
-
self
.
ymn
)
/
self
.
ysd
else
:
y
=
y
.
reshape
((
-
1
,
1
))
X
=
(
X
-
self
.
Xmn
)
/
self
.
sv1
if
self
.
preprocess
==
constants
.
Preprocess
.
ZSCORE
:
X
/=
self
.
Xsd
return
X
,
y
def
max_batch_size
(
self
):
"""
Return the maximum batchsize for the dataset.
"""
return
int
(
np
.
min
([
self
.
max_rows
,
self
.
N
]))
def
apply
(
self
,
ix_rows
=
None
,
ix_cols
=
None
,
f_Xy
=
None
):
if
f_Xy
is
None
:
return
if
ix_rows
is
None
:
ix_rows
=
range
(
self
.
N
)
if
ix_cols
is
None
:
ix_cols
=
range
(
self
.
n_features
)
f_Xy
((
self
.
X
[
ix_rows
,
ix_cols
]
if
not
self
.
storage_level
==
constants
.
StorageLevel
.
SPARSE
else
self
.
X
[
ix_rows
,
ix_cols
].
toarray
()),
self
.
y
[
ix_rows
])
def
get_dense_data
(
self
,
ix_cols
=
None
,
ix_rows
=
None
):
if
ix_cols
is
None
:
ix_cols
=
range
(
self
.
n_features
)
X
=
[
np
.
zeros
((
0
,
len
(
ix_cols
)))]
y
=
[
np
.
zeros
((
0
,
1
))]
Bnp
=
self
.
return_np
def
f_Xy
(
Xb
,
yb
,
n
):
X
[
-
1
]
=
np
.
concatenate
((
X
[
-
1
],
Xb
),
axis
=
0
)
y
[
-
1
]
=
np
.
concatenate
((
y
[
-
1
],
yb
),
axis
=
0
)
self
.
apply
(
f_Xy
=
f_Xy
,
ix_rows
=
ix_rows
,
ix_cols
=
ix_cols
)
self
.
set_return_np
(
Bnp
)
return
X
[
-
1
],
y
[
-
1
]
def
__len__
(
self
):
return
self
.
N
def
getXy
(
self
,
idx
):
if
self
.
storage_level
==
constants
.
StorageLevel
.
DENSE
:
X
,
y
=
self
.
X
[
idx
],
self
.
y
[
idx
]
elif
self
.
storage_level
==
constants
.
StorageLevel
.
SPARSE
:
# assume subset can fit into memory even if whole matrix cant
X
,
y
=
self
.
X
[
idx
].
toarray
(),
self
.
y
[
idx
]
else
:
raise
NotImplementedError
return
X
,
y
def
__getitem__
(
self
,
idx
):
with
torch
.
no_grad
():
X
,
y
=
self
.
getXy
(
idx
)
X
=
X
.
toarray
()
if
scipy
.
sparse
.
issparse
(
X
)
else
X
X
=
torch
.
as_tensor
(
X
,
dtype
=
torch
.
get_default_dtype
()).
to
(
self
.
device
)
y
=
torch
.
as_tensor
(
y
,
dtype
=
torch
.
get_default_dtype
()).
to
(
self
.
device
)
if
not
self
.
return_raw
:
X
,
y
=
self
.
apply_preprocess
(
X
,
y
)
if
self
.
classification
and
(
self
.
n_classes
is
None
or
self
.
n_classes
==
2
):
y
[
y
==
0
]
=
-
1
if
self
.
return_np
:
if
constants
.
Device
.
CPU
not
in
self
.
device
:
X
=
X
.
cpu
()
y
=
y
.
cpu
()
X
=
X
.
numpy
()
y
=
y
.
numpy
()
return
X
,
y
return
X
,
y
class
ChunkDataLoader
(
DataLoader
):
"""
DataLoader class used to more quickly load a batch of indices at once.
"""
def
__iter__
(
self
):
return
_ChunkDataLoaderIter
(
self
)
class
_ChunkDataLoaderIter
(
_DataLoaderIter
):
"""
DataLoaderIter class used to more quickly load a batch of indices at once.
"""
def
__next__
(
self
):
# only chunk that is edited from base
if
self
.
num_workers
==
0
:
# same-process loading
indices
=
next
(
self
.
sample_iter
)
# may raise StopIteration
if
len
(
indices
)
>
1
:
batch
=
self
.
dataset
[
np
.
array
(
indices
)]
else
:
batch
=
self
.
collate_fn
([
self
.
dataset
[
i
]
for
i
in
indices
])
if
self
.
pin_memory
:
batch
=
_utils
.
pin_memory
.
pin_memory_batch
(
batch
)
return
batch
# check if the next sample has already been generated
if
self
.
rcvd_idx
in
self
.
reorder_dict
:
batch
=
self
.
reorder_dict
.
pop
(
self
.
rcvd_idx
)
return
self
.
_process_next_batch
(
batch
)
if
self
.
batches_outstanding
==
0
:
self
.
_shutdown_workers
()
raise
StopIteration
while
True
:
assert
(
not
self
.
shutdown
and
self
.
batches_outstanding
>
0
)
idx
,
batch
=
self
.
_get_batch
()
self
.
batches_outstanding
-=
1
if
idx
!=
self
.
rcvd_idx
:
# store out-of-order samples
self
.
reorder_dict
[
idx
]
=
batch
continue
return
self
.
_process_next_batch
(
batch
)
src/sdk/pynni/nni/feature_engineering/gradient_selector/fgtrain.py
0 → 100644
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import
time
import
numpy
as
np
import
torch
from
sklearn.feature_selection
import
SelectKBest
,
\
f_classif
,
mutual_info_classif
,
f_regression
,
mutual_info_regression
import
nni.feature_engineering.gradient_selector.constants
as
constants
import
nni.feature_engineering.gradient_selector.syssettings
as
syssettings
from
nni.feature_engineering.gradient_selector.learnability
import
Solver
from
nni.feature_engineering.gradient_selector.utils
import
EMA
torch
.
set_default_tensor_type
(
syssettings
.
torch
.
tensortype
)
def
get_optim_f_stop
(
maxiter
,
maxtime
,
dftol_stop
,
freltol_stop
,
minibatch
=
True
):
"""
Check stopping conditions.
"""
discount_factor
=
1.
/
3
total_t
=
[
0.
]
df_store
=
[
np
.
nan
]
it_store
=
[
0
]
relchange_store
=
[
np
.
nan
]
f_ma
=
EMA
(
discount_factor
=
discount_factor
)
df_ma
=
EMA
(
discount_factor
=
discount_factor
)
def
f_stop
(
f0
,
v0
,
it
,
t
):
flag_stop
=
False
total_t
[
-
1
]
+=
t
g
=
f0
.
x
.
grad
.
clone
().
cpu
().
detach
()
df
=
g
.
abs
().
max
().
numpy
().
squeeze
()
v
=
v0
.
clone
().
cpu
().
detach
()
f
=
v
.
numpy
().
squeeze
()
if
it
>=
maxiter
:
flag_stop
=
True
elif
total_t
[
-
1
]
>=
maxtime
:
flag_stop
=
True
f_ma
.
update
(
f
)
df_ma
.
update
(
df
)
rel_change
=
f_ma
.
relchange
()
if
((
not
minibatch
)
and
(
df
<
dftol_stop
))
\
or
(
minibatch
and
(
df_ma
()
<
dftol_stop
)):
flag_stop
=
True
if
rel_change
<
freltol_stop
:
flag_stop
=
True
if
not
minibatch
:
df_store
[
-
1
]
=
df
else
:
df_store
[
-
1
]
=
df_ma
()
relchange_store
[
-
1
]
=
rel_change
it_store
[
-
1
]
=
it
return
flag_stop
return
f_stop
,
{
't'
:
total_t
,
'it'
:
it_store
,
'df'
:
df_store
,
'relchange'
:
relchange_store
}
def
get_init
(
data_train
,
init_type
=
'on'
,
rng
=
np
.
random
.
RandomState
(
0
),
prev_score
=
None
):
"""
Initialize the 'x' variable with different settings
"""
D
=
data_train
.
n_features
value_off
=
constants
.
Initialization
.
VALUE_DICT
[
constants
.
Initialization
.
OFF
]
value_on
=
constants
.
Initialization
.
VALUE_DICT
[
constants
.
Initialization
.
ON
]
if
prev_score
is
not
None
:
x0
=
prev_score
elif
not
isinstance
(
init_type
,
str
):
x0
=
value_off
*
np
.
ones
(
D
)
x0
[
init_type
]
=
value_on
elif
init_type
.
startswith
(
constants
.
Initialization
.
RANDOM
):
d
=
int
(
init_type
.
replace
(
constants
.
Initialization
.
RANDOM
,
''
))
x0
=
value_off
*
np
.
ones
(
D
)
x0
[
rng
.
permutation
(
D
)[:
d
]]
=
value_on
elif
init_type
==
constants
.
Initialization
.
SKLEARN
:
B
=
data_train
.
return_raw
X
,
y
=
data_train
.
get_dense_data
()
data_train
.
set_return_raw
(
B
)
ix
=
train_sk_dense
(
init_type
,
X
,
y
,
data_train
.
classification
)
x0
=
value_off
*
np
.
ones
(
D
)
x0
[
ix
]
=
value_on
elif
init_type
in
constants
.
Initialization
.
VALUE_DICT
:
x0
=
constants
.
Initialization
.
VALUE_DICT
[
init_type
]
*
np
.
ones
(
D
)
else
:
raise
NotImplementedError
(
'init_type {0} not supported yet'
.
format
(
init_type
))
# pylint: disable=E1102
return
torch
.
tensor
(
x0
.
reshape
((
-
1
,
1
)),
dtype
=
torch
.
get_default_dtype
())
def
get_checkpoint
(
S
,
stop_conds
,
rng
=
None
,
get_state
=
True
):
"""
Save the necessary information into a dictionary
"""
m
=
{}
m
[
'ninitfeats'
]
=
S
.
ninitfeats
m
[
'x0'
]
=
S
.
x0
x
=
S
.
x
.
clone
().
cpu
().
detach
()
m
[
'feats'
]
=
np
.
where
(
x
.
numpy
()
>=
0
)[
0
]
m
.
update
({
k
:
v
[
0
]
for
k
,
v
in
stop_conds
.
items
()})
if
get_state
:
m
.
update
({
constants
.
Checkpoint
.
MODEL
:
S
.
state_dict
(),
constants
.
Checkpoint
.
OPT
:
S
.
opt_train
.
state_dict
(),
constants
.
Checkpoint
.
RNG
:
torch
.
get_rng_state
(),
})
if
rng
:
m
.
update
({
'rng_state'
:
rng
.
get_state
()})
return
m
def
_train
(
data_train
,
Nminibatch
,
order
,
C
,
rng
,
lr_train
,
debug
,
maxiter
,
maxtime
,
init
,
dftol_stop
,
freltol_stop
,
dn_log
,
accum_steps
,
path_save
,
shuffle
,
device
=
constants
.
Device
.
CPU
,
verbose
=
1
,
prev_checkpoint
=
None
,
groups
=
None
,
soft_groups
=
None
):
"""
Main training loop.
"""
t_init
=
time
.
time
()
x0
=
get_init
(
data_train
,
init
,
rng
)
if
isinstance
(
init
,
str
)
and
init
==
constants
.
Initialization
.
ZERO
:
ninitfeats
=
-
1
else
:
ninitfeats
=
np
.
where
(
x0
.
detach
().
numpy
()
>
0
)[
0
].
size
S
=
Solver
(
data_train
,
order
,
Nminibatch
=
Nminibatch
,
x0
=
x0
,
C
=
C
,
ftransform
=
lambda
x
:
torch
.
sigmoid
(
2
*
x
),
get_train_opt
=
lambda
p
:
torch
.
optim
.
Adam
(
p
,
lr_train
),
rng
=
rng
,
accum_steps
=
accum_steps
,
shuffle
=
shuffle
,
groups
=
groups
,
soft_groups
=
soft_groups
,
device
=
device
,
verbose
=
verbose
)
S
=
S
.
to
(
device
)
S
.
ninitfeats
=
ninitfeats
S
.
x0
=
x0
if
prev_checkpoint
:
S
.
load_state_dict
(
prev_checkpoint
[
constants
.
Checkpoint
.
MODEL
])
S
.
opt_train
.
load_state_dict
(
prev_checkpoint
[
constants
.
Checkpoint
.
OPT
])
torch
.
set_rng_state
(
prev_checkpoint
[
constants
.
Checkpoint
.
RNG
])
minibatch
=
S
.
Ntrain
!=
S
.
Nminibatch
f_stop
,
stop_conds
=
get_optim_f_stop
(
maxiter
,
maxtime
,
dftol_stop
,
freltol_stop
,
minibatch
=
minibatch
)
if
debug
:
pass
else
:
f_callback
=
None
stop_conds
[
't'
][
-
1
]
=
time
.
time
()
-
t_init
S
.
train
(
f_stop
=
f_stop
,
f_callback
=
f_callback
)
return
get_checkpoint
(
S
,
stop_conds
,
rng
),
S
def
train_sk_dense
(
ty
,
X
,
y
,
classification
):
if
classification
:
if
ty
.
startswith
(
'skf'
):
d
=
int
(
ty
.
replace
(
'skf'
,
''
))
f_sk
=
f_classif
elif
ty
.
startswith
(
'skmi'
):
d
=
int
(
ty
.
replace
(
'skmi'
,
''
))
f_sk
=
mutual_info_classif
else
:
if
ty
.
startswith
(
'skf'
):
d
=
int
(
ty
.
replace
(
'skf'
,
''
))
f_sk
=
f_regression
elif
ty
.
startswith
(
'skmi'
):
d
=
int
(
ty
.
replace
(
'skmi'
,
''
))
f_sk
=
mutual_info_regression
t
=
time
.
time
()
clf
=
SelectKBest
(
f_sk
,
k
=
d
)
clf
.
fit_transform
(
X
,
y
.
squeeze
())
ix
=
np
.
argsort
(
-
clf
.
scores_
)
ix
=
ix
[
np
.
where
(
np
.
invert
(
np
.
isnan
(
clf
.
scores_
[
ix
])))[
0
]][:
d
]
t
=
time
.
time
()
-
t
return
{
'feats'
:
ix
,
't'
:
t
}
src/sdk/pynni/nni/feature_engineering/gradient_selector/gradient_selector.py
0 → 100644
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import
time
import
numpy
as
np
import
pandas
as
pd
from
sklearn.base
import
BaseEstimator
from
sklearn.feature_selection.base
import
SelectorMixin
from
sklearn.utils.validation
import
check_is_fitted
import
torch
from
nni.feature_engineering.feature_selector
import
FeatureSelector
import
nni.feature_engineering.gradient_selector.constants
as
constants
from
nni.feature_engineering.gradient_selector.fginitialize
import
PrepareData
from
nni.feature_engineering.gradient_selector.fgtrain
import
_train
class
FeatureGradientSelector
(
FeatureSelector
,
BaseEstimator
,
SelectorMixin
):
def
__init__
(
self
,
order
=
4
,
penalty
=
1
,
n_features
=
None
,
max_features
=
None
,
learning_rate
=
1e-1
,
init
=
'zero'
,
n_epochs
=
1
,
shuffle
=
True
,
batch_size
=
1000
,
target_batch_size
=
1000
,
max_time
=
np
.
inf
,
classification
=
True
,
ordinal
=
False
,
balanced
=
True
,
preprocess
=
'zscore'
,
soft_grouping
=
False
,
verbose
=
0
,
device
=
'cpu'
):
"""
FeatureGradientSelector is a class that selects features for a machine
learning model using a gradient based search.
Parameters
----------
order : int
What order of interactions to include. Higher orders
may be more accurate but increase the run time. 12 is the maximum allowed order.
penatly : int
Constant that multiplies the regularization term.
n_features: int
If None, will automatically choose number of features based on search.
Otherwise, number of top features to select.
max_features : int
If not None, will use the 'elbow method' to determine the number of features
with max_features as the upper limit.
learning_rate : float
init : str
How to initialize the vector of scores. 'zero' is the default.
Options: {'zero', 'on', 'off', 'onhigh', 'offhigh', 'sklearn'}
n_epochs : int
number of epochs to run
shuffle : bool
Shuffle "rows" prior to an epoch.
batch_size : int
Nnumber of "rows" to process at a time
target_batch_size : int
Number of "rows" to accumulate gradients over.
Useful when many rows will not fit into memory but are needed for accurate estimation.
classification : bool
If True, problem is classification, else regression.
ordinal : bool
If True, problem is ordinal classification. Requires classification to be True.
balanced : bool
If true, each class is weighted equally in optimization, otherwise
weighted is done via support of each class. Requires classification to be True.
prerocess : str
'zscore' which refers to centering and normalizing data to unit variance or
'center' which only centers the data to 0 mean
soft_grouping : bool
if True, groups represent features that come from the same source.
Used to encourage sparsity of groups and features within groups.
verbose : int
Controls the verbosity when fitting. Set to 0 for no printing
1 or higher for printing every verbose number of gradient steps.
device : str
'cpu' to run on CPU and 'cuda' to run on GPU. Runs much faster on GPU
"""
assert
order
<=
12
and
order
>=
1
,
'order must be an integer between 1 and 12, inclusive'
assert
n_features
is
None
or
max_features
is
None
,
\
'only specify one of n_features and max_features at a time'
self
.
order
=
order
self
.
penalty
=
penalty
self
.
n_features
=
n_features
self
.
max_features
=
max_features
self
.
learning_rate
=
learning_rate
self
.
init
=
init
self
.
n_epochs
=
n_epochs
self
.
shuffle
=
shuffle
self
.
batch_size
=
batch_size
self
.
target_batch_size
=
target_batch_size
self
.
max_time
=
max_time
self
.
dftol_stop
=
-
1
self
.
freltol_stop
=
-
1
self
.
classification
=
classification
self
.
ordinal
=
ordinal
self
.
balanced
=
balanced
self
.
preprocess
=
preprocess
self
.
soft_grouping
=
soft_grouping
self
.
verbose
=
verbose
self
.
device
=
device
self
.
model_
=
None
self
.
scores_
=
None
self
.
_prev_checkpoint
=
None
self
.
_data_train
=
None
def
partial_fit
(
self
,
X
,
y
,
n_classes
=
None
,
groups
=
None
):
"""
Select Features via a gradient based search on (X, y) on the given samples.
Can be called repeatedly with different X and y to handle streaming datasets.
Parameters
----------
X : array-like
Shape = [n_samples, n_features]
The training input samples.
y : array-like
Shape = [n_samples]
The target values (class labels in classification, real numbers in
regression).
n_classes : int
Number of classes
Classes across all calls to partial_fit.
Can be obtained by via `np.unique(y_all).shape[0]`, where y_all is the
target vector of the entire dataset.
This argument is expected for the first call to partial_fit,
otherwise will assume all classes are present in the batch of y given.
It will be ignored in the subsequent calls.
Note that y doesn't need to contain all labels in `classes`.
groups : array-like
Optional, shape = [n_features]
Groups of columns that must be selected as a unit
e.g. [0, 0, 1, 2] specifies the first two columns are part of a group.
This argument is expected for the first call to partial_fit,
otherwise will assume all classes are present in the batch of y given.
It will be ignored in the subsequent calls.
"""
try
:
self
.
_partial_fit
(
X
,
y
,
n_classes
=
n_classes
,
groups
=
groups
)
except
constants
.
NanError
:
if
hasattr
(
self
,
'_prev_checkpoint'
):
# if it's already done some batches successfully just ignore it
print
(
'failed fitting this batch, loss was nan'
)
else
:
# if this is the first batch, reset and try with doubles
if
self
.
verbose
:
print
(
'Loss was nan, trying with Doubles'
)
self
.
_reset
()
torch
.
set_default_tensor_type
(
torch
.
DoubleTensor
)
self
.
_partial_fit
(
X
,
y
,
n_classes
=
n_classes
,
groups
=
groups
)
return
self
def
_partial_fit
(
self
,
X
,
y
,
n_classes
=
None
,
groups
=
None
):
"""
Private function for partial_fit to enable trying floats before doubles.
"""
# pass in X and y in chunks
if
hasattr
(
self
,
'_data_train'
):
# just overwrite the X and y from the new chunk but make them tensors
# keep dataset stats from previous
self
.
_data_train
.
X
=
X
.
values
if
isinstance
(
X
,
pd
.
DataFrame
)
else
X
self
.
_data_train
.
N
,
self
.
_data_train
.
D
=
self
.
_data_train
.
X
.
shape
self
.
_data_train
.
dense_size_gb
=
self
.
_data_train
.
get_dense_size
()
self
.
_data_train
.
set_dense_X
()
self
.
_data_train
.
y
=
y
.
values
if
isinstance
(
y
,
pd
.
Series
)
else
y
self
.
_data_train
.
y
=
torch
.
as_tensor
(
y
,
dtype
=
torch
.
get_default_dtype
())
else
:
data_train
=
self
.
_prepare_data
(
X
,
y
,
n_classes
=
n_classes
)
self
.
_data_train
=
data_train
batch_size
,
_
,
accum_steps
,
max_iter
=
self
.
_set_batch_size
(
self
.
_data_train
)
rng
=
None
# not used
debug
=
0
# {0,1} print messages and do other stuff?
dn_logs
=
None
# tensorboard logs; only specify if debug=1
path_save
=
None
# intermediate models saves; only specify if debug=1
m
,
solver
=
_train
(
self
.
_data_train
,
batch_size
,
self
.
order
,
self
.
penalty
,
rng
,
self
.
learning_rate
,
debug
,
max_iter
,
self
.
max_time
,
self
.
init
,
self
.
dftol_stop
,
self
.
freltol_stop
,
dn_logs
,
accum_steps
,
path_save
,
self
.
shuffle
,
device
=
self
.
device
,
verbose
=
self
.
verbose
,
prev_checkpoint
=
self
.
_prev_checkpoint
if
hasattr
(
self
,
'_prev_checkpoint'
)
else
None
,
groups
=
groups
if
not
self
.
soft_grouping
else
None
,
soft_groups
=
groups
if
self
.
soft_grouping
else
None
)
self
.
_prev_checkpoint
=
m
self
.
_process_results
(
m
,
solver
,
X
,
groups
=
groups
)
return
self
def
fit
(
self
,
X
,
y
,
groups
=
None
):
"""
Select Features via a gradient based search on (X, y).
Parameters
----------
X : array-like
Shape = [n_samples, n_features]
The training input samples.
y : array-like
Shape = [n_samples]
The target values (class labels in classification, real numbers in
regression).
groups : array-like
Optional, shape = [n_features]
Groups of columns that must be selected as a unit
e.g. [0, 0, 1, 2] specifies the first two columns are part of a group.
"""
try
:
self
.
_fit
(
X
,
y
,
groups
=
groups
)
except
constants
.
NanError
:
if
self
.
verbose
:
print
(
'Loss was nan, trying with Doubles'
)
torch
.
set_default_tensor_type
(
torch
.
DoubleTensor
)
self
.
_fit
(
X
,
y
,
groups
=
groups
)
return
self
def
get_selected_features
(
self
):
return
self
.
selected_features_
def
_prepare_data
(
self
,
X
,
y
,
n_classes
=
None
):
"""
Returns a PrepareData object.
"""
return
PrepareData
(
X
=
X
.
values
if
isinstance
(
X
,
pd
.
DataFrame
)
else
X
,
y
=
y
.
values
if
isinstance
(
y
,
pd
.
Series
)
else
y
,
data_format
=
constants
.
DataFormat
.
NUMPY
,
classification
=
int
(
self
.
classification
),
ordinal
=
self
.
ordinal
,
balanced
=
self
.
balanced
,
preprocess
=
self
.
preprocess
,
verbose
=
self
.
verbose
,
device
=
self
.
device
,
n_classes
=
n_classes
)
def
_fit
(
self
,
X
,
y
,
groups
=
None
):
"""
Private function for fit to enable trying floats before doubles.
"""
data_train
=
self
.
_prepare_data
(
X
,
y
)
batch_size
,
_
,
accum_steps
,
max_iter
=
self
.
_set_batch_size
(
data_train
)
rng
=
None
# not used
debug
=
0
# {0,1} print messages and log to tensorboard
dn_logs
=
None
# tensorboard logs; only specify if debug=1
path_save
=
None
# intermediate models saves; only specify if debug=1
m
,
solver
=
_train
(
data_train
,
batch_size
,
self
.
order
,
self
.
penalty
,
rng
,
self
.
learning_rate
,
debug
,
max_iter
,
self
.
max_time
,
self
.
init
,
self
.
dftol_stop
,
self
.
freltol_stop
,
dn_logs
,
accum_steps
,
path_save
,
self
.
shuffle
,
device
=
self
.
device
,
verbose
=
self
.
verbose
,
groups
=
groups
if
not
self
.
soft_grouping
else
None
,
soft_groups
=
groups
if
self
.
soft_grouping
else
None
)
self
.
_process_results
(
m
,
solver
,
X
,
groups
=
groups
)
return
self
def
_process_torch_scores
(
self
,
scores
):
"""
Convert scores into flat numpy arrays.
"""
if
constants
.
Device
.
CUDA
in
scores
.
device
.
type
:
scores
=
scores
.
cpu
()
return
scores
.
numpy
().
ravel
()
def
_set_batch_size
(
self
,
data_train
):
"""
Ensures that batch_size is less than the number of rows.
"""
batch_size
=
min
(
self
.
batch_size
,
data_train
.
N
)
target_batch_size
=
min
(
max
(
self
.
batch_size
,
self
.
target_batch_size
),
data_train
.
N
)
accum_steps
=
max
(
int
(
np
.
ceil
(
target_batch_size
/
self
.
batch_size
)),
1
)
max_iter
=
self
.
n_epochs
*
(
data_train
.
N
//
batch_size
)
return
batch_size
,
target_batch_size
,
accum_steps
,
max_iter
def
_process_results
(
self
,
m
,
solver
,
X
,
groups
=
None
):
"""
Process the results of a run into something suitable for transform().
"""
self
.
scores_
=
self
.
_process_torch_scores
(
torch
.
sigmoid
(
m
[
constants
.
Checkpoint
.
MODEL
][
'x'
]
*
2
))
if
self
.
max_features
:
self
.
max_features
=
min
([
self
.
max_features
,
self
.
scores_
.
shape
[
0
]])
n_features
=
self
.
_recommend_number_features
(
solver
)
self
.
set_n_features
(
n_features
,
groups
=
groups
)
elif
self
.
n_features
:
self
.
set_n_features
(
self
.
n_features
,
groups
=
groups
)
else
:
self
.
selected_features_
=
m
[
'feats'
]
# subtract elapsed time from max_time
self
.
max_time
-=
m
[
't'
]
self
.
model_
=
m
return
self
def
transform
(
self
,
X
):
"""
Returns selected features from X.
Paramters
---------
X: array-like
Shape = [n_samples, n_features]
The training input samples.
"""
self
.
_get_support_mask
()
if
self
.
selected_features_
.
shape
[
0
]
==
0
:
raise
ValueError
(
'No Features selected, consider lowering the penalty or specifying n_features'
)
return
(
X
.
iloc
[:,
self
.
selected_features_
]
if
isinstance
(
X
,
pd
.
DataFrame
)
else
X
[:,
self
.
selected_features_
])
def
get_support
(
self
,
indices
=
False
):
"""
Get a mask, or integer index, of the features selected.
Parameters
----------
indices : bool
Default False
If True, the return value will be an array of integers, rather than a boolean mask.
Returns
-------
list :
returns support: An index that selects the retained features from a feature vector.
If indices is False, this is a boolean array of shape [# input features],
in which an element is True iff its corresponding feature is selected for retention.
If indices is True, this is an integer array of shape [# output features] whose values
are indices into the input feature vector.
"""
self
.
_get_support_mask
()
if
indices
:
return
self
.
selected_features_
mask
=
np
.
zeros_like
(
self
.
scores_
,
dtype
=
bool
)
# pylint: disable=E1137
mask
[
self
.
selected_features_
]
=
True
return
mask
def
inverse_transform
(
self
,
X
):
"""
Returns transformed X to the original number of column.
This operation is lossy and all columns not in the transformed data
will be returned as columns of 0s.
"""
self
.
_get_support_mask
()
X_new
=
np
.
zeros
((
X
.
shape
[
0
],
self
.
scores_
.
shape
[
0
]))
X_new
[
self
.
selected_features_
]
=
X
return
X_new
def
get_params
(
self
,
deep
=
True
):
"""
Get parameters for this estimator.
"""
params
=
self
.
__dict__
params
=
{
key
:
val
for
(
key
,
val
)
in
params
.
items
()
if
not
key
.
endswith
(
'_'
)}
return
params
def
set_params
(
self
,
**
params
):
"""
Set the parameters of this estimator.
"""
for
param
in
params
:
if
hasattr
(
self
,
param
):
setattr
(
self
,
param
,
params
[
param
])
return
self
def
fit_transform
(
self
,
X
,
y
):
"""
Select features and then return X with the selected features.
Parameters
----------
X : array-like
Shape = [n_samples, n_features]
The training input samples.
y : array-like
Shape = [n_samples]
The target values (class labels in classification, real numbers in
regression).
"""
self
.
fit
(
X
,
y
)
return
self
.
transform
(
X
)
def
_get_support_mask
(
self
):
"""
Check if it is fitted.
"""
check_is_fitted
(
self
,
'scores_'
)
def
_generate_scores
(
self
,
solver
,
xsub
,
ysub
,
step_size
,
feature_order
):
"""
Generate forward passes to determine the number of features when max_features is set.
"""
scores
=
[]
for
i
in
np
.
arange
(
1
,
self
.
max_features
+
1
,
step_size
):
# optimization possible since xsub is growing?
i
=
int
(
np
.
ceil
(
i
))
# pylint: disable=E1102
score
=
solver
.
f_train
(
torch
.
tensor
(
np
.
ones
(
i
),
dtype
=
torch
.
get_default_dtype
()
).
unsqueeze
(
1
).
to
(
self
.
device
),
xsub
[:,
feature_order
[:
i
]],
ysub
)
if
constants
.
Device
.
CUDA
in
score
.
device
.
type
:
score
=
score
.
cpu
()
# score.numpy()[0][0]
scores
.
append
(
score
)
return
scores
def
set_n_features
(
self
,
n
,
groups
=
None
):
"""
Set the number of features to return after fitting.
"""
self
.
_get_support_mask
()
self
.
n_features
=
n
return
self
.
_set_top_features
(
groups
=
groups
)
def
_set_top_features
(
self
,
groups
=
None
):
"""
Set the selected features after a run.
With groups, ensures that if any member of a group is selected, all members are selected
"""
self
.
_get_support_mask
()
assert
self
.
n_features
<=
self
.
scores_
.
shape
[
0
],
\
'n_features must be less than or equal to the number of columns in X'
# pylint: disable=E1130
self
.
selected_features_
=
np
.
argpartition
(
self
.
scores_
,
-
self
.
n_features
)[
-
self
.
n_features
:]
if
groups
is
not
None
and
not
self
.
soft_grouping
:
selected_feature_set
=
set
(
self
.
selected_features_
.
tolist
())
for
_
in
np
.
unique
(
groups
):
group_members
=
np
.
where
(
groups
==
groups
)[
0
].
tolist
()
if
selected_feature_set
.
intersection
(
group_members
):
selected_feature_set
.
update
(
group_members
)
self
.
selected_features_
=
np
.
array
(
list
(
selected_feature_set
))
self
.
selected_features_
=
np
.
sort
(
self
.
selected_features_
)
return
self
def
set_top_percentile
(
self
,
percentile
,
groups
=
None
):
"""
Set the percentile of features to return after fitting.
"""
self
.
_get_support_mask
()
assert
percentile
<=
1
and
percentile
>=
0
,
\
'percentile must between 0 and 1 inclusive'
self
.
n_features
=
int
(
self
.
scores_
.
shape
[
0
]
*
percentile
)
return
self
.
_set_top_features
(
groups
=
groups
)
def
_recommend_number_features
(
self
,
solver
,
max_time
=
None
):
"""
Get the recommended number of features by doing forward passes when max_features is set.
"""
max_time
=
max_time
if
max_time
else
self
.
max_time
if
max_time
<
0
:
max_time
=
60
# allow 1 minute extra if we already spent max_time
MAX_FORWARD_PASS
=
200
MAX_FULL_BATCHES
=
3
# the forward passes can take longer than the fitting
# if we allow a full epoch of data to be included. By only doing 3 full batches at most
# we get enough accuracy without increasing the time too much. This
# constant may not be optimal
accum_steps
=
solver
.
accum_steps
step_size
=
max
(
self
.
max_features
/
MAX_FORWARD_PASS
,
1
)
# pylint: disable=E1130
feature_order
=
np
.
argsort
(
-
self
.
scores_
)
# note the negative
t
=
time
.
time
()
dataloader_iterator
=
iter
(
solver
.
ds_train
)
full_scores
=
[]
# keep_going = True
with
torch
.
no_grad
():
# might want to only consider a batch valid if there are at least
# two classes
for
_
in
range
(
accum_steps
*
MAX_FULL_BATCHES
):
scores
=
[]
try
:
xsub
,
ysub
=
next
(
dataloader_iterator
)
except
StopIteration
:
# done with epoch, don't do more than one epoch
break
except
Exception
as
e
:
print
(
e
)
break
if
max_time
and
time
.
time
()
-
t
>
max_time
:
if
self
.
verbose
:
print
(
"Stoppinn forward passes because they reached max_time: "
,
max_time
)
if
not
full_scores
:
# no forward passes worked, return half of max_features
return
self
.
max_features
//
2
break
if
solver
.
multiclass
:
for
target_class
in
range
(
solver
.
n_classes
):
ysub_binary
=
solver
.
transform_y_into_binary
(
ysub
,
target_class
)
scaling_value
=
solver
.
_get_scaling_value
(
ysub
,
target_class
)
if
not
solver
.
_skip_y_forward
(
ysub_binary
):
scores
=
self
.
_generate_scores
(
solver
,
xsub
,
ysub_binary
,
step_size
,
feature_order
)
# one row will represent one class that is present in the data
# all classes are weighted equally
full_scores
.
append
(
[
score
*
scaling_value
for
score
in
scores
])
else
:
if
not
solver
.
_skip_y_forward
(
ysub
):
scores
=
self
.
_generate_scores
(
solver
,
xsub
,
ysub
,
step_size
,
feature_order
)
full_scores
.
append
(
scores
)
best_index
=
FeatureGradientSelector
.
_find_best_index_elbow
(
full_scores
)
if
self
.
verbose
:
print
(
"Forward passes took: "
,
time
.
time
()
-
t
)
# account for step size and off by one (n_features is 1 indexed, not 0
# )
return
int
(
np
.
ceil
(
np
.
arange
(
1
,
self
.
max_features
+
1
,
step_size
))[
best_index
])
@
staticmethod
def
_find_best_index_elbow
(
full_scores
):
"""
Finds the point on the curve that maximizes distance from the line determined by the endpoints.
"""
scores
=
pd
.
DataFrame
(
full_scores
).
mean
(
0
).
values
.
tolist
()
first_point
=
np
.
array
([
0
,
scores
[
0
]])
last_point
=
np
.
array
([
len
(
scores
)
-
1
,
scores
[
-
1
]])
elbow_metric
=
[]
for
i
in
range
(
len
(
scores
)):
elbow_metric
.
append
(
FeatureGradientSelector
.
_distance_to_line
(
first_point
,
last_point
,
np
.
array
([
i
,
scores
[
i
]])))
return
np
.
argmax
(
elbow_metric
)
@
staticmethod
def
_distance_to_line
(
start_point
,
end_point
,
new_point
):
"""
Calculates the shortest distance from new_point to the line determined by start_point and end_point.
"""
# for calculating elbow method
return
np
.
cross
(
new_point
-
start_point
,
end_point
-
start_point
)
/
np
.
linalg
.
norm
(
end_point
-
start_point
)
def
_reset
(
self
):
"""
Reset the estimator by deleting all private and fit parameters.
"""
params
=
self
.
__dict__
for
key
,
_
in
params
.
items
():
if
key
.
endswith
(
'_'
)
or
key
.
startswith
(
'_'
):
delattr
(
self
,
key
)
return
self
src/sdk/pynni/nni/feature_engineering/gradient_selector/learnability.py
0 → 100644
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import
time
import
numpy
as
np
import
scipy.special
import
torch
import
torch.nn
as
nn
import
nni.feature_engineering.gradient_selector.constants
as
constants
import
nni.feature_engineering.gradient_selector.syssettings
as
syssettings
from
nni.feature_engineering.gradient_selector.fginitialize
import
ChunkDataLoader
torch
.
set_default_tensor_type
(
syssettings
.
torch
.
tensortype
)
sparsetensor
=
syssettings
.
torch
.
sparse
.
tensortype
def
def_train_opt
(
p
):
"""
Return the default optimizer.
"""
return
torch
.
optim
.
Adam
(
p
,
1e-1
,
amsgrad
=
False
)
def
revcumsum
(
U
):
"""
Reverse cumulative sum for faster performance.
"""
return
U
.
flip
(
dims
=
[
0
]).
cumsum
(
dim
=
0
).
flip
(
dims
=
[
0
])
def
triudr
(
X
,
r
):
Zr
=
torch
.
zeros_like
(
X
,
requires_grad
=
False
)
U
=
X
*
r
Zr
[:
-
1
]
=
X
[:
-
1
]
*
revcumsum
(
U
)[
1
:]
return
Zr
def
triudl
(
X
,
l
):
Zl
=
torch
.
zeros_like
(
X
,
requires_grad
=
False
)
U
=
X
*
l
Zl
[
1
:]
=
X
[
1
:]
*
(
U
.
cumsum
(
dim
=
0
)[:
-
1
])
return
Zl
class
ramp
(
torch
.
autograd
.
Function
):
"""
Ensures input is between 0 and 1
"""
@
staticmethod
def
forward
(
ctx
,
input_data
):
ctx
.
save_for_backward
(
input_data
)
return
input_data
.
clamp
(
min
=
0
,
max
=
1
)
@
staticmethod
def
backward
(
ctx
,
grad_output
):
input_data
,
=
ctx
.
saved_tensors
grad_input
=
grad_output
.
clone
()
grad_input
[
input_data
<
0
]
=
1e-2
grad_input
[
input_data
>
1
]
=
-
1e-2
return
grad_input
class
safesqrt
(
torch
.
autograd
.
Function
):
"""
Square root without dividing by 0.
"""
@
staticmethod
def
forward
(
ctx
,
input_data
):
o
=
input_data
.
sqrt
()
ctx
.
save_for_backward
(
input_data
,
o
)
return
o
@
staticmethod
def
backward
(
ctx
,
grad_output
):
_
,
o
=
ctx
.
saved_tensors
grad_input
=
grad_output
.
clone
()
grad_input
*=
0.5
/
(
o
+
constants
.
EPSILON
)
return
grad_input
class
LearnabilityMB
(
nn
.
Module
):
"""
Calculates the learnability of a set of features.
mini-batch version w/ "left" and "right" multiplies
"""
def
__init__
(
self
,
Nminibatch
,
D
,
coeff
,
groups
=
None
,
binary
=
False
,
device
=
constants
.
Device
.
CPU
):
super
(
LearnabilityMB
,
self
).
__init__
()
a
=
coeff
/
scipy
.
special
.
binom
(
Nminibatch
,
np
.
arange
(
coeff
.
size
)
+
2
)
self
.
order
=
a
.
size
# pylint: disable=E1102
self
.
a
=
torch
.
tensor
(
a
,
dtype
=
torch
.
get_default_dtype
(),
requires_grad
=
False
)
self
.
binary
=
binary
self
.
a
=
self
.
a
.
to
(
device
)
def
ret_val
(
self
,
z
):
"""
Get the return value based on z.
"""
if
not
self
.
binary
:
return
1
-
z
else
:
return
0.5
*
(
1
-
safesqrt
.
apply
(
ramp
.
apply
(
z
)))
def
forward
(
self
,
s
,
X
,
y
):
l
=
y
.
clone
()
r
=
y
.
clone
()
z
=
0
for
i
in
range
(
self
.
order
):
if
i
%
2
==
0
:
Z
=
triudr
(
X
,
r
)
r
=
torch
.
mm
(
Z
,
s
)
else
:
Z
=
triudl
(
X
,
l
)
l
=
torch
.
mm
(
Z
,
s
)
if
self
.
a
[
i
]
!=
0
:
# same the computation if a[i] is 0
p
=
torch
.
mm
(
l
.
t
(),
r
)
z
+=
self
.
a
[
i
]
*
p
return
self
.
ret_val
(
z
)
class
Solver
(
nn
.
Module
):
"""
Class that performs the main optimization.
Keeps track of the current x and iterates through data to learn x given the penalty and order.
"""
def
__init__
(
self
,
PreparedData
,
order
,
Nminibatch
=
None
,
groups
=
None
,
soft_groups
=
None
,
x0
=
None
,
C
=
1
,
ftransform
=
torch
.
sigmoid
,
get_train_opt
=
def_train_opt
,
accum_steps
=
1
,
rng
=
np
.
random
.
RandomState
(
0
),
max_norm_clip
=
1.
,
shuffle
=
True
,
device
=
constants
.
Device
.
CPU
,
verbose
=
1
):
"""
Parameters
----------
PreparedData : Dataset of PrepareData class
order : int
What order of interactions to include. Higher orders
may be more accurate but increase the run time. 12 is the maximum allowed order.
Nminibatch : int
Number of rows in a mini batch
groups : array-like
Optional, shape = [n_features]
Groups of columns that must be selected as a unit
e.g. [0, 0, 1, 2] specifies the first two columns are part of a group.
soft_groups : array-like
optional, shape = [n_features]
Groups of columns come from the same source
Used to encourage sparsity of number of sources selected
e.g. [0, 0, 1, 2] specifies the first two columns are part of a group.
x0 : torch.tensor
Optional, initialization of x.
C : float
Penalty parameter.
get_train_opt : function
Function that returns a pytorch optimizer, Adam is the default
accum_steps : int
Number of steps
rng : random state
max_norm_clip : float
Maximum allowable size of the gradient
shuffle : bool
Whether or not to shuffle data within the dataloader
order : int
What order of interactions to include. Higher orders
may be more accurate but increase the run time. 12 is the maximum allowed order.
penalty : int
Constant that multiplies the regularization term.
ftransform : function
Function to transform the x. sigmoid is the default.
device : str
'cpu' to run on CPU and 'cuda' to run on GPU. Runs much faster on GPU
verbose : int
Controls the verbosity when fitting. Set to 0 for no printing
1 or higher for printing every verbose number of gradient steps.
"""
super
(
Solver
,
self
).
__init__
()
self
.
Ntrain
,
self
.
D
=
PreparedData
.
N
,
PreparedData
.
n_features
if
groups
is
not
None
:
# pylint: disable=E1102
groups
=
torch
.
tensor
(
groups
,
dtype
=
torch
.
long
)
self
.
groups
=
groups
else
:
self
.
groups
=
None
if
soft_groups
is
not
None
:
# pylint: disable=E1102
soft_groups
=
torch
.
tensor
(
soft_groups
,
dtype
=
torch
.
long
)
self
.
soft_D
=
torch
.
unique
(
soft_groups
).
size
()[
0
]
else
:
self
.
soft_D
=
None
self
.
soft_groups
=
soft_groups
if
Nminibatch
is
None
:
Nminibatch
=
self
.
Ntrain
else
:
if
Nminibatch
>
self
.
Ntrain
:
print
(
'Minibatch larger than sample size.'
+
(
' Reducing from %d to %d.'
%
(
Nminibatch
,
self
.
Ntrain
)))
Nminibatch
=
self
.
Ntrain
if
Nminibatch
>
PreparedData
.
max_rows
:
print
(
'Minibatch larger than mem-allowed.'
+
(
' Reducing from %d to %d.'
%
(
Nminibatch
,
PreparedData
.
max_rows
)))
Nminibatch
=
int
(
np
.
min
([
Nminibatch
,
PreparedData
.
max_rows
]))
self
.
Nminibatch
=
Nminibatch
self
.
accum_steps
=
accum_steps
if
x0
is
None
:
x0
=
torch
.
zeros
(
self
.
D
,
1
,
dtype
=
torch
.
get_default_dtype
())
self
.
ftransform
=
ftransform
self
.
x
=
nn
.
Parameter
(
x0
)
self
.
max_norm
=
max_norm_clip
self
.
device
=
device
self
.
verbose
=
verbose
self
.
multiclass
=
PreparedData
.
classification
and
PreparedData
.
n_classes
and
PreparedData
.
n_classes
>
2
if
self
.
multiclass
:
self
.
n_classes
=
PreparedData
.
n_classes
else
:
self
.
n_classes
=
None
# whether to treat all classes equally
self
.
balanced
=
PreparedData
.
balanced
self
.
ordinal
=
PreparedData
.
ordinal
if
(
hasattr
(
PreparedData
,
'mappings'
)
or
PreparedData
.
storage_level
==
'disk'
):
num_workers
=
PreparedData
.
num_workers
elif
PreparedData
.
storage_level
==
constants
.
StorageLevel
.
DENSE
:
num_workers
=
0
else
:
num_workers
=
0
if
constants
.
Device
.
CUDA
in
device
:
pin_memory
=
False
else
:
pin_memory
=
False
self
.
ds_train
=
ChunkDataLoader
(
PreparedData
,
batch_size
=
self
.
Nminibatch
,
shuffle
=
shuffle
,
drop_last
=
True
,
num_workers
=
num_workers
,
pin_memory
=
pin_memory
,
timeout
=
60
)
self
.
f_train
=
LearnabilityMB
(
self
.
Nminibatch
,
self
.
D
,
constants
.
Coefficients
.
SLE
[
order
],
self
.
groups
,
binary
=
PreparedData
.
classification
,
device
=
self
.
device
)
self
.
opt_train
=
get_train_opt
(
torch
.
nn
.
ParameterList
([
self
.
x
]))
self
.
it
=
0
self
.
iters_per_epoch
=
int
(
np
.
ceil
(
len
(
self
.
ds_train
.
dataset
)
/
self
.
ds_train
.
batch_size
))
self
.
f_train
=
self
.
f_train
.
to
(
device
)
# pylint: disable=E1102
self
.
w
=
torch
.
tensor
(
C
/
(
C
+
1
),
dtype
=
torch
.
get_default_dtype
(),
requires_grad
=
False
)
self
.
w
=
self
.
w
.
to
(
device
)
def
penalty
(
self
,
s
):
"""
Calculate L1 Penalty.
"""
to_return
=
torch
.
sum
(
s
)
/
self
.
D
if
self
.
soft_groups
is
not
None
:
# if soft_groups, there is an additional penalty for using more
# groups
s_grouped
=
torch
.
zeros
(
self
.
soft_D
,
1
,
dtype
=
torch
.
get_default_dtype
(),
device
=
self
.
device
)
for
group
in
torch
.
unique
(
self
.
soft_groups
):
# groups should be indexed 0 to n_group - 1
# TODO: consider other functions here
s_grouped
[
group
]
=
s
[
self
.
soft_groups
==
group
].
max
()
# each component of the penalty contributes .5
# TODO: could make this a user given parameter
to_return
=
(
to_return
+
torch
.
sum
(
s_grouped
)
/
self
.
soft_D
)
*
.
5
return
to_return
def
forward_and_backward
(
self
,
s
,
xsub
,
ysub
,
retain_graph
=
False
):
"""
Completes the forward operation and computes gradients for learnability and penalty.
"""
f_train
=
self
.
f_train
(
s
,
xsub
,
ysub
)
pen
=
self
.
penalty
(
s
)
# pylint: disable=E1102
grad_outputs
=
torch
.
tensor
([[
1
]],
dtype
=
torch
.
get_default_dtype
(),
device
=
self
.
device
)
g1
,
=
torch
.
autograd
.
grad
([
f_train
],
[
self
.
x
],
grad_outputs
,
retain_graph
=
True
)
# pylint: disable=E1102
grad_outputs
=
torch
.
tensor
([[
1
]],
dtype
=
torch
.
get_default_dtype
(),
device
=
self
.
device
)
g2
,
=
torch
.
autograd
.
grad
([
pen
],
[
self
.
x
],
grad_outputs
,
retain_graph
=
retain_graph
)
return
f_train
,
pen
,
g1
,
g2
def
combine_gradient
(
self
,
g1
,
g2
):
"""
Combine gradients from learnability and penalty
Parameters
----------
g1 : array-like
gradient from learnability
g2 : array-like
gradient from penalty
"""
to_return
=
((
1
-
self
.
w
)
*
g1
+
self
.
w
*
g2
)
/
self
.
accum_steps
if
self
.
groups
is
not
None
:
# each column will get a gradient
# but we can only up or down groups, so the gradient for the group
# should be the average of the gradients of the columns
to_return_grouped
=
torch
.
zeros_like
(
self
.
x
)
for
group
in
torch
.
unique
(
self
.
groups
):
to_return_grouped
[
self
.
groups
==
group
]
=
to_return
[
self
.
groups
==
group
].
mean
()
to_return
=
to_return_grouped
return
to_return
def
combine_loss
(
self
,
f_train
,
pen
):
"""
Combine the learnability and L1 penalty.
"""
return
((
1
-
self
.
w
)
*
f_train
.
detach
()
+
self
.
w
*
pen
.
detach
())
\
/
self
.
accum_steps
def
transform_y_into_binary
(
self
,
ysub
,
target_class
):
"""
Transforms multiclass classification problems into a binary classification problem.
"""
with
torch
.
no_grad
():
ysub_binary
=
torch
.
zeros_like
(
ysub
)
if
self
.
ordinal
:
# turn ordinal problems into n-1 classifications of is this
# example less than rank k
if
target_class
==
0
:
return
None
ysub_binary
[
ysub
>=
target_class
]
=
1
ysub_binary
[
ysub
<
target_class
]
=
-
1
else
:
# turn multiclass problems into n binary classifications
ysub_binary
[
ysub
==
target_class
]
=
1
ysub_binary
[
ysub
!=
target_class
]
=
-
1
return
ysub_binary
def
_get_scaling_value
(
self
,
ysub
,
target_class
):
"""
Returns the weight given to a class for multiclass classification.
"""
if
self
.
balanced
:
if
self
.
ordinal
:
return
1
/
(
torch
.
unique
(
ysub
).
size
()[
0
]
-
1
)
return
1
/
torch
.
unique
(
ysub
).
size
()[
0
]
else
:
if
self
.
ordinal
:
this_class_proportion
=
torch
.
mean
(
ysub
>=
target_class
)
normalizing_constant
=
0
for
i
in
range
(
1
,
self
.
n_classes
):
normalizing_constant
+=
torch
.
mean
(
ysub
>=
i
)
return
this_class_proportion
/
normalizing_constant
else
:
return
torch
.
mean
(
ysub
==
target_class
)
def
_skip_y_forward
(
self
,
y
):
"""
Returns boolean of whether to skip the currrent y if there is nothing to be learned from it.
"""
if
y
is
None
:
return
True
elif
torch
.
unique
(
y
).
size
()[
0
]
<
2
:
return
True
else
:
return
False
def
train
(
self
,
f_callback
=
None
,
f_stop
=
None
):
"""
Trains the estimator to determine which features to include.
Parameters
----------
f_callback : function
Function that performs a callback
f_stop: function
Function that tells you when to stop
"""
t
=
time
.
time
()
h
=
torch
.
zeros
([
1
,
1
],
dtype
=
torch
.
get_default_dtype
())
h
=
h
.
to
(
self
.
device
)
# h_complete is so when we divide by the number of classes
# we only do that for that minibatch if accumulating
h_complete
=
h
.
clone
()
flag_stop
=
False
dataloader_iterator
=
iter
(
self
.
ds_train
)
self
.
x
.
grad
=
torch
.
zeros_like
(
self
.
x
)
while
not
flag_stop
:
try
:
xsub
,
ysub
=
next
(
dataloader_iterator
)
except
StopIteration
:
dataloader_iterator
=
iter
(
self
.
ds_train
)
xsub
,
ysub
=
next
(
dataloader_iterator
)
try
:
s
=
self
.
ftransform
(
self
.
x
)
s
=
s
.
to
(
self
.
device
)
if
self
.
multiclass
:
# accumulate gradients over each class, classes range from
# 0 to n_classes - 1
#num_classes_batch = torch.unique(ysub).size()[0]
for
target_class
in
range
(
self
.
n_classes
):
ysub_binary
=
self
.
transform_y_into_binary
(
ysub
,
target_class
)
if
self
.
_skip_y_forward
(
ysub_binary
):
continue
# should should skip if target class is not included
# but that changes what we divide by
scaling_value
=
self
.
_get_scaling_value
(
ysub
,
target_class
)
f_train
,
pen
,
g1
,
g2
=
self
.
forward_and_backward
(
s
,
xsub
,
ysub_binary
,
retain_graph
=
True
)
self
.
x
.
grad
+=
self
.
combine_gradient
(
g1
,
g2
)
*
scaling_value
h
+=
self
.
combine_loss
(
f_train
,
pen
)
*
scaling_value
else
:
if
not
self
.
_skip_y_forward
(
ysub
):
f_train
,
pen
,
g1
,
g2
=
self
.
forward_and_backward
(
s
,
xsub
,
ysub
)
self
.
x
.
grad
+=
self
.
combine_gradient
(
g1
,
g2
)
h
+=
self
.
combine_loss
(
f_train
,
pen
)
else
:
continue
h_complete
+=
h
self
.
it
+=
1
if
torch
.
isnan
(
h
):
raise
constants
.
NanError
(
'Loss is nan, something may be misconfigured'
)
if
self
.
it
%
self
.
accum_steps
==
0
:
torch
.
nn
.
utils
.
clip_grad_norm_
(
torch
.
nn
.
ParameterList
([
self
.
x
]),
max_norm
=
self
.
max_norm
)
self
.
opt_train
.
step
()
t
=
time
.
time
()
-
t
if
f_stop
is
not
None
:
flag_stop
=
f_stop
(
self
,
h
,
self
.
it
,
t
)
if
f_callback
is
not
None
:
f_callback
(
self
,
h
,
self
.
it
,
t
)
elif
self
.
verbose
and
(
self
.
it
//
self
.
accum_steps
)
%
self
.
verbose
==
0
:
epoch
=
int
(
self
.
it
/
self
.
iters_per_epoch
)
print
(
'[Minibatch: %6d/ Epoch: %3d/ t: %3.3f s] Loss: %0.3f'
%
(
self
.
it
,
epoch
,
t
,
h_complete
/
self
.
accum_steps
))
if
flag_stop
:
break
self
.
opt_train
.
zero_grad
()
h
=
0
h_complete
=
0
t
=
time
.
time
()
except
KeyboardInterrupt
:
flag_stop
=
True
break
src/sdk/pynni/nni/feature_engineering/gradient_selector/requirements.txt
0 → 100644
View file @
cd3a912a
numpy==1.14.3
scikit-learn==0.20.0
scipy==1.1.0
torch==1.1.0
src/sdk/pynni/nni/feature_engineering/gradient_selector/syssettings.py
0 → 100644
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import
torch
# pytorch
torch
.
tensortype
=
torch
.
FloatTensor
torch
.
sparse
.
tensortype
=
torch
.
sparse
.
FloatTensor
# mem
MAXMEMGB
=
10
src/sdk/pynni/nni/feature_engineering/gradient_selector/utils.py
0 → 100644
View file @
cd3a912a
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import
numpy
as
np
class
EMA
():
"""
maintains an exponential moving average
"""
def
__init__
(
self
,
f
=
np
.
nan
,
discount_factor
=
0.1
,
valid_after
=
None
,
n_iters_relchange
=
3
):
self
.
f_ma
=
[
f
]
self
.
fs
=
[
f
]
self
.
gamma
=
discount_factor
self
.
rel_change
=
[
np
.
nan
]
if
valid_after
is
None
:
self
.
valid_after
=
int
(
1
/
discount_factor
)
else
:
self
.
valid_after
=
valid_after
self
.
n_iters_relchange
=
n_iters_relchange
self
.
initialized
=
False
def
reset
(
self
,
f
):
self
.
f_ma
=
[
f
]
self
.
fs
=
[
f
]
self
.
rel_change
=
[
np
.
nan
]
self
.
initialized
=
True
def
relchange
(
self
):
if
self
.
num_updates
()
>
np
.
max
([
self
.
valid_after
,
self
.
n_iters_relchange
]):
return
np
.
max
(
self
.
rel_change
[
-
self
.
n_iters_relchange
:])
else
:
return
np
.
nan
def
update
(
self
,
f_new
):
if
not
self
.
initialized
:
self
.
reset
(
f_new
)
else
:
self
.
fs
.
append
(
f_new
)
self
.
f_ma
.
append
(
self
.
f_ma
[
-
1
]
*
(
1
-
self
.
gamma
)
+
self
.
gamma
*
f_new
)
if
self
.
num_updates
()
>
self
.
valid_after
:
self
.
rel_change
.
append
(
np
.
abs
((
self
.
f_ma
[
-
1
]
-
self
.
f_ma
[
-
2
])
/
self
.
f_ma
[
-
2
]))
def
num_updates
(
self
):
return
len
(
self
.
f_ma
)
def
__call__
(
self
):
if
self
.
num_updates
()
>
self
.
valid_after
:
return
self
.
f_ma
[
-
1
]
else
:
return
np
.
nan
Prev
1
…
7
8
9
10
11
12
13
14
15
…
19
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment