Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
b51c7be4
Commit
b51c7be4
authored
Dec 08, 2016
by
Guolin Ke
Committed by
GitHub
Dec 08, 2016
Browse files
refine Dataset class (#113)
Provide a high level Dataset class for easy use.
parent
f3d33582
Changes
12
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
683 additions
and
256 deletions
+683
-256
.travis.yml
.travis.yml
+3
-3
CMakeLists.txt
CMakeLists.txt
+1
-1
examples/python-guide/simple_example.py
examples/python-guide/simple_example.py
+2
-10
include/LightGBM/config.h
include/LightGBM/config.h
+2
-0
python-package/lightgbm/__init__.py
python-package/lightgbm/__init__.py
+1
-1
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+403
-81
python-package/lightgbm/engine.py
python-package/lightgbm/engine.py
+49
-105
python-package/lightgbm/libpath.py
python-package/lightgbm/libpath.py
+1
-0
python-package/lightgbm/sklearn.py
python-package/lightgbm/sklearn.py
+131
-49
src/io/config.cpp
src/io/config.cpp
+13
-2
tests/python_package_test/test_basic.py
tests/python_package_test/test_basic.py
+0
-4
tests/python_package_test/test_engine.py
tests/python_package_test/test_engine.py
+77
-0
No files found.
.travis.yml
View file @
b51c7be4
...
@@ -14,7 +14,7 @@ before_install:
...
@@ -14,7 +14,7 @@ before_install:
install
:
install
:
-
sudo apt-get install -y libopenmpi-dev openmpi-bin build-essential
-
sudo apt-get install -y libopenmpi-dev openmpi-bin build-essential
-
conda install --yes atlas numpy scipy scikit-learn
-
conda install --yes atlas numpy scipy scikit-learn
pandas
script
:
script
:
...
@@ -22,12 +22,12 @@ script:
...
@@ -22,12 +22,12 @@ script:
-
mkdir build && cd build && cmake .. && make -j
-
mkdir build && cd build && cmake .. && make -j
-
cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
-
cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
-
cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
-
cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
-
cd $TRAVIS_BUILD_DIR/tests/python_package_test && python test_basic.py && python test_sklearn.py
-
cd $TRAVIS_BUILD_DIR/tests/python_package_test && python test_basic.py && python
test_engine.py && python
test_sklearn.py
-
cd $TRAVIS_BUILD_DIR
-
cd $TRAVIS_BUILD_DIR
-
rm -rf build && mkdir build && cd build && cmake -DUSE_MPI=ON ..&& make -j
-
rm -rf build && mkdir build && cd build && cmake -DUSE_MPI=ON ..&& make -j
-
cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
-
cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
-
cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
-
cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
-
cd $TRAVIS_BUILD_DIR/tests/python_package_test && python test_basic.py && python test_sklearn.py
-
cd $TRAVIS_BUILD_DIR/tests/python_package_test && python test_basic.py && python
test_engine.py && python
test_sklearn.py
notifications
:
notifications
:
email
:
false
email
:
false
...
...
CMakeLists.txt
View file @
b51c7be4
...
@@ -76,7 +76,7 @@ add_executable(lightgbm src/main.cpp ${SOURCES})
...
@@ -76,7 +76,7 @@ add_executable(lightgbm src/main.cpp ${SOURCES})
add_library
(
_lightgbm SHARED src/c_api.cpp
${
SOURCES
}
)
add_library
(
_lightgbm SHARED src/c_api.cpp
${
SOURCES
}
)
if
(
MSVC
)
if
(
MSVC
)
set_target_properties
(
_lightgbm PROPERTIES OUTPUT_NAME
"lightgbm"
)
set_target_properties
(
_lightgbm PROPERTIES OUTPUT_NAME
"
lib_
lightgbm"
)
endif
(
MSVC
)
endif
(
MSVC
)
if
(
USE_MPI
)
if
(
USE_MPI
)
...
...
examples/python-guide/simple_example.py
View file @
b51c7be4
...
@@ -17,13 +17,7 @@ X_test = df_test.drop(0, axis=1)
...
@@ -17,13 +17,7 @@ X_test = df_test.drop(0, axis=1)
# create dataset for lightgbm
# create dataset for lightgbm
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
)
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
)
lgb_eval
=
lgb
.
Dataset
(
X_test
,
y_test
,
reference
=
lgb_train
)
lgb_eval
=
lgb
.
Dataset
(
X_test
,
y_test
,
reference
=
lgb_train
)
# ATTENTION: you should carefully use lightgbm.Dataset
# it requires setting up categorical_feature when you init it
# rather than passing from lightgbm.train
# instead, you can simply use a tuple of length=2 like below
# it will help you construct Datasets with parameters in lightgbm.train
lgb_train
=
(
X_train
,
y_train
)
lgb_eval
=
(
X_test
,
y_test
)
# specify your configurations as a dict
# specify your configurations as a dict
params
=
{
params
=
{
...
@@ -43,9 +37,7 @@ params = {
...
@@ -43,9 +37,7 @@ params = {
gbm
=
lgb
.
train
(
params
,
gbm
=
lgb
.
train
(
params
,
lgb_train
,
lgb_train
,
num_boost_round
=
100
,
num_boost_round
=
100
,
valid_datas
=
lgb_eval
,
valid_sets
=
lgb_eval
,
# you can use a list to represent multiple valid_datas/valid_names
# don't use tuple, tuple is used to represent one dataset
early_stopping_rounds
=
10
)
early_stopping_rounds
=
10
)
# save model to file
# save model to file
...
...
include/LightGBM/config.h
View file @
b51c7be4
...
@@ -230,6 +230,7 @@ struct OverallConfig: public ConfigBase {
...
@@ -230,6 +230,7 @@ struct OverallConfig: public ConfigBase {
public:
public:
TaskType
task_type
=
TaskType
::
kTrain
;
TaskType
task_type
=
TaskType
::
kTrain
;
NetworkConfig
network_config
;
NetworkConfig
network_config
;
int
seed
=
0
;
int
num_threads
=
0
;
int
num_threads
=
0
;
bool
is_parallel
=
false
;
bool
is_parallel
=
false
;
bool
is_parallel_find_bin
=
false
;
bool
is_parallel_find_bin
=
false
;
...
@@ -317,6 +318,7 @@ struct ParameterAlias {
...
@@ -317,6 +318,7 @@ struct ParameterAlias {
{
{
{
"config"
,
"config_file"
},
{
"config"
,
"config_file"
},
{
"nthread"
,
"num_threads"
},
{
"nthread"
,
"num_threads"
},
{
"random_seed"
,
"seed"
},
{
"num_thread"
,
"num_threads"
},
{
"num_thread"
,
"num_threads"
},
{
"boosting"
,
"boosting_type"
},
{
"boosting"
,
"boosting_type"
},
{
"boost"
,
"boosting_type"
},
{
"boost"
,
"boosting_type"
},
...
...
python-package/lightgbm/__init__.py
View file @
b51c7be4
...
@@ -8,7 +8,7 @@ from __future__ import absolute_import
...
@@ -8,7 +8,7 @@ from __future__ import absolute_import
import
os
import
os
from
.basic
import
Predictor
,
Dataset
,
Booster
from
.basic
import
Dataset
,
Booster
from
.engine
import
train
,
cv
from
.engine
import
train
,
cv
try
:
try
:
from
.sklearn
import
LGBMModel
,
LGBMRegressor
,
LGBMClassifier
,
LGBMRanker
from
.sklearn
import
LGBMModel
,
LGBMRegressor
,
LGBMClassifier
,
LGBMRanker
...
...
python-package/lightgbm/basic.py
View file @
b51c7be4
This diff is collapsed.
Click to expand it.
python-package/lightgbm/engine.py
View file @
b51c7be4
...
@@ -6,52 +6,12 @@ from __future__ import absolute_import
...
@@ -6,52 +6,12 @@ from __future__ import absolute_import
import
collections
import
collections
from
operator
import
attrgetter
from
operator
import
attrgetter
import
numpy
as
np
import
numpy
as
np
from
.basic
import
LightGBMError
,
Predictor
,
Dataset
,
Booster
,
is_str
from
.basic
import
LightGBMError
,
_Inner
Predictor
,
Dataset
,
Booster
,
is_str
from
.
import
callback
from
.
import
callback
def
_construct_dataset
(
X_y
,
reference
=
None
,
def
train
(
params
,
train_set
,
num_boost_round
=
100
,
params
=
None
,
other_fields
=
None
,
valid_sets
=
None
,
valid_names
=
None
,
feature_name
=
None
,
categorical_feature
=
None
,
predictor
=
None
):
if
'max_bin'
in
params
:
max_bin
=
int
(
params
[
'max_bin'
])
else
:
max_bin
=
255
weight
=
None
group
=
None
init_score
=
None
if
other_fields
is
not
None
:
if
not
isinstance
(
other_fields
,
dict
):
raise
TypeError
(
"type of other filed data should be dict"
)
weight
=
other_fields
.
get
(
'weight'
,
None
)
group
=
other_fields
.
get
(
'group'
,
None
)
init_score
=
other_fields
.
get
(
'init_score'
,
None
)
if
is_str
(
X_y
):
data
=
X_y
label
=
None
else
:
if
len
(
X_y
)
!=
2
:
raise
TypeError
(
"should pass (data, label) tuple for dataset"
)
data
=
X_y
[
0
]
label
=
X_y
[
1
]
if
reference
is
None
:
ret
=
Dataset
(
data
,
label
=
label
,
max_bin
=
max_bin
,
weight
=
weight
,
group
=
group
,
predictor
=
predictor
,
feature_name
=
feature_name
,
categorical_feature
=
categorical_feature
,
params
=
params
)
else
:
ret
=
reference
.
create_valid
(
data
,
label
=
label
,
weight
=
weight
,
group
=
group
,
params
=
params
)
if
init_score
is
not
None
:
ret
.
set_init_score
(
init_score
)
return
ret
def
train
(
params
,
train_data
,
num_boost_round
=
100
,
valid_datas
=
None
,
valid_names
=
None
,
fobj
=
None
,
feval
=
None
,
init_model
=
None
,
fobj
=
None
,
feval
=
None
,
init_model
=
None
,
train_fields
=
None
,
valid_fields
=
None
,
feature_name
=
None
,
categorical_feature
=
None
,
feature_name
=
None
,
categorical_feature
=
None
,
early_stopping_rounds
=
None
,
evals_result
=
None
,
early_stopping_rounds
=
None
,
evals_result
=
None
,
verbose_eval
=
True
,
learning_rates
=
None
,
callbacks
=
None
):
verbose_eval
=
True
,
learning_rates
=
None
,
callbacks
=
None
):
...
@@ -61,14 +21,14 @@ def train(params, train_data, num_boost_round=100,
...
@@ -61,14 +21,14 @@ def train(params, train_data, num_boost_round=100,
----------
----------
params : dict
params : dict
Parameters for training.
Parameters for training.
train_
data
: Dataset
, tuple (X, y) or filename of data
train_
set
: Dataset
Data to be trained.
Data to be trained.
num_boost_round: int
num_boost_round: int
Number of boosting iterations.
Number of boosting iterations.
valid_
data
s: list of Datasets
, tuples (valid_X, valid_y) or filenames of data
valid_
set
s: list of Datasets
List of data to be evaluated during training
List of data to be evaluated during training
valid_names: list of string
valid_names: list of string
Names of valid_
data
s
Names of valid_
set
s
fobj : function
fobj : function
Customized objective function.
Customized objective function.
feval : function
feval : function
...
@@ -76,13 +36,6 @@ def train(params, train_data, num_boost_round=100,
...
@@ -76,13 +36,6 @@ def train(params, train_data, num_boost_round=100,
Note: should return (eval_name, eval_result, is_higher_better) of list of this
Note: should return (eval_name, eval_result, is_higher_better) of list of this
init_model : file name of lightgbm model or 'Booster' instance
init_model : file name of lightgbm model or 'Booster' instance
model used for continued train
model used for continued train
train_fields : dict
Other data file in training data. e.g. train_fields['weight'] is weight data
Support fields: weight, group, init_score
valid_fields : dict
Other data file in training data.
\
e.g. valid_fields[0]['weight'] is weight data for first valid data
Support fields: weight, group, init_score
feature_name : list of str
feature_name : list of str
Feature names
Feature names
categorical_feature : list of str or int
categorical_feature : list of str or int
...
@@ -95,8 +48,8 @@ def train(params, train_data, num_boost_round=100,
...
@@ -95,8 +48,8 @@ def train(params, train_data, num_boost_round=100,
Returns the model with (best_iter + early_stopping_rounds)
Returns the model with (best_iter + early_stopping_rounds)
If early stopping occurs, the model will add 'best_iteration' field
If early stopping occurs, the model will add 'best_iteration' field
evals_result: dict or None
evals_result: dict or None
This dictionary used to store all evaluation results of all the items in valid_
data
s.
This dictionary used to store all evaluation results of all the items in valid_
set
s.
Example: with a valid_
data
s containing [valid_set, train_set]
\
Example: with a valid_
set
s containing [valid_set, train_set]
\
and valid_names containing ['eval', 'train'] and a paramater containing ('metric':'logloss')
and valid_names containing ['eval', 'train'] and a paramater containing ('metric':'logloss')
Returns: {'train': {'logloss': ['0.48253', '0.35953', ...]},
Returns: {'train': {'logloss': ['0.48253', '0.35953', ...]},
'eval': {'logloss': ['0.480385', '0.357756', ...]}}
'eval': {'logloss': ['0.480385', '0.357756', ...]}}
...
@@ -127,58 +80,40 @@ def train(params, train_data, num_boost_round=100,
...
@@ -127,58 +80,40 @@ def train(params, train_data, num_boost_round=100,
"""
"""
"""create predictor first"""
"""create predictor first"""
if
is_str
(
init_model
):
if
is_str
(
init_model
):
predictor
=
Predictor
(
model_file
=
init_model
)
predictor
=
_Inner
Predictor
(
model_file
=
init_model
)
elif
isinstance
(
init_model
,
Booster
):
elif
isinstance
(
init_model
,
Booster
):
predictor
=
init_model
.
to_predictor
()
predictor
=
init_model
.
_to_predictor
()
elif
isinstance
(
init_model
,
Predictor
):
predictor
=
init_model
else
:
else
:
predictor
=
None
predictor
=
None
init_iteration
=
predictor
.
num_total_iteration
if
predictor
else
0
init_iteration
=
predictor
.
num_total_iteration
if
predictor
else
0
"""create dataset"""
"""check dataset"""
if
isinstance
(
train_data
,
Dataset
):
if
not
isinstance
(
train_set
,
Dataset
):
train_set
=
train_data
raise
TypeError
(
"only can accept Dataset instance for traninig"
)
if
train_fields
is
not
None
:
for
field
,
data
in
train_fields
.
items
():
train_set
.
_set_predictor
(
predictor
)
train_set
.
set_field
(
field
,
data
)
train_set
.
set_feature_name
(
feature_name
)
else
:
train_set
.
set_categorical_feature
(
categorical_feature
)
train_set
=
_construct_dataset
(
train_data
,
None
,
params
,
other_fields
=
train_fields
,
feature_name
=
feature_name
,
categorical_feature
=
categorical_feature
,
predictor
=
predictor
)
is_valid_contain_train
=
False
is_valid_contain_train
=
False
train_data_name
=
"training"
train_data_name
=
"training"
valid_sets
=
[]
reduced_
valid_sets
=
[]
name_valid_sets
=
[]
name_valid_sets
=
[]
if
valid_
data
s
:
if
valid_
set
s
:
if
isinstance
(
valid_
data
s
,
(
Dataset
,
tuple
)
):
if
isinstance
(
valid_
set
s
,
Dataset
):
valid_
data
s
=
[
valid_
data
s
]
valid_
set
s
=
[
valid_
set
s
]
if
isinstance
(
valid_names
,
str
):
if
isinstance
(
valid_names
,
str
):
valid_names
=
[
valid_names
]
valid_names
=
[
valid_names
]
for
i
,
valid_data
in
enumerate
(
valid_datas
):
for
i
,
valid_data
in
enumerate
(
valid_sets
):
other_fields
=
None
if
valid_fields
is
None
else
valid_fields
.
get
(
i
,
None
)
"""reduce cost for prediction training data"""
"""reduce cost for prediction training data"""
if
valid_data
[
0
]
is
train_
data
[
0
]
and
valid_data
[
1
]
is
train_data
[
1
]
:
if
valid_data
is
train_
set
:
is_valid_contain_train
=
True
is_valid_contain_train
=
True
if
valid_names
is
not
None
:
if
valid_names
is
not
None
:
train_data_name
=
valid_names
[
i
]
train_data_name
=
valid_names
[
i
]
continue
continue
if
isinstance
(
valid_data
,
Dataset
):
if
not
isinstance
(
valid_data
,
Dataset
):
valid_set
=
valid_data
raise
TypeError
(
"only can accept Dataset instance for traninig"
)
if
other_fields
is
not
None
:
valid_data
.
set_reference
(
train_set
)
for
field
,
data
in
other_fields
.
items
():
reduced_valid_sets
.
append
(
valid_data
)
valid_set
.
set_field
(
field
,
data
)
else
:
valid_set
=
_construct_dataset
(
valid_data
,
train_set
,
params
,
other_fields
=
other_fields
,
feature_name
=
feature_name
,
categorical_feature
=
categorical_feature
,
predictor
=
predictor
)
valid_sets
.
append
(
valid_set
)
if
valid_names
is
not
None
and
len
(
valid_names
)
>
i
:
if
valid_names
is
not
None
and
len
(
valid_names
)
>
i
:
name_valid_sets
.
append
(
valid_names
[
i
])
name_valid_sets
.
append
(
valid_names
[
i
])
else
:
else
:
...
@@ -217,7 +152,7 @@ def train(params, train_data, num_boost_round=100,
...
@@ -217,7 +152,7 @@ def train(params, train_data, num_boost_round=100,
booster
=
Booster
(
params
=
params
,
train_set
=
train_set
)
booster
=
Booster
(
params
=
params
,
train_set
=
train_set
)
if
is_valid_contain_train
:
if
is_valid_contain_train
:
booster
.
set_train_data_name
(
train_data_name
)
booster
.
set_train_data_name
(
train_data_name
)
for
valid_set
,
name_valid_set
in
zip
(
valid_sets
,
name_valid_sets
):
for
valid_set
,
name_valid_set
in
zip
(
reduced_
valid_sets
,
name_valid_sets
):
booster
.
add_valid
(
valid_set
,
name_valid_set
)
booster
.
add_valid
(
valid_set
,
name_valid_set
)
"""start training"""
"""start training"""
...
@@ -294,6 +229,7 @@ def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=Fals
...
@@ -294,6 +229,7 @@ def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=Fals
else
:
else
:
raise
LightGBMError
(
'sklearn needs to be installed in order to use stratified cv'
)
raise
LightGBMError
(
'sklearn needs to be installed in order to use stratified cv'
)
else
:
else
:
full_data
.
construct
()
randidx
=
np
.
random
.
permutation
(
full_data
.
num_data
())
randidx
=
np
.
random
.
permutation
(
full_data
.
num_data
())
kstep
=
int
(
len
(
randidx
)
/
nfold
)
kstep
=
int
(
len
(
randidx
)
/
nfold
)
idset
=
[
randidx
[(
i
*
kstep
):
min
(
len
(
randidx
),
(
i
+
1
)
*
kstep
)]
for
i
in
range
(
nfold
)]
idset
=
[
randidx
[(
i
*
kstep
):
min
(
len
(
randidx
),
(
i
+
1
)
*
kstep
)]
for
i
in
range
(
nfold
)]
...
@@ -322,8 +258,8 @@ def _agg_cv_result(raw_results):
...
@@ -322,8 +258,8 @@ def _agg_cv_result(raw_results):
cvmap
[
one_line
[
1
]].
append
(
one_line
[
2
])
cvmap
[
one_line
[
1
]].
append
(
one_line
[
2
])
return
[(
'cv_agg'
,
k
,
np
.
mean
(
v
),
metric_type
[
k
],
np
.
std
(
v
))
for
k
,
v
in
cvmap
.
items
()]
return
[(
'cv_agg'
,
k
,
np
.
mean
(
v
),
metric_type
[
k
],
np
.
std
(
v
))
for
k
,
v
in
cvmap
.
items
()]
def
cv
(
params
,
train_
data
,
num_boost_round
=
10
,
nfold
=
5
,
stratified
=
False
,
def
cv
(
params
,
train_
set
,
num_boost_round
=
10
,
nfold
=
5
,
stratified
=
False
,
metrics
=
(),
fobj
=
None
,
feval
=
None
,
train_fields
=
None
,
metrics
=
(),
fobj
=
None
,
feval
=
None
,
init_model
=
None
,
feature_name
=
None
,
categorical_feature
=
None
,
feature_name
=
None
,
categorical_feature
=
None
,
early_stopping_rounds
=
None
,
fpreproc
=
None
,
early_stopping_rounds
=
None
,
fpreproc
=
None
,
verbose_eval
=
None
,
show_stdv
=
True
,
seed
=
0
,
verbose_eval
=
None
,
show_stdv
=
True
,
seed
=
0
,
...
@@ -334,7 +270,7 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
...
@@ -334,7 +270,7 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
----------
----------
params : dict
params : dict
Booster params.
Booster params.
train_
data : tuple (X, y) or filename of data
train_
set : Dataset
Data to be trained.
Data to be trained.
num_boost_round : int
num_boost_round : int
Number of boosting iterations.
Number of boosting iterations.
...
@@ -350,9 +286,8 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
...
@@ -350,9 +286,8 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
Custom objective function.
Custom objective function.
feval : function
feval : function
Custom evaluation function.
Custom evaluation function.
train_fields : dict
init_model : file name of lightgbm model or 'Booster' instance
Other data file in training data. e.g. train_fields['weight'] is weight data
model used for continued train
Support fields: weight, group, init_score
feature_name : list of str
feature_name : list of str
Feature names
Feature names
categorical_feature : list of str or int
categorical_feature : list of str or int
...
@@ -382,6 +317,20 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
...
@@ -382,6 +317,20 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
-------
-------
evaluation history : list(string)
evaluation history : list(string)
"""
"""
if
not
isinstance
(
train_set
,
Dataset
):
raise
TypeError
(
"only can accept Dataset instance for traninig"
)
if
is_str
(
init_model
):
predictor
=
_InnerPredictor
(
model_file
=
init_model
)
elif
isinstance
(
init_model
,
Booster
):
predictor
=
init_model
.
_to_predictor
()
else
:
predictor
=
None
train_set
.
_set_predictor
(
predictor
)
train_set
.
set_feature_name
(
feature_name
)
train_set
.
set_categorical_feature
(
categorical_feature
)
if
metrics
:
if
metrics
:
params
.
setdefault
(
'metric'
,
[])
params
.
setdefault
(
'metric'
,
[])
if
is_str
(
metrics
):
if
is_str
(
metrics
):
...
@@ -389,11 +338,6 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
...
@@ -389,11 +338,6 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
else
:
else
:
params
[
'metric'
].
extend
(
metrics
)
params
[
'metric'
].
extend
(
metrics
)
train_set
=
_construct_dataset
(
train_data
,
None
,
params
,
other_fields
=
train_fields
,
feature_name
=
feature_name
,
categorical_feature
=
categorical_feature
)
results
=
collections
.
defaultdict
(
list
)
results
=
collections
.
defaultdict
(
list
)
cvfolds
=
_make_n_folds
(
train_set
,
nfold
,
params
,
seed
,
fpreproc
,
stratified
)
cvfolds
=
_make_n_folds
(
train_set
,
nfold
,
params
,
seed
,
fpreproc
,
stratified
)
...
...
python-package/lightgbm/libpath.py
View file @
b51c7be4
...
@@ -19,6 +19,7 @@ def find_lib_path():
...
@@ -19,6 +19,7 @@ def find_lib_path():
if
os
.
name
==
'nt'
:
if
os
.
name
==
'nt'
:
dll_path
.
append
(
os
.
path
.
join
(
curr_path
,
'../../windows/x64/Dll/'
))
dll_path
.
append
(
os
.
path
.
join
(
curr_path
,
'../../windows/x64/Dll/'
))
dll_path
.
append
(
os
.
path
.
join
(
curr_path
,
'./windows/x64/Dll/'
))
dll_path
.
append
(
os
.
path
.
join
(
curr_path
,
'./windows/x64/Dll/'
))
dll_path
.
append
(
os
.
path
.
join
(
curr_path
,
'../../Release/'
))
dll_path
=
[
os
.
path
.
join
(
p
,
'lib_lightgbm.dll'
)
for
p
in
dll_path
]
dll_path
=
[
os
.
path
.
join
(
p
,
'lib_lightgbm.dll'
)
for
p
in
dll_path
]
else
:
else
:
dll_path
=
[
os
.
path
.
join
(
p
,
'lib_lightgbm.so'
)
for
p
in
dll_path
]
dll_path
=
[
os
.
path
.
join
(
p
,
'lib_lightgbm.so'
)
for
p
in
dll_path
]
...
...
python-package/lightgbm/sklearn.py
View file @
b51c7be4
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
from
__future__
import
absolute_import
from
__future__
import
absolute_import
import
numpy
as
np
import
numpy
as
np
from
.basic
import
LightGBMError
,
is_str
from
.basic
import
LightGBMError
,
Dataset
,
is_str
from
.engine
import
train
from
.engine
import
train
# sklearn
# sklearn
try
:
try
:
...
@@ -195,9 +195,12 @@ class LGBMModel(LGBMModelBase):
...
@@ -195,9 +195,12 @@ class LGBMModel(LGBMModelBase):
params
.
pop
(
'nthread'
,
None
)
params
.
pop
(
'nthread'
,
None
)
return
params
return
params
def
fit
(
self
,
X
,
y
,
eval_set
=
None
,
eval_metric
=
None
,
def
fit
(
self
,
X
,
y
,
sample_weight
=
None
,
init_score
=
None
,
group
=
None
,
eval_set
=
None
,
eval_sample_weight
=
None
,
eval_init_score
=
None
,
eval_group
=
None
,
eval_metric
=
None
,
early_stopping_rounds
=
None
,
verbose
=
True
,
early_stopping_rounds
=
None
,
verbose
=
True
,
train_fields
=
None
,
valid_fields
=
None
,
feature_name
=
None
,
categorical_feature
=
None
,
feature_name
=
None
,
categorical_feature
=
None
,
other_params
=
None
):
other_params
=
None
):
"""
"""
...
@@ -209,24 +212,29 @@ class LGBMModel(LGBMModelBase):
...
@@ -209,24 +212,29 @@ class LGBMModel(LGBMModelBase):
Feature matrix
Feature matrix
y : array_like
y : array_like
Labels
Labels
sample_weight : array_like
weight of training data
init_score : array_like
init score of training data
group : array_like
group data of training data
eval_set : list, optional
eval_set : list, optional
A list of (X, y) tuple pairs to use as a validation set for early-stopping
A list of (X, y) tuple pairs to use as a validation set for early-stopping
eval_sample_weight : List of array
weight of eval data
eval_init_score : List of array
init score of eval data
eval_group : List of array
group data of eval data
eval_metric : str, list of str, callable, optional
eval_metric : str, list of str, callable, optional
If a str, should be a built-in evaluation metric to use.
If a str, should be a built-in evaluation metric to use.
If callable, a custom evaluation metric. The call
\
If callable, a custom evaluation metric. The call
\
signature is func(y_predicted, dataset) where dataset will be a
\
signature is func(y_predicted, dataset) where dataset will be a
\
Dat
a
set
f
object such that you may need to call the get_label
\
Dat
e
set object such that you may need to call the get_label
\
method. And it must return (eval_name->str, eval_result->float, is_bigger_better->Bool)
method. And it must return (eval_name->str, eval_result->float, is_bigger_better->Bool)
early_stopping_rounds : int
early_stopping_rounds : int
verbose : bool
verbose : bool
If `verbose` and an evaluation set is used, writes the evaluation
If `verbose` and an evaluation set is used, writes the evaluation
train_fields : dict
Other data file in training data. e.g. train_fields['weight'] is weight data
Support fields: weight, group, init_score
valid_fields : dict
Other data file in training data.
\
e.g. valid_fields[0]['weight'] is weight data for first valid data
Support fields: weight, group, init_score
feature_name : list of str
feature_name : list of str
Feature names
Feature names
categorical_feature : list of str or int
categorical_feature : list of str or int
...
@@ -263,12 +271,33 @@ class LGBMModel(LGBMModelBase):
...
@@ -263,12 +271,33 @@ class LGBMModel(LGBMModelBase):
feval
=
None
feval
=
None
feval
=
eval_metric
if
callable
(
eval_metric
)
else
None
feval
=
eval_metric
if
callable
(
eval_metric
)
else
None
self
.
_Booster
=
train
(
params
,
(
X
,
y
),
def
_construct_dataset
(
X
,
y
,
sample_weight
,
init_score
,
group
):
self
.
n_estimators
,
valid_datas
=
eval_set
,
ret
=
Dataset
(
X
,
label
=
y
,
weight
=
sample_weight
,
group
=
group
)
ret
.
set_init_score
(
init_score
)
return
ret
train_set
=
_construct_dataset
(
X
,
y
,
sample_weight
,
init_score
,
group
)
valid_sets
=
[]
if
eval_set
is
not
None
:
if
isinstance
(
eval_set
,
tuple
):
eval_set
=
[
eval_set
]
for
i
,
valid_data
in
enumerate
(
eval_set
):
"""reduce cost for prediction training data"""
if
valid_data
[
0
]
is
X
and
valid_data
[
1
]
is
y
:
valid_set
=
train_set
else
:
valid_weight
=
None
if
eval_sample_weight
is
None
else
eval_sample_weight
.
get
(
i
,
None
)
valid_init_score
=
None
if
eval_init_score
is
None
else
eval_init_score
.
get
(
i
,
None
)
valid_group
=
None
if
eval_group
is
None
else
eval_group
.
get
(
i
,
None
)
valid_set
=
_construct_dataset
(
valid_data
[
0
],
valid_data
[
1
],
valid_weight
,
valid_init_score
,
valid_group
)
valid_sets
.
append
(
valid_set
)
self
.
_Booster
=
train
(
params
,
train_set
,
self
.
n_estimators
,
valid_sets
=
valid_sets
,
early_stopping_rounds
=
early_stopping_rounds
,
early_stopping_rounds
=
early_stopping_rounds
,
evals_result
=
evals_result
,
fobj
=
self
.
fobj
,
feval
=
feval
,
evals_result
=
evals_result
,
fobj
=
self
.
fobj
,
feval
=
feval
,
verbose_eval
=
verbose
,
train_fields
=
train_fields
,
verbose_eval
=
verbose
,
feature_name
=
feature_name
,
valid_fields
=
valid_fields
,
feature_name
=
feature_name
,
categorical_feature
=
categorical_feature
)
categorical_feature
=
categorical_feature
)
if
evals_result
:
if
evals_result
:
...
@@ -331,14 +360,48 @@ class LGBMRegressor(LGBMModel, LGBMRegressorBase):
...
@@ -331,14 +360,48 @@ class LGBMRegressor(LGBMModel, LGBMRegressorBase):
__doc__
=
"""Implementation of the scikit-learn API for LightGBM regression.
__doc__
=
"""Implementation of the scikit-learn API for LightGBM regression.
"""
+
'
\n
'
.
join
(
LGBMModel
.
__doc__
.
split
(
'
\n
'
)[
2
:])
"""
+
'
\n
'
.
join
(
LGBMModel
.
__doc__
.
split
(
'
\n
'
)[
2
:])
def
fit
(
self
,
X
,
y
,
sample_weight
=
None
,
init_score
=
None
,
eval_set
=
None
,
eval_sample_weight
=
None
,
eval_init_score
=
None
,
eval_metric
=
None
,
early_stopping_rounds
=
None
,
verbose
=
True
,
feature_name
=
None
,
categorical_feature
=
None
,
other_params
=
None
):
super
(
LGBMRegressor
,
self
).
fit
(
X
,
y
,
sample_weight
,
init_score
,
None
,
eval_set
,
eval_sample_weight
,
eval_init_score
,
None
,
eval_metric
,
early_stopping_rounds
,
verbose
,
feature_name
,
categorical_feature
,
other_params
)
return
self
class
LGBMClassifier
(
LGBMModel
,
LGBMClassifierBase
):
class
LGBMClassifier
(
LGBMModel
,
LGBMClassifierBase
):
__doc__
=
"""Implementation of the scikit-learn API for LightGBM classification.
__doc__
=
"""Implementation of the scikit-learn API for LightGBM classification.
"""
+
'
\n
'
.
join
(
LGBMModel
.
__doc__
.
split
(
'
\n
'
)[
2
:])
"""
+
'
\n
'
.
join
(
LGBMModel
.
__doc__
.
split
(
'
\n
'
)[
2
:])
def
fit
(
self
,
X
,
y
,
eval_set
=
None
,
eval_metric
=
None
,
def
__init__
(
self
,
num_leaves
=
31
,
max_depth
=-
1
,
learning_rate
=
0.1
,
n_estimators
=
10
,
max_bin
=
255
,
silent
=
True
,
objective
=
"binary"
,
nthread
=-
1
,
min_split_gain
=
0
,
min_child_weight
=
5
,
min_child_samples
=
10
,
subsample
=
1
,
subsample_freq
=
1
,
colsample_bytree
=
1
,
reg_alpha
=
0
,
reg_lambda
=
0
,
scale_pos_weight
=
1
,
is_unbalance
=
False
,
seed
=
0
):
super
(
LGBMClassifier
,
self
).
__init__
(
num_leaves
,
max_depth
,
learning_rate
,
n_estimators
,
max_bin
,
silent
,
objective
,
nthread
,
min_split_gain
,
min_child_weight
,
min_child_samples
,
subsample
,
subsample_freq
,
colsample_bytree
,
reg_alpha
,
reg_lambda
,
scale_pos_weight
,
is_unbalance
,
seed
)
def
fit
(
self
,
X
,
y
,
sample_weight
=
None
,
init_score
=
None
,
eval_set
=
None
,
eval_sample_weight
=
None
,
eval_init_score
=
None
,
eval_metric
=
None
,
early_stopping_rounds
=
None
,
verbose
=
True
,
early_stopping_rounds
=
None
,
verbose
=
True
,
train_fields
=
None
,
valid_fields
=
None
,
feature_name
=
None
,
categorical_feature
=
None
,
feature_name
=
None
,
categorical_feature
=
None
,
other_params
=
None
):
other_params
=
None
):
...
@@ -350,12 +413,6 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
...
@@ -350,12 +413,6 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
# Switch to using a multiclass objective in the underlying LGBM instance
# Switch to using a multiclass objective in the underlying LGBM instance
self
.
objective
=
"multiclass"
self
.
objective
=
"multiclass"
other_params
[
'num_class'
]
=
self
.
n_classes_
other_params
[
'num_class'
]
=
self
.
n_classes_
if
eval_metric
is
None
and
eval_set
is
not
None
:
eval_metric
=
"multi_logloss"
else
:
self
.
objective
=
"binary"
if
eval_metric
is
None
and
eval_set
is
not
None
:
eval_metric
=
"binary_logloss"
self
.
_le
=
LGBMLabelEncoder
().
fit
(
y
)
self
.
_le
=
LGBMLabelEncoder
().
fit
(
y
)
training_labels
=
self
.
_le
.
transform
(
y
)
training_labels
=
self
.
_le
.
transform
(
y
)
...
@@ -363,10 +420,10 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
...
@@ -363,10 +420,10 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
if
eval_set
is
not
None
:
if
eval_set
is
not
None
:
eval_set
=
list
((
x
[
0
],
self
.
_le
.
transform
(
x
[
1
]))
for
x
in
eval_set
)
eval_set
=
list
((
x
[
0
],
self
.
_le
.
transform
(
x
[
1
]))
for
x
in
eval_set
)
super
(
LGBMClassifier
,
self
).
fit
(
X
,
training_labels
,
eval_set
,
super
(
LGBMClassifier
,
self
).
fit
(
X
,
training_labels
,
sample_weight
,
init_score
,
None
,
eval_set
,
eval_sample_weight
,
eval_init_score
,
None
,
eval_metric
,
early_stopping_rounds
,
eval_metric
,
early_stopping_rounds
,
verbose
,
train_fields
,
valid_fields
,
verbose
,
feature_name
,
categorical_feature
,
feature_name
,
categorical_feature
,
other_params
)
other_params
)
return
self
return
self
...
@@ -442,34 +499,59 @@ class LGBMRanker(LGBMModel):
...
@@ -442,34 +499,59 @@ class LGBMRanker(LGBMModel):
"""
+
'
\n
'
.
join
(
LGBMModel
.
__doc__
.
split
(
'
\n
'
)[
2
:])
"""
+
'
\n
'
.
join
(
LGBMModel
.
__doc__
.
split
(
'
\n
'
)[
2
:])
def
fit
(
self
,
X
,
y
,
eval_set
=
None
,
eval_metric
=
None
,
def
__init__
(
self
,
num_leaves
=
31
,
max_depth
=-
1
,
learning_rate
=
0.1
,
n_estimators
=
10
,
max_bin
=
255
,
silent
=
True
,
objective
=
"lambdarank"
,
nthread
=-
1
,
min_split_gain
=
0
,
min_child_weight
=
5
,
min_child_samples
=
10
,
subsample
=
1
,
subsample_freq
=
1
,
colsample_bytree
=
1
,
reg_alpha
=
0
,
reg_lambda
=
0
,
scale_pos_weight
=
1
,
is_unbalance
=
False
,
seed
=
0
):
super
(
LGBMRanker
,
self
).
__init__
(
num_leaves
,
max_depth
,
learning_rate
,
n_estimators
,
max_bin
,
silent
,
objective
,
nthread
,
min_split_gain
,
min_child_weight
,
min_child_samples
,
subsample
,
subsample_freq
,
colsample_bytree
,
reg_alpha
,
reg_lambda
,
scale_pos_weight
,
is_unbalance
,
seed
)
if
callable
(
self
.
objective
):
self
.
fobj
=
_group_wise_objective
(
self
.
objective
)
else
:
self
.
fobj
=
None
def
fit
(
self
,
X
,
y
,
sample_weight
=
None
,
init_score
=
None
,
group
=
None
,
eval_set
=
None
,
eval_sample_weight
=
None
,
eval_init_score
=
None
,
eval_group
=
None
,
eval_metric
=
None
,
eval_at
=
None
,
early_stopping_rounds
=
None
,
verbose
=
True
,
early_stopping_rounds
=
None
,
verbose
=
True
,
train_fields
=
None
,
valid_fields
=
None
,
other_params
=
None
):
feature_name
=
None
,
categorical_feature
=
None
,
other_params
=
None
):
"""
Most arguments like LGBMModel.fit except following:
eval_at : list of int
The evaulation positions of NDCG
"""
"""check group data"""
"""check group data"""
if
"
group
"
not
in
train_fields
:
if
group
is
None
:
raise
ValueError
(
"should se
t
group
in train_fields
for ranking task"
)
raise
ValueError
(
"should
u
se group for ranking task"
)
if
eval_set
is
not
None
:
if
eval_set
is
not
None
:
if
val
id_fields
is
None
:
if
e
val
_group
is
None
:
raise
ValueError
(
"val
id_fields
cannot be None when eval_set is not None"
)
raise
ValueError
(
"
e
val
_group
cannot be None when eval_set is not None"
)
elif
len
(
val
id_fields
)
!=
len
(
eval_set
):
elif
len
(
e
val
_group
)
!=
len
(
eval_set
):
raise
ValueError
(
"leng
h
t of val
id_fields
should equal with eval_set"
)
raise
ValueError
(
"lengt
h
of
e
val
_group
should equal with eval_set"
)
else
:
else
:
for
inner
in
valid_fields
:
for
inner_group
in
eval_group
:
if
"group"
not
in
inner
:
if
inner_group
is
None
:
raise
ValueError
(
"should set group in valid_fields for ranking task"
)
raise
ValueError
(
"should set group for all eval data for ranking task"
)
if
eval_at
is
not
None
:
if
callable
(
self
.
objective
):
other_params
=
{}
if
other_params
is
None
else
other_params
self
.
fobj
=
_group_wise_objective
(
self
.
objective
)
other_params
[
'ndcg_eval_at'
]
=
list
(
eval_at
)
else
:
super
(
LGBMRanker
,
self
).
fit
(
X
,
y
,
sample_weight
,
init_score
,
group
,
self
.
objective
=
"lambdarank"
eval_set
,
eval_sample_weight
,
eval_init_score
,
eval_group
,
self
.
fobj
=
None
eval_metric
,
early_stopping_rounds
,
if
eval_metric
is
None
and
eval_set
is
not
None
:
verbose
,
feature_name
,
categorical_feature
,
eval_metric
=
"ndcg"
super
(
LGBMRanker
,
self
).
fit
(
X
,
y
,
eval_set
,
eval_metric
,
early_stopping_rounds
,
verbose
,
train_fields
,
valid_fields
,
other_params
)
other_params
)
return
self
return
self
src/io/config.cpp
View file @
b51c7be4
#include <LightGBM/config.h>
#include <LightGBM/config.h>
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/random.h>
#include <LightGBM/utils/log.h>
#include <LightGBM/utils/log.h>
#include <vector>
#include <vector>
#include <string>
#include <string>
#include <unordered_set>
#include <unordered_set>
#include <algorithm>
#include <algorithm>
#include <limits>
namespace
LightGBM
{
namespace
LightGBM
{
...
@@ -22,7 +24,7 @@ std::unordered_map<std::string, std::string> ConfigBase::Str2Map(const char* par
...
@@ -22,7 +24,7 @@ std::unordered_map<std::string, std::string> ConfigBase::Str2Map(const char* par
continue
;
continue
;
}
}
params
[
key
]
=
value
;
params
[
key
]
=
value
;
}
else
{
}
else
if
(
Common
::
Trim
(
arg
).
size
()
>
0
)
{
Log
::
Warning
(
"Unknown parameter %s"
,
arg
.
c_str
());
Log
::
Warning
(
"Unknown parameter %s"
,
arg
.
c_str
());
}
}
}
}
...
@@ -33,12 +35,21 @@ std::unordered_map<std::string, std::string> ConfigBase::Str2Map(const char* par
...
@@ -33,12 +35,21 @@ std::unordered_map<std::string, std::string> ConfigBase::Str2Map(const char* par
void
OverallConfig
::
Set
(
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>&
params
)
{
void
OverallConfig
::
Set
(
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>&
params
)
{
// load main config types
// load main config types
GetInt
(
params
,
"num_threads"
,
&
num_threads
);
GetInt
(
params
,
"num_threads"
,
&
num_threads
);
// generate seeds by seed.
if
(
GetInt
(
params
,
"seed"
,
&
seed
))
{
Random
rand
(
seed
);
int
int_max
=
std
::
numeric_limits
<
int
>::
max
();
io_config
.
data_random_seed
=
static_cast
<
int
>
(
rand
.
NextInt
(
0
,
int_max
));
boosting_config
.
bagging_seed
=
static_cast
<
int
>
(
rand
.
NextInt
(
0
,
int_max
));
boosting_config
.
drop_seed
=
static_cast
<
int
>
(
rand
.
NextInt
(
0
,
int_max
));
boosting_config
.
tree_config
.
feature_fraction_seed
=
static_cast
<
int
>
(
rand
.
NextInt
(
0
,
int_max
));
}
GetTaskType
(
params
);
GetTaskType
(
params
);
GetBoostingType
(
params
);
GetBoostingType
(
params
);
GetObjectiveType
(
params
);
GetObjectiveType
(
params
);
GetMetricType
(
params
);
GetMetricType
(
params
);
// sub-config setup
// sub-config setup
network_config
.
Set
(
params
);
network_config
.
Set
(
params
);
io_config
.
Set
(
params
);
io_config
.
Set
(
params
);
...
...
tests/python_package_test/test_basic.py
View file @
b51c7be4
...
@@ -8,10 +8,6 @@ x_train, x_test, y_train, y_test = model_selection.train_test_split(X, Y, test_s
...
@@ -8,10 +8,6 @@ x_train, x_test, y_train, y_test = model_selection.train_test_split(X, Y, test_s
train_data
=
lgb
.
Dataset
(
x_train
,
max_bin
=
255
,
label
=
y_train
)
train_data
=
lgb
.
Dataset
(
x_train
,
max_bin
=
255
,
label
=
y_train
)
num_features
=
train_data
.
num_feature
()
names
=
[
"name_%d"
%
(
i
)
for
i
in
range
(
num_features
)]
train_data
.
set_feature_name
(
names
)
valid_data
=
train_data
.
create_valid
(
x_test
,
label
=
y_test
)
valid_data
=
train_data
.
create_valid
(
x_test
,
label
=
y_test
)
config
=
{
"objective"
:
"binary"
,
"metric"
:
"auc"
,
"min_data"
:
1
,
"num_leaves"
:
15
}
config
=
{
"objective"
:
"binary"
,
"metric"
:
"auc"
,
"min_data"
:
1
,
"num_leaves"
:
15
}
...
...
tests/python_package_test/test_engine.py
0 → 100644
View file @
b51c7be4
# coding: utf-8
# pylint: disable = invalid-name, C0111
import
json
import
lightgbm
as
lgb
import
pandas
as
pd
from
sklearn.metrics
import
mean_squared_error
# load or create your dataset
df_train
=
pd
.
read_csv
(
'../../examples/regression/regression.train'
,
header
=
None
,
sep
=
'
\t
'
)
df_test
=
pd
.
read_csv
(
'../../examples/regression/regression.test'
,
header
=
None
,
sep
=
'
\t
'
)
y_train
=
df_train
[
0
]
y_test
=
df_test
[
0
]
X_train
=
df_train
.
drop
(
0
,
axis
=
1
)
X_test
=
df_test
.
drop
(
0
,
axis
=
1
)
# create dataset for lightgbm
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
,
free_raw_data
=
False
)
lgb_eval
=
lgb
.
Dataset
(
X_test
,
y_test
,
reference
=
lgb_train
,
free_raw_data
=
False
)
# specify your configurations as a dict
params
=
{
'task'
:
'train'
,
'boosting_type'
:
'gbdt'
,
'objective'
:
'regression'
,
'metric'
:
{
'l2'
,
'auc'
},
'num_leaves'
:
31
,
'learning_rate'
:
0.05
,
'feature_fraction'
:
0.9
,
'bagging_fraction'
:
0.8
,
'bagging_freq'
:
5
,
'verbose'
:
0
}
# train
init_gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
5
,
valid_sets
=
lgb_eval
)
print
(
'Start continue train'
)
gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
100
,
valid_sets
=
lgb_eval
,
early_stopping_rounds
=
10
,
init_model
=
init_gbm
)
# save model to file
gbm
.
save_model
(
'model.txt'
)
# predict
y_pred
=
gbm
.
predict
(
X_test
,
num_iteration
=
gbm
.
best_iteration
)
# eval
print
(
'The rmse of prediction is:'
,
mean_squared_error
(
y_test
,
y_pred
)
**
0.5
)
# dump model to json (and save to file)
model_json
=
gbm
.
dump_model
()
with
open
(
'model.json'
,
'w+'
)
as
f
:
json
.
dump
(
model_json
,
f
,
indent
=
4
)
# feature importances
print
(
'Feature importances:'
,
gbm
.
feature_importance
())
print
(
'Feature importances:'
,
gbm
.
feature_importance
(
"gain"
))
print
(
'Start test cv'
)
lgb
.
cv
(
params
,
lgb_train
,
num_boost_round
=
100
,
nfold
=
5
,
verbose_eval
=
5
,
init_model
=
init_gbm
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment