Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
ebfc8521
Commit
ebfc8521
authored
Dec 09, 2016
by
wxchan
Committed by
Guolin Ke
Dec 09, 2016
Browse files
add an advanced example; add guide-python README.md details; clean error messages (#117)
parent
b51c7be4
Changes
8
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
273 additions
and
130 deletions
+273
-130
examples/python-guide/README.md
examples/python-guide/README.md
+20
-0
examples/python-guide/advanced_example.py
examples/python-guide/advanced_example.py
+131
-0
examples/python-guide/simple_example.py
examples/python-guide/simple_example.py
+10
-5
examples/python-guide/sklearn_example.py
examples/python-guide/sklearn_example.py
+8
-3
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+59
-77
python-package/lightgbm/callback.py
python-package/lightgbm/callback.py
+4
-4
python-package/lightgbm/engine.py
python-package/lightgbm/engine.py
+11
-11
python-package/lightgbm/sklearn.py
python-package/lightgbm/sklearn.py
+30
-30
No files found.
examples/python-guide/README.md
View file @
ebfc8521
...
@@ -16,3 +16,23 @@ Now you can run examples in this folder, for example:
...
@@ -16,3 +16,23 @@ Now you can run examples in this folder, for example:
```
```
python simple_example.py
python simple_example.py
```
```
Examples including:
-
[
simple_example.py
](
https://github.com/Microsoft/LightGBM/blob/master/examples/python-guide/simple_example.py
)
-
Construct Dataset
-
Basic train and predict
-
Eval during training
-
Early stopping
-
Save model to file
-
Dump model to json format
-
Feature importances
-
[
sklearn_example.py
](
https://github.com/Microsoft/LightGBM/blob/master/examples/python-guide/sklearn_example.py
)
-
Basic train and predict with sklearn interface
-
Feature importances with sklearn interface
-
[
advanced_example.py
](
https://github.com/Microsoft/LightGBM/blob/master/examples/python-guide/advanced_example.py
)
-
Set feature names
-
Directly use categorical features without one-hot encoding
-
Load model file to continue training
-
Change learning rates during training
-
Self-defined objective function
-
Self-defined eval metric
-
Callback function
\ No newline at end of file
examples/python-guide/advanced_example.py
0 → 100644
View file @
ebfc8521
# coding: utf-8
# pylint: disable = invalid-name, C0111
import
lightgbm
as
lgb
import
pandas
as
pd
import
numpy
as
np
# load or create your dataset
print
(
'Load data...'
)
df_train
=
pd
.
read_csv
(
'../binary_classification/binary.train'
,
header
=
None
,
sep
=
'
\t
'
)
df_test
=
pd
.
read_csv
(
'../binary_classification/binary.test'
,
header
=
None
,
sep
=
'
\t
'
)
W_train
=
pd
.
read_csv
(
'../binary_classification/binary.train.weight'
,
header
=
None
)[
0
]
W_test
=
pd
.
read_csv
(
'../binary_classification/binary.test.weight'
,
header
=
None
)[
0
]
y_train
=
df_train
[
0
]
y_test
=
df_test
[
0
]
X_train
=
df_train
.
drop
(
0
,
axis
=
1
)
X_test
=
df_test
.
drop
(
0
,
axis
=
1
)
num_train
,
num_feature
=
X_train
.
shape
# create dataset for lightgbm
# if you want to re-use data, remember to set free_raw_data=False
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
,
weight
=
W_train
,
free_raw_data
=
False
)
lgb_eval
=
lgb
.
Dataset
(
X_test
,
y_test
,
reference
=
lgb_train
,
weight
=
W_test
,
free_raw_data
=
False
)
# specify your configurations as a dict
params
=
{
'boosting_type'
:
'gbdt'
,
'objective'
:
'binary'
,
'metric'
:
'binary_logloss'
,
'num_leaves'
:
31
,
'learning_rate'
:
0.05
,
'feature_fraction'
:
0.9
,
'bagging_fraction'
:
0.8
,
'bagging_freq'
:
5
,
'verbose'
:
0
}
# generate a feature name
feature_name
=
[
'feature_'
+
str
(
col
)
for
col
in
range
(
num_feature
)]
print
(
'Start training...'
)
# feature_name and categorical_feature
gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
10
,
valid_sets
=
lgb_train
,
# eval training data
feature_name
=
feature_name
,
categorical_feature
=
[
21
])
# check feature name
print
(
'Finish first 10 rounds...'
)
print
(
'7th feature name is:'
,
repr
(
lgb_train
.
feature_name
[
6
]))
# save model to file
gbm
.
save_model
(
'model.txt'
)
# continue training
# init_model accepts:
# 1. model file name
# 2. Booster()
gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
10
,
init_model
=
'model.txt'
,
valid_sets
=
lgb_eval
)
print
(
'Finish 10 - 20 rounds with model file...'
)
# decay learning rates
# learning_rates accepts:
# 1. list/tuple with length = num_boost_round
# 2. function(curr_iter)
# 3. function(curr_iter, total_iter)
gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
10
,
init_model
=
gbm
,
learning_rates
=
lambda
iter
:
0.05
*
(
0.99
**
iter
),
valid_sets
=
lgb_eval
)
print
(
'Finish 20 - 30 rounds with decay learning rates...'
)
# self-defined objective function
# f(preds: array, train_data: Dataset) -> grad: array, hess: array
# log likelihood loss
def
loglikelood
(
preds
,
train_data
):
labels
=
train_data
.
get_label
()
preds
=
1.
/
(
1.
+
np
.
exp
(
-
preds
))
grad
=
preds
-
labels
hess
=
preds
*
(
1.
-
preds
)
return
grad
,
hess
# self-defined eval metric
# f(preds: array, train_data: Dataset) -> name: string, value: array, is_higher_better: bool
# binary error
def
binary_error
(
preds
,
train_data
):
labels
=
train_data
.
get_label
()
return
'error'
,
np
.
mean
(
labels
!=
(
preds
>
0.5
)),
False
gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
10
,
init_model
=
gbm
,
fobj
=
loglikelood
,
feval
=
binary_error
,
valid_sets
=
lgb_eval
)
print
(
'Finish 30 - 40 rounds with self-defined objective function and eval metric...'
)
print
(
'Start a new training job...'
)
# callback
def
reset_metrics
():
def
callback
(
env
):
lgb_eval_new
=
lgb
.
Dataset
(
X_test
,
y_test
,
reference
=
lgb_train
)
if
env
.
iteration
-
env
.
begin_iteration
==
5
:
print
(
'Add a new valid dataset at iteration 5...'
)
env
.
model
.
add_valid
(
lgb_eval_new
,
'new valid'
)
callback
.
before_iteration
=
True
callback
.
order
=
0
return
callback
gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
10
,
valid_sets
=
lgb_train
,
callbacks
=
[
reset_metrics
()])
print
(
'Finish first 10 rounds with callback function...'
)
examples/python-guide/simple_example.py
View file @
ebfc8521
...
@@ -6,6 +6,7 @@ import pandas as pd
...
@@ -6,6 +6,7 @@ import pandas as pd
from
sklearn.metrics
import
mean_squared_error
from
sklearn.metrics
import
mean_squared_error
# load or create your dataset
# load or create your dataset
print
(
'Load data...'
)
df_train
=
pd
.
read_csv
(
'../regression/regression.train'
,
header
=
None
,
sep
=
'
\t
'
)
df_train
=
pd
.
read_csv
(
'../regression/regression.train'
,
header
=
None
,
sep
=
'
\t
'
)
df_test
=
pd
.
read_csv
(
'../regression/regression.test'
,
header
=
None
,
sep
=
'
\t
'
)
df_test
=
pd
.
read_csv
(
'../regression/regression.test'
,
header
=
None
,
sep
=
'
\t
'
)
...
@@ -18,7 +19,6 @@ X_test = df_test.drop(0, axis=1)
...
@@ -18,7 +19,6 @@ X_test = df_test.drop(0, axis=1)
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
)
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
)
lgb_eval
=
lgb
.
Dataset
(
X_test
,
y_test
,
reference
=
lgb_train
)
lgb_eval
=
lgb
.
Dataset
(
X_test
,
y_test
,
reference
=
lgb_train
)
# specify your configurations as a dict
# specify your configurations as a dict
params
=
{
params
=
{
'task'
:
'train'
,
'task'
:
'train'
,
...
@@ -33,27 +33,32 @@ params = {
...
@@ -33,27 +33,32 @@ params = {
'verbose'
:
0
'verbose'
:
0
}
}
print
(
'Start training...'
)
# train
# train
gbm
=
lgb
.
train
(
params
,
gbm
=
lgb
.
train
(
params
,
lgb_train
,
lgb_train
,
num_boost_round
=
10
0
,
num_boost_round
=
2
0
,
valid_sets
=
lgb_eval
,
valid_sets
=
lgb_eval
,
early_stopping_rounds
=
10
)
early_stopping_rounds
=
5
)
print
(
'Save model...'
)
# save model to file
# save model to file
gbm
.
save_model
(
'model.txt'
)
gbm
.
save_model
(
'model.txt'
)
print
(
'Start predicting...'
)
# predict
# predict
y_pred
=
gbm
.
predict
(
X_test
,
num_iteration
=
gbm
.
best_iteration
)
y_pred
=
gbm
.
predict
(
X_test
,
num_iteration
=
gbm
.
best_iteration
)
# eval
# eval
print
(
'The rmse of prediction is:'
,
mean_squared_error
(
y_test
,
y_pred
)
**
0.5
)
print
(
'The rmse of prediction is:'
,
mean_squared_error
(
y_test
,
y_pred
)
**
0.5
)
print
(
'Dump model to JSON...'
)
# dump model to json (and save to file)
# dump model to json (and save to file)
model_json
=
gbm
.
dump_model
()
model_json
=
gbm
.
dump_model
()
with
open
(
'model.json'
,
'w+'
)
as
f
:
with
open
(
'model.json'
,
'w+'
)
as
f
:
json
.
dump
(
model_json
,
f
,
indent
=
4
)
json
.
dump
(
model_json
,
f
,
indent
=
4
)
print
(
'Calculate feature importances...'
)
# feature importances
# feature importances
print
(
'Feature importances:'
,
gbm
.
feature_importance
())
print
(
'Feature importances:'
,
list
(
gbm
.
feature_importance
())
)
print
(
'Feature importances:'
,
gbm
.
feature_importance
(
"gain"
))
#
print('Feature importances:',
list(
gbm.feature_importance("gain"))
)
examples/python-guide/sklearn_example.py
View file @
ebfc8521
...
@@ -5,6 +5,7 @@ import pandas as pd
...
@@ -5,6 +5,7 @@ import pandas as pd
from
sklearn.metrics
import
mean_squared_error
from
sklearn.metrics
import
mean_squared_error
# load or create your dataset
# load or create your dataset
print
(
'Load data...'
)
df_train
=
pd
.
read_csv
(
'../regression/regression.train'
,
header
=
None
,
sep
=
'
\t
'
)
df_train
=
pd
.
read_csv
(
'../regression/regression.train'
,
header
=
None
,
sep
=
'
\t
'
)
df_test
=
pd
.
read_csv
(
'../regression/regression.test'
,
header
=
None
,
sep
=
'
\t
'
)
df_test
=
pd
.
read_csv
(
'../regression/regression.test'
,
header
=
None
,
sep
=
'
\t
'
)
...
@@ -13,19 +14,23 @@ y_test = df_test[0]
...
@@ -13,19 +14,23 @@ y_test = df_test[0]
X_train
=
df_train
.
drop
(
0
,
axis
=
1
)
X_train
=
df_train
.
drop
(
0
,
axis
=
1
)
X_test
=
df_test
.
drop
(
0
,
axis
=
1
)
X_test
=
df_test
.
drop
(
0
,
axis
=
1
)
print
(
'Start training...'
)
# train
# train
gbm
=
lgb
.
LGBMRegressor
(
objective
=
'regression'
,
gbm
=
lgb
.
LGBMRegressor
(
objective
=
'regression'
,
num_leaves
=
31
,
num_leaves
=
31
,
learning_rate
=
0.05
,
learning_rate
=
0.05
,
n_estimators
=
10
0
)
n_estimators
=
2
0
)
gbm
.
fit
(
X_train
,
y_train
,
gbm
.
fit
(
X_train
,
y_train
,
eval_set
=
[(
X_test
,
y_test
)],
eval_set
=
[(
X_test
,
y_test
)],
early_stopping_rounds
=
10
)
eval_metric
=
'l1'
,
early_stopping_rounds
=
5
)
print
(
'Start predicting...'
)
# predict
# predict
y_pred
=
gbm
.
predict
(
X_test
,
num_iteration
=
gbm
.
best_iteration
)
y_pred
=
gbm
.
predict
(
X_test
,
num_iteration
=
gbm
.
best_iteration
)
# eval
# eval
print
(
'The rmse of prediction is:'
,
mean_squared_error
(
y_test
,
y_pred
)
**
0.5
)
print
(
'The rmse of prediction is:'
,
mean_squared_error
(
y_test
,
y_pred
)
**
0.5
)
print
(
'Calculate feature importances...'
)
# feature importances
# feature importances
print
(
'Feature importances:'
,
gbm
.
feature_importance
())
print
(
'Feature importances:'
,
list
(
gbm
.
feature_importance
())
)
python-package/lightgbm/basic.py
View file @
ebfc8521
This diff is collapsed.
Click to expand it.
python-package/lightgbm/callback.py
View file @
ebfc8521
...
@@ -35,7 +35,7 @@ def _format_eval_result(value, show_stdv=True):
...
@@ -35,7 +35,7 @@ def _format_eval_result(value, show_stdv=True):
else
:
else
:
return
'%s
\'
s %s:%g'
%
(
value
[
0
],
value
[
1
],
value
[
2
])
return
'%s
\'
s %s:%g'
%
(
value
[
0
],
value
[
1
],
value
[
2
])
else
:
else
:
raise
ValueError
(
"
w
rong metric value"
)
raise
ValueError
(
"
W
rong metric value"
)
def
print_evaluation
(
period
=
1
,
show_stdv
=
True
):
def
print_evaluation
(
period
=
1
,
show_stdv
=
True
):
...
@@ -80,7 +80,7 @@ def record_evaluation(eval_result):
...
@@ -80,7 +80,7 @@ def record_evaluation(eval_result):
The requested callback function.
The requested callback function.
"""
"""
if
not
isinstance
(
eval_result
,
dict
):
if
not
isinstance
(
eval_result
,
dict
):
raise
TypeError
(
'
e
val_result
has to
be a dictionary'
)
raise
TypeError
(
'
E
val_result
should
be a dictionary'
)
eval_result
.
clear
()
eval_result
.
clear
()
def
init
(
env
):
def
init
(
env
):
...
@@ -164,7 +164,7 @@ def early_stop(stopping_rounds, verbose=True):
...
@@ -164,7 +164,7 @@ def early_stop(stopping_rounds, verbose=True):
def
init
(
env
):
def
init
(
env
):
"""internal function"""
"""internal function"""
if
not
env
.
evaluation_result_list
:
if
not
env
.
evaluation_result_list
:
raise
ValueError
(
'For early stopping
you need
at least one set i
n evals.
'
)
raise
ValueError
(
'For early stopping
,
at least one
data
set i
s required for evaluation
'
)
if
verbose
:
if
verbose
:
msg
=
"Train until valid scores didn't improve in {} rounds."
msg
=
"Train until valid scores didn't improve in {} rounds."
...
@@ -194,7 +194,7 @@ def early_stop(stopping_rounds, verbose=True):
...
@@ -194,7 +194,7 @@ def early_stop(stopping_rounds, verbose=True):
if
env
.
model
is
not
None
:
if
env
.
model
is
not
None
:
env
.
model
.
set_attr
(
best_iteration
=
str
(
best_iter
[
i
]))
env
.
model
.
set_attr
(
best_iteration
=
str
(
best_iter
[
i
]))
if
verbose
:
if
verbose
:
print
(
'
e
arly stopping, best iteration is:'
)
print
(
'
E
arly stopping, best iteration is:'
)
print
(
best_msg
[
i
])
print
(
best_msg
[
i
])
raise
EarlyStopException
(
best_iter
[
i
])
raise
EarlyStopException
(
best_iter
[
i
])
callback
.
order
=
30
callback
.
order
=
30
...
...
python-package/lightgbm/engine.py
View file @
ebfc8521
...
@@ -85,10 +85,10 @@ def train(params, train_set, num_boost_round=100,
...
@@ -85,10 +85,10 @@ def train(params, train_set, num_boost_round=100,
predictor
=
init_model
.
_to_predictor
()
predictor
=
init_model
.
_to_predictor
()
else
:
else
:
predictor
=
None
predictor
=
None
init_iteration
=
predictor
.
num_total_iteration
if
predictor
else
0
init_iteration
=
predictor
.
num_total_iteration
if
predictor
is
not
None
else
0
"""check dataset"""
"""check dataset"""
if
not
isinstance
(
train_set
,
Dataset
):
if
not
isinstance
(
train_set
,
Dataset
):
raise
TypeError
(
"only
can
accept Dataset
instance for traninig
"
)
raise
TypeError
(
"
Traninig
only accept
s
Dataset
object
"
)
train_set
.
_set_predictor
(
predictor
)
train_set
.
_set_predictor
(
predictor
)
train_set
.
set_feature_name
(
feature_name
)
train_set
.
set_feature_name
(
feature_name
)
...
@@ -98,7 +98,7 @@ def train(params, train_set, num_boost_round=100,
...
@@ -98,7 +98,7 @@ def train(params, train_set, num_boost_round=100,
train_data_name
=
"training"
train_data_name
=
"training"
reduced_valid_sets
=
[]
reduced_valid_sets
=
[]
name_valid_sets
=
[]
name_valid_sets
=
[]
if
valid_sets
:
if
valid_sets
is
not
None
:
if
isinstance
(
valid_sets
,
Dataset
):
if
isinstance
(
valid_sets
,
Dataset
):
valid_sets
=
[
valid_sets
]
valid_sets
=
[
valid_sets
]
if
isinstance
(
valid_names
,
str
):
if
isinstance
(
valid_names
,
str
):
...
@@ -111,7 +111,7 @@ def train(params, train_set, num_boost_round=100,
...
@@ -111,7 +111,7 @@ def train(params, train_set, num_boost_round=100,
train_data_name
=
valid_names
[
i
]
train_data_name
=
valid_names
[
i
]
continue
continue
if
not
isinstance
(
valid_data
,
Dataset
):
if
not
isinstance
(
valid_data
,
Dataset
):
raise
TypeError
(
"only
can
accept Dataset
instance for traninig
"
)
raise
TypeError
(
"
Traninig
only accept
s
Dataset
object
"
)
valid_data
.
set_reference
(
train_set
)
valid_data
.
set_reference
(
train_set
)
reduced_valid_sets
.
append
(
valid_data
)
reduced_valid_sets
.
append
(
valid_data
)
if
valid_names
is
not
None
and
len
(
valid_names
)
>
i
:
if
valid_names
is
not
None
and
len
(
valid_names
)
>
i
:
...
@@ -120,7 +120,7 @@ def train(params, train_set, num_boost_round=100,
...
@@ -120,7 +120,7 @@ def train(params, train_set, num_boost_round=100,
name_valid_sets
.
append
(
'valid_'
+
str
(
i
))
name_valid_sets
.
append
(
'valid_'
+
str
(
i
))
"""process callbacks"""
"""process callbacks"""
if
not
callbacks
:
if
callbacks
is
None
:
callbacks
=
set
()
callbacks
=
set
()
else
:
else
:
for
i
,
cb
in
enumerate
(
callbacks
):
for
i
,
cb
in
enumerate
(
callbacks
):
...
@@ -133,7 +133,7 @@ def train(params, train_set, num_boost_round=100,
...
@@ -133,7 +133,7 @@ def train(params, train_set, num_boost_round=100,
elif
isinstance
(
verbose_eval
,
int
):
elif
isinstance
(
verbose_eval
,
int
):
callbacks
.
add
(
callback
.
print_evaluation
(
verbose_eval
))
callbacks
.
add
(
callback
.
print_evaluation
(
verbose_eval
))
if
early_stopping_rounds
:
if
early_stopping_rounds
is
not
None
:
callbacks
.
add
(
callback
.
early_stop
(
early_stopping_rounds
,
callbacks
.
add
(
callback
.
early_stop
(
early_stopping_rounds
,
verbose
=
bool
(
verbose_eval
)))
verbose
=
bool
(
verbose_eval
)))
...
@@ -169,7 +169,7 @@ def train(params, train_set, num_boost_round=100,
...
@@ -169,7 +169,7 @@ def train(params, train_set, num_boost_round=100,
evaluation_result_list
=
[]
evaluation_result_list
=
[]
# check evaluation result.
# check evaluation result.
if
valid_sets
:
if
valid_sets
is
not
None
:
if
is_valid_contain_train
:
if
is_valid_contain_train
:
evaluation_result_list
.
extend
(
booster
.
eval_train
(
feval
))
evaluation_result_list
.
extend
(
booster
.
eval_train
(
feval
))
evaluation_result_list
.
extend
(
booster
.
eval_valid
(
feval
))
evaluation_result_list
.
extend
(
booster
.
eval_valid
(
feval
))
...
@@ -227,7 +227,7 @@ def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=Fals
...
@@ -227,7 +227,7 @@ def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=Fals
sfk
=
StratifiedKFold
(
n_splits
=
nfold
,
shuffle
=
True
,
random_state
=
seed
)
sfk
=
StratifiedKFold
(
n_splits
=
nfold
,
shuffle
=
True
,
random_state
=
seed
)
idset
=
[
x
[
1
]
for
x
in
sfk
.
split
(
X
=
full_data
.
get_label
(),
y
=
full_data
.
get_label
())]
idset
=
[
x
[
1
]
for
x
in
sfk
.
split
(
X
=
full_data
.
get_label
(),
y
=
full_data
.
get_label
())]
else
:
else
:
raise
LightGBMError
(
'
sklearn needs to be installed in order to use
stratified cv'
)
raise
LightGBMError
(
'
Scikit-learn is required for
stratified cv'
)
else
:
else
:
full_data
.
construct
()
full_data
.
construct
()
randidx
=
np
.
random
.
permutation
(
full_data
.
num_data
())
randidx
=
np
.
random
.
permutation
(
full_data
.
num_data
())
...
@@ -318,7 +318,7 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
...
@@ -318,7 +318,7 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
evaluation history : list(string)
evaluation history : list(string)
"""
"""
if
not
isinstance
(
train_set
,
Dataset
):
if
not
isinstance
(
train_set
,
Dataset
):
raise
TypeError
(
"only
can
accept Dataset
instance for traninig
"
)
raise
TypeError
(
"
Traninig
only accept
s
Dataset
object
"
)
if
is_str
(
init_model
):
if
is_str
(
init_model
):
predictor
=
_InnerPredictor
(
model_file
=
init_model
)
predictor
=
_InnerPredictor
(
model_file
=
init_model
)
...
@@ -342,13 +342,13 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
...
@@ -342,13 +342,13 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
cvfolds
=
_make_n_folds
(
train_set
,
nfold
,
params
,
seed
,
fpreproc
,
stratified
)
cvfolds
=
_make_n_folds
(
train_set
,
nfold
,
params
,
seed
,
fpreproc
,
stratified
)
# setup callbacks
# setup callbacks
if
not
callbacks
:
if
callbacks
is
None
:
callbacks
=
set
()
callbacks
=
set
()
else
:
else
:
for
i
,
cb
in
enumerate
(
callbacks
):
for
i
,
cb
in
enumerate
(
callbacks
):
cb
.
__dict__
.
setdefault
(
'order'
,
i
-
len
(
callbacks
))
cb
.
__dict__
.
setdefault
(
'order'
,
i
-
len
(
callbacks
))
callbacks
=
set
(
callbacks
)
callbacks
=
set
(
callbacks
)
if
early_stopping_rounds
:
if
early_stopping_rounds
is
not
None
:
callbacks
.
add
(
callback
.
early_stop
(
early_stopping_rounds
,
verbose
=
False
))
callbacks
.
add
(
callback
.
early_stop
(
early_stopping_rounds
,
verbose
=
False
))
if
verbose_eval
is
True
:
if
verbose_eval
is
True
:
callbacks
.
add
(
callback
.
print_evaluation
(
show_stdv
=
show_stdv
))
callbacks
.
add
(
callback
.
print_evaluation
(
show_stdv
=
show_stdv
))
...
...
python-package/lightgbm/sklearn.py
View file @
ebfc8521
...
@@ -6,7 +6,7 @@ from __future__ import absolute_import
...
@@ -6,7 +6,7 @@ from __future__ import absolute_import
import
numpy
as
np
import
numpy
as
np
from
.basic
import
LightGBMError
,
Dataset
,
is_str
from
.basic
import
LightGBMError
,
Dataset
,
is_str
from
.engine
import
train
from
.engine
import
train
#
sklearn
'''
sklearn
'''
try
:
try
:
from
sklearn.base
import
BaseEstimator
from
sklearn.base
import
BaseEstimator
from
sklearn.base
import
RegressorMixin
,
ClassifierMixin
from
sklearn.base
import
RegressorMixin
,
ClassifierMixin
...
@@ -38,7 +38,6 @@ def _point_wise_objective(func):
...
@@ -38,7 +38,6 @@ def _point_wise_objective(func):
y_pred: array_like of shape [n_samples] or shape[n_samples* n_class] (for multi-class)
y_pred: array_like of shape [n_samples] or shape[n_samples* n_class] (for multi-class)
The predicted values
The predicted values
Returns
Returns
-------
-------
new_func: callable
new_func: callable
...
@@ -66,7 +65,7 @@ def _point_wise_objective(func):
...
@@ -66,7 +65,7 @@ def _point_wise_objective(func):
num_data
=
len
(
weight
)
num_data
=
len
(
weight
)
num_class
=
len
(
grad
)
//
num_data
num_class
=
len
(
grad
)
//
num_data
if
num_class
*
num_data
!=
len
(
grad
):
if
num_class
*
num_data
!=
len
(
grad
):
raise
ValueError
(
"
l
ength of grad and hess should equal to num_class * num_data"
)
raise
ValueError
(
"
L
ength of grad and hess should equal to num_class * num_data"
)
for
k
in
range
(
num_class
):
for
k
in
range
(
num_class
):
for
i
in
range
(
num_data
):
for
i
in
range
(
num_data
):
idx
=
k
*
num_data
+
i
idx
=
k
*
num_data
+
i
...
@@ -147,7 +146,7 @@ class LGBMModel(LGBMModelBase):
...
@@ -147,7 +146,7 @@ class LGBMModel(LGBMModelBase):
reg_alpha
=
0
,
reg_lambda
=
0
,
scale_pos_weight
=
1
,
reg_alpha
=
0
,
reg_lambda
=
0
,
scale_pos_weight
=
1
,
is_unbalance
=
False
,
seed
=
0
):
is_unbalance
=
False
,
seed
=
0
):
if
not
SKLEARN_INSTALLED
:
if
not
SKLEARN_INSTALLED
:
raise
LightGBMError
(
'
sklearn needs to be installed in order to use
this module'
)
raise
LightGBMError
(
'
Scikit-learn is required for
this module'
)
self
.
num_leaves
=
num_leaves
self
.
num_leaves
=
num_leaves
self
.
max_depth
=
max_depth
self
.
max_depth
=
max_depth
...
@@ -185,7 +184,7 @@ class LGBMModel(LGBMModelBase):
...
@@ -185,7 +184,7 @@ class LGBMModel(LGBMModelBase):
booster : a lightgbm booster of underlying model
booster : a lightgbm booster of underlying model
"""
"""
if
self
.
_Booster
is
None
:
if
self
.
_Booster
is
None
:
raise
LightGBMError
(
'
n
eed to call fit beforehand'
)
raise
LightGBMError
(
'
N
eed to call fit beforehand'
)
return
self
.
_Booster
return
self
.
_Booster
def
get_params
(
self
,
deep
=
False
):
def
get_params
(
self
,
deep
=
False
):
...
@@ -196,8 +195,8 @@ class LGBMModel(LGBMModelBase):
...
@@ -196,8 +195,8 @@ class LGBMModel(LGBMModelBase):
return
params
return
params
def
fit
(
self
,
X
,
y
,
def
fit
(
self
,
X
,
y
,
sample_weight
=
None
,
init_score
=
None
,
group
=
None
,
sample_weight
=
None
,
init_score
=
None
,
group
=
None
,
eval_set
=
None
,
eval_sample_weight
=
None
,
eval_set
=
None
,
eval_sample_weight
=
None
,
eval_init_score
=
None
,
eval_group
=
None
,
eval_init_score
=
None
,
eval_group
=
None
,
eval_metric
=
None
,
eval_metric
=
None
,
early_stopping_rounds
=
None
,
verbose
=
True
,
early_stopping_rounds
=
None
,
verbose
=
True
,
...
@@ -343,7 +342,7 @@ class LGBMModel(LGBMModelBase):
...
@@ -343,7 +342,7 @@ class LGBMModel(LGBMModelBase):
if
self
.
evals_result_
:
if
self
.
evals_result_
:
evals_result
=
self
.
evals_result_
evals_result
=
self
.
evals_result_
else
:
else
:
raise
LightGBMError
(
'No results.'
)
raise
LightGBMError
(
'No results
found
.'
)
return
evals_result
return
evals_result
...
@@ -362,7 +361,7 @@ class LGBMRegressor(LGBMModel, LGBMRegressorBase):
...
@@ -362,7 +361,7 @@ class LGBMRegressor(LGBMModel, LGBMRegressorBase):
def
fit
(
self
,
X
,
y
,
def
fit
(
self
,
X
,
y
,
sample_weight
=
None
,
init_score
=
None
,
sample_weight
=
None
,
init_score
=
None
,
eval_set
=
None
,
eval_sample_weight
=
None
,
eval_set
=
None
,
eval_sample_weight
=
None
,
eval_init_score
=
None
,
eval_init_score
=
None
,
eval_metric
=
None
,
eval_metric
=
None
,
early_stopping_rounds
=
None
,
verbose
=
True
,
early_stopping_rounds
=
None
,
verbose
=
True
,
...
@@ -370,10 +369,10 @@ class LGBMRegressor(LGBMModel, LGBMRegressorBase):
...
@@ -370,10 +369,10 @@ class LGBMRegressor(LGBMModel, LGBMRegressorBase):
other_params
=
None
):
other_params
=
None
):
super
(
LGBMRegressor
,
self
).
fit
(
X
,
y
,
sample_weight
,
init_score
,
None
,
super
(
LGBMRegressor
,
self
).
fit
(
X
,
y
,
sample_weight
,
init_score
,
None
,
eval_set
,
eval_sample_weight
,
eval_init_score
,
None
,
eval_set
,
eval_sample_weight
,
eval_init_score
,
None
,
eval_metric
,
early_stopping_rounds
,
eval_metric
,
early_stopping_rounds
,
verbose
,
feature_name
,
categorical_feature
,
verbose
,
feature_name
,
categorical_feature
,
other_params
)
other_params
)
return
self
return
self
class
LGBMClassifier
(
LGBMModel
,
LGBMClassifierBase
):
class
LGBMClassifier
(
LGBMModel
,
LGBMClassifierBase
):
...
@@ -390,15 +389,15 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
...
@@ -390,15 +389,15 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
is_unbalance
=
False
,
seed
=
0
):
is_unbalance
=
False
,
seed
=
0
):
super
(
LGBMClassifier
,
self
).
__init__
(
num_leaves
,
max_depth
,
super
(
LGBMClassifier
,
self
).
__init__
(
num_leaves
,
max_depth
,
learning_rate
,
n_estimators
,
max_bin
,
learning_rate
,
n_estimators
,
max_bin
,
silent
,
objective
,
silent
,
objective
,
nthread
,
nthread
,
min_split_gain
,
min_child_weight
,
min_child_samples
,
min_split_gain
,
min_child_weight
,
min_child_samples
,
subsample
,
subsample_freq
,
colsample_bytree
,
subsample
,
subsample_freq
,
colsample_bytree
,
reg_alpha
,
reg_lambda
,
scale_pos_weight
,
reg_alpha
,
reg_lambda
,
scale_pos_weight
,
is_unbalance
,
seed
)
is_unbalance
,
seed
)
def
fit
(
self
,
X
,
y
,
def
fit
(
self
,
X
,
y
,
sample_weight
=
None
,
init_score
=
None
,
sample_weight
=
None
,
init_score
=
None
,
eval_set
=
None
,
eval_sample_weight
=
None
,
eval_set
=
None
,
eval_sample_weight
=
None
,
eval_init_score
=
None
,
eval_init_score
=
None
,
eval_metric
=
None
,
eval_metric
=
None
,
early_stopping_rounds
=
None
,
verbose
=
True
,
early_stopping_rounds
=
None
,
verbose
=
True
,
...
@@ -480,7 +479,7 @@ def _group_wise_objective(func):
...
@@ -480,7 +479,7 @@ def _group_wise_objective(func):
labels
=
dataset
.
get_label
()
labels
=
dataset
.
get_label
()
group
=
dataset
.
get_group
()
group
=
dataset
.
get_group
()
if
group
is
None
:
if
group
is
None
:
raise
ValueError
(
"
g
roup should not be None for ranking task"
)
raise
ValueError
(
"
G
roup should not be None for ranking task"
)
grad
,
hess
=
func
(
labels
,
group
,
preds
)
grad
,
hess
=
func
(
labels
,
group
,
preds
)
"""weighted for objective"""
"""weighted for objective"""
weight
=
dataset
.
get_weight
()
weight
=
dataset
.
get_weight
()
...
@@ -490,7 +489,7 @@ def _group_wise_objective(func):
...
@@ -490,7 +489,7 @@ def _group_wise_objective(func):
grad
=
np
.
multiply
(
grad
,
weight
)
grad
=
np
.
multiply
(
grad
,
weight
)
hess
=
np
.
multiply
(
hess
,
weight
)
hess
=
np
.
multiply
(
hess
,
weight
)
else
:
else
:
raise
ValueError
(
"
l
eng
h
t of grad and hess should equal with num_data"
)
raise
ValueError
(
"
L
engt
h
of grad and hess should equal with num_data"
)
return
grad
,
hess
return
grad
,
hess
return
inner
return
inner
...
@@ -507,20 +506,20 @@ class LGBMRanker(LGBMModel):
...
@@ -507,20 +506,20 @@ class LGBMRanker(LGBMModel):
reg_alpha
=
0
,
reg_lambda
=
0
,
scale_pos_weight
=
1
,
reg_alpha
=
0
,
reg_lambda
=
0
,
scale_pos_weight
=
1
,
is_unbalance
=
False
,
seed
=
0
):
is_unbalance
=
False
,
seed
=
0
):
super
(
LGBMRanker
,
self
).
__init__
(
num_leaves
,
max_depth
,
super
(
LGBMRanker
,
self
).
__init__
(
num_leaves
,
max_depth
,
learning_rate
,
n_estimators
,
max_bin
,
learning_rate
,
n_estimators
,
max_bin
,
silent
,
objective
,
silent
,
objective
,
nthread
,
nthread
,
min_split_gain
,
min_child_weight
,
min_child_samples
,
min_split_gain
,
min_child_weight
,
min_child_samples
,
subsample
,
subsample_freq
,
colsample_bytree
,
subsample
,
subsample_freq
,
colsample_bytree
,
reg_alpha
,
reg_lambda
,
scale_pos_weight
,
reg_alpha
,
reg_lambda
,
scale_pos_weight
,
is_unbalance
,
seed
)
is_unbalance
,
seed
)
if
callable
(
self
.
objective
):
if
callable
(
self
.
objective
):
self
.
fobj
=
_group_wise_objective
(
self
.
objective
)
self
.
fobj
=
_group_wise_objective
(
self
.
objective
)
else
:
else
:
self
.
fobj
=
None
self
.
fobj
=
None
def
fit
(
self
,
X
,
y
,
def
fit
(
self
,
X
,
y
,
sample_weight
=
None
,
init_score
=
None
,
group
=
None
,
sample_weight
=
None
,
init_score
=
None
,
group
=
None
,
eval_set
=
None
,
eval_sample_weight
=
None
,
eval_set
=
None
,
eval_sample_weight
=
None
,
eval_init_score
=
None
,
eval_group
=
None
,
eval_init_score
=
None
,
eval_group
=
None
,
eval_metric
=
None
,
eval_at
=
None
,
eval_metric
=
None
,
eval_at
=
None
,
early_stopping_rounds
=
None
,
verbose
=
True
,
early_stopping_rounds
=
None
,
verbose
=
True
,
...
@@ -535,17 +534,18 @@ class LGBMRanker(LGBMModel):
...
@@ -535,17 +534,18 @@ class LGBMRanker(LGBMModel):
"""check group data"""
"""check group data"""
if
group
is
None
:
if
group
is
None
:
raise
ValueError
(
"
s
hould
u
se group for ranking task"
)
raise
ValueError
(
"
S
hould se
t
group for ranking task"
)
if
eval_set
is
not
None
:
if
eval_set
is
not
None
:
if
eval_group
is
None
:
if
eval_group
is
None
:
raise
ValueError
(
"
e
val_group cannot be None when eval_set is not None"
)
raise
ValueError
(
"
E
val_group cannot be None when eval_set is not None"
)
elif
len
(
eval_group
)
!=
len
(
eval_set
):
elif
len
(
eval_group
)
!=
len
(
eval_set
):
raise
ValueError
(
"
l
ength of eval_group should equal
with
eval_set"
)
raise
ValueError
(
"
L
ength of eval_group should equal
to
eval_set"
)
else
:
else
:
for
inner_group
in
eval_group
:
for
inner_group
in
eval_group
:
if
inner_group
is
None
:
if
inner_group
is
None
:
raise
ValueError
(
"should set group for all eval data for ranking task"
)
raise
ValueError
(
"Should set group for all eval dataset for ranking task"
)
if
eval_at
is
not
None
:
if
eval_at
is
not
None
:
other_params
=
{}
if
other_params
is
None
else
other_params
other_params
=
{}
if
other_params
is
None
else
other_params
other_params
[
'ndcg_eval_at'
]
=
list
(
eval_at
)
other_params
[
'ndcg_eval_at'
]
=
list
(
eval_at
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment