Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
dcbdc675
Commit
dcbdc675
authored
Jan 03, 2017
by
Guolin Ke
Browse files
Merge branch 'master' of
https://github.com/Microsoft/LightGBM
parents
795ff82f
1c6c7046
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
105 additions
and
118 deletions
+105
-118
docs/Python-API.md
docs/Python-API.md
+27
-32
examples/python-guide/sklearn_example.py
examples/python-guide/sklearn_example.py
+1
-1
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+2
-2
python-package/lightgbm/sklearn.py
python-package/lightgbm/sklearn.py
+60
-78
src/metric/binary_metric.hpp
src/metric/binary_metric.hpp
+1
-1
tests/python_package_test/test_engine.py
tests/python_package_test/test_engine.py
+3
-2
tests/python_package_test/test_sklearn.py
tests/python_package_test/test_sklearn.py
+11
-2
No files found.
docs/Python-API.md
View file @
dcbdc675
...
...
@@ -10,6 +10,7 @@
*
[
Scikit-learn API
](
Python-API.md#scikit-learn-api
)
-
[
Common Methods
](
Python-API.md#common-methods
)
-
[
Common Attributes
](
Python-API.md#common-attributes
)
-
[
LGBMClassifier
](
Python-API.md#lgbmclassifier
)
-
[
LGBMRegressor
](
Python-API.md#lgbmregressor
)
-
[
LGBMRanker
](
Python-API.md#lgbmranker
)
...
...
@@ -675,35 +676,6 @@ The methods of each Class is in alphabetical order.
X_leaves : array_like, shape=[n_samples, n_trees]
####booster()
Get the underlying lightgbm Booster of this model.
This will raise an exception when it's called before fit().
Returns
-------
booster : a lightgbm booster of underlying model
####evals_result()
Return the evaluation results.
Returns
-------
evals_result : dictionary
####feature_importance()
Return the feature importances of each feature.
Returns
-------
result : array
Array of normailized feature importances
####fit(X, y, sample_weight=None, init_score=None, group=None, eval_set=None, eval_sample_weight=None, eval_init_score=None, eval_group=None, eval_metric=None, early_stopping_rounds=None, verbose=True, feature_name=None, categorical_feature=None, callbacks=None)
Fit the gradient boosting model.
...
...
@@ -771,7 +743,7 @@ The methods of each Class is in alphabetical order.
if you want to get i-th row y_pred in j-th class, the access way is y_pred[j*num_data+i]
####predict(
data
, raw_score=False, num_iteration=0)
####predict(
X
, raw_score=False, num_iteration=0)
Return the predicted value for each sample.
...
...
@@ -786,11 +758,26 @@ The methods of each Class is in alphabetical order.
Returns
-------
predicted_result : array_like, shape=[n_samples] or [n_samples, n_classes]
###Common Attributes
####booster_
Get the underlying lightgbm Booster of this model.
####evals_result_
Get the evaluation results.
####feature_importance_
Get normailized feature importances.
###LGBMClassifier
####predict_proba(
data
, raw_score=False, num_iteration=0)
####predict_proba(
X
, raw_score=False, num_iteration=0)
Return the predicted probability for each class for each sample.
...
...
@@ -805,6 +792,14 @@ The methods of each Class is in alphabetical order.
Returns
-------
predicted_probability : array_like, shape=[n_samples, n_classes]
####classes_
Get class label array.
####n_classes_
Get number of classes.
###LGBMRegressor
...
...
examples/python-guide/sklearn_example.py
View file @
dcbdc675
...
...
@@ -34,7 +34,7 @@ print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)
print
(
'Calculate feature importances...'
)
# feature importances
print
(
'Feature importances:'
,
list
(
gbm
.
feature_importance
()
))
print
(
'Feature importances:'
,
list
(
gbm
.
feature_importance
_
))
# other scikit-learn modules
estimator
=
lgb
.
LGBMRegressor
(
num_leaves
=
31
)
...
...
python-package/lightgbm/basic.py
View file @
dcbdc675
...
...
@@ -132,7 +132,7 @@ def param_dict_to_str(data):
%
(
key
,
type
(
val
).
__name__
))
return
' '
.
join
(
pairs
)
class
_temp_file
:
class
_temp_file
(
object
)
:
def
__enter__
(
self
):
with
NamedTemporaryFile
(
prefix
=
"lightgbm_tmp_"
,
delete
=
True
)
as
f
:
self
.
name
=
f
.
name
...
...
@@ -146,7 +146,7 @@ class _temp_file:
return
ret
def
writelines
(
self
,
lines
):
with
open
(
self
.
name
,
"w+"
)
as
f
:
ret
=
f
.
writelines
(
lines
)
f
.
writelines
(
lines
)
"""marco definition of data type in c_api of LightGBM"""
C_API_DTYPE_FLOAT32
=
0
...
...
python-package/lightgbm/sklearn.py
View file @
dcbdc675
...
...
@@ -5,13 +5,14 @@ from __future__ import absolute_import
import
inspect
import
numpy
as
np
from
.basic
import
LightGBMError
,
Dataset
,
is_str
from
.basic
import
LightGBMError
,
Dataset
from
.engine
import
train
'''sklearn'''
try
:
from
sklearn.base
import
BaseEstimator
from
sklearn.base
import
RegressorMixin
,
ClassifierMixin
from
sklearn.preprocessing
import
LabelEncoder
from
sklearn.utils
import
deprecated
SKLEARN_INSTALLED
=
True
LGBMModelBase
=
BaseEstimator
LGBMRegressorBase
=
RegressorMixin
...
...
@@ -251,25 +252,13 @@ class LGBMModel(LGBMModelBase):
self
.
uniform_drop
=
uniform_drop
self
.
xgboost_dart_mode
=
xgboost_dart_mode
self
.
_Booster
=
None
self
.
evals_result
=
None
self
.
best_iteration
=
-
1
if
callable
(
self
.
objective
):
self
.
fobj
=
_objective_function_wrapper
(
self
.
objective
)
else
:
self
.
fobj
=
None
def
booster
(
self
):
"""
Get the underlying lightgbm Booster of this model.
This will raise an exception when fit was not called
Returns
-------
booster : a lightgbm booster of underlying model
"""
if
self
.
_Booster
is
None
:
raise
LightGBMError
(
'Need to call fit beforehand'
)
return
self
.
_Booster
def
fit
(
self
,
X
,
y
,
sample_weight
=
None
,
init_score
=
None
,
group
=
None
,
eval_set
=
None
,
eval_sample_weight
=
None
,
...
...
@@ -349,19 +338,15 @@ class LGBMModel(LGBMModelBase):
params
[
'num_class'
]
=
self
.
n_classes_
if
hasattr
(
self
,
'eval_at'
):
params
[
'ndcg_eval_at'
]
=
self
.
eval_at
if
self
.
fobj
:
params
[
"objective"
]
=
"None"
else
:
params
[
"objective"
]
=
self
.
objective
# objective = nullptr for unknown objective
params
[
'objective'
]
=
'None'
if
callable
(
eval_metric
):
feval
=
_eval_function_wrapper
(
eval_metric
)
elif
is_str
(
eval_metric
)
or
isinstance
(
eval_metric
,
list
):
feval
=
None
params
.
update
({
'metric'
:
eval_metric
})
else
:
feval
=
None
params
[
'metric'
]
=
eval_metric
def
_construct_dataset
(
X
,
y
,
sample_weight
,
init_score
,
group
,
params
):
ret
=
Dataset
(
X
,
label
=
y
,
max_bin
=
self
.
max_bin
,
weight
=
sample_weight
,
group
=
group
,
params
=
params
)
...
...
@@ -383,10 +368,7 @@ class LGBMModel(LGBMModelBase):
if
collection
is
None
:
return
None
elif
isinstance
(
collection
,
list
):
if
len
(
collection
)
>
i
:
return
collection
[
i
]
else
:
return
None
return
collection
[
i
]
if
len
(
collection
)
>
i
else
None
elif
isinstance
(
collection
,
dict
):
return
collection
.
get
(
i
,
None
)
else
:
...
...
@@ -406,16 +388,13 @@ class LGBMModel(LGBMModelBase):
callbacks
=
callbacks
)
if
evals_result
:
for
val
in
evals_result
.
items
():
evals_result_key
=
list
(
val
[
1
].
keys
())[
0
]
evals_result
[
val
[
0
]][
evals_result_key
]
=
val
[
1
][
evals_result_key
]
self
.
evals_result_
=
evals_result
self
.
evals_result
=
evals_result
if
early_stopping_rounds
is
not
None
:
self
.
best_iteration
=
self
.
_Booster
.
best_iteration
return
self
def
predict
(
self
,
data
,
raw_score
=
False
,
num_iteration
=
0
):
def
predict
(
self
,
X
,
raw_score
=
False
,
num_iteration
=
0
):
"""
Return the predicted value for each sample.
...
...
@@ -431,9 +410,7 @@ class LGBMModel(LGBMModelBase):
-------
predicted_result : array_like, shape=[n_samples] or [n_samples, n_classes]
"""
return
self
.
_Booster
.
predict
(
data
,
raw_score
=
raw_score
,
num_iteration
=
num_iteration
)
return
self
.
booster_
.
predict
(
X
,
raw_score
=
raw_score
,
num_iteration
=
num_iteration
)
def
apply
(
self
,
X
,
num_iteration
=
0
):
"""
...
...
@@ -451,35 +428,35 @@ class LGBMModel(LGBMModelBase):
-------
X_leaves : array_like, shape=[n_samples, n_trees]
"""
return
self
.
_Booster
.
predict
(
X
,
pred_leaf
=
True
,
num_iteration
=
num_iteration
)
return
self
.
booster_
.
predict
(
X
,
pred_leaf
=
True
,
num_iteration
=
num_iteration
)
def
evals_result
(
self
):
"""
Return the evaluation results.
@
property
def
booster_
(
self
):
"""Get the underlying lightgbm Booster of this model."""
if
self
.
_Booster
is
None
:
raise
LightGBMError
(
'No booster found. Need to call fit beforehand.'
)
return
self
.
_Booster
Returns
-------
evals_result : dictionary
"""
if
self
.
evals_result_
:
evals_result
=
self
.
evals_result_
else
:
raise
LightGBMError
(
'No results found.'
)
@
property
def
evals_result_
(
self
):
"""Get the evaluation results."""
if
self
.
evals_result
is
None
:
raise
LightGBMError
(
'No results found. Need to call fit with eval set beforehand.'
)
return
self
.
evals_result
@
property
def
feature_importance_
(
self
):
"""Get normailized feature importances."""
importace_array
=
self
.
booster_
.
feature_importance
().
astype
(
np
.
float32
)
return
importace_array
/
importace_array
.
sum
()
return
evals_result
@
deprecated
(
'Use attribute booster_ instead.'
)
def
booster
(
self
):
return
self
.
booster_
@
deprecated
(
'Use attribute feature_importance_ instead.'
)
def
feature_importance
(
self
):
"""
Feature importances
Returns
-------
Array of normailized feature importances
"""
importace_array
=
self
.
_Booster
.
feature_importance
().
astype
(
np
.
float32
)
return
importace_array
/
importace_array
.
sum
()
return
self
.
feature_importance_
class
LGBMRegressor
(
LGBMModel
,
LGBMRegressorBase
):
...
...
@@ -513,6 +490,7 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
is_unbalance
=
False
,
seed
=
0
,
drop_rate
=
0.1
,
skip_drop
=
0.5
,
max_drop
=
50
,
uniform_drop
=
False
,
xgboost_dart_mode
=
False
):
self
.
classes
,
self
.
n_classes
=
None
,
None
super
(
LGBMClassifier
,
self
).
__init__
(
boosting_type
=
boosting_type
,
num_leaves
=
num_leaves
,
max_depth
=
max_depth
,
learning_rate
=
learning_rate
,
n_estimators
=
n_estimators
,
max_bin
=
max_bin
,
...
...
@@ -533,12 +511,12 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
early_stopping_rounds
=
None
,
verbose
=
True
,
feature_name
=
None
,
categorical_feature
=
None
,
callbacks
=
None
):
self
.
_le
=
LGBMLabelEncoder
().
fit
(
y
)
y
=
self
.
_le
.
transform
(
y
)
self
.
n_classes_
=
len
(
self
.
_le
.
classes_
)
if
self
.
n_classes_
>
2
:
self
.
classes
=
self
.
_le
.
classes_
self
.
n_classes
=
len
(
self
.
classes_
)
if
self
.
n_classes
>
2
:
# Switch to using a multiclass objective in the underlying LGBM instance
self
.
objective
=
"multiclass"
if
eval_set
is
not
None
and
eval_metric
==
"binary_logloss"
:
...
...
@@ -558,18 +536,12 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
callbacks
=
callbacks
)
return
self
def
predict
(
self
,
data
,
raw_score
=
False
,
num_iteration
=
0
):
class_probs
=
self
.
_Booster
.
predict
(
data
,
raw_score
=
raw_score
,
num_iteration
=
num_iteration
)
if
len
(
class_probs
.
shape
)
>
1
:
column_indexes
=
np
.
argmax
(
class_probs
,
axis
=
1
)
else
:
column_indexes
=
np
.
repeat
(
0
,
class_probs
.
shape
[
0
])
column_indexes
[
class_probs
>
0.5
]
=
1
return
self
.
_le
.
inverse_transform
(
column_indexes
)
def
predict
(
self
,
X
,
raw_score
=
False
,
num_iteration
=
0
):
class_probs
=
self
.
predict_proba
(
X
,
raw_score
,
num_iteration
)
class_index
=
np
.
argmax
(
class_probs
,
axis
=
1
)
return
self
.
_le
.
inverse_transform
(
class_index
)
def
predict_proba
(
self
,
data
,
raw_score
=
False
,
num_iteration
=
0
):
def
predict_proba
(
self
,
X
,
raw_score
=
False
,
num_iteration
=
0
):
"""
Return the predicted probability for each class for each sample.
...
...
@@ -585,15 +557,25 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
-------
predicted_probability : array_like, shape=[n_samples, n_classes]
"""
class_probs
=
self
.
_Booster
.
predict
(
data
,
raw_score
=
raw_score
,
num_iteration
=
num_iteration
)
if
self
.
n_classes_
>
2
:
class_probs
=
self
.
booster_
.
predict
(
X
,
raw_score
=
raw_score
,
num_iteration
=
num_iteration
)
if
self
.
n_classes
>
2
:
return
class_probs
else
:
classone_probs
=
class_probs
classzero_probs
=
1.0
-
classone_probs
return
np
.
vstack
((
classzero_probs
,
classone_probs
)).
transpose
()
return
np
.
vstack
((
1.
-
class_probs
,
class_probs
)).
transpose
()
@
property
def
classes_
(
self
):
"""Get class label array."""
if
self
.
classes
is
None
:
raise
LightGBMError
(
'No classes found. Need to call fit beforehand.'
)
return
self
.
classes
@
property
def
n_classes_
(
self
):
"""Get number of classes"""
if
self
.
n_classes
is
None
:
raise
LightGBMError
(
'No classes found. Need to call fit beforehand.'
)
return
self
.
n_classes
class
LGBMRanker
(
LGBMModel
):
...
...
src/metric/binary_metric.hpp
View file @
dcbdc675
...
...
@@ -127,7 +127,7 @@ public:
explicit
BinaryErrorMetric
(
const
MetricConfig
&
config
)
:
BinaryMetric
<
BinaryErrorMetric
>
(
config
)
{}
inline
static
score_t
LossOnPoint
(
float
label
,
score_t
prob
)
{
if
(
prob
<
0.5
f
)
{
if
(
prob
<
=
0.5
f
)
{
return
label
;
}
else
{
return
1.0
f
-
label
;
...
...
tests/python_package_test/test_engine.py
View file @
dcbdc675
...
...
@@ -17,13 +17,14 @@ def multi_logloss(y_true, y_pred):
def
test_template
(
params
=
{
'objective'
:
'regression'
,
'metric'
:
'l2'
},
X_y
=
load_boston
(
True
),
feval
=
mean_squared_error
,
num_round
=
100
,
init_model
=
None
,
custom_eval
=
None
,
return_data
=
False
,
return_model
=
False
,
early_stopping_rounds
=
10
):
early_stopping_rounds
=
10
,
return_data
=
False
,
return_model
=
False
):
params
[
'verbose'
],
params
[
'seed'
]
=
-
1
,
42
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
*
X_y
,
test_size
=
0.1
,
random_state
=
42
)
lgb_train
=
lgb
.
Dataset
(
X_train
,
y_train
,
params
=
params
)
lgb_eval
=
lgb
.
Dataset
(
X_test
,
y_test
,
reference
=
lgb_train
,
params
=
params
)
if
return_data
:
return
lgb_train
,
lgb_eval
evals_result
=
{}
params
[
'verbose'
]
=
params
[
'seed'
]
=
0
gbm
=
lgb
.
train
(
params
,
lgb_train
,
num_boost_round
=
num_round
,
valid_sets
=
lgb_eval
,
...
...
tests/python_package_test/test_sklearn.py
View file @
dcbdc675
...
...
@@ -86,19 +86,28 @@ class TestSklearn(unittest.TestCase):
gbm
.
fit
(
X_train
,
y_train
)
self
.
assertIn
(
gbm
.
best_params_
[
'n_estimators'
],
[
15
,
20
])
def
test_clone
(
self
):
def
test_clone
_and_property
(
self
):
gbm
=
test_template
(
return_model
=
True
)
gbm_clone
=
clone
(
gbm
)
self
.
assertIsInstance
(
gbm
.
booster_
,
lgb
.
Booster
)
self
.
assertIsInstance
(
gbm
.
feature_importance_
,
np
.
ndarray
)
clf
=
test_template
(
load_digits
(
2
,
True
),
model
=
lgb
.
LGBMClassifier
,
return_model
=
True
)
self
.
assertListEqual
(
sorted
(
clf
.
classes_
),
[
0
,
1
])
self
.
assertEqual
(
clf
.
n_classes_
,
2
)
self
.
assertIsInstance
(
clf
.
booster_
,
lgb
.
Booster
)
self
.
assertIsInstance
(
clf
.
feature_importance_
,
np
.
ndarray
)
def
test_joblib
(
self
):
gbm
=
test_template
(
num_round
=
10
,
return_model
=
True
)
joblib
.
dump
(
gbm
,
'lgb.pkl'
)
gbm_pickle
=
joblib
.
load
(
'lgb.pkl'
)
self
.
assertIsInstance
(
gbm_pickle
.
booster_
,
lgb
.
Booster
)
self
.
assertDictEqual
(
gbm
.
get_params
(),
gbm_pickle
.
get_params
())
self
.
assertListEqual
(
list
(
gbm
.
feature_importance_
),
list
(
gbm_pickle
.
feature_importance_
))
X_train
,
X_test
,
y_train
,
y_test
=
test_template
(
return_data
=
True
)
gbm
.
fit
(
X_train
,
y_train
,
eval_set
=
[(
X_test
,
y_test
)],
verbose
=
False
)
gbm_pickle
.
fit
(
X_train
,
y_train
,
eval_set
=
[(
X_test
,
y_test
)],
verbose
=
False
)
self
.
assertDictEqual
(
gbm
.
evals_result
()
,
gbm_pickle
.
evals_result
()
)
self
.
assertDictEqual
(
gbm
.
evals_result
_
,
gbm_pickle
.
evals_result
_
)
pred_origin
=
gbm
.
predict
(
X_test
)
pred_pickle
=
gbm_pickle
.
predict
(
X_test
)
self
.
assertEqual
(
len
(
pred_origin
),
len
(
pred_pickle
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment