Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
164524d8
Commit
164524d8
authored
Nov 30, 2016
by
Guolin Ke
Browse files
weighted objective function
parent
b59a5a4c
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
43 additions
and
26 deletions
+43
-26
include/LightGBM/config.h
include/LightGBM/config.h
+2
-1
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+1
-1
python-package/lightgbm/sklearn.py
python-package/lightgbm/sklearn.py
+37
-22
tests/python_package_test/test_basic.py
tests/python_package_test/test_basic.py
+3
-2
No files found.
include/LightGBM/config.h
View file @
164524d8
...
...
@@ -332,6 +332,7 @@ struct ParameterAlias {
{
"ndcg_at"
,
"ndcg_eval_at"
},
{
"min_data_per_leaf"
,
"min_data_in_leaf"
},
{
"min_data"
,
"min_data_in_leaf"
},
{
"min_child_samples"
,
"min_data_in_leaf"
},
{
"min_sum_hessian_per_leaf"
,
"min_sum_hessian_in_leaf"
},
{
"min_sum_hessian"
,
"min_sum_hessian_in_leaf"
},
{
"min_hessian"
,
"min_sum_hessian_in_leaf"
},
...
...
@@ -369,7 +370,7 @@ struct ParameterAlias {
{
"blacklist"
,
"ignore_column"
},
{
"predict_raw_score"
,
"is_predict_raw_score"
},
{
"predict_leaf_index"
,
"is_predict_leaf_index"
},
{
"
gamma
"
,
"min_gain_to_split"
},
{
"
min_split_gain
"
,
"min_gain_to_split"
},
{
"reg_alpha"
,
"lambda_l1"
},
{
"reg_lambda"
,
"lambda_l2"
},
{
"num_classes"
,
"num_class"
}
...
...
python-package/lightgbm/basic.py
View file @
164524d8
...
...
@@ -127,7 +127,7 @@ C_API_PREDICT_RAW_SCORE =1
C_API_PREDICT_LEAF_INDEX
=
2
FIELD_TYPE_MAPPER
=
{
"label"
:
C_API_DTYPE_FLOAT32
,
"we
g
iht"
:
C_API_DTYPE_FLOAT32
,
"wei
g
ht"
:
C_API_DTYPE_FLOAT32
,
"init_score"
:
C_API_DTYPE_FLOAT32
,
"group"
:
C_API_DTYPE_INT32
,
}
...
...
python-package/lightgbm/sklearn.py
View file @
164524d8
...
...
@@ -21,13 +21,13 @@ except ImportError:
LGBMRegressorBase
=
object
LGBMLabelEncoder
=
None
def
_
objective_decorator
(
func
):
def
_
point_wise_objective
(
func
):
"""Decorate an objective function
Converts an objective function using the typical sklearn metrics to LightGBM fobj
Note: for multi-class task, the
label/
pred is group by class_id first, then group by row_id
if you want to get i-th row
label/
pred in j-th class, the access way is
label/
pred[j*num_data+i]
Note: for multi-class task, the
y_
pred is group by class_id first, then group by row_id
if you want to get i-th row
y_
pred in j-th class, the access way is
y_
pred[j*num_data+i]
and you should group grad and hess in this way as well
Parameters
----------
...
...
@@ -36,16 +36,17 @@ def _objective_decorator(func):
y_true: array_like of shape [n_samples]
The target values
y_pred: array_like of shape [n_samples]
y_pred: array_like of shape [n_samples]
or shape[n_samples* n_class]
The predicted values
Returns
-------
new_func: callable
The new objective function as expected by ``lightgbm.engine.train``.
The signature is ``new_func(preds, dataset)``:
preds: array_like, shape [n_samples]
preds: array_like, shape [n_samples]
or shape[n_samples* n_class]
The predicted values
dataset: ``dataset``
The training set from which the labels will be extracted using
...
...
@@ -54,9 +55,26 @@ def _objective_decorator(func):
def
inner
(
preds
,
dataset
):
"""internal function"""
labels
=
dataset
.
get_label
()
return
func
(
labels
,
preds
)
grad
,
hess
=
func
(
labels
,
preds
)
"""weighted for objective"""
weight
=
dataset
.
get_weight
()
if
weight
is
not
None
:
"""only one class"""
if
len
(
weight
)
==
len
(
grad
):
grad
=
np
.
multiply
(
grad
,
weight
)
hess
=
np
.
multiply
(
hess
,
weight
)
else
:
num_data
=
len
(
weight
)
num_class
=
len
(
grad
)
//
num_data
for
k
in
range
(
num_class
):
for
i
in
range
(
num_data
):
idx
=
k
*
num_data
+
i
grad
[
idx
]
*=
weight
[
i
]
hess
[
idx
]
*=
weight
[
i
]
return
grad
,
hess
return
inner
class
LGBMModel
(
LGBMModelBase
):
"""Implementation of the Scikit-Learn API for LightGBM.
...
...
@@ -77,11 +95,11 @@ class LGBMModel(LGBMModelBase):
a custom objective function to be used (see note below).
nthread : int
Number of parallel threads
gamma
: float
min_split_gain
: float
Minimum loss reduction required to make a further partition on a leaf node of the tree.
min_child_weight : int
Minimum sum of instance weight(hessian) needed in a child(leaf)
min_
data
: int
min_
child_samples
: int
Minimum number of data need in a child(leaf)
subsample : float
Subsample ratio of the training instance.
...
...
@@ -89,8 +107,6 @@ class LGBMModel(LGBMModelBase):
frequence of subsample, <=0 means no enable
colsample_bytree : float
Subsample ratio of columns when constructing each tree.
colsample_byleaf : float
Subsample ratio of columns when constructing each leaf.
reg_alpha : float
L1 regularization term on weights
reg_lambda : float
...
...
@@ -108,26 +124,26 @@ class LGBMModel(LGBMModelBase):
parameter. In this case, it should have the signature
``objective(y_true, y_pred) -> grad, hess``:
y_true: array_like of shape [n_samples]
y_true: array_like of shape [n_samples]
The target values
y_pred: array_like of shape [n_samples]
y_pred: array_like of shape [n_samples]
or shape[n_samples* n_class]
The predicted values
grad: array_like of shape [n_samples]
grad: array_like of shape [n_samples]
or shape[n_samples* n_class]
The value of the gradient for each sample point.
hess: array_like of shape [n_samples]
hess: array_like of shape [n_samples]
or shape[n_samples* n_class]
The value of the second derivative for each sample point
for multi-class task, the
label/
pred is group by class_id first, then group by row_id
if you want to get i-th row
label/
pred in j-th class, the access way is
label/
pred[j*num_data+i]
for multi-class task, the
y_
pred is group by class_id first, then group by row_id
if you want to get i-th row
y_
pred in j-th class, the access way is
y_
pred[j*num_data+i]
and you should group grad and hess in this way as well
"""
def
__init__
(
self
,
num_leaves
=
31
,
max_depth
=-
1
,
learning_rate
=
0.1
,
n_estimators
=
10
,
max_bin
=
255
,
silent
=
True
,
objective
=
"regression"
,
nthread
=-
1
,
gamma
=
0
,
min_child_weight
=
5
,
min_
data
=
10
,
subsample
=
1
,
subsample_freq
=
1
,
colsample_bytree
=
1
,
colsample_byleaf
=
1
,
nthread
=-
1
,
min_split_gain
=
0
,
min_child_weight
=
5
,
min_
child_samples
=
10
,
subsample
=
1
,
subsample_freq
=
1
,
colsample_bytree
=
1
,
reg_alpha
=
0
,
reg_lambda
=
0
,
scale_pos_weight
=
1
,
is_unbalance
=
False
,
seed
=
0
):
if
not
SKLEARN_INSTALLED
:
...
...
@@ -141,13 +157,12 @@ class LGBMModel(LGBMModelBase):
self
.
silent
=
silent
self
.
objective
=
objective
self
.
nthread
=
nthread
self
.
gamma
=
gamma
self
.
min_split_gain
=
min_split_gain
self
.
min_child_weight
=
min_child_weight
self
.
min_
data
=
min_data
self
.
min_
child_samples
=
min_child_samples
self
.
subsample
=
subsample
self
.
subsample_freq
=
subsample_freq
self
.
colsample_bytree
=
colsample_bytree
self
.
colsample_byleaf
=
colsample_byleaf
self
.
reg_alpha
=
reg_alpha
self
.
reg_lambda
=
reg_lambda
self
.
scale_pos_weight
=
scale_pos_weight
...
...
@@ -214,7 +229,7 @@ class LGBMModel(LGBMModelBase):
params
.
update
(
other_params
)
if
callable
(
self
.
objective
):
fobj
=
_
objective_decorator
(
self
.
objective
)
fobj
=
_
point_wise_objective
(
self
.
objective
)
params
[
"objective"
]
=
"None"
else
:
params
[
"objective"
]
=
self
.
objective
...
...
tests/python_package_test/test_basic.py
View file @
164524d8
...
...
@@ -15,7 +15,8 @@ bst.add_valid(valid_data,"valid_1")
for
i
in
range
(
100
):
bst
.
update
()
print
(
bst
.
eval_train
())
print
(
bst
.
eval_valid
())
if
i
%
10
==
0
:
print
(
bst
.
eval_train
())
print
(
bst
.
eval_valid
())
bst
.
save_model
(
"model.txt"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment