Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
164524d8
Commit
164524d8
authored
Nov 30, 2016
by
Guolin Ke
Browse files
weighted objective function
parent
b59a5a4c
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
43 additions
and
26 deletions
+43
-26
include/LightGBM/config.h
include/LightGBM/config.h
+2
-1
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+1
-1
python-package/lightgbm/sklearn.py
python-package/lightgbm/sklearn.py
+37
-22
tests/python_package_test/test_basic.py
tests/python_package_test/test_basic.py
+3
-2
No files found.
include/LightGBM/config.h
View file @
164524d8
...
@@ -332,6 +332,7 @@ struct ParameterAlias {
...
@@ -332,6 +332,7 @@ struct ParameterAlias {
{
"ndcg_at"
,
"ndcg_eval_at"
},
{
"ndcg_at"
,
"ndcg_eval_at"
},
{
"min_data_per_leaf"
,
"min_data_in_leaf"
},
{
"min_data_per_leaf"
,
"min_data_in_leaf"
},
{
"min_data"
,
"min_data_in_leaf"
},
{
"min_data"
,
"min_data_in_leaf"
},
{
"min_child_samples"
,
"min_data_in_leaf"
},
{
"min_sum_hessian_per_leaf"
,
"min_sum_hessian_in_leaf"
},
{
"min_sum_hessian_per_leaf"
,
"min_sum_hessian_in_leaf"
},
{
"min_sum_hessian"
,
"min_sum_hessian_in_leaf"
},
{
"min_sum_hessian"
,
"min_sum_hessian_in_leaf"
},
{
"min_hessian"
,
"min_sum_hessian_in_leaf"
},
{
"min_hessian"
,
"min_sum_hessian_in_leaf"
},
...
@@ -369,7 +370,7 @@ struct ParameterAlias {
...
@@ -369,7 +370,7 @@ struct ParameterAlias {
{
"blacklist"
,
"ignore_column"
},
{
"blacklist"
,
"ignore_column"
},
{
"predict_raw_score"
,
"is_predict_raw_score"
},
{
"predict_raw_score"
,
"is_predict_raw_score"
},
{
"predict_leaf_index"
,
"is_predict_leaf_index"
},
{
"predict_leaf_index"
,
"is_predict_leaf_index"
},
{
"
gamma
"
,
"min_gain_to_split"
},
{
"
min_split_gain
"
,
"min_gain_to_split"
},
{
"reg_alpha"
,
"lambda_l1"
},
{
"reg_alpha"
,
"lambda_l1"
},
{
"reg_lambda"
,
"lambda_l2"
},
{
"reg_lambda"
,
"lambda_l2"
},
{
"num_classes"
,
"num_class"
}
{
"num_classes"
,
"num_class"
}
...
...
python-package/lightgbm/basic.py
View file @
164524d8
...
@@ -127,7 +127,7 @@ C_API_PREDICT_RAW_SCORE =1
...
@@ -127,7 +127,7 @@ C_API_PREDICT_RAW_SCORE =1
C_API_PREDICT_LEAF_INDEX
=
2
C_API_PREDICT_LEAF_INDEX
=
2
FIELD_TYPE_MAPPER
=
{
"label"
:
C_API_DTYPE_FLOAT32
,
FIELD_TYPE_MAPPER
=
{
"label"
:
C_API_DTYPE_FLOAT32
,
"we
g
iht"
:
C_API_DTYPE_FLOAT32
,
"wei
g
ht"
:
C_API_DTYPE_FLOAT32
,
"init_score"
:
C_API_DTYPE_FLOAT32
,
"init_score"
:
C_API_DTYPE_FLOAT32
,
"group"
:
C_API_DTYPE_INT32
,
"group"
:
C_API_DTYPE_INT32
,
}
}
...
...
python-package/lightgbm/sklearn.py
View file @
164524d8
...
@@ -21,13 +21,13 @@ except ImportError:
...
@@ -21,13 +21,13 @@ except ImportError:
LGBMRegressorBase
=
object
LGBMRegressorBase
=
object
LGBMLabelEncoder
=
None
LGBMLabelEncoder
=
None
def
_
objective_decorator
(
func
):
def
_
point_wise_objective
(
func
):
"""Decorate an objective function
"""Decorate an objective function
Converts an objective function using the typical sklearn metrics to LightGBM fobj
Converts an objective function using the typical sklearn metrics to LightGBM fobj
Note: for multi-class task, the
label/
pred is group by class_id first, then group by row_id
Note: for multi-class task, the
y_
pred is group by class_id first, then group by row_id
if you want to get i-th row
label/
pred in j-th class, the access way is
label/
pred[j*num_data+i]
if you want to get i-th row
y_
pred in j-th class, the access way is
y_
pred[j*num_data+i]
and you should group grad and hess in this way as well
and you should group grad and hess in this way as well
Parameters
Parameters
----------
----------
...
@@ -36,16 +36,17 @@ def _objective_decorator(func):
...
@@ -36,16 +36,17 @@ def _objective_decorator(func):
y_true: array_like of shape [n_samples]
y_true: array_like of shape [n_samples]
The target values
The target values
y_pred: array_like of shape [n_samples]
y_pred: array_like of shape [n_samples]
or shape[n_samples* n_class]
The predicted values
The predicted values
Returns
Returns
-------
-------
new_func: callable
new_func: callable
The new objective function as expected by ``lightgbm.engine.train``.
The new objective function as expected by ``lightgbm.engine.train``.
The signature is ``new_func(preds, dataset)``:
The signature is ``new_func(preds, dataset)``:
preds: array_like, shape [n_samples]
preds: array_like, shape [n_samples]
or shape[n_samples* n_class]
The predicted values
The predicted values
dataset: ``dataset``
dataset: ``dataset``
The training set from which the labels will be extracted using
The training set from which the labels will be extracted using
...
@@ -54,9 +55,26 @@ def _objective_decorator(func):
...
@@ -54,9 +55,26 @@ def _objective_decorator(func):
def
inner
(
preds
,
dataset
):
def
inner
(
preds
,
dataset
):
"""internal function"""
"""internal function"""
labels
=
dataset
.
get_label
()
labels
=
dataset
.
get_label
()
return
func
(
labels
,
preds
)
grad
,
hess
=
func
(
labels
,
preds
)
"""weighted for objective"""
weight
=
dataset
.
get_weight
()
if
weight
is
not
None
:
"""only one class"""
if
len
(
weight
)
==
len
(
grad
):
grad
=
np
.
multiply
(
grad
,
weight
)
hess
=
np
.
multiply
(
hess
,
weight
)
else
:
num_data
=
len
(
weight
)
num_class
=
len
(
grad
)
//
num_data
for
k
in
range
(
num_class
):
for
i
in
range
(
num_data
):
idx
=
k
*
num_data
+
i
grad
[
idx
]
*=
weight
[
i
]
hess
[
idx
]
*=
weight
[
i
]
return
grad
,
hess
return
inner
return
inner
class
LGBMModel
(
LGBMModelBase
):
class
LGBMModel
(
LGBMModelBase
):
"""Implementation of the Scikit-Learn API for LightGBM.
"""Implementation of the Scikit-Learn API for LightGBM.
...
@@ -77,11 +95,11 @@ class LGBMModel(LGBMModelBase):
...
@@ -77,11 +95,11 @@ class LGBMModel(LGBMModelBase):
a custom objective function to be used (see note below).
a custom objective function to be used (see note below).
nthread : int
nthread : int
Number of parallel threads
Number of parallel threads
gamma
: float
min_split_gain
: float
Minimum loss reduction required to make a further partition on a leaf node of the tree.
Minimum loss reduction required to make a further partition on a leaf node of the tree.
min_child_weight : int
min_child_weight : int
Minimum sum of instance weight(hessian) needed in a child(leaf)
Minimum sum of instance weight(hessian) needed in a child(leaf)
min_
data
: int
min_
child_samples
: int
Minimum number of data need in a child(leaf)
Minimum number of data need in a child(leaf)
subsample : float
subsample : float
Subsample ratio of the training instance.
Subsample ratio of the training instance.
...
@@ -89,8 +107,6 @@ class LGBMModel(LGBMModelBase):
...
@@ -89,8 +107,6 @@ class LGBMModel(LGBMModelBase):
frequence of subsample, <=0 means no enable
frequence of subsample, <=0 means no enable
colsample_bytree : float
colsample_bytree : float
Subsample ratio of columns when constructing each tree.
Subsample ratio of columns when constructing each tree.
colsample_byleaf : float
Subsample ratio of columns when constructing each leaf.
reg_alpha : float
reg_alpha : float
L1 regularization term on weights
L1 regularization term on weights
reg_lambda : float
reg_lambda : float
...
@@ -110,24 +126,24 @@ class LGBMModel(LGBMModelBase):
...
@@ -110,24 +126,24 @@ class LGBMModel(LGBMModelBase):
y_true: array_like of shape [n_samples]
y_true: array_like of shape [n_samples]
The target values
The target values
y_pred: array_like of shape [n_samples]
y_pred: array_like of shape [n_samples]
or shape[n_samples* n_class]
The predicted values
The predicted values
grad: array_like of shape [n_samples]
grad: array_like of shape [n_samples]
or shape[n_samples* n_class]
The value of the gradient for each sample point.
The value of the gradient for each sample point.
hess: array_like of shape [n_samples]
hess: array_like of shape [n_samples]
or shape[n_samples* n_class]
The value of the second derivative for each sample point
The value of the second derivative for each sample point
for multi-class task, the
label/
pred is group by class_id first, then group by row_id
for multi-class task, the
y_
pred is group by class_id first, then group by row_id
if you want to get i-th row
label/
pred in j-th class, the access way is
label/
pred[j*num_data+i]
if you want to get i-th row
y_
pred in j-th class, the access way is
y_
pred[j*num_data+i]
and you should group grad and hess in this way as well
and you should group grad and hess in this way as well
"""
"""
def
__init__
(
self
,
num_leaves
=
31
,
max_depth
=-
1
,
def
__init__
(
self
,
num_leaves
=
31
,
max_depth
=-
1
,
learning_rate
=
0.1
,
n_estimators
=
10
,
max_bin
=
255
,
learning_rate
=
0.1
,
n_estimators
=
10
,
max_bin
=
255
,
silent
=
True
,
objective
=
"regression"
,
silent
=
True
,
objective
=
"regression"
,
nthread
=-
1
,
gamma
=
0
,
min_child_weight
=
5
,
min_
data
=
10
,
nthread
=-
1
,
min_split_gain
=
0
,
min_child_weight
=
5
,
min_
child_samples
=
10
,
subsample
=
1
,
subsample_freq
=
1
,
colsample_bytree
=
1
,
colsample_byleaf
=
1
,
subsample
=
1
,
subsample_freq
=
1
,
colsample_bytree
=
1
,
reg_alpha
=
0
,
reg_lambda
=
0
,
scale_pos_weight
=
1
,
reg_alpha
=
0
,
reg_lambda
=
0
,
scale_pos_weight
=
1
,
is_unbalance
=
False
,
seed
=
0
):
is_unbalance
=
False
,
seed
=
0
):
if
not
SKLEARN_INSTALLED
:
if
not
SKLEARN_INSTALLED
:
...
@@ -141,13 +157,12 @@ class LGBMModel(LGBMModelBase):
...
@@ -141,13 +157,12 @@ class LGBMModel(LGBMModelBase):
self
.
silent
=
silent
self
.
silent
=
silent
self
.
objective
=
objective
self
.
objective
=
objective
self
.
nthread
=
nthread
self
.
nthread
=
nthread
self
.
gamma
=
gamma
self
.
min_split_gain
=
min_split_gain
self
.
min_child_weight
=
min_child_weight
self
.
min_child_weight
=
min_child_weight
self
.
min_
data
=
min_data
self
.
min_
child_samples
=
min_child_samples
self
.
subsample
=
subsample
self
.
subsample
=
subsample
self
.
subsample_freq
=
subsample_freq
self
.
subsample_freq
=
subsample_freq
self
.
colsample_bytree
=
colsample_bytree
self
.
colsample_bytree
=
colsample_bytree
self
.
colsample_byleaf
=
colsample_byleaf
self
.
reg_alpha
=
reg_alpha
self
.
reg_alpha
=
reg_alpha
self
.
reg_lambda
=
reg_lambda
self
.
reg_lambda
=
reg_lambda
self
.
scale_pos_weight
=
scale_pos_weight
self
.
scale_pos_weight
=
scale_pos_weight
...
@@ -214,7 +229,7 @@ class LGBMModel(LGBMModelBase):
...
@@ -214,7 +229,7 @@ class LGBMModel(LGBMModelBase):
params
.
update
(
other_params
)
params
.
update
(
other_params
)
if
callable
(
self
.
objective
):
if
callable
(
self
.
objective
):
fobj
=
_
objective_decorator
(
self
.
objective
)
fobj
=
_
point_wise_objective
(
self
.
objective
)
params
[
"objective"
]
=
"None"
params
[
"objective"
]
=
"None"
else
:
else
:
params
[
"objective"
]
=
self
.
objective
params
[
"objective"
]
=
self
.
objective
...
...
tests/python_package_test/test_basic.py
View file @
164524d8
...
@@ -15,6 +15,7 @@ bst.add_valid(valid_data,"valid_1")
...
@@ -15,6 +15,7 @@ bst.add_valid(valid_data,"valid_1")
for
i
in
range
(
100
):
for
i
in
range
(
100
):
bst
.
update
()
bst
.
update
()
if
i
%
10
==
0
:
print
(
bst
.
eval_train
())
print
(
bst
.
eval_train
())
print
(
bst
.
eval_valid
())
print
(
bst
.
eval_valid
())
bst
.
save_model
(
"model.txt"
)
bst
.
save_model
(
"model.txt"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment