Commit 164524d8 authored by Guolin Ke's avatar Guolin Ke
Browse files

weighted objective function

parent b59a5a4c
......@@ -332,6 +332,7 @@ struct ParameterAlias {
{ "ndcg_at", "ndcg_eval_at" },
{ "min_data_per_leaf", "min_data_in_leaf" },
{ "min_data", "min_data_in_leaf" },
{ "min_child_samples", "min_data_in_leaf" },
{ "min_sum_hessian_per_leaf", "min_sum_hessian_in_leaf" },
{ "min_sum_hessian", "min_sum_hessian_in_leaf" },
{ "min_hessian", "min_sum_hessian_in_leaf" },
......@@ -369,7 +370,7 @@ struct ParameterAlias {
{ "blacklist", "ignore_column" },
{ "predict_raw_score", "is_predict_raw_score" },
{ "predict_leaf_index", "is_predict_leaf_index" },
{ "gamma", "min_gain_to_split" },
{ "min_split_gain", "min_gain_to_split" },
{ "reg_alpha", "lambda_l1" },
{ "reg_lambda", "lambda_l2" },
{ "num_classes", "num_class" }
......
......@@ -127,7 +127,7 @@ C_API_PREDICT_RAW_SCORE =1
C_API_PREDICT_LEAF_INDEX =2
FIELD_TYPE_MAPPER = {"label":C_API_DTYPE_FLOAT32,
"wegiht":C_API_DTYPE_FLOAT32,
"weight":C_API_DTYPE_FLOAT32,
"init_score":C_API_DTYPE_FLOAT32,
"group":C_API_DTYPE_INT32,
}
......
......@@ -21,13 +21,13 @@ except ImportError:
LGBMRegressorBase = object
LGBMLabelEncoder = None
def _objective_decorator(func):
def _point_wise_objective(func):
"""Decorate an objective function
Converts an objective function using the typical sklearn metrics to LightGBM fobj
Note: for multi-class task, the label/pred is group by class_id first, then group by row_id
if you want to get i-th row label/pred in j-th class, the access way is label/pred[j*num_data+i]
Note: for multi-class task, the y_pred is group by class_id first, then group by row_id
if you want to get i-th row y_pred in j-th class, the access way is y_pred[j*num_data+i]
and you should group grad and hess in this way as well
Parameters
----------
......@@ -36,16 +36,17 @@ def _objective_decorator(func):
y_true: array_like of shape [n_samples]
The target values
y_pred: array_like of shape [n_samples]
y_pred: array_like of shape [n_samples] or shape[n_samples* n_class]
The predicted values
Returns
-------
new_func: callable
The new objective function as expected by ``lightgbm.engine.train``.
The signature is ``new_func(preds, dataset)``:
preds: array_like, shape [n_samples]
preds: array_like, shape [n_samples] or shape[n_samples* n_class]
The predicted values
dataset: ``dataset``
The training set from which the labels will be extracted using
......@@ -54,9 +55,26 @@ def _objective_decorator(func):
def inner(preds, dataset):
"""internal function"""
labels = dataset.get_label()
return func(labels, preds)
grad, hess = func(labels, preds)
"""weighted for objective"""
weight = dataset.get_weight()
if weight is not None:
"""only one class"""
if len(weight) == len(grad):
grad = np.multiply(grad, weight)
hess = np.multiply(hess, weight)
else:
num_data = len(weight)
num_class = len(grad) // num_data
for k in range(num_class):
for i in range(num_data):
idx = k * num_data + i
grad[idx] *= weight[i]
hess[idx] *= weight[i]
return grad, hess
return inner
class LGBMModel(LGBMModelBase):
"""Implementation of the Scikit-Learn API for LightGBM.
......@@ -77,11 +95,11 @@ class LGBMModel(LGBMModelBase):
a custom objective function to be used (see note below).
nthread : int
Number of parallel threads
gamma : float
min_split_gain : float
Minimum loss reduction required to make a further partition on a leaf node of the tree.
min_child_weight : int
Minimum sum of instance weight(hessian) needed in a child(leaf)
min_data : int
min_child_samples : int
Minimum number of data need in a child(leaf)
subsample : float
Subsample ratio of the training instance.
......@@ -89,8 +107,6 @@ class LGBMModel(LGBMModelBase):
frequence of subsample, <=0 means no enable
colsample_bytree : float
Subsample ratio of columns when constructing each tree.
colsample_byleaf : float
Subsample ratio of columns when constructing each leaf.
reg_alpha : float
L1 regularization term on weights
reg_lambda : float
......@@ -110,24 +126,24 @@ class LGBMModel(LGBMModelBase):
y_true: array_like of shape [n_samples]
The target values
y_pred: array_like of shape [n_samples]
y_pred: array_like of shape [n_samples] or shape[n_samples* n_class]
The predicted values
grad: array_like of shape [n_samples]
grad: array_like of shape [n_samples] or shape[n_samples* n_class]
The value of the gradient for each sample point.
hess: array_like of shape [n_samples]
hess: array_like of shape [n_samples] or shape[n_samples* n_class]
The value of the second derivative for each sample point
for multi-class task, the label/pred is group by class_id first, then group by row_id
if you want to get i-th row label/pred in j-th class, the access way is label/pred[j*num_data+i]
for multi-class task, the y_pred is group by class_id first, then group by row_id
if you want to get i-th row y_pred in j-th class, the access way is y_pred[j*num_data+i]
and you should group grad and hess in this way as well
"""
def __init__(self, num_leaves=31, max_depth=-1,
learning_rate=0.1, n_estimators=10, max_bin=255,
silent=True, objective="regression",
nthread=-1, gamma=0, min_child_weight=5, min_data=10,
subsample=1, subsample_freq=1, colsample_bytree=1, colsample_byleaf=1,
nthread=-1, min_split_gain=0, min_child_weight=5, min_child_samples=10,
subsample=1, subsample_freq=1, colsample_bytree=1,
reg_alpha=0, reg_lambda=0, scale_pos_weight=1,
is_unbalance=False, seed=0):
if not SKLEARN_INSTALLED:
......@@ -141,13 +157,12 @@ class LGBMModel(LGBMModelBase):
self.silent = silent
self.objective = objective
self.nthread = nthread
self.gamma = gamma
self.min_split_gain = min_split_gain
self.min_child_weight = min_child_weight
self.min_data = min_data
self.min_child_samples = min_child_samples
self.subsample = subsample
self.subsample_freq = subsample_freq
self.colsample_bytree = colsample_bytree
self.colsample_byleaf = colsample_byleaf
self.reg_alpha = reg_alpha
self.reg_lambda = reg_lambda
self.scale_pos_weight = scale_pos_weight
......@@ -214,7 +229,7 @@ class LGBMModel(LGBMModelBase):
params.update(other_params)
if callable(self.objective):
fobj = _objective_decorator(self.objective)
fobj = _point_wise_objective(self.objective)
params["objective"] = "None"
else:
params["objective"] = self.objective
......
......@@ -15,6 +15,7 @@ bst.add_valid(valid_data,"valid_1")
for i in range(100):
bst.update()
if i % 10 == 0:
print(bst.eval_train())
print(bst.eval_valid())
bst.save_model("model.txt")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment