Commit a87af879 authored by Tsukasa OMOTO's avatar Tsukasa OMOTO Committed by Guolin Ke
Browse files

Add Huber loss (#174)

* Add Huber loss

https://en.wikipedia.org/wiki/Huber_loss

* update

* update docs/Parameters.md

* update
parent 1b7643ba
...@@ -16,8 +16,9 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can ...@@ -16,8 +16,9 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
* ```task```, default=```train```, type=enum, options=```train```,```prediction``` * ```task```, default=```train```, type=enum, options=```train```,```prediction```
* ```train``` for training * ```train``` for training
* ```prediction``` for prediction. * ```prediction``` for prediction.
* ```application```, default=```regression```, type=enum, options=```regression```,```binary```,```lambdarank```,```multiclass```, alias=```objective```,```app``` * ```application```, default=```regression```, type=enum, options=```regression```,```huber```,```binary```,```lambdarank```,```multiclass```, alias=```objective```,```app```
* ```regression```, regression application * ```regression```, regression application
* ```huber```, [Huber loss](https://en.wikipedia.org/wiki/Huber_loss "Huber loss - Wikipedia") for regression task
* ```binary```, binary classification application * ```binary```, binary classification application
* ```lambdarank```, lambdarank application * ```lambdarank```, lambdarank application
* ```multiclass```, multi-class classification application, should set ```num_class``` as well * ```multiclass```, multi-class classification application, should set ```num_class``` as well
...@@ -165,6 +166,8 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can ...@@ -165,6 +166,8 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
* ```sigmoid```, default=```1.0```, type=double * ```sigmoid```, default=```1.0```, type=double
* parameter for sigmoid function. Will be used in binary classification and lambdarank. * parameter for sigmoid function. Will be used in binary classification and lambdarank.
* ```huber_delta```, default=```1.0```, type=double
* parameter for [Huber loss](https://en.wikipedia.org/wiki/Huber_loss "Huber loss - Wikipedia"). Will be used in regression task.
* ```scale_pos_weight```, default=```1.0```, type=double * ```scale_pos_weight```, default=```1.0```, type=double
* weight of positive class in binary classification task * weight of positive class in binary classification task
* ```is_unbalance```, default=```false```, type=bool * ```is_unbalance```, default=```false```, type=bool
......
...@@ -576,7 +576,7 @@ The methods of each Class is in alphabetical order. ...@@ -576,7 +576,7 @@ The methods of each Class is in alphabetical order.
###Common Methods ###Common Methods
####__init__(boosting_type="gbdt", num_leaves=31, max_depth=-1, learning_rate=0.1, n_estimators=10, max_bin=255, subsample_for_bin=50000, objective="regression", min_split_gain=0, min_child_weight=5, min_child_samples=10, subsample=1, subsample_freq=1, colsample_bytree=1, reg_alpha=0, reg_lambda=0, scale_pos_weight=1, is_unbalance=False, seed=0, nthread=-1, silent=True, sigmoid=1.0, max_position=20, label_gain=None, drop_rate=0.1, skip_drop=0.5, max_drop=50, uniform_drop=False, xgboost_dart_mode=False) ####__init__(boosting_type="gbdt", num_leaves=31, max_depth=-1, learning_rate=0.1, n_estimators=10, max_bin=255, subsample_for_bin=50000, objective="regression", min_split_gain=0, min_child_weight=5, min_child_samples=10, subsample=1, subsample_freq=1, colsample_bytree=1, reg_alpha=0, reg_lambda=0, scale_pos_weight=1, is_unbalance=False, seed=0, nthread=-1, silent=True, sigmoid=1.0, huber_delta=1.0, max_position=20, label_gain=None, drop_rate=0.1, skip_drop=0.5, max_drop=50, uniform_drop=False, xgboost_dart_mode=False)
Implementation of the Scikit-Learn API for LightGBM. Implementation of the Scikit-Learn API for LightGBM.
...@@ -629,6 +629,8 @@ The methods of each Class is in alphabetical order. ...@@ -629,6 +629,8 @@ The methods of each Class is in alphabetical order.
Whether to print messages while running boosting. Whether to print messages while running boosting.
sigmoid : float sigmoid : float
Only used in binary classification and lambdarank. Parameter for sigmoid function. Only used in binary classification and lambdarank. Parameter for sigmoid function.
huber_delta : float
Only used in regression. Parameter for Huber loss function.
max_position : int max_position : int
Only used in lambdarank, will optimize NDCG at this position. Only used in lambdarank, will optimize NDCG at this position.
label_gain : list of float label_gain : list of float
......
...@@ -137,6 +137,7 @@ struct ObjectiveConfig: public ConfigBase { ...@@ -137,6 +137,7 @@ struct ObjectiveConfig: public ConfigBase {
public: public:
virtual ~ObjectiveConfig() {} virtual ~ObjectiveConfig() {}
double sigmoid = 1.0f; double sigmoid = 1.0f;
double huber_delta = 1.0f;
// for lambdarank // for lambdarank
std::vector<double> label_gain; std::vector<double> label_gain;
// for lambdarank // for lambdarank
...@@ -156,6 +157,7 @@ public: ...@@ -156,6 +157,7 @@ public:
virtual ~MetricConfig() {} virtual ~MetricConfig() {}
int num_class = 1; int num_class = 1;
double sigmoid = 1.0f; double sigmoid = 1.0f;
double huber_delta = 1.0f;
std::vector<double> label_gain; std::vector<double> label_gain;
std::vector<int> eval_at; std::vector<int> eval_at;
void Set(const std::unordered_map<std::string, std::string>& params) override; void Set(const std::unordered_map<std::string, std::string>& params) override;
......
...@@ -153,7 +153,7 @@ class LGBMModel(LGBMModelBase): ...@@ -153,7 +153,7 @@ class LGBMModel(LGBMModelBase):
subsample=1, subsample_freq=1, colsample_bytree=1, subsample=1, subsample_freq=1, colsample_bytree=1,
reg_alpha=0, reg_lambda=0, scale_pos_weight=1, reg_alpha=0, reg_lambda=0, scale_pos_weight=1,
is_unbalance=False, seed=0, nthread=-1, silent=True, is_unbalance=False, seed=0, nthread=-1, silent=True,
sigmoid=1.0, max_position=20, label_gain=None, sigmoid=1.0, huber_delta=1.0, max_position=20, label_gain=None,
drop_rate=0.1, skip_drop=0.5, max_drop=50, drop_rate=0.1, skip_drop=0.5, max_drop=50,
uniform_drop=False, xgboost_dart_mode=False): uniform_drop=False, xgboost_dart_mode=False):
""" """
...@@ -208,6 +208,8 @@ class LGBMModel(LGBMModelBase): ...@@ -208,6 +208,8 @@ class LGBMModel(LGBMModelBase):
Whether to print messages while running boosting. Whether to print messages while running boosting.
sigmoid : float sigmoid : float
Only used in binary classification and lambdarank. Parameter for sigmoid function. Only used in binary classification and lambdarank. Parameter for sigmoid function.
huber_delta : float
Only used in regression. Parameter for Huber loss function.
max_position : int max_position : int
Only used in lambdarank, will optimize NDCG at this position. Only used in lambdarank, will optimize NDCG at this position.
label_gain : list of float label_gain : list of float
...@@ -272,6 +274,7 @@ class LGBMModel(LGBMModelBase): ...@@ -272,6 +274,7 @@ class LGBMModel(LGBMModelBase):
self.nthread = nthread self.nthread = nthread
self.silent = silent self.silent = silent
self.sigmoid = sigmoid self.sigmoid = sigmoid
self.huber_delta = huber_delta
self.max_position = max_position self.max_position = max_position
self.label_gain = label_gain self.label_gain = label_gain
self.drop_rate = drop_rate self.drop_rate = drop_rate
...@@ -490,6 +493,29 @@ class LGBMModel(LGBMModelBase): ...@@ -490,6 +493,29 @@ class LGBMModel(LGBMModelBase):
class LGBMRegressor(LGBMModel, LGBMRegressorBase): class LGBMRegressor(LGBMModel, LGBMRegressorBase):
def __init__(self, boosting_type="gbdt", num_leaves=31, max_depth=-1,
learning_rate=0.1, n_estimators=10, max_bin=255,
subsample_for_bin=50000, objective="regression",
min_split_gain=0, min_child_weight=5, min_child_samples=10,
subsample=1, subsample_freq=1, colsample_bytree=1,
reg_alpha=0, reg_lambda=0,
seed=0, nthread=-1, silent=True,
huber_delta=1.0,
drop_rate=0.1, skip_drop=0.5, max_drop=50,
uniform_drop=False, xgboost_dart_mode=False):
super(LGBMRegressor, self).__init__(boosting_type=boosting_type, num_leaves=num_leaves,
max_depth=max_depth, learning_rate=learning_rate,
n_estimators=n_estimators, max_bin=max_bin,
subsample_for_bin=subsample_for_bin, objective=objective,
min_split_gain=min_split_gain, min_child_weight=min_child_weight,
min_child_samples=min_child_samples, subsample=subsample,
subsample_freq=subsample_freq, colsample_bytree=colsample_bytree,
reg_alpha=reg_alpha, reg_lambda=reg_lambda,
seed=seed, nthread=nthread, silent=silent,
huber_delta=huber_delta,
drop_rate=drop_rate, skip_drop=skip_drop, max_drop=max_drop,
uniform_drop=uniform_drop, xgboost_dart_mode=xgboost_dart_mode)
def fit(self, X, y, def fit(self, X, y,
sample_weight=None, init_score=None, sample_weight=None, init_score=None,
eval_set=None, eval_sample_weight=None, eval_set=None, eval_sample_weight=None,
......
...@@ -217,6 +217,7 @@ void IOConfig::Set(const std::unordered_map<std::string, std::string>& params) { ...@@ -217,6 +217,7 @@ void IOConfig::Set(const std::unordered_map<std::string, std::string>& params) {
void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& params) { void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& params) {
GetBool(params, "is_unbalance", &is_unbalance); GetBool(params, "is_unbalance", &is_unbalance);
GetDouble(params, "sigmoid", &sigmoid); GetDouble(params, "sigmoid", &sigmoid);
GetDouble(params, "huber_delta", &huber_delta);
GetInt(params, "max_position", &max_position); GetInt(params, "max_position", &max_position);
CHECK(max_position > 0); CHECK(max_position > 0);
GetInt(params, "num_class", &num_class); GetInt(params, "num_class", &num_class);
...@@ -239,6 +240,7 @@ void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& pa ...@@ -239,6 +240,7 @@ void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& pa
void MetricConfig::Set(const std::unordered_map<std::string, std::string>& params) { void MetricConfig::Set(const std::unordered_map<std::string, std::string>& params) {
GetDouble(params, "sigmoid", &sigmoid); GetDouble(params, "sigmoid", &sigmoid);
GetDouble(params, "huber_delta", &huber_delta);
GetInt(params, "num_class", &num_class); GetInt(params, "num_class", &num_class);
std::string tmp_str = ""; std::string tmp_str = "";
if (GetString(params, "label_gain", &tmp_str)) { if (GetString(params, "label_gain", &tmp_str)) {
......
...@@ -11,6 +11,8 @@ Metric* Metric::CreateMetric(const std::string& type, const MetricConfig& config ...@@ -11,6 +11,8 @@ Metric* Metric::CreateMetric(const std::string& type, const MetricConfig& config
return new L2Metric(config); return new L2Metric(config);
} else if (type == std::string("l1")) { } else if (type == std::string("l1")) {
return new L1Metric(config); return new L1Metric(config);
} else if (type == std::string("huber")) {
return new HuberLossMetric(config);
} else if (type == std::string("binary_logloss")) { } else if (type == std::string("binary_logloss")) {
return new BinaryLoglossMetric(config); return new BinaryLoglossMetric(config);
} else if (type == std::string("binary_error")) { } else if (type == std::string("binary_error")) {
......
...@@ -15,8 +15,7 @@ namespace LightGBM { ...@@ -15,8 +15,7 @@ namespace LightGBM {
template<typename PointWiseLossCalculator> template<typename PointWiseLossCalculator>
class RegressionMetric: public Metric { class RegressionMetric: public Metric {
public: public:
explicit RegressionMetric(const MetricConfig&) { explicit RegressionMetric(const MetricConfig&) :huber_delta_(1.0f) {
} }
virtual ~RegressionMetric() { virtual ~RegressionMetric() {
...@@ -55,13 +54,13 @@ public: ...@@ -55,13 +54,13 @@ public:
#pragma omp parallel for schedule(static) reduction(+:sum_loss) #pragma omp parallel for schedule(static) reduction(+:sum_loss)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
// add loss // add loss
sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i]); sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i], huber_delta_);
} }
} else { } else {
#pragma omp parallel for schedule(static) reduction(+:sum_loss) #pragma omp parallel for schedule(static) reduction(+:sum_loss)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
// add loss // add loss
sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i]) * weights_[i]; sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i], huber_delta_) * weights_[i];
} }
} }
double loss = PointWiseLossCalculator::AverageLoss(sum_loss, sum_weights_); double loss = PointWiseLossCalculator::AverageLoss(sum_loss, sum_weights_);
...@@ -73,6 +72,10 @@ public: ...@@ -73,6 +72,10 @@ public:
return sum_loss / sum_weights; return sum_loss / sum_weights;
} }
protected:
/*! \brief delta for Huber loss */
double huber_delta_;
private: private:
/*! \brief Number of data */ /*! \brief Number of data */
data_size_t num_data_; data_size_t num_data_;
...@@ -91,7 +94,7 @@ class L2Metric: public RegressionMetric<L2Metric> { ...@@ -91,7 +94,7 @@ class L2Metric: public RegressionMetric<L2Metric> {
public: public:
explicit L2Metric(const MetricConfig& config) :RegressionMetric<L2Metric>(config) {} explicit L2Metric(const MetricConfig& config) :RegressionMetric<L2Metric>(config) {}
inline static score_t LossOnPoint(float label, score_t score) { inline static score_t LossOnPoint(float label, score_t score, float) {
return (score - label)*(score - label); return (score - label)*(score - label);
} }
...@@ -110,7 +113,7 @@ class L1Metric: public RegressionMetric<L1Metric> { ...@@ -110,7 +113,7 @@ class L1Metric: public RegressionMetric<L1Metric> {
public: public:
explicit L1Metric(const MetricConfig& config) :RegressionMetric<L1Metric>(config) {} explicit L1Metric(const MetricConfig& config) :RegressionMetric<L1Metric>(config) {}
inline static score_t LossOnPoint(float label, score_t score) { inline static score_t LossOnPoint(float label, score_t score, float) {
return std::fabs(score - label); return std::fabs(score - label);
} }
inline static const char* Name() { inline static const char* Name() {
...@@ -118,5 +121,26 @@ public: ...@@ -118,5 +121,26 @@ public:
} }
}; };
/*! \brief Huber loss for regression task */
class HuberLossMetric: public RegressionMetric<HuberLossMetric> {
public:
explicit HuberLossMetric(const MetricConfig& config) :RegressionMetric<HuberLossMetric>(config) {
huber_delta_ = config.huber_delta;
}
inline static score_t LossOnPoint(float label, score_t score, float delta) {
const double diff = score - label;
if (std::abs(diff) <= delta) {
return 0.5 * diff * diff;
} else {
return delta * (std::abs(diff) - 0.5 * delta);
}
}
inline static const char* Name() {
return "huber";
}
};
} // namespace LightGBM } // namespace LightGBM
#endif // LightGBM_METRIC_REGRESSION_METRIC_HPP_ #endif // LightGBM_METRIC_REGRESSION_METRIC_HPP_
...@@ -9,6 +9,8 @@ namespace LightGBM { ...@@ -9,6 +9,8 @@ namespace LightGBM {
ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string& type, const ObjectiveConfig& config) { ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string& type, const ObjectiveConfig& config) {
if (type == std::string("regression")) { if (type == std::string("regression")) {
return new RegressionL2loss(config); return new RegressionL2loss(config);
} else if (type == std::string("huber")) {
return new RegressionLHuberLoss(config);
} else if (type == std::string("binary")) { } else if (type == std::string("binary")) {
return new BinaryLogloss(config); return new BinaryLogloss(config);
} else if (type == std::string("lambdarank")) { } else if (type == std::string("lambdarank")) {
......
...@@ -51,5 +51,72 @@ private: ...@@ -51,5 +51,72 @@ private:
const float* weights_; const float* weights_;
}; };
class RegressionLHuberLoss: public ObjectiveFunction {
public:
explicit RegressionLHuberLoss(const ObjectiveConfig& config) {
delta_ = config.huber_delta;
}
~RegressionLHuberLoss() {
}
void Init(const Metadata& metadata, data_size_t num_data) override {
num_data_ = num_data;
label_ = metadata.label();
weights_ = metadata.weights();
}
void GetGradients(const score_t* score, score_t* gradients,
score_t* hessians) const override {
if (weights_ == nullptr) {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double diff = score[i] - label_[i];
if (std::abs(diff) <= delta_) {
gradients[i] = diff;
hessians[i] = 1.0;
} else {
if (diff >= 0.0) {
gradients[i] = delta_;
} else {
gradients[i] = -delta_;
}
hessians[i] = 0.0;
}
}
} else {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double diff = score[i] - label_[i];
if (std::abs(diff) <= delta_) {
gradients[i] = diff * weights_[i];
hessians[i] = weights_[i];
} else {
if (diff >= 0.0) {
gradients[i] = delta_ * weights_[i];
} else {
gradients[i] = -delta_ * weights_[i];
}
hessians[i] = 0.0;
}
}
}
}
const char* GetName() const override {
return "huber";
}
private:
/*! \brief Number of data */
data_size_t num_data_;
/*! \brief Pointer of label */
const float* label_;
/*! \brief Pointer of weights */
const float* weights_;
/*! \brief delta for Huber loss */
double delta_;
};
} // namespace LightGBM } // namespace LightGBM
#endif // LightGBM_OBJECTIVE_REGRESSION_OBJECTIVE_HPP_ #endif // LightGBM_OBJECTIVE_REGRESSION_OBJECTIVE_HPP_
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment