Add Huber loss (#174)

* Add Huber loss https://en.wikipedia.org/wiki/Huber_loss * update * update docs/Parameters.md * update

Add Huber loss (#174)
* Add Huber loss https://en.wikipedia.org/wiki/Huber_loss * update * update docs/Parameters.md * update
a87af879 · Tsukasa OMOTO · Guolin Ke · 1b7643ba · a87af879 · a87af879
Commit a87af879 authored Jan 09, 2017 by Tsukasa OMOTO Committed by Guolin Ke Jan 09, 2017
9 changed files
--- a/docs/Parameters.md
+++ b/docs/Parameters.md
@@ -16,8 +16,9 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
 * ```task```, default=```train```, type=enum, options=```train```,```prediction```
  * ```train``` for training
  * ```prediction``` for prediction.
-* ```application```, default=```regression```, type=enum, options=```regression```,```binary```,```lambdarank```,```multiclass```, alias=```objective```,```app```
+* ```application```, default=```regression```, type=enum, options=```regression```,```huber```,```binary```,```lambdarank```,```multiclass```, alias=```objective```,```app```
  * ```regression```, regression application
+  * ```huber```, [Huber loss](https://en.wikipedia.org/wiki/Huber_loss "Huber loss - Wikipedia") for regression task
  * ```binary```, binary classification application 
  * ```lambdarank```, lambdarank application
  * ```multiclass```, multi-class classification application, should set ```num_class``` as well
@@ -165,6 +166,8 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
 * ```sigmoid```, default=```1.0```, type=double
  * parameter for sigmoid function. Will be used in binary classification and lambdarank.
+* ```huber_delta```, default=```1.0```, type=double
+  * parameter for [Huber loss](https://en.wikipedia.org/wiki/Huber_loss "Huber loss - Wikipedia"). Will be used in regression task.
 * ```scale_pos_weight```, default=```1.0```, type=double
  * weight of positive class in binary classification task
 * ```is_unbalance```, default=```false```, type=bool

--- a/docs/Python-API.md
+++ b/docs/Python-API.md
@@ -576,7 +576,7 @@ The methods of each Class is in alphabetical order.
 ###Common Methods
-####__init__(boosting_type="gbdt", num_leaves=31, max_depth=-1, learning_rate=0.1, n_estimators=10, max_bin=255, subsample_for_bin=50000, objective="regression", min_split_gain=0, min_child_weight=5, min_child_samples=10, subsample=1, subsample_freq=1, colsample_bytree=1, reg_alpha=0, reg_lambda=0, scale_pos_weight=1, is_unbalance=False, seed=0, nthread=-1, silent=True, sigmoid=1.0, max_position=20, label_gain=None, drop_rate=0.1, skip_drop=0.5, max_drop=50, uniform_drop=False, xgboost_dart_mode=False)
+####__init__(boosting_type="gbdt", num_leaves=31, max_depth=-1, learning_rate=0.1, n_estimators=10, max_bin=255, subsample_for_bin=50000, objective="regression", min_split_gain=0, min_child_weight=5, min_child_samples=10, subsample=1, subsample_freq=1, colsample_bytree=1, reg_alpha=0, reg_lambda=0, scale_pos_weight=1, is_unbalance=False, seed=0, nthread=-1, silent=True, sigmoid=1.0, huber_delta=1.0, max_position=20, label_gain=None, drop_rate=0.1, skip_drop=0.5, max_drop=50, uniform_drop=False, xgboost_dart_mode=False)
    Implementation of the Scikit-Learn API for LightGBM.
@@ -629,6 +629,8 @@ The methods of each Class is in alphabetical order.
        Whether to print messages while running boosting.
    sigmoid : float
        Only used in binary classification and lambdarank. Parameter for sigmoid function.
+    huber_delta : float
+        Only used in regression. Parameter for Huber loss function.
    max_position : int
        Only used in lambdarank, will optimize NDCG at this position.
    label_gain : list of float

--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -137,6 +137,7 @@ struct ObjectiveConfig: public ConfigBase {
 public:
  virtual ~ObjectiveConfig() {}
  double sigmoid = 1.0f;
+  double huber_delta = 1.0f;
  // for lambdarank
  std::vector<double> label_gain;
  // for lambdarank
@@ -156,6 +157,7 @@ public:
  virtual ~MetricConfig() {}
  int num_class = 1;
  double sigmoid = 1.0f;
+  double huber_delta = 1.0f;
  std::vector<double> label_gain;
  std::vector<int> eval_at;
  void Set(const std::unordered_map<std::string, std::string>& params) override;

--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -153,7 +153,7 @@ class LGBMModel(LGBMModelBase):
                 subsample=1, subsample_freq=1, colsample_bytree=1,
                 reg_alpha=0, reg_lambda=0, scale_pos_weight=1,
                 is_unbalance=False, seed=0, nthread=-1, silent=True,
-                 sigmoid=1.0, max_position=20, label_gain=None,
+                 sigmoid=1.0, huber_delta=1.0, max_position=20, label_gain=None,
                 drop_rate=0.1, skip_drop=0.5, max_drop=50,
                 uniform_drop=False, xgboost_dart_mode=False):
        """
@@ -208,6 +208,8 @@ class LGBMModel(LGBMModelBase):
            Whether to print messages while running boosting.
        sigmoid : float
            Only used in binary classification and lambdarank. Parameter for sigmoid function.
+        huber_delta : float
+            Only used in regression. Parameter for Huber loss function.
        max_position : int
            Only used in lambdarank, will optimize NDCG at this position.
        label_gain : list of float
@@ -272,6 +274,7 @@ class LGBMModel(LGBMModelBase):
        self.nthread = nthread
        self.silent = silent
        self.sigmoid = sigmoid
+        self.huber_delta = huber_delta
        self.max_position = max_position
        self.label_gain = label_gain
        self.drop_rate = drop_rate
@@ -490,6 +493,29 @@ class LGBMModel(LGBMModelBase):
 class LGBMRegressor(LGBMModel, LGBMRegressorBase):
+    def __init__(self, boosting_type="gbdt", num_leaves=31, max_depth=-1,
+                 learning_rate=0.1, n_estimators=10, max_bin=255,
+                 subsample_for_bin=50000, objective="regression",
+                 min_split_gain=0, min_child_weight=5, min_child_samples=10,
+                 subsample=1, subsample_freq=1, colsample_bytree=1,
+                 reg_alpha=0, reg_lambda=0,
+                 seed=0, nthread=-1, silent=True,
+                 huber_delta=1.0,
+                 drop_rate=0.1, skip_drop=0.5, max_drop=50,
+                 uniform_drop=False, xgboost_dart_mode=False):
+        super(LGBMRegressor, self).__init__(boosting_type=boosting_type, num_leaves=num_leaves,
+                                            max_depth=max_depth, learning_rate=learning_rate,
+                                            n_estimators=n_estimators, max_bin=max_bin,
+                                            subsample_for_bin=subsample_for_bin, objective=objective,
+                                            min_split_gain=min_split_gain, min_child_weight=min_child_weight,
+                                            min_child_samples=min_child_samples, subsample=subsample,
+                                            subsample_freq=subsample_freq, colsample_bytree=colsample_bytree,
+                                            reg_alpha=reg_alpha, reg_lambda=reg_lambda,
+                                            seed=seed, nthread=nthread, silent=silent,
+                                            huber_delta=huber_delta,
+                                            drop_rate=drop_rate, skip_drop=skip_drop, max_drop=max_drop,
+                                            uniform_drop=uniform_drop, xgboost_dart_mode=xgboost_dart_mode)
    def fit(self, X, y,
            sample_weight=None, init_score=None,
            eval_set=None, eval_sample_weight=None,

--- a/src/io/config.cpp
+++ b/src/io/config.cpp
@@ -217,6 +217,7 @@ void IOConfig::Set(const std::unordered_map<std::string, std::string>& params) {
 void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& params) {
  GetBool(params, "is_unbalance", &is_unbalance);
  GetDouble(params, "sigmoid", &sigmoid);
+  GetDouble(params, "huber_delta", &huber_delta);
  GetInt(params, "max_position", &max_position);
  CHECK(max_position > 0);
  GetInt(params, "num_class", &num_class);
@@ -239,6 +240,7 @@ void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& pa
 void MetricConfig::Set(const std::unordered_map<std::string, std::string>& params) {
  GetDouble(params, "sigmoid", &sigmoid);
+  GetDouble(params, "huber_delta", &huber_delta);
  GetInt(params, "num_class", &num_class);
  std::string tmp_str = "";
  if (GetString(params, "label_gain", &tmp_str)) {

--- a/src/metric/metric.cpp
+++ b/src/metric/metric.cpp
@@ -11,6 +11,8 @@ Metric* Metric::CreateMetric(const std::string& type, const MetricConfig& config
    return new L2Metric(config);
  } else if (type == std::string("l1")) {
    return new L1Metric(config);
+  } else if (type == std::string("huber")) {
+    return new HuberLossMetric(config);
  } else if (type == std::string("binary_logloss")) {
    return new BinaryLoglossMetric(config);
  } else if (type == std::string("binary_error")) {

--- a/src/metric/regression_metric.hpp
+++ b/src/metric/regression_metric.hpp
@@ -15,8 +15,7 @@ namespace LightGBM {
 template<typename PointWiseLossCalculator>
 class RegressionMetric: public Metric {
 public:
-  explicit RegressionMetric(const MetricConfig&) {
+  explicit RegressionMetric(const MetricConfig&) :huber_delta_(1.0f) {
  }
  virtual ~RegressionMetric() {
@@ -55,13 +54,13 @@ public:
 #pragma omp parallel for schedule(static) reduction(+:sum_loss)
      for (data_size_t i = 0; i < num_data_; ++i) {
        // add loss
-        sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i]);
+        sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i], huber_delta_);
      }
    } else {
 #pragma omp parallel for schedule(static) reduction(+:sum_loss)
      for (data_size_t i = 0; i < num_data_; ++i) {
        // add loss
-        sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i]) * weights_[i];
+        sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i], huber_delta_) * weights_[i];
      }
    }
    double loss = PointWiseLossCalculator::AverageLoss(sum_loss, sum_weights_);
@@ -73,6 +72,10 @@ public:
    return sum_loss / sum_weights;
  }
+protected:
+  /*! \brief delta for Huber loss */
+  double huber_delta_;
 private:
  /*! \brief Number of data */
  data_size_t num_data_;
@@ -91,7 +94,7 @@ class L2Metric: public RegressionMetric<L2Metric> {
 public:
  explicit L2Metric(const MetricConfig& config) :RegressionMetric<L2Metric>(config) {}
-  inline static score_t LossOnPoint(float label, score_t score) {
+  inline static score_t LossOnPoint(float label, score_t score, float) {
    return (score - label)*(score - label);
  }
@@ -110,7 +113,7 @@ class L1Metric: public RegressionMetric<L1Metric> {
 public:
  explicit L1Metric(const MetricConfig& config) :RegressionMetric<L1Metric>(config) {}
-  inline static score_t LossOnPoint(float label, score_t score) {
+  inline static score_t LossOnPoint(float label, score_t score, float) {
    return std::fabs(score - label);
  }
  inline static const char* Name() {
@@ -118,5 +121,26 @@ public:
  }
 };
+/*! \brief Huber loss for regression task */
+class HuberLossMetric: public RegressionMetric<HuberLossMetric> {
+public:
+    explicit HuberLossMetric(const MetricConfig& config) :RegressionMetric<HuberLossMetric>(config) {
+        huber_delta_ = config.huber_delta;
+    }
+    inline static score_t LossOnPoint(float label, score_t score, float delta) {
+        const double diff = score - label;
+        if (std::abs(diff) <= delta) {
+            return 0.5 * diff * diff;
+        } else {
+            return delta * (std::abs(diff) - 0.5 * delta);
+        }
+    }
+    inline static const char* Name() {
+        return "huber";
+    }
+};
 }  // namespace LightGBM
 #endif   // LightGBM_METRIC_REGRESSION_METRIC_HPP_
--- a/src/objective/objective_function.cpp
+++ b/src/objective/objective_function.cpp
@@ -9,6 +9,8 @@ namespace LightGBM {
 ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string& type, const ObjectiveConfig& config) {
  if (type == std::string("regression")) {
    return new RegressionL2loss(config);
+  } else if (type == std::string("huber")) {
+    return new RegressionLHuberLoss(config);
  } else if (type == std::string("binary")) {
    return new BinaryLogloss(config);
  } else if (type == std::string("lambdarank")) {

--- a/src/objective/regression_objective.hpp
+++ b/src/objective/regression_objective.hpp
@@ -51,5 +51,72 @@ private:
  const float* weights_;
 };
+class RegressionLHuberLoss: public ObjectiveFunction {
+public:
+  explicit RegressionLHuberLoss(const ObjectiveConfig& config) {
+    delta_ = config.huber_delta;
+  }
+  ~RegressionLHuberLoss() {
+  }
+  void Init(const Metadata& metadata, data_size_t num_data) override {
+    num_data_ = num_data;
+    label_ = metadata.label();
+    weights_ = metadata.weights();
+  }
+  void GetGradients(const score_t* score, score_t* gradients,
+                    score_t* hessians) const override {
+    if (weights_ == nullptr) {
+      #pragma omp parallel for schedule(static)
+      for (data_size_t i = 0; i < num_data_; ++i) {
+        const double diff = score[i] - label_[i];
+        if (std::abs(diff) <= delta_) {
+          gradients[i] = diff;
+          hessians[i] = 1.0;
+        } else {
+            if (diff >= 0.0) {
+              gradients[i] = delta_;
+            } else {
+              gradients[i] = -delta_;
+            }
+            hessians[i] = 0.0;
+        }
+      }
+    } else {
+      #pragma omp parallel for schedule(static)
+      for (data_size_t i = 0; i < num_data_; ++i) {
+        const double diff = score[i] - label_[i];
+        if (std::abs(diff) <= delta_) {
+          gradients[i] = diff * weights_[i];
+          hessians[i] = weights_[i];
+        } else {
+            if (diff >= 0.0) {
+              gradients[i] = delta_ * weights_[i];
+            } else {
+              gradients[i] = -delta_ * weights_[i];
+            }
+            hessians[i] = 0.0;
+        }
+      }
+    }
+  }
+  const char* GetName() const override {
+    return "huber";
+  }
+private:
+  /*! \brief Number of data */
+  data_size_t num_data_;
+  /*! \brief Pointer of label */
+  const float* label_;
+  /*! \brief Pointer of weights */
+  const float* weights_;
+  /*! \brief delta for Huber loss */
+  double delta_;
+};
 }  // namespace LightGBM
 #endif   // LightGBM_OBJECTIVE_REGRESSION_OBJECTIVE_HPP_