Add Huber loss (#174)

* Add Huber loss https://en.wikipedia.org/wiki/Huber_loss * update * update docs/Parameters.md * update

Add Huber loss (#174)
* Add Huber loss https://en.wikipedia.org/wiki/Huber_loss * update * update docs/Parameters.md * update
a87af879 · Tsukasa OMOTO · Guolin Ke · 1b7643ba · a87af879 · a87af879
Commit a87af879 authored Jan 09, 2017 by Tsukasa OMOTO Committed by Guolin Ke Jan 09, 2017
9 changed files
--- a/docs/Parameters.md
+++ b/docs/Parameters.md
@@ -16,8 +16,9 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
 * ```task```, default=```train```, type=enum, options=```train```,```prediction```
  * ```train``` for training
  * ```prediction``` for prediction.
-* ```application```, default=```regression```, type=enum, options=```regression```,```binary```,```lambdarank```,```multiclass```, alias=```objective```,```app```
+* ```application```, default=```regression```, type=enum, options=```regression```,```huber```,```binary```,```lambdarank```,```multiclass```, alias=```objective```,```app```
  * ```regression```, regression application
+  * ```huber```, [Huber loss](https://en.wikipedia.org/wiki/Huber_loss "Huber loss - Wikipedia") for regression task
  * ```binary```, binary classification application 
  * ```lambdarank```, lambdarank application
  * ```multiclass```, multi-class classification application, should set ```num_class``` as well
@@ -165,6 +166,8 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can

 * ```sigmoid```, default=```1.0```, type=double
  * parameter for sigmoid function. Will be used in binary classification and lambdarank.
+* ```huber_delta```, default=```1.0```, type=double
+  * parameter for [Huber loss](https://en.wikipedia.org/wiki/Huber_loss "Huber loss - Wikipedia"). Will be used in regression task.
 * ```scale_pos_weight```, default=```1.0```, type=double
  * weight of positive class in binary classification task
 * ```is_unbalance```, default=```false```, type=bool
@@ -255,4 +258,4 @@ For LambdaRank learning, it needs query information for training data. LightGBM
 It means first ```27``` lines samples belong one query and next ```18``` lines belong to another, and so on.(**Note: data should order by query**) If name of data file is "train.txt", the query file should be named as "train.txt.query" and in same folder of training data. LightGBM will load the query file automatically if it exists.

 update:
-You can specific query/group id in data file now. Please refer to parameter ```group``` in above.
\ No newline at end of file
+You can specific query/group id in data file now. Please refer to parameter ```group``` in above.
--- a/docs/Python-API.md
+++ b/docs/Python-API.md
@@ -576,7 +576,7 @@ The methods of each Class is in alphabetical order.

 ###Common Methods

-####__init__(boosting_type="gbdt", num_leaves=31, max_depth=-1, learning_rate=0.1, n_estimators=10, max_bin=255, subsample_for_bin=50000, objective="regression", min_split_gain=0, min_child_weight=5, min_child_samples=10, subsample=1, subsample_freq=1, colsample_bytree=1, reg_alpha=0, reg_lambda=0, scale_pos_weight=1, is_unbalance=False, seed=0, nthread=-1, silent=True, sigmoid=1.0, max_position=20, label_gain=None, drop_rate=0.1, skip_drop=0.5, max_drop=50, uniform_drop=False, xgboost_dart_mode=False)
+####__init__(boosting_type="gbdt", num_leaves=31, max_depth=-1, learning_rate=0.1, n_estimators=10, max_bin=255, subsample_for_bin=50000, objective="regression", min_split_gain=0, min_child_weight=5, min_child_samples=10, subsample=1, subsample_freq=1, colsample_bytree=1, reg_alpha=0, reg_lambda=0, scale_pos_weight=1, is_unbalance=False, seed=0, nthread=-1, silent=True, sigmoid=1.0, huber_delta=1.0, max_position=20, label_gain=None, drop_rate=0.1, skip_drop=0.5, max_drop=50, uniform_drop=False, xgboost_dart_mode=False)

    Implementation of the Scikit-Learn API for LightGBM.

@@ -629,6 +629,8 @@ The methods of each Class is in alphabetical order.
        Whether to print messages while running boosting.
    sigmoid : float
        Only used in binary classification and lambdarank. Parameter for sigmoid function.
+    huber_delta : float
+        Only used in regression. Parameter for Huber loss function.
    max_position : int
        Only used in lambdarank, will optimize NDCG at this position.
    label_gain : list of float

--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -137,6 +137,7 @@ struct ObjectiveConfig: public ConfigBase {
 public:
  virtual ~ObjectiveConfig() {}
  double sigmoid = 1.0f;
+  double huber_delta = 1.0f;
  // for lambdarank
  std::vector<double> label_gain;
  // for lambdarank
@@ -156,6 +157,7 @@ public:
  virtual ~MetricConfig() {}
  int num_class = 1;
  double sigmoid = 1.0f;
+  double huber_delta = 1.0f;
  std::vector<double> label_gain;
  std::vector<int> eval_at;
  void Set(const std::unordered_map<std::string, std::string>& params) override;

--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -153,7 +153,7 @@ class LGBMModel(LGBMModelBase):
                 subsample=1, subsample_freq=1, colsample_bytree=1,
                 reg_alpha=0, reg_lambda=0, scale_pos_weight=1,
                 is_unbalance=False, seed=0, nthread=-1, silent=True,
-                 sigmoid=1.0, max_position=20, label_gain=None,
+                 sigmoid=1.0, huber_delta=1.0, max_position=20, label_gain=None,
                 drop_rate=0.1, skip_drop=0.5, max_drop=50,
                 uniform_drop=False, xgboost_dart_mode=False):
        """
@@ -208,6 +208,8 @@ class LGBMModel(LGBMModelBase):
            Whether to print messages while running boosting.
        sigmoid : float
            Only used in binary classification and lambdarank. Parameter for sigmoid function.
+        huber_delta : float
+            Only used in regression. Parameter for Huber loss function.
        max_position : int
            Only used in lambdarank, will optimize NDCG at this position.
        label_gain : list of float
@@ -272,6 +274,7 @@ class LGBMModel(LGBMModelBase):
        self.nthread = nthread
        self.silent = silent
        self.sigmoid = sigmoid
+        self.huber_delta = huber_delta
        self.max_position = max_position
        self.label_gain = label_gain
        self.drop_rate = drop_rate
@@ -490,6 +493,29 @@ class LGBMModel(LGBMModelBase):

 class LGBMRegressor(LGBMModel, LGBMRegressorBase):

+    def __init__(self, boosting_type="gbdt", num_leaves=31, max_depth=-1,
+                 learning_rate=0.1, n_estimators=10, max_bin=255,
+                 subsample_for_bin=50000, objective="regression",
+                 min_split_gain=0, min_child_weight=5, min_child_samples=10,
+                 subsample=1, subsample_freq=1, colsample_bytree=1,
+                 reg_alpha=0, reg_lambda=0,
+                 seed=0, nthread=-1, silent=True,
+                 huber_delta=1.0,
+                 drop_rate=0.1, skip_drop=0.5, max_drop=50,
+                 uniform_drop=False, xgboost_dart_mode=False):
+        super(LGBMRegressor, self).__init__(boosting_type=boosting_type, num_leaves=num_leaves,
+                                            max_depth=max_depth, learning_rate=learning_rate,
+                                            n_estimators=n_estimators, max_bin=max_bin,
+                                            subsample_for_bin=subsample_for_bin, objective=objective,
+                                            min_split_gain=min_split_gain, min_child_weight=min_child_weight,
+                                            min_child_samples=min_child_samples, subsample=subsample,
+                                            subsample_freq=subsample_freq, colsample_bytree=colsample_bytree,
+                                            reg_alpha=reg_alpha, reg_lambda=reg_lambda,
+                                            seed=seed, nthread=nthread, silent=silent,
+                                            huber_delta=huber_delta,
+                                            drop_rate=drop_rate, skip_drop=skip_drop, max_drop=max_drop,
+                                            uniform_drop=uniform_drop, xgboost_dart_mode=xgboost_dart_mode)
+
    def fit(self, X, y,
            sample_weight=None, init_score=None,
            eval_set=None, eval_sample_weight=None,

--- a/src/io/config.cpp
+++ b/src/io/config.cpp
@@ -217,6 +217,7 @@ void IOConfig::Set(const std::unordered_map<std::string, std::string>& params) {
 void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& params) {
  GetBool(params, "is_unbalance", &is_unbalance);
  GetDouble(params, "sigmoid", &sigmoid);
+  GetDouble(params, "huber_delta", &huber_delta);
  GetInt(params, "max_position", &max_position);
  CHECK(max_position > 0);
  GetInt(params, "num_class", &num_class);
@@ -239,6 +240,7 @@ void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& pa

 void MetricConfig::Set(const std::unordered_map<std::string, std::string>& params) {
  GetDouble(params, "sigmoid", &sigmoid);
+  GetDouble(params, "huber_delta", &huber_delta);
  GetInt(params, "num_class", &num_class);
  std::string tmp_str = "";
  if (GetString(params, "label_gain", &tmp_str)) {

--- a/src/metric/metric.cpp
+++ b/src/metric/metric.cpp
@@ -11,6 +11,8 @@ Metric* Metric::CreateMetric(const std::string& type, const MetricConfig& config
    return new L2Metric(config);
  } else if (type == std::string("l1")) {
    return new L1Metric(config);
+  } else if (type == std::string("huber")) {
+    return new HuberLossMetric(config);
  } else if (type == std::string("binary_logloss")) {
    return new BinaryLoglossMetric(config);
  } else if (type == std::string("binary_error")) {

--- a/src/metric/regression_metric.hpp
+++ b/src/metric/regression_metric.hpp
@@ -15,8 +15,7 @@ namespace LightGBM {
 template<typename PointWiseLossCalculator>
 class RegressionMetric: public Metric {
 public:
-  explicit RegressionMetric(const MetricConfig&) {
-
+  explicit RegressionMetric(const MetricConfig&) :huber_delta_(1.0f) {
  }

  virtual ~RegressionMetric() {
@@ -55,13 +54,13 @@ public:
 #pragma omp parallel for schedule(static) reduction(+:sum_loss)
      for (data_size_t i = 0; i < num_data_; ++i) {
        // add loss
-        sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i]);
+        sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i], huber_delta_);
      }
    } else {
 #pragma omp parallel for schedule(static) reduction(+:sum_loss)
      for (data_size_t i = 0; i < num_data_; ++i) {
        // add loss
-        sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i]) * weights_[i];
+        sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i], huber_delta_) * weights_[i];
      }
    }
    double loss = PointWiseLossCalculator::AverageLoss(sum_loss, sum_weights_);
@@ -73,6 +72,10 @@ public:
    return sum_loss / sum_weights;
  }

+protected:
+  /*! \brief delta for Huber loss */
+  double huber_delta_;
+
 private:
  /*! \brief Number of data */
  data_size_t num_data_;
@@ -91,7 +94,7 @@ class L2Metric: public RegressionMetric<L2Metric> {
 public:
  explicit L2Metric(const MetricConfig& config) :RegressionMetric<L2Metric>(config) {}

-  inline static score_t LossOnPoint(float label, score_t score) {
+  inline static score_t LossOnPoint(float label, score_t score, float) {
    return (score - label)*(score - label);
  }

@@ -110,7 +113,7 @@ class L1Metric: public RegressionMetric<L1Metric> {
 public:
  explicit L1Metric(const MetricConfig& config) :RegressionMetric<L1Metric>(config) {}

-  inline static score_t LossOnPoint(float label, score_t score) {
+  inline static score_t LossOnPoint(float label, score_t score, float) {
    return std::fabs(score - label);
  }
  inline static const char* Name() {
@@ -118,5 +121,26 @@ public:
  }
 };

+/*! \brief Huber loss for regression task */
+class HuberLossMetric: public RegressionMetric<HuberLossMetric> {
+public:
+    explicit HuberLossMetric(const MetricConfig& config) :RegressionMetric<HuberLossMetric>(config) {
+        huber_delta_ = config.huber_delta;
+    }
+
+    inline static score_t LossOnPoint(float label, score_t score, float delta) {
+        const double diff = score - label;
+        if (std::abs(diff) <= delta) {
+            return 0.5 * diff * diff;
+        } else {
+            return delta * (std::abs(diff) - 0.5 * delta);
+        }
+    }
+
+    inline static const char* Name() {
+        return "huber";
+    }
+};
+
 }  // namespace LightGBM
 #endif   // LightGBM_METRIC_REGRESSION_METRIC_HPP_
--- a/src/objective/objective_function.cpp
+++ b/src/objective/objective_function.cpp
@@ -9,6 +9,8 @@ namespace LightGBM {
 ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string& type, const ObjectiveConfig& config) {
  if (type == std::string("regression")) {
    return new RegressionL2loss(config);
+  } else if (type == std::string("huber")) {
+    return new RegressionLHuberLoss(config);
  } else if (type == std::string("binary")) {
    return new BinaryLogloss(config);
  } else if (type == std::string("lambdarank")) {

--- a/src/objective/regression_objective.hpp
+++ b/src/objective/regression_objective.hpp
@@ -51,5 +51,72 @@ private:
  const float* weights_;
 };

+class RegressionLHuberLoss: public ObjectiveFunction {
+public:
+  explicit RegressionLHuberLoss(const ObjectiveConfig& config) {
+    delta_ = config.huber_delta;
+  }
+
+  ~RegressionLHuberLoss() {
+  }
+
+  void Init(const Metadata& metadata, data_size_t num_data) override {
+    num_data_ = num_data;
+    label_ = metadata.label();
+    weights_ = metadata.weights();
+  }
+
+  void GetGradients(const score_t* score, score_t* gradients,
+                    score_t* hessians) const override {
+    if (weights_ == nullptr) {
+      #pragma omp parallel for schedule(static)
+      for (data_size_t i = 0; i < num_data_; ++i) {
+        const double diff = score[i] - label_[i];
+        if (std::abs(diff) <= delta_) {
+          gradients[i] = diff;
+          hessians[i] = 1.0;
+        } else {
+            if (diff >= 0.0) {
+              gradients[i] = delta_;
+            } else {
+              gradients[i] = -delta_;
+            }
+            hessians[i] = 0.0;
+        }
+      }
+    } else {
+      #pragma omp parallel for schedule(static)
+      for (data_size_t i = 0; i < num_data_; ++i) {
+        const double diff = score[i] - label_[i];
+        if (std::abs(diff) <= delta_) {
+          gradients[i] = diff * weights_[i];
+          hessians[i] = weights_[i];
+        } else {
+            if (diff >= 0.0) {
+              gradients[i] = delta_ * weights_[i];
+            } else {
+              gradients[i] = -delta_ * weights_[i];
+            }
+            hessians[i] = 0.0;
+        }
+      }
+    }
+  }
+
+  const char* GetName() const override {
+    return "huber";
+  }
+
+private:
+  /*! \brief Number of data */
+  data_size_t num_data_;
+  /*! \brief Pointer of label */
+  const float* label_;
+  /*! \brief Pointer of weights */
+  const float* weights_;
+  /*! \brief delta for Huber loss */
+  double delta_;
+};
+
 }  // namespace LightGBM
 #endif   // LightGBM_OBJECTIVE_REGRESSION_OBJECTIVE_HPP_