Add a parameter to control width of Gaussian function to approximate hessian (#182)

26027d35 · Tsukasa OMOTO · Guolin Ke · 4a75f140 · 26027d35 · 26027d35
Commit 26027d35 authored Jan 10, 2017 by Tsukasa OMOTO Committed by Guolin Ke Jan 10, 2017
6 changed files
--- a/docs/Python-API.md
+++ b/docs/Python-API.md
@@ -631,6 +631,9 @@ The methods of each Class is in alphabetical order.
        Only used in binary classification and lambdarank. Parameter for sigmoid function.
    huber_delta : float
        Only used in regression. Parameter for Huber loss function.
+    gaussian_eta : float
+        Only used in regression. Parameter for L1 and Huber loss function.
+        It is used to control the width of Gaussian function to approximate hessian.
    fair_c : float
        Only used in regression. Parameter for Fair loss function.
    max_position : int

--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -139,6 +139,8 @@ public:
  double sigmoid = 1.0f;
  double huber_delta = 1.0f;
  double fair_c = 1.0f;
+  // for ApproximateHessianWithGaussian
+  double gaussian_eta = 1.0f;
  // for lambdarank
  std::vector<double> label_gain;
  // for lambdarank

--- a/include/LightGBM/utils/common.h
+++ b/include/LightGBM/utils/common.h
@@ -407,15 +407,17 @@ inline void SortForPair(std::vector<T1>& keys, std::vector<T2>& values, size_t s
 * y is a prediction.
 * t means true target.
 * g means gradient.
+* eta is a parameter to control the width of Gaussian function.
 * w means weights.
 */
-inline static double ApproximateHessianWithGaussian(const double y, const double t, const double g, const double w=1.0f) {
+inline static double ApproximateHessianWithGaussian(const double y, const double t, const double g,
+                                                    const double eta, const double w=1.0f) {
  const double diff = y - t;
  const double pi = 4.0 * std::atan(1.0);
  const double x = std::fabs(diff);
  const double a = 2.0 * std::fabs(g) * w;  // difference of two first derivatives, (zero to inf) and (zero to -inf).
  const double b = 0.0;
-  const double c = std::max(std::fabs(y) + std::fabs(t), 1.0e-10);
+  const double c = std::max((std::fabs(y) + std::fabs(t)) * eta, 1.0e-10);
  return w * std::exp(-(x - b) * (x - b) / (2.0 * c * c)) * a / (c * std::sqrt(2 * pi));
 }

--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -153,7 +153,8 @@ class LGBMModel(LGBMModelBase):
                 subsample=1, subsample_freq=1, colsample_bytree=1,
                 reg_alpha=0, reg_lambda=0, scale_pos_weight=1,
                 is_unbalance=False, seed=0, nthread=-1, silent=True,
-                 sigmoid=1.0, huber_delta=1.0, fair_c=1.0, max_position=20, label_gain=None,
+                 sigmoid=1.0, huber_delta=1.0, gaussian_eta=1.0, fair_c=1.0,
+                 max_position=20, label_gain=None,
                 drop_rate=0.1, skip_drop=0.5, max_drop=50,
                 uniform_drop=False, xgboost_dart_mode=False):
        """
@@ -210,6 +211,9 @@ class LGBMModel(LGBMModelBase):
            Only used in binary classification and lambdarank. Parameter for sigmoid function.
        huber_delta : float
            Only used in regression. Parameter for Huber loss function.
+        gaussian_eta : float
+            Only used in regression. Parameter for L1 and Huber loss function.
+            It is used to control the width of Gaussian function to approximate hessian.
        fair_c : float
            Only used in regression. Parameter for Fair loss function.
        max_position : int
@@ -277,6 +281,7 @@ class LGBMModel(LGBMModelBase):
        self.silent = silent
        self.sigmoid = sigmoid
        self.huber_delta = huber_delta
+        self.gaussian_eta = gaussian_eta
        self.fair_c = fair_c
        self.max_position = max_position
        self.label_gain = label_gain
@@ -503,7 +508,7 @@ class LGBMRegressor(LGBMModel, LGBMRegressorBase):
                 subsample=1, subsample_freq=1, colsample_bytree=1,
                 reg_alpha=0, reg_lambda=0,
                 seed=0, nthread=-1, silent=True,
-                 huber_delta=1.0, fair_c=1.0,
+                 huber_delta=1.0, gaussian_eta=1.0, fair_c=1.0,
                 drop_rate=0.1, skip_drop=0.5, max_drop=50,
                 uniform_drop=False, xgboost_dart_mode=False):
        super(LGBMRegressor, self).__init__(boosting_type=boosting_type, num_leaves=num_leaves,
@@ -515,7 +520,7 @@ class LGBMRegressor(LGBMModel, LGBMRegressorBase):
                                            subsample_freq=subsample_freq, colsample_bytree=colsample_bytree,
                                            reg_alpha=reg_alpha, reg_lambda=reg_lambda,
                                            seed=seed, nthread=nthread, silent=silent,
-                                            huber_delta=huber_delta, fair_c=fair_c,
+                                            huber_delta=huber_delta, gaussian_eta=gaussian_eta, fair_c=fair_c,
                                            drop_rate=drop_rate, skip_drop=skip_drop, max_drop=max_drop,
                                            uniform_drop=uniform_drop, xgboost_dart_mode=xgboost_dart_mode)

--- a/src/io/config.cpp
+++ b/src/io/config.cpp
@@ -221,6 +221,7 @@ void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& pa
  GetDouble(params, "sigmoid", &sigmoid);
  GetDouble(params, "huber_delta", &huber_delta);
  GetDouble(params, "fair_c", &fair_c);
+  GetDouble(params, "gaussian_eta", &gaussian_eta);
  GetInt(params, "max_position", &max_position);
  CHECK(max_position > 0);
  GetInt(params, "num_class", &num_class);

--- a/src/objective/regression_objective.hpp
+++ b/src/objective/regression_objective.hpp
@@ -57,7 +57,9 @@ private:
 */
 class RegressionL1loss: public ObjectiveFunction {
 public:
-  explicit RegressionL1loss(const ObjectiveConfig&) {}
+  explicit RegressionL1loss(const ObjectiveConfig& config) {
+    eta_ = static_cast<score_t>(config.gaussian_eta);
+  }
  ~RegressionL1loss() {}
@@ -78,7 +80,7 @@ public:
        } else {
          gradients[i] = -1.0f;
        }
-        hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i]));
+        hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_));
      }
    } else {
 #pragma omp parallel for schedule(static)
@@ -89,7 +91,7 @@ public:
        } else {
          gradients[i] = -weights_[i];
        }
-        hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], weights_[i]));
+        hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_, weights_[i]));
      }
    }
  }
@@ -105,6 +107,8 @@ private:
  const float* label_;
  /*! \brief Pointer of weights */
  const float* weights_;
+  /*! \brief a parameter to control the width of Gaussian function to approximate hessian */
+  score_t eta_;
 };
 /*!
@@ -114,6 +118,7 @@ class RegressionHuberLoss: public ObjectiveFunction {
 public:
  explicit RegressionHuberLoss(const ObjectiveConfig& config) {
    delta_ = static_cast<score_t>(config.huber_delta);
+    eta_ = static_cast<score_t>(config.gaussian_eta);
  }
  ~RegressionHuberLoss() {
@@ -141,7 +146,7 @@ public:
          } else {
            gradients[i] = -delta_;
          }
-          hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i]));
+          hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_));
        }
      }
    } else {
@@ -158,7 +163,7 @@ public:
          } else {
            gradients[i] = -delta_ * weights_[i];
          }
-          hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], weights_[i]));
+          hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_, weights_[i]));
        }
      }
    }
@@ -177,6 +182,8 @@ private:
  const float* weights_;
  /*! \brief delta for Huber loss */
  score_t delta_;
+  /*! \brief a parameter to control the width of Gaussian function to approximate hessian */
+  score_t eta_;
 };