Commit 26027d35 authored by Tsukasa OMOTO's avatar Tsukasa OMOTO Committed by Guolin Ke
Browse files

Add a parameter to control width of Gaussian function to approximate hessian (#182)

parent 4a75f140
...@@ -631,6 +631,9 @@ The methods of each Class is in alphabetical order. ...@@ -631,6 +631,9 @@ The methods of each Class is in alphabetical order.
Only used in binary classification and lambdarank. Parameter for sigmoid function. Only used in binary classification and lambdarank. Parameter for sigmoid function.
huber_delta : float huber_delta : float
Only used in regression. Parameter for Huber loss function. Only used in regression. Parameter for Huber loss function.
gaussian_eta : float
Only used in regression. Parameter for L1 and Huber loss function.
It is used to control the width of Gaussian function to approximate hessian.
fair_c : float fair_c : float
Only used in regression. Parameter for Fair loss function. Only used in regression. Parameter for Fair loss function.
max_position : int max_position : int
......
...@@ -139,6 +139,8 @@ public: ...@@ -139,6 +139,8 @@ public:
double sigmoid = 1.0f; double sigmoid = 1.0f;
double huber_delta = 1.0f; double huber_delta = 1.0f;
double fair_c = 1.0f; double fair_c = 1.0f;
// for ApproximateHessianWithGaussian
double gaussian_eta = 1.0f;
// for lambdarank // for lambdarank
std::vector<double> label_gain; std::vector<double> label_gain;
// for lambdarank // for lambdarank
......
...@@ -407,15 +407,17 @@ inline void SortForPair(std::vector<T1>& keys, std::vector<T2>& values, size_t s ...@@ -407,15 +407,17 @@ inline void SortForPair(std::vector<T1>& keys, std::vector<T2>& values, size_t s
* y is a prediction. * y is a prediction.
* t means true target. * t means true target.
* g means gradient. * g means gradient.
* eta is a parameter to control the width of Gaussian function.
* w means weights. * w means weights.
*/ */
inline static double ApproximateHessianWithGaussian(const double y, const double t, const double g, const double w=1.0f) { inline static double ApproximateHessianWithGaussian(const double y, const double t, const double g,
const double eta, const double w=1.0f) {
const double diff = y - t; const double diff = y - t;
const double pi = 4.0 * std::atan(1.0); const double pi = 4.0 * std::atan(1.0);
const double x = std::fabs(diff); const double x = std::fabs(diff);
const double a = 2.0 * std::fabs(g) * w; // difference of two first derivatives, (zero to inf) and (zero to -inf). const double a = 2.0 * std::fabs(g) * w; // difference of two first derivatives, (zero to inf) and (zero to -inf).
const double b = 0.0; const double b = 0.0;
const double c = std::max(std::fabs(y) + std::fabs(t), 1.0e-10); const double c = std::max((std::fabs(y) + std::fabs(t)) * eta, 1.0e-10);
return w * std::exp(-(x - b) * (x - b) / (2.0 * c * c)) * a / (c * std::sqrt(2 * pi)); return w * std::exp(-(x - b) * (x - b) / (2.0 * c * c)) * a / (c * std::sqrt(2 * pi));
} }
......
...@@ -153,7 +153,8 @@ class LGBMModel(LGBMModelBase): ...@@ -153,7 +153,8 @@ class LGBMModel(LGBMModelBase):
subsample=1, subsample_freq=1, colsample_bytree=1, subsample=1, subsample_freq=1, colsample_bytree=1,
reg_alpha=0, reg_lambda=0, scale_pos_weight=1, reg_alpha=0, reg_lambda=0, scale_pos_weight=1,
is_unbalance=False, seed=0, nthread=-1, silent=True, is_unbalance=False, seed=0, nthread=-1, silent=True,
sigmoid=1.0, huber_delta=1.0, fair_c=1.0, max_position=20, label_gain=None, sigmoid=1.0, huber_delta=1.0, gaussian_eta=1.0, fair_c=1.0,
max_position=20, label_gain=None,
drop_rate=0.1, skip_drop=0.5, max_drop=50, drop_rate=0.1, skip_drop=0.5, max_drop=50,
uniform_drop=False, xgboost_dart_mode=False): uniform_drop=False, xgboost_dart_mode=False):
""" """
...@@ -210,6 +211,9 @@ class LGBMModel(LGBMModelBase): ...@@ -210,6 +211,9 @@ class LGBMModel(LGBMModelBase):
Only used in binary classification and lambdarank. Parameter for sigmoid function. Only used in binary classification and lambdarank. Parameter for sigmoid function.
huber_delta : float huber_delta : float
Only used in regression. Parameter for Huber loss function. Only used in regression. Parameter for Huber loss function.
gaussian_eta : float
Only used in regression. Parameter for L1 and Huber loss function.
It is used to control the width of Gaussian function to approximate hessian.
fair_c : float fair_c : float
Only used in regression. Parameter for Fair loss function. Only used in regression. Parameter for Fair loss function.
max_position : int max_position : int
...@@ -277,6 +281,7 @@ class LGBMModel(LGBMModelBase): ...@@ -277,6 +281,7 @@ class LGBMModel(LGBMModelBase):
self.silent = silent self.silent = silent
self.sigmoid = sigmoid self.sigmoid = sigmoid
self.huber_delta = huber_delta self.huber_delta = huber_delta
self.gaussian_eta = gaussian_eta
self.fair_c = fair_c self.fair_c = fair_c
self.max_position = max_position self.max_position = max_position
self.label_gain = label_gain self.label_gain = label_gain
...@@ -503,7 +508,7 @@ class LGBMRegressor(LGBMModel, LGBMRegressorBase): ...@@ -503,7 +508,7 @@ class LGBMRegressor(LGBMModel, LGBMRegressorBase):
subsample=1, subsample_freq=1, colsample_bytree=1, subsample=1, subsample_freq=1, colsample_bytree=1,
reg_alpha=0, reg_lambda=0, reg_alpha=0, reg_lambda=0,
seed=0, nthread=-1, silent=True, seed=0, nthread=-1, silent=True,
huber_delta=1.0, fair_c=1.0, huber_delta=1.0, gaussian_eta=1.0, fair_c=1.0,
drop_rate=0.1, skip_drop=0.5, max_drop=50, drop_rate=0.1, skip_drop=0.5, max_drop=50,
uniform_drop=False, xgboost_dart_mode=False): uniform_drop=False, xgboost_dart_mode=False):
super(LGBMRegressor, self).__init__(boosting_type=boosting_type, num_leaves=num_leaves, super(LGBMRegressor, self).__init__(boosting_type=boosting_type, num_leaves=num_leaves,
...@@ -515,7 +520,7 @@ class LGBMRegressor(LGBMModel, LGBMRegressorBase): ...@@ -515,7 +520,7 @@ class LGBMRegressor(LGBMModel, LGBMRegressorBase):
subsample_freq=subsample_freq, colsample_bytree=colsample_bytree, subsample_freq=subsample_freq, colsample_bytree=colsample_bytree,
reg_alpha=reg_alpha, reg_lambda=reg_lambda, reg_alpha=reg_alpha, reg_lambda=reg_lambda,
seed=seed, nthread=nthread, silent=silent, seed=seed, nthread=nthread, silent=silent,
huber_delta=huber_delta, fair_c=fair_c, huber_delta=huber_delta, gaussian_eta=gaussian_eta, fair_c=fair_c,
drop_rate=drop_rate, skip_drop=skip_drop, max_drop=max_drop, drop_rate=drop_rate, skip_drop=skip_drop, max_drop=max_drop,
uniform_drop=uniform_drop, xgboost_dart_mode=xgboost_dart_mode) uniform_drop=uniform_drop, xgboost_dart_mode=xgboost_dart_mode)
......
...@@ -221,6 +221,7 @@ void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& pa ...@@ -221,6 +221,7 @@ void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& pa
GetDouble(params, "sigmoid", &sigmoid); GetDouble(params, "sigmoid", &sigmoid);
GetDouble(params, "huber_delta", &huber_delta); GetDouble(params, "huber_delta", &huber_delta);
GetDouble(params, "fair_c", &fair_c); GetDouble(params, "fair_c", &fair_c);
GetDouble(params, "gaussian_eta", &gaussian_eta);
GetInt(params, "max_position", &max_position); GetInt(params, "max_position", &max_position);
CHECK(max_position > 0); CHECK(max_position > 0);
GetInt(params, "num_class", &num_class); GetInt(params, "num_class", &num_class);
......
...@@ -57,7 +57,9 @@ private: ...@@ -57,7 +57,9 @@ private:
*/ */
class RegressionL1loss: public ObjectiveFunction { class RegressionL1loss: public ObjectiveFunction {
public: public:
explicit RegressionL1loss(const ObjectiveConfig&) {} explicit RegressionL1loss(const ObjectiveConfig& config) {
eta_ = static_cast<score_t>(config.gaussian_eta);
}
~RegressionL1loss() {} ~RegressionL1loss() {}
...@@ -78,7 +80,7 @@ public: ...@@ -78,7 +80,7 @@ public:
} else { } else {
gradients[i] = -1.0f; gradients[i] = -1.0f;
} }
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i])); hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_));
} }
} else { } else {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
...@@ -89,7 +91,7 @@ public: ...@@ -89,7 +91,7 @@ public:
} else { } else {
gradients[i] = -weights_[i]; gradients[i] = -weights_[i];
} }
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], weights_[i])); hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_, weights_[i]));
} }
} }
} }
...@@ -105,6 +107,8 @@ private: ...@@ -105,6 +107,8 @@ private:
const float* label_; const float* label_;
/*! \brief Pointer of weights */ /*! \brief Pointer of weights */
const float* weights_; const float* weights_;
/*! \brief a parameter to control the width of Gaussian function to approximate hessian */
score_t eta_;
}; };
/*! /*!
...@@ -114,6 +118,7 @@ class RegressionHuberLoss: public ObjectiveFunction { ...@@ -114,6 +118,7 @@ class RegressionHuberLoss: public ObjectiveFunction {
public: public:
explicit RegressionHuberLoss(const ObjectiveConfig& config) { explicit RegressionHuberLoss(const ObjectiveConfig& config) {
delta_ = static_cast<score_t>(config.huber_delta); delta_ = static_cast<score_t>(config.huber_delta);
eta_ = static_cast<score_t>(config.gaussian_eta);
} }
~RegressionHuberLoss() { ~RegressionHuberLoss() {
...@@ -141,7 +146,7 @@ public: ...@@ -141,7 +146,7 @@ public:
} else { } else {
gradients[i] = -delta_; gradients[i] = -delta_;
} }
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i])); hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_));
} }
} }
} else { } else {
...@@ -158,7 +163,7 @@ public: ...@@ -158,7 +163,7 @@ public:
} else { } else {
gradients[i] = -delta_ * weights_[i]; gradients[i] = -delta_ * weights_[i];
} }
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], weights_[i])); hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_, weights_[i]));
} }
} }
} }
...@@ -177,6 +182,8 @@ private: ...@@ -177,6 +182,8 @@ private:
const float* weights_; const float* weights_;
/*! \brief delta for Huber loss */ /*! \brief delta for Huber loss */
score_t delta_; score_t delta_;
/*! \brief a parameter to control the width of Gaussian function to approximate hessian */
score_t eta_;
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment