"python-package/vscode:/vscode.git/clone" did not exist on "ec5492f8f915f9bab99e52417cff6808dbdb8ddd"
Commit 26027d35 authored by Tsukasa OMOTO's avatar Tsukasa OMOTO Committed by Guolin Ke
Browse files

Add a parameter to control width of Gaussian function to approximate hessian (#182)

parent 4a75f140
......@@ -631,6 +631,9 @@ The methods of each Class is in alphabetical order.
Only used in binary classification and lambdarank. Parameter for sigmoid function.
huber_delta : float
Only used in regression. Parameter for Huber loss function.
gaussian_eta : float
Only used in regression. Parameter for L1 and Huber loss function.
It is used to control the width of Gaussian function to approximate hessian.
fair_c : float
Only used in regression. Parameter for Fair loss function.
max_position : int
......
......@@ -139,6 +139,8 @@ public:
double sigmoid = 1.0f;
double huber_delta = 1.0f;
double fair_c = 1.0f;
// for ApproximateHessianWithGaussian
double gaussian_eta = 1.0f;
// for lambdarank
std::vector<double> label_gain;
// for lambdarank
......
......@@ -407,15 +407,17 @@ inline void SortForPair(std::vector<T1>& keys, std::vector<T2>& values, size_t s
* y is a prediction.
* t means true target.
* g means gradient.
* eta is a parameter to control the width of Gaussian function.
* w means weights.
*/
inline static double ApproximateHessianWithGaussian(const double y, const double t, const double g, const double w=1.0f) {
inline static double ApproximateHessianWithGaussian(const double y, const double t, const double g,
const double eta, const double w=1.0f) {
const double diff = y - t;
const double pi = 4.0 * std::atan(1.0);
const double x = std::fabs(diff);
const double a = 2.0 * std::fabs(g) * w; // difference of two first derivatives, (zero to inf) and (zero to -inf).
const double b = 0.0;
const double c = std::max(std::fabs(y) + std::fabs(t), 1.0e-10);
const double c = std::max((std::fabs(y) + std::fabs(t)) * eta, 1.0e-10);
return w * std::exp(-(x - b) * (x - b) / (2.0 * c * c)) * a / (c * std::sqrt(2 * pi));
}
......
......@@ -153,7 +153,8 @@ class LGBMModel(LGBMModelBase):
subsample=1, subsample_freq=1, colsample_bytree=1,
reg_alpha=0, reg_lambda=0, scale_pos_weight=1,
is_unbalance=False, seed=0, nthread=-1, silent=True,
sigmoid=1.0, huber_delta=1.0, fair_c=1.0, max_position=20, label_gain=None,
sigmoid=1.0, huber_delta=1.0, gaussian_eta=1.0, fair_c=1.0,
max_position=20, label_gain=None,
drop_rate=0.1, skip_drop=0.5, max_drop=50,
uniform_drop=False, xgboost_dart_mode=False):
"""
......@@ -210,6 +211,9 @@ class LGBMModel(LGBMModelBase):
Only used in binary classification and lambdarank. Parameter for sigmoid function.
huber_delta : float
Only used in regression. Parameter for Huber loss function.
gaussian_eta : float
Only used in regression. Parameter for L1 and Huber loss function.
It is used to control the width of Gaussian function to approximate hessian.
fair_c : float
Only used in regression. Parameter for Fair loss function.
max_position : int
......@@ -277,6 +281,7 @@ class LGBMModel(LGBMModelBase):
self.silent = silent
self.sigmoid = sigmoid
self.huber_delta = huber_delta
self.gaussian_eta = gaussian_eta
self.fair_c = fair_c
self.max_position = max_position
self.label_gain = label_gain
......@@ -503,7 +508,7 @@ class LGBMRegressor(LGBMModel, LGBMRegressorBase):
subsample=1, subsample_freq=1, colsample_bytree=1,
reg_alpha=0, reg_lambda=0,
seed=0, nthread=-1, silent=True,
huber_delta=1.0, fair_c=1.0,
huber_delta=1.0, gaussian_eta=1.0, fair_c=1.0,
drop_rate=0.1, skip_drop=0.5, max_drop=50,
uniform_drop=False, xgboost_dart_mode=False):
super(LGBMRegressor, self).__init__(boosting_type=boosting_type, num_leaves=num_leaves,
......@@ -515,7 +520,7 @@ class LGBMRegressor(LGBMModel, LGBMRegressorBase):
subsample_freq=subsample_freq, colsample_bytree=colsample_bytree,
reg_alpha=reg_alpha, reg_lambda=reg_lambda,
seed=seed, nthread=nthread, silent=silent,
huber_delta=huber_delta, fair_c=fair_c,
huber_delta=huber_delta, gaussian_eta=gaussian_eta, fair_c=fair_c,
drop_rate=drop_rate, skip_drop=skip_drop, max_drop=max_drop,
uniform_drop=uniform_drop, xgboost_dart_mode=xgboost_dart_mode)
......
......@@ -221,6 +221,7 @@ void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& pa
GetDouble(params, "sigmoid", &sigmoid);
GetDouble(params, "huber_delta", &huber_delta);
GetDouble(params, "fair_c", &fair_c);
GetDouble(params, "gaussian_eta", &gaussian_eta);
GetInt(params, "max_position", &max_position);
CHECK(max_position > 0);
GetInt(params, "num_class", &num_class);
......
......@@ -57,7 +57,9 @@ private:
*/
class RegressionL1loss: public ObjectiveFunction {
public:
explicit RegressionL1loss(const ObjectiveConfig&) {}
explicit RegressionL1loss(const ObjectiveConfig& config) {
eta_ = static_cast<score_t>(config.gaussian_eta);
}
~RegressionL1loss() {}
......@@ -78,7 +80,7 @@ public:
} else {
gradients[i] = -1.0f;
}
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i]));
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_));
}
} else {
#pragma omp parallel for schedule(static)
......@@ -89,7 +91,7 @@ public:
} else {
gradients[i] = -weights_[i];
}
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], weights_[i]));
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_, weights_[i]));
}
}
}
......@@ -105,6 +107,8 @@ private:
const float* label_;
/*! \brief Pointer of weights */
const float* weights_;
/*! \brief a parameter to control the width of Gaussian function to approximate hessian */
score_t eta_;
};
/*!
......@@ -114,6 +118,7 @@ class RegressionHuberLoss: public ObjectiveFunction {
public:
explicit RegressionHuberLoss(const ObjectiveConfig& config) {
delta_ = static_cast<score_t>(config.huber_delta);
eta_ = static_cast<score_t>(config.gaussian_eta);
}
~RegressionHuberLoss() {
......@@ -141,7 +146,7 @@ public:
} else {
gradients[i] = -delta_;
}
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i]));
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_));
}
}
} else {
......@@ -158,7 +163,7 @@ public:
} else {
gradients[i] = -delta_ * weights_[i];
}
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], weights_[i]));
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_, weights_[i]));
}
}
}
......@@ -177,6 +182,8 @@ private:
const float* weights_;
/*! \brief delta for Huber loss */
score_t delta_;
/*! \brief a parameter to control the width of Gaussian function to approximate hessian */
score_t eta_;
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment