Unverified Commit 5392c9ea authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

Fix objective functions with zero hessian (#1199)

parent d90369a0
......@@ -205,10 +205,24 @@ Support following metrics:
- NDCG
- MAP
- Multi class log loss
- Multi class error rate
- Fair
- Huber
- Poisson
- Quantile
- MAPE
- kullback Leibler
For more details, please refer to `Parameters <./Parameters.rst#metric-parameters>`__.
Other Features
......@@ -269,7 +283,7 @@ References
.. _LightGBM\: A Highly Efficient Gradient Boosting Decision Tree: https://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision-tree.pdf
.. _On Grouping for Maximum Homogeneity: http://amstat.tandfonline.com/doi/abs/10.1080/01621459.1958.10501479
.. _On Grouping for Maximum Homogeneity: http://www.csiss.org/SPACE/workshops/2004/SAC/files/fisher.pdf
.. _Optimization of collective communication operations in MPICH: http://wwwi10.lrr.in.tum.de/~gerndt/home/Teaching/HPCSeminar/mpich_multi_coll.pdf
......
......@@ -54,7 +54,7 @@ Core Parameters
- **Note**: Only can be used in CLI version.
- ``application``, default=\ ``regression``, type=enum,
options=\ ``regression``, ``regression_l1``, ``huber``, ``fair``, ``poisson``, ``quantile``, ``quantile_l2``,
options=\ ``regression``, ``regression_l1``, ``huber``, ``fair``, ``poisson``, ``quantile``, ``mape``,
``binary``, ``multiclass``, ``multiclassova``, ``xentropy``, ``xentlambda``, ``lambdarank``,
alias=\ ``objective``, ``app``
......@@ -72,7 +72,7 @@ Core Parameters
- ``quantile``, `Quantile regression`_
- ``quantile_l2``, like the ``quantile``, but L2 loss is used instead
- ``mape``, `MAPE loss`_
- ``binary``, binary `log loss`_ classification application
......@@ -513,10 +513,6 @@ Objective Parameters
- parameter for `Fair loss`_. Will be used in ``regression`` task
- ``gaussian_eta``, default=\ ``1.0``, type=double
- parameter to control the width of Gaussian function. Will be used in ``regression_l1`` and ``huber`` losses
- ``poisson_max_delta_step``, default=\ ``0.7``, type=double
- parameter for `Poisson regression`_ to safeguard optimization
......@@ -573,6 +569,8 @@ Metric Parameters
- ``l2_root``, root square loss, alias=\ ``root_mean_squared_error``, ``rmse``
- ``quantile``, `Quantile regression`_
- ``mape``, `MAPE loss`_
- ``huber``, `Huber loss`_
......@@ -744,6 +742,8 @@ You can specific query/group id in data file now. Please refer to parameter ``gr
.. _Quantile regression: https://en.wikipedia.org/wiki/Quantile_regression
.. _MAPE loss: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error
.. _Fair loss: https://www.kaggle.com/c/allstate-claims-severity/discussion/24520
.. _Poisson regression: https://en.wikipedia.org/wiki/Poisson_regression
......
......@@ -68,7 +68,7 @@ Some important parameters:
- ``convert_model``, for converting model file into if-else format, see more information in `Convert model parameters <./Parameters.rst#convert-model-parameters>`__
- ``application``, default=\ ``regression``, type=enum,
options=\ ``regression``, ``regression_l1``, ``huber``, ``fair``, ``poisson``, ``quantile``, ``quantile_l2``,
options=\ ``regression``, ``regression_l1``, ``huber``, ``fair``, ``poisson``, ``quantile``, ``mape``,
``binary``, ``multiclass``, ``multiclassova``, ``xentropy``, ``xentlambda``, ``lambdarank``,
alias=\ ``objective``, ``app``
......@@ -86,7 +86,7 @@ Some important parameters:
- ``quantile``, `Quantile regression`_
- ``quantile_l2``, like the ``quantile``, but L2 loss is used instead
- ``mape``, `MAPE loss`_
- ``binary``, binary `log loss`_ classification application
......@@ -234,6 +234,8 @@ Examples
.. _Quantile regression: https://en.wikipedia.org/wiki/Quantile_regression
.. _MAPE loss: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error
.. _log loss: https://en.wikipedia.org/wiki/Cross_entropy
.. _softmax: https://en.wikipedia.org/wiki/Softmax_function
......
......@@ -164,8 +164,6 @@ public:
virtual ~ObjectiveConfig() {}
double sigmoid = 1.0f;
double fair_c = 1.0f;
// for Approximate Hessian With Gaussian
double gaussian_eta = 1.0f;
double poisson_max_delta_step = 0.7f;
// for lambdarank
std::vector<double> label_gain;
......@@ -473,7 +471,7 @@ struct ParameterAlias {
"convert_model", "convert_model_language",
"feature_fraction_seed", "enable_bundle", "data_filename", "valid_data_filenames",
"snapshot_freq", "verbosity", "sparse_threshold", "enable_load_from_binary_file",
"max_conflict_rate", "poisson_max_delta_step", "gaussian_eta",
"max_conflict_rate", "poisson_max_delta_step",
"histogram_pool_size", "is_provide_training_metric", "machine_list_filename", "machines",
"zero_as_missing", "init_score_file", "valid_init_score_file", "is_predict_contrib",
"max_cat_threshold", "cat_smooth", "min_data_per_group", "cat_l2", "max_cat_to_onehot",
......
......@@ -210,6 +210,52 @@ public:
return global;
}
template<class T>
static T GlobalSyncUpByMean(T& local) {
T global = (T)0;
Allreduce(reinterpret_cast<char*>(&local),
sizeof(local), sizeof(local),
reinterpret_cast<char*>(&global),
[](const char* src, char* dst, int type_size, comm_size_t len) {
comm_size_t used_size = 0;
const T *p1;
T *p2;
while (used_size < len) {
p1 = reinterpret_cast<const T *>(src);
p2 = reinterpret_cast<T *>(dst);
*p2 += *p1;
src += type_size;
dst += type_size;
used_size += type_size;
}
});
return static_cast<T>(global / num_machines_);
}
template<class T>
static void GlobalSum(std::vector<T>& local) {
std::vector<T> global;
Allreduce(reinterpret_cast<char*>(local.data()),
static_cast<comm_size_t>(sizeof(T) * local.size()), sizeof(T),
reinterpret_cast<char*>(global.data()),
[](const char* src, char* dst, int type_size, comm_size_t len) {
comm_size_t used_size = 0;
const T *p1;
T *p2;
while (used_size < len) {
p1 = reinterpret_cast<const T *>(src);
p2 = reinterpret_cast<T *>(dst);
*p2 += *p1;
src += type_size;
dst += type_size;
used_size += type_size;
}
});
for (size_t i = 0; i < local.size(); ++i) {
local[i] = global[i];
}
}
private:
static void AllgatherBruck(char* input, const comm_size_t* block_start, const comm_size_t* block_len, char* output, comm_size_t all_size);
......
......@@ -35,9 +35,14 @@ public:
virtual bool IsConstantHessian() const { return false; }
virtual bool BoostFromAverage() const { return false; }
virtual bool IsRenewTreeOutput() const { return false; }
virtual bool GetCustomAverage(double *) const { return false; }
virtual double RenewTreeOutput(double ori_output, const double*,
const data_size_t*,
const data_size_t*,
data_size_t) const { return ori_output; }
virtual double BoostFromScore() const { return 0.0f; }
virtual bool SkipEmptyClass() const { return false; }
......
......@@ -12,6 +12,7 @@ namespace LightGBM {
/*! \brief forward declaration */
class Tree;
class Dataset;
class ObjectiveFunction;
/*!
* \brief Interface for tree learner
......@@ -67,6 +68,9 @@ public:
*/
virtual void AddPredictionToScore(const Tree* tree, double* out_score) const = 0;
virtual void RenewTreeOutput(Tree* tree, const ObjectiveFunction* obj, const double* prediction,
data_size_t total_num_data, const data_size_t* bag_indices, data_size_t bag_cnt) const = 0;
TreeLearner() = default;
/*! \brief Disable copy */
TreeLearner& operator=(const TreeLearner&) = delete;
......
......@@ -640,27 +640,6 @@ inline static void SortForPair(std::vector<T1>& keys, std::vector<T2>& values, s
}
/*
* approximate hessians of absolute loss with Gaussian function
* cf. https://en.wikipedia.org/wiki/Gaussian_function
*
* y is a prediction.
* t means true target.
* g means gradient.
* eta is a parameter to control the width of Gaussian function.
* w means weights.
*/
inline static double ApproximateHessianWithGaussian(const double y, const double t, const double g,
const double eta, const double w=1.0f) {
const double diff = y - t;
const double pi = 4.0 * std::atan(1.0);
const double x = std::fabs(diff);
const double a = 2.0 * std::fabs(g) * w; // difference of two first derivatives, (zero to inf) and (zero to -inf).
const double b = 0.0;
const double c = std::max((std::fabs(y) + std::fabs(t)) * eta, 1.0e-10);
return w * std::exp(-(x - b) * (x - b) / (2.0 * c * c)) * a / (c * std::sqrt(2 * pi));
}
template <typename T>
inline static std::vector<T*> Vector2Ptr(std::vector<std::vector<T>>& data) {
std::vector<T*> ptr(data.size());
......@@ -882,6 +861,11 @@ inline static const char* SkipNewLine(const char* str) {
return str;
}
template <typename T>
static int Sign(T x) {
return (x > T(0)) - (x < T(0));
}
} // namespace Common
} // namespace LightGBM
......
......@@ -1956,7 +1956,7 @@ class Booster(object):
self.__name_inner_eval = \
[string_buffers[i].value.decode() for i in range_(self.__num_inner_eval)]
self.__higher_better_inner_eval = \
[name.startswith(('auc', 'ndcg', 'map')) for name in self.__name_inner_eval]
[name.startswith(('auc', 'ndcg@', 'map@')) for name in self.__name_inner_eval]
def attr(self, key):
"""Get attribute string from the Booster.
......
......@@ -295,42 +295,15 @@ void GBDT::Bagging(int iter) {
* (i) and (ii) could be selected as say "auto_init_score" = 0 or 1 etc..
*
*/
double ObtainAutomaticInitialScore(const ObjectiveFunction* fobj, const label_t* label, data_size_t num_data) {
double ObtainAutomaticInitialScore(const ObjectiveFunction* fobj) {
double init_score = 0.0f;
bool got_custom = false;
if (fobj != nullptr) {
got_custom = fobj->GetCustomAverage(&init_score);
}
if (!got_custom) {
double sum_label = 0.0f;
#pragma omp parallel for schedule(static) reduction(+:sum_label)
for (data_size_t i = 0; i < num_data; ++i) {
sum_label += label[i];
}
init_score = sum_label / num_data;
init_score = fobj->BoostFromScore();
}
if (Network::num_machines() > 1) {
double global_init_score = 0.0f;
Network::Allreduce(reinterpret_cast<char*>(&init_score),
sizeof(init_score), sizeof(init_score),
reinterpret_cast<char*>(&global_init_score),
[](const char* src, char* dst, int type_size, comm_size_t len) {
comm_size_t used_size = 0;
const double *p1;
double *p2;
while (used_size < len) {
p1 = reinterpret_cast<const double *>(src);
p2 = reinterpret_cast<double *>(dst);
*p2 += *p1;
src += type_size;
dst += type_size;
used_size += type_size;
}
});
return global_init_score / Network::num_machines();
} else {
return init_score;
init_score = Network::GlobalSyncUpByMean(init_score);
}
return init_score;
}
void GBDT::Train(int snapshot_freq, const std::string& model_output_path) {
......@@ -379,21 +352,23 @@ void GBDT::RefitTree(const std::vector<std::vector<int>>& tree_leaf_prediction)
double GBDT::BoostFromAverage() {
// boosting from average label; or customized "average" if implemented for the current objective
if (models_.empty()
&& gbdt_config_->boost_from_average
&& !train_score_updater_->has_init_score()
if (models_.empty() && !train_score_updater_->has_init_score()
&& num_class_ <= 1
&& objective_function_ != nullptr
&& objective_function_->BoostFromAverage()) {
auto label = train_data_->metadata().label();
double init_score = ObtainAutomaticInitialScore(objective_function_, label, num_data_);
if (std::fabs(init_score) > kEpsilon) {
train_score_updater_->AddScore(init_score, 0);
for (auto& score_updater : valid_score_updater_) {
score_updater->AddScore(init_score, 0);
&& objective_function_ != nullptr) {
if (gbdt_config_->boost_from_average) {
double init_score = ObtainAutomaticInitialScore(objective_function_);
if (std::fabs(init_score) > kEpsilon) {
train_score_updater_->AddScore(init_score, 0);
for (auto& score_updater : valid_score_updater_) {
score_updater->AddScore(init_score, 0);
}
Log::Info("Start training from score %lf", init_score);
return init_score;
}
return init_score;
} else if (std::string(objective_function_->GetName()) == std::string("regression_l1")
|| std::string(objective_function_->GetName()) == std::string("quantile")
|| std::string(objective_function_->GetName()) == std::string("mape")) {
Log::Warning("Disable boost_from_average in %s may cause the slow convergence.", objective_function_->GetName());
}
}
return 0.0f;
......@@ -434,10 +409,9 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
#ifdef TIMETAG
start_time = std::chrono::steady_clock::now();
#endif
const size_t bias = static_cast<size_t>(cur_tree_id) * num_data_;
std::unique_ptr<Tree> new_tree(new Tree(2));
if (class_need_train_[cur_tree_id]) {
size_t bias = static_cast<size_t>(cur_tree_id)* num_data_;
auto grad = gradients + bias;
auto hess = hessians + bias;
......@@ -460,6 +434,8 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
if (new_tree->num_leaves() > 1) {
should_continue = true;
tree_learner_->RenewTreeOutput(new_tree.get(), objective_function_, train_score_updater_->score() + bias,
num_data_, bag_data_indices_.data(), bag_data_cnt_);
// shrinkage by learning rate
new_tree->Shrinkage(shrinkage_rate_);
// update score
......
......@@ -206,11 +206,6 @@ public:
}
}
/*!
* \brief Get Type name of this boosting object
*/
const char* SubModelName() const override { return "tree"; }
private:
std::vector<data_size_t> tmp_indice_right_;
};
......
......@@ -73,7 +73,8 @@ public:
* \param cur_tree_id Current tree for multiclass training
*/
inline void AddScore(const Tree* tree, int cur_tree_id) {
tree->AddPredictionToScore(data_, num_data_, score_.data() + cur_tree_id * num_data_);
const size_t offset = static_cast<size_t>(num_data_) * cur_tree_id;
tree->AddPredictionToScore(data_, num_data_, score_.data() + offset);
}
/*!
* \brief Adding prediction score, only used for training data.
......@@ -83,7 +84,8 @@ public:
* \param cur_tree_id Current tree for multiclass training
*/
inline void AddScore(const TreeLearner* tree_learner, const Tree* tree, int cur_tree_id) {
tree_learner->AddPredictionToScore(tree, score_.data() + cur_tree_id * num_data_);
const size_t offset = static_cast<size_t>(num_data_) * cur_tree_id;
tree_learner->AddPredictionToScore(tree, score_.data() + offset);
}
/*!
* \brief Using tree model to get prediction number, then adding to scores for parts of data
......@@ -95,10 +97,12 @@ public:
*/
inline void AddScore(const Tree* tree, const data_size_t* data_indices,
data_size_t data_cnt, int cur_tree_id) {
tree->AddPredictionToScore(data_, data_indices, data_cnt, score_.data() + cur_tree_id * num_data_);
const size_t offset = static_cast<size_t>(num_data_) * cur_tree_id;
tree->AddPredictionToScore(data_, data_indices, data_cnt, score_.data() + offset);
}
/*! \brief Pointer of score */
inline const double* score() const { return score_.data(); }
inline data_size_t num_data() const { return num_data_; }
/*! \brief Disable copy */
......
......@@ -310,8 +310,6 @@ void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& pa
CHECK(sigmoid > 0);
GetDouble(params, "fair_c", &fair_c);
CHECK(fair_c > 0);
GetDouble(params, "gaussian_eta", &gaussian_eta);
CHECK(gaussian_eta > 0);
GetDouble(params, "poisson_max_delta_step", &poisson_max_delta_step);
CHECK(poisson_max_delta_step > 0);
GetInt(params, "max_position", &max_position);
......
......@@ -43,6 +43,8 @@ Metric* Metric::CreateMetric(const std::string& type, const MetricConfig& config
return new CrossEntropyLambdaMetric(config);
} else if (type == std::string("kldiv") || type == std::string("kullback_leibler")) {
return new KullbackLeiblerDivergence(config);
} else if (type == std::string("mean_absolute_percentage_error") || type == std::string("mape")) {
return new MAPEMetric(config);
}
return nullptr;
}
......
......@@ -57,9 +57,11 @@ public:
sum_query_weights_ += query_weights_[i];
}
}
inverse_max_dcgs_.resize(num_queries_);
// cache the inverse max DCG for all querys, used to calculate NDCG
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_queries_; ++i) {
inverse_max_dcgs_.emplace_back(eval_at_.size(), 0.0f);
inverse_max_dcgs_[i].resize(eval_at_.size(), 0.0f);
DCGCalculator::CalMaxDCG(eval_at_, label_ + query_boundaries_[i],
query_boundaries_[i + 1] - query_boundaries_[i],
&inverse_max_dcgs_[i]);
......
......@@ -227,5 +227,20 @@ public:
}
};
/*! \brief Mape regression loss for regression task */
class MAPEMetric : public RegressionMetric<MAPEMetric> {
public:
explicit MAPEMetric(const MetricConfig& config) :RegressionMetric<MAPEMetric>(config) {
}
inline static double LossOnPoint(label_t label, double score, const MetricConfig&) {
return std::fabs((label - score)) / std::max(1.0f, std::fabs(label));
}
inline static const char* Name() {
return "mape";
}
};
} // namespace LightGBM
#endif // LightGBM_METRIC_REGRESSION_METRIC_HPP_
......@@ -15,8 +15,6 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string&
return new RegressionL1loss(config);
} else if (type == std::string("quantile")) {
return new RegressionQuantileloss(config);
} else if (type == std::string("quantile_l2")) {
return new RegressionQuantileL2loss(config);
} else if (type == std::string("huber")) {
return new RegressionHuberLoss(config);
} else if (type == std::string("fair")) {
......@@ -35,6 +33,8 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string&
return new CrossEntropy(config);
} else if (type == std::string("xentlambda") || type == std::string("cross_entropy_lambda")) {
return new CrossEntropyLambda(config);
} else if (type == std::string("mean_absolute_percentage_error") || type == std::string("mape")) {
return new RegressionMAPELOSS(config);
}
return nullptr;
}
......@@ -48,8 +48,6 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string&
return new RegressionL1loss(strs);
} else if (type == std::string("quantile")) {
return new RegressionQuantileloss(strs);
} else if (type == std::string("quantile_l2")) {
return new RegressionQuantileL2loss(strs);
} else if (type == std::string("huber")) {
return new RegressionHuberLoss(strs);
} else if (type == std::string("fair")) {
......
......@@ -4,10 +4,60 @@
#include <LightGBM/meta.h>
#include <LightGBM/objective_function.h>
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/array_args.h>
namespace LightGBM {
#define PercentileFun(T, data_reader, cnt_data, alpha) {\
std::vector<T> ref_data(cnt_data);\
for (data_size_t i = 0; i < cnt_data; ++i) {\
ref_data[i] = data_reader(i);\
}\
const double float_pos = (1.0f - alpha) * cnt_data;\
const data_size_t pos = static_cast<data_size_t>(float_pos);\
if (pos < 1) {\
return ref_data[ArrayArgs<T>::ArgMax(ref_data)];\
} else if (pos >= cnt_data) {\
return ref_data[ArrayArgs<T>::ArgMin(ref_data)];\
} else {\
const double bias = float_pos - pos;\
if (pos > cnt_data / 2) {\
ArrayArgs<T>::ArgMaxAtK(&ref_data, 0, cnt_data, pos - 1);\
T v1 = ref_data[pos - 1];\
T v2 = ref_data[pos + ArrayArgs<T>::ArgMax(ref_data.data() + pos, cnt_data - pos)];\
return static_cast<T>(v1 - (v1 - v2) * bias);\
} else {\
ArrayArgs<T>::ArgMaxAtK(&ref_data, 0, cnt_data, pos);\
T v2 = ref_data[pos];\
T v1 = ref_data[ArrayArgs<T>::ArgMin(ref_data.data(), pos)];\
return static_cast<T>(v1 - (v1 - v2) * bias);\
}\
}\
}\
#define WeightedPercentileFun(T, data_reader, weight_reader, cnt_data, alpha) {\
std::vector<data_size_t> sorted_idx(cnt_data);\
for (data_size_t i = 0; i < cnt_data; ++i) {\
sorted_idx[i] = i;\
}\
std::sort(sorted_idx.begin(), sorted_idx.end(), [=](data_size_t a, data_size_t b) {return data_reader(a) < data_reader(b); });\
std::vector<double> weighted_cdf(cnt_data);\
weighted_cdf[0] = weight_reader(sorted_idx[0]);\
for (data_size_t i = 1; i < cnt_data; ++i) {\
weighted_cdf[i] = weighted_cdf[i - 1] + weight_reader(sorted_idx[i]);\
}\
double threshold = weighted_cdf[cnt_data - 1] * alpha;\
size_t pos = std::upper_bound(weighted_cdf.begin(), weighted_cdf.end(), threshold) - weighted_cdf.begin();\
if (pos == 0) {\
return data_reader(sorted_idx[0]);\
}\
CHECK(threshold >= weighted_cdf[pos - 1]);\
CHECK(threshold < weighted_cdf[pos]);\
T v1 = data_reader(sorted_idx[pos - 1]);\
T v2 = data_reader(sorted_idx[pos]);\
return static_cast<T>((threshold - weighted_cdf[pos]) / (weighted_cdf[pos + 1] - weighted_cdf[pos]) * (v2 - v1) + v1);\
}\
/*!
* \brief Objective function for regression
*/
......@@ -25,7 +75,7 @@ public:
}
}
}
~RegressionL2loss() {
}
......@@ -34,8 +84,9 @@ public:
label_ = metadata.label();
if (sqrt_) {
trans_label_.resize(num_data_);
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) {
trans_label_[i] = std::copysign(std::sqrt(std::fabs(label_[i])), label_[i]);
trans_label_[i] = Common::Sign(label_[i]) * std::sqrt(std::fabs(label_[i]));
}
label_ = trans_label_.data();
}
......@@ -65,7 +116,7 @@ public:
void ConvertOutput(const double* input, double* output) const override {
if (sqrt_) {
output[0] = std::copysign(input[0] * input[0], input[0]);
output[0] = Common::Sign(input[0]) * input[0] * input[0];
} else {
output[0] = input[0];
}
......@@ -88,12 +139,23 @@ public:
}
}
bool BoostFromAverage() const override {
if (sqrt_) {
return false;
double BoostFromScore() const override {
double suml = 0.0f;
double sumw = 0.0f;
if (weights_ != nullptr) {
#pragma omp parallel for schedule(static) reduction(+:suml,sumw)
for (data_size_t i = 0; i < num_data_; ++i) {
suml += label_[i] * weights_[i];
sumw += weights_[i];
}
} else {
return true;
sumw = static_cast<double>(num_data_);
#pragma omp parallel for schedule(static) reduction(+:suml)
for (data_size_t i = 0; i < num_data_; ++i) {
suml += label_[i];
}
}
return suml / sumw;
}
protected:
......@@ -113,11 +175,9 @@ protected:
class RegressionL1loss: public RegressionL2loss {
public:
explicit RegressionL1loss(const ObjectiveConfig& config): RegressionL2loss(config) {
eta_ = static_cast<double>(config.gaussian_eta);
}
explicit RegressionL1loss(const std::vector<std::string>& strs): RegressionL2loss(strs) {
}
~RegressionL1loss() {}
......@@ -128,37 +188,71 @@ public:
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double diff = score[i] - label_[i];
if (diff >= 0.0f) {
gradients[i] = 1.0f;
} else {
gradients[i] = -1.0f;
}
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_));
gradients[i] = static_cast<score_t>(Common::Sign(diff));
hessians[i] = 1.0f;
}
} else {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double diff = score[i] - label_[i];
if (diff >= 0.0f) {
gradients[i] = static_cast<score_t>(weights_[i]);
} else {
gradients[i] = static_cast<score_t>(-weights_[i]);
}
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_, weights_[i]));
gradients[i] = static_cast<score_t>(Common::Sign(diff) * weights_[i]);
hessians[i] = weights_[i];
}
}
}
const char* GetName() const override {
return "regression_l1";
double BoostFromScore() const override {
const double alpha = 0.5;
if (weights_ != nullptr) {
#define data_reader(i) (label_[i])
#define weight_reader(i) (weights_[i])
WeightedPercentileFun(label_t, data_reader, weight_reader, num_data_, alpha);
#undef data_reader
#undef weight_reader
} else {
#define data_reader(i) (label_[i])
PercentileFun(label_t, data_reader, num_data_, alpha);
#undef data_reader
}
}
bool IsConstantHessian() const override {
return false;
bool IsRenewTreeOutput() const override { return true; }
double RenewTreeOutput(double, const double* pred,
const data_size_t* index_mapper,
const data_size_t* bagging_mapper,
data_size_t num_data_in_leaf) const override {
const double alpha = 0.5;
if (weights_ == nullptr) {
if (bagging_mapper == nullptr) {
#define data_reader(i) (label_[index_mapper[i]] - pred[index_mapper[i]])
PercentileFun(double, data_reader, num_data_in_leaf, alpha);
#undef data_reader
} else {
#define data_reader(i) (label_[bagging_mapper[index_mapper[i]]] - pred[bagging_mapper[index_mapper[i]]])
PercentileFun(double, data_reader, num_data_in_leaf, alpha);
#undef data_reader
}
} else {
if (bagging_mapper == nullptr) {
#define data_reader(i) (label_[index_mapper[i]] - pred[index_mapper[i]])
#define weight_reader(i) (weights_[index_mapper[i]])
WeightedPercentileFun(double, data_reader, weight_reader, num_data_in_leaf, alpha);
#undef data_reader
#undef weight_reader
} else {
#define data_reader(i) (label_[bagging_mapper[index_mapper[i]]] - pred[bagging_mapper[index_mapper[i]]])
#define weight_reader(i) (weights_[bagging_mapper[index_mapper[i]]])
WeightedPercentileFun(double, data_reader, weight_reader, num_data_in_leaf, alpha);
#undef data_reader
#undef weight_reader
}
}
}
private:
double eta_;
const char* GetName() const override {
return "regression_l1";
}
};
/*!
......@@ -168,7 +262,6 @@ class RegressionHuberLoss: public RegressionL2loss {
public:
explicit RegressionHuberLoss(const ObjectiveConfig& config): RegressionL2loss(config) {
alpha_ = static_cast<double>(config.alpha);
eta_ = static_cast<double>(config.gaussian_eta);
}
explicit RegressionHuberLoss(const std::vector<std::string>& strs): RegressionL2loss(strs) {
......@@ -184,35 +277,23 @@ public:
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double diff = score[i] - label_[i];
if (std::abs(diff) <= alpha_) {
gradients[i] = static_cast<score_t>(diff);
hessians[i] = 1.0f;
} else {
if (diff >= 0.0f) {
gradients[i] = static_cast<score_t>(alpha_);
} else {
gradients[i] = static_cast<score_t>(-alpha_);
}
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_));
gradients[i] = static_cast<score_t>(Common::Sign(diff) * alpha_);
}
hessians[i] = 1.0f;
}
} else {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double diff = score[i] - label_[i];
if (std::abs(diff) <= alpha_) {
gradients[i] = static_cast<score_t>(diff * weights_[i]);
hessians[i] = static_cast<score_t>(weights_[i]);
} else {
if (diff >= 0.0f) {
gradients[i] = static_cast<score_t>(alpha_ * weights_[i]);
} else {
gradients[i] = static_cast<score_t>(-alpha_ * weights_[i]);
}
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_, weights_[i]));
gradients[i] = static_cast<score_t>(Common::Sign(diff) * weights_[i] * alpha_);
}
hessians[i] = static_cast<score_t>(weights_[i]);
}
}
}
......@@ -228,8 +309,6 @@ public:
private:
/*! \brief delta for Huber loss */
double alpha_;
/*! \brief a parameter to control the width of Gaussian function to approximate hessian */
double eta_;
};
......@@ -286,6 +365,10 @@ class RegressionPoissonLoss: public RegressionL2loss {
public:
explicit RegressionPoissonLoss(const ObjectiveConfig& config): RegressionL2loss(config) {
max_delta_step_ = static_cast<double>(config.poisson_max_delta_step);
if (sqrt_) {
Log::Warning("cannot use sqrt transform in Poisson Regression, will auto disable it.");
sqrt_ = false;
}
}
explicit RegressionPoissonLoss(const std::vector<std::string>& strs): RegressionL2loss(strs) {
......@@ -295,6 +378,10 @@ public:
~RegressionPoissonLoss() {}
void Init(const Metadata& metadata, data_size_t num_data) override {
if (sqrt_) {
Log::Warning("cannot use sqrt transform in Poisson Regression, will auto disable it.");
sqrt_ = false;
}
RegressionL2loss::Init(metadata, num_data);
// Safety check of labels
label_t miny;
......@@ -322,22 +409,19 @@ public:
if (weights_ == nullptr) {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double ef = std::exp(score[i]);
gradients[i] = static_cast<score_t>(ef - label_[i]);
hessians[i] = static_cast<score_t>(ef);
gradients[i] = static_cast<score_t>(std::exp(score[i]) - label_[i]);
hessians[i] = static_cast<score_t>(std::exp(score[i] + max_delta_step_));
}
} else {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double ef = std::exp(score[i]);
gradients[i] = static_cast<score_t>((ef - label_[i]) * weights_[i]);
hessians[i] = static_cast<score_t>(ef * weights_[i]);
gradients[i] = static_cast<score_t>((std::exp(score[i]) - label_[i]) * weights_[i]);
hessians[i] = static_cast<score_t>(std::exp(score[i] + max_delta_step_) * weights_[i]);
}
}
}
void ConvertOutput(const double* input, double* output) const override {
RegressionL2loss::ConvertOutput(input, output);
output[0] = std::exp(input[0]);
}
......@@ -345,25 +429,8 @@ public:
return "poisson";
}
bool GetCustomAverage(double *initscore) const override {
if (initscore == nullptr) return false;
double sumw = 0.0f;
double sumy = 0.0f;
if (weights_ == nullptr) {
for (data_size_t i = 0; i < num_data_; i++) {
sumy += label_[i];
}
sumw = static_cast<double>(num_data_);
} else {
for (data_size_t i = 0; i < num_data_; i++) {
sumy += weights_[i] * label_[i];
sumw += weights_[i];
}
}
const double yavg = sumy / sumw;
*initscore = std::log(yavg);
Log::Info("[%s:%s]: yavg=%f -> initscore=%f", GetName(), __func__, yavg, *initscore);
return true;
double BoostFromScore() const override {
return std::log(RegressionL2loss::BoostFromScore());
}
bool IsConstantHessian() const override {
......@@ -418,63 +485,159 @@ public:
return "quantile";
}
double BoostFromScore() const override {
if (weights_ != nullptr) {
#define data_reader(i) (label_[i])
#define weight_reader(i) (weights_[i])
WeightedPercentileFun(label_t, data_reader, weight_reader, num_data_, alpha_);
#undef data_reader
#undef weight_reader
} else {
#define data_reader(i) (label_[i])
PercentileFun(label_t, data_reader, num_data_, alpha_);
#undef data_reader
}
}
bool IsRenewTreeOutput() const override { return true; }
double RenewTreeOutput(double, const double* pred,
const data_size_t* index_mapper,
const data_size_t* bagging_mapper,
data_size_t num_data_in_leaf) const override {
if (weights_ == nullptr) {
if (bagging_mapper == nullptr) {
#define data_reader(i) (label_[index_mapper[i]] - pred[index_mapper[i]])
PercentileFun(double, data_reader, num_data_in_leaf, alpha_);
#undef data_reader
} else {
#define data_reader(i) (label_[bagging_mapper[index_mapper[i]]] - pred[bagging_mapper[index_mapper[i]]])
PercentileFun(double, data_reader, num_data_in_leaf, alpha_);
#undef data_reader
}
} else {
if (bagging_mapper == nullptr) {
#define data_reader(i) (label_[index_mapper[i]] - pred[index_mapper[i]])
#define weight_reader(i) (weights_[index_mapper[i]])
WeightedPercentileFun(double, data_reader, weight_reader, num_data_in_leaf, alpha_);
#undef data_reader
#undef weight_reader
} else {
#define data_reader(i) (label_[bagging_mapper[index_mapper[i]]] - pred[bagging_mapper[index_mapper[i]]])
#define weight_reader(i) (weights_[bagging_mapper[index_mapper[i]]])
WeightedPercentileFun(double, data_reader, weight_reader, num_data_in_leaf, alpha_);
#undef data_reader
#undef weight_reader
}
}
}
private:
score_t alpha_;
};
class RegressionQuantileL2loss : public RegressionL2loss {
/*!
* \brief Mape Regression Loss
*/
class RegressionMAPELOSS : public RegressionL1loss {
public:
explicit RegressionQuantileL2loss(const ObjectiveConfig& config) : RegressionL2loss(config) {
alpha_ = static_cast<score_t>(config.alpha);
explicit RegressionMAPELOSS(const ObjectiveConfig& config) : RegressionL1loss(config) {
}
explicit RegressionQuantileL2loss(const std::vector<std::string>& strs) : RegressionL2loss(strs) {
explicit RegressionMAPELOSS(const std::vector<std::string>& strs) : RegressionL1loss(strs) {
}
~RegressionQuantileL2loss() {}
~RegressionMAPELOSS() {}
void Init(const Metadata& metadata, data_size_t num_data) override {
RegressionL2loss::Init(metadata, num_data);
for (data_size_t i = 0; i < num_data_; ++i) {
if (std::fabs(label_[i]) < 1) {
Log::Warning("Met 'abs(label) < 1', will convert them to '1' in Mape objective and metric.");
break;
}
}
label_weight_.resize(num_data);
if (weights_ == nullptr) {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
label_weight_[i] = 1.0f / std::max(1.0f, std::fabs(label_[i]));
}
} else {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
label_weight_[i] = 1.0f / std::max(1.0f, std::fabs(label_[i])) * weights_[i];
}
}
}
void GetGradients(const double* score, score_t* gradients,
score_t* hessians) const override {
if (weights_ == nullptr) {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
score_t delta = static_cast<score_t>(score[i] - label_[i]);
if (delta > 0) {
gradients[i] = (1.0f - alpha_) * delta;
hessians[i] = (1.0f - alpha_);
} else {
gradients[i] = alpha_ * delta;
hessians[i] = alpha_;
}
const double diff = score[i] - label_[i];
gradients[i] = static_cast<score_t>(Common::Sign(diff) * label_weight_[i]);
hessians[i] = 1.0f;
}
} else {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
score_t delta = static_cast<score_t>(score[i] - label_[i]);
if (delta > 0) {
gradients[i] = static_cast<score_t>((1.0f - alpha_) * delta * weights_[i]);
hessians[i] = static_cast<score_t>((1.0f - alpha_) * weights_[i]);
} else {
gradients[i] = static_cast<score_t>(alpha_ * delta * weights_[i]);
hessians[i] = static_cast<score_t>(alpha_ * weights_[i]);
}
const double diff = score[i] - label_[i];
gradients[i] = static_cast<score_t>(Common::Sign(diff) * label_weight_[i]);
hessians[i] = weights_[i];
}
}
}
bool IsConstantHessian() const override {
return false;
double BoostFromScore() const override {
const double alpha = 0.5;
#define data_reader(i) (label_[i])
#define weight_reader(i) (label_weight_[i])
WeightedPercentileFun(label_t, data_reader, weight_reader, num_data_, alpha);
#undef data_reader
#undef weight_reader
}
bool IsRenewTreeOutput() const override { return true; }
double RenewTreeOutput(double, const double* pred,
const data_size_t* index_mapper,
const data_size_t* bagging_mapper,
data_size_t num_data_in_leaf) const override {
const double alpha = 0.5;
if (bagging_mapper == nullptr) {
#define data_reader(i) (label_[index_mapper[i]] - pred[index_mapper[i]])
#define weight_reader(i) (label_weight_[index_mapper[i]])
WeightedPercentileFun(double, data_reader, weight_reader, num_data_in_leaf, alpha);
#undef data_reader
#undef weight_reader
} else {
#define data_reader(i) (label_[bagging_mapper[index_mapper[i]]] - pred[bagging_mapper[index_mapper[i]]])
#define weight_reader(i) (label_weight_[bagging_mapper[index_mapper[i]]])
WeightedPercentileFun(double, data_reader, weight_reader, num_data_in_leaf, alpha);
#undef data_reader
#undef weight_reader
}
}
const char* GetName() const override {
return "quantile_l2";
return "mape";
}
bool IsConstantHessian() const override {
return true;
}
private:
score_t alpha_;
std::vector<label_t> label_weight_;
};
#undef PercentileFun
#undef WeightedPercentileFun
} // namespace LightGBM
#endif // LightGBM_OBJECTIVE_REGRESSION_OBJECTIVE_HPP_
......@@ -104,29 +104,27 @@ public:
return str_buf.str();
}
// allow boost from average option
bool BoostFromAverage() const override { return true; }
// implement custom average to boost from (if enabled among options)
bool GetCustomAverage(double *initscore) const override {
if (initscore == nullptr) return false;
double BoostFromScore() const override {
double suml = 0.0f;
double sumw = 0.0f;
if (weights_ != nullptr) {
#pragma omp parallel for schedule(static) reduction(+:suml,sumw)
for (data_size_t i = 0; i < num_data_; ++i) {
suml += label_[i] * weights_[i];
sumw += weights_[i];
}
} else {
sumw = static_cast<double>(num_data_);
#pragma omp parallel for schedule(static) reduction(+:suml)
for (data_size_t i = 0; i < num_data_; ++i) {
suml += label_[i];
}
}
double pavg = suml / sumw;
*initscore = std::log(pavg / (1.0f - pavg));
Log::Info("[%s:%s]: pavg=%f -> initscore=%f", GetName(), __func__, pavg, *initscore);
return true;
double initscore = std::log(pavg / (1.0f - pavg));
Log::Info("[%s:%s]: pavg=%f -> initscore=%f", GetName(), __func__, pavg, initscore);
return initscore;
}
private:
......@@ -232,22 +230,26 @@ public:
return str_buf.str();
}
bool BoostFromAverage() const override { return true; }
bool GetCustomAverage(double *initscore) const override {
if (initscore == nullptr) return false;
double sumy = 0.0f;
for (data_size_t i = 0; i < num_data_; ++i) sumy += label_[i];
double BoostFromScore() const override {
double suml = 0.0f;
double sumw = 0.0f;
if (weights_ != nullptr) {
for (data_size_t i = 0; i < num_data_; ++i) sumw += weights_[i];
#pragma omp parallel for schedule(static) reduction(+:suml,sumw)
for (data_size_t i = 0; i < num_data_; ++i) {
suml += label_[i] * weights_[i];
sumw += weights_[i];
}
} else {
sumw = static_cast<double>(num_data_);
#pragma omp parallel for schedule(static) reduction(+:suml)
for (data_size_t i = 0; i < num_data_; ++i) {
suml += label_[i];
}
}
double havg = sumy / sumw;
*initscore = std::log(std::exp(havg) - 1.0f);
Log::Info("[%s:%s]: havg=%f -> initscore=%f", GetName(), __func__, havg, *initscore);
return true;
double havg = suml / sumw;
double initscore = std::log(std::exp(havg) - 1.0f);
Log::Info("[%s:%s]: havg=%f -> initscore=%f", GetName(), __func__, havg, initscore);
return initscore;
}
private:
......
......@@ -303,10 +303,8 @@ public:
* \return leaf output
*/
static double CalculateSplittedLeafOutput(double sum_gradients, double sum_hessians, double l1, double l2) {
double abs_sum_gradients = std::fabs(sum_gradients);
double reg_abs_sum_gradients = std::max(0.0, abs_sum_gradients - l1);
return -std::copysign(reg_abs_sum_gradients, sum_gradients)
/ (sum_hessians + l2);
const double reg_abs_sum_gradients = std::max(0.0, std::fabs(sum_gradients) - l1);
return -(Common::Sign(sum_gradients) * reg_abs_sum_gradients) / (sum_hessians + l2);
}
private:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment