"src/vscode:/vscode.git/clone" did not exist on "de341926c7b7e5c2a55e2ada2dde17c4a636847c"
Commit a3c63ff7 authored by olofer's avatar olofer Committed by Guolin Ke
Browse files

Added method for customizable "boost-from-average" (#731)

* Created objectives and metrics xentropy and xentropy1

* Some coment and code cleanup.

* Added Kullback-Leibler version of metric. Changed some warning messages.

* Fixed sign error in KL-divergence calc.

* Removed __PRETTY_FUNCTION__.

* Fixed better name for alternative xentropy parameterization.
Documented details on the objectives / metrics in code comments.

* Common code for label interval checks. Cleanups.

* Use common utility for various weight property checks.

* Added code for customizable initial average to boost from.

* Fixed spelling error in aliases.
parent 1f711156
......@@ -370,7 +370,7 @@ struct ParameterAlias {
{ "is_sparse", "is_enable_sparse" },
{ "enable_sparse", "is_enable_sparse" },
{ "pre_partition", "is_pre_partition" },
{ "tranining_metric", "is_training_metric" },
{ "training_metric", "is_training_metric" },
{ "train_metric", "is_training_metric" },
{ "ndcg_at", "ndcg_eval_at" },
{ "eval_at", "ndcg_eval_at" },
......
......@@ -37,6 +37,8 @@ public:
virtual bool BoostFromAverage() const { return false; }
virtual bool GetCustomAverage(double *) const { return false; }
virtual bool SkipEmptyClass() const { return false; }
virtual int NumTreePerIteration() const { return 1; }
......
......@@ -433,8 +433,13 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
&& num_class_ <= 1
&& objective_function_ != nullptr
&& objective_function_->BoostFromAverage()) {
double init_score = 0.0f;
// First try to poll the optional custom average score calculation for the specific objective
if (!objective_function_->GetCustomAverage(&init_score)) {
// otherwise compute a standard label average
auto label = train_data_->metadata().label();
double init_score = LabelAverage(label, num_data_);
init_score = LabelAverage(label, num_data_);
}
std::unique_ptr<Tree> new_tree(new Tree(2));
new_tree->Split(0, 0, BinType::NumericalBin, 0, 0, 0, init_score, init_score, 0, 0, -1, 0, 0, 0);
train_score_updater_->AddScore(init_score, 0);
......
......@@ -102,8 +102,31 @@ public:
return str_buf.str();
}
// allow boost from average option
bool BoostFromAverage() const override { return true; }
// implement custom average to boost from (if enabled among options)
bool GetCustomAverage(double *initscore) const override {
if (initscore == nullptr) return false;
double suml = 0.0f;
double sumw = 0.0f;
if (weights_ == nullptr) {
for (data_size_t i = 0; i < num_data_; ++i) {
suml += label_[i] * weights_[i];
sumw += weights_[i];
}
} else {
sumw = static_cast<double>(num_data_);
for (data_size_t i = 0; i < num_data_; ++i) {
suml += label_[i];
}
}
double pavg = suml / sumw;
*initscore = std::log(pavg / (1.0f - pavg));
Log::Info("[%s:%s]: pavg=%f -> initscore=%f", GetName(), __func__, pavg, *initscore);
return true;
}
private:
/*! \brief Number of data points */
data_size_t num_data_;
......@@ -207,9 +230,24 @@ public:
return str_buf.str();
}
// might want to boost from a weighted average in general, if possible
bool BoostFromAverage() const override { return true; }
bool GetCustomAverage(double *initscore) const override {
if (initscore == nullptr) return false;
double sumy = 0.0f;
for (data_size_t i = 0; i < num_data_; ++i) sumy += label_[i];
double sumw = 0.0f;
if (weights_ == nullptr) {
for (data_size_t i = 0; i < num_data_; ++i) sumw += weights_[i];
} else {
sumw = static_cast<double>(num_data_);
}
double havg = sumy / sumw;
*initscore = std::log(std::exp(havg) - 1.0f);
Log::Info("[%s:%s]: havg=%f -> initscore=%f", GetName(), __func__, havg, *initscore);
return true;
}
private:
/*! \brief Number of data points */
data_size_t num_data_;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment