Unverified Commit f6d654b7 authored by shiyu1994's avatar shiyu1994 Committed by GitHub
Browse files

[fix] fix duplicate added initial scores for single-leaf trees (#fixes #4708)



* fix duplicate added initial scores for single-leaf trees

* add test case

* Fix import in Python test

* commit python suggestions
Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
parent 3032b646
......@@ -419,20 +419,15 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
} else {
// only add default score one-time
if (models_.size() < static_cast<size_t>(num_tree_per_iteration_)) {
double output = 0.0;
if (!class_need_train_[cur_tree_id]) {
if (objective_function_ != nullptr) {
output = objective_function_->BoostFromScore(cur_tree_id);
}
} else {
output = init_scores[cur_tree_id];
}
new_tree->AsConstantTree(output);
if (objective_function_ != nullptr && !config_->boost_from_average && !train_score_updater_->has_init_score()) {
init_scores[cur_tree_id] = ObtainAutomaticInitialScore(objective_function_, cur_tree_id);
// updates scores
train_score_updater_->AddScore(output, cur_tree_id);
train_score_updater_->AddScore(init_scores[cur_tree_id], cur_tree_id);
for (auto& score_updater : valid_score_updater_) {
score_updater->AddScore(output, cur_tree_id);
score_updater->AddScore(init_scores[cur_tree_id], cur_tree_id);
}
}
new_tree->AsConstantTree(init_scores[cur_tree_id]);
}
}
// add model
......
......@@ -3424,3 +3424,31 @@ def test_pandas_nullable_dtypes():
# test equal predictions
np.testing.assert_allclose(preds, preds_nullable_dtypes)
def test_boost_from_average_with_single_leaf_trees():
# test data are taken from bug report
# https://github.com/microsoft/LightGBM/issues/4708
X = np.array([
[1021.0589, 1018.9578],
[1023.85754, 1018.7854],
[1024.5468, 1018.88513],
[1019.02954, 1018.88513],
[1016.79926, 1018.88513],
[1007.6, 1018.88513]], dtype=np.float32)
y = np.array([1023.8, 1024.6, 1024.4, 1023.8, 1022.0, 1014.4], dtype=np.float32)
params = {
"extra_trees": True,
"min_data_in_bin": 1,
"extra_seed": 7,
"objective": "regression",
"verbose": -1,
"boost_from_average": True,
"min_data_in_leaf": 1,
}
train_set = lgb.Dataset(X, y)
model = lgb.train(params=params, train_set=train_set, num_boost_round=10)
preds = model.predict(X)
mean_preds = np.mean(preds)
assert y.min() <= mean_preds <= y.max()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment