"...git@developer.sourcefind.cn:tianlh/lightgbm-dcu.git" did not exist on "501ce1cb63e39c67ceb93a063662f3d9867e044c"
Unverified Commit 9a4e7068 authored by José Morales's avatar José Morales Committed by GitHub
Browse files

[python-package] [R-package] propagate the best iteration of cvbooster into...

[python-package] [R-package] propagate the best iteration of cvbooster into the individual boosters (#5066)
parent 9307d538
...@@ -434,6 +434,10 @@ lgb.cv <- function(params = list() ...@@ -434,6 +434,10 @@ lgb.cv <- function(params = list()
) )
cv_booster$best_score <- cv_booster$record_evals[["valid"]][[first_metric]][[.EVAL_KEY()]][[cv_booster$best_iter]] cv_booster$best_score <- cv_booster$record_evals[["valid"]][[first_metric]][[.EVAL_KEY()]][[cv_booster$best_iter]]
} }
# Propagate the best_iter attribute from the cv_booster to the individual boosters
for (bst in cv_booster$boosters) {
bst$booster$best_iter <- cv_booster$best_iter
}
if (reset_data) { if (reset_data) {
lapply(cv_booster$boosters, function(fd) { lapply(cv_booster$boosters, function(fd) {
......
...@@ -2207,6 +2207,16 @@ test_that("early stopping works with lgb.cv()", { ...@@ -2207,6 +2207,16 @@ test_that("early stopping works with lgb.cv()", {
length(bst$record_evals[["valid"]][["increasing_metric"]][["eval"]]) length(bst$record_evals[["valid"]][["increasing_metric"]][["eval"]])
, early_stopping_rounds + 1L , early_stopping_rounds + 1L
) )
# every booster's predict method should use best_iter as num_iteration in predict
random_data <- as.matrix(rnorm(10L), ncol = 1L, drop = FALSE)
for (x in bst$boosters) {
expect_equal(x$booster$best_iter, bst$best_iter)
expect_gt(x$booster$current_iter(), bst$best_iter)
preds_iter <- predict(x$booster, random_data, num_iteration = bst$best_iter)
preds_no_iter <- predict(x$booster, random_data)
expect_equal(preds_iter, preds_no_iter)
}
}) })
test_that("lgb.cv() respects changes to logging verbosity", { test_that("lgb.cv() respects changes to logging verbosity", {
......
...@@ -581,6 +581,8 @@ def cv(params, train_set, num_boost_round=100, ...@@ -581,6 +581,8 @@ def cv(params, train_set, num_boost_round=100,
evaluation_result_list=res)) evaluation_result_list=res))
except callback.EarlyStopException as earlyStopException: except callback.EarlyStopException as earlyStopException:
cvfolds.best_iteration = earlyStopException.best_iteration + 1 cvfolds.best_iteration = earlyStopException.best_iteration + 1
for bst in cvfolds.boosters:
bst.best_iteration = cvfolds.best_iteration
for k in results: for k in results:
results[k] = results[k][:cvfolds.best_iteration] results[k] = results[k][:cvfolds.best_iteration]
break break
......
...@@ -1025,24 +1025,30 @@ def test_cvbooster(): ...@@ -1025,24 +1025,30 @@ def test_cvbooster():
'metric': 'binary_logloss', 'metric': 'binary_logloss',
'verbose': -1, 'verbose': -1,
} }
nfold = 3
lgb_train = lgb.Dataset(X_train, y_train) lgb_train = lgb.Dataset(X_train, y_train)
# with early stopping # with early stopping
cv_res = lgb.cv(params, lgb_train, cv_res = lgb.cv(params, lgb_train,
num_boost_round=25, num_boost_round=25,
nfold=3, nfold=nfold,
callbacks=[lgb.early_stopping(stopping_rounds=5)], callbacks=[lgb.early_stopping(stopping_rounds=5)],
return_cvbooster=True) return_cvbooster=True)
assert 'cvbooster' in cv_res assert 'cvbooster' in cv_res
cvb = cv_res['cvbooster'] cvb = cv_res['cvbooster']
assert isinstance(cvb, lgb.CVBooster) assert isinstance(cvb, lgb.CVBooster)
assert isinstance(cvb.boosters, list) assert isinstance(cvb.boosters, list)
assert len(cvb.boosters) == 3 assert len(cvb.boosters) == nfold
assert all(isinstance(bst, lgb.Booster) for bst in cvb.boosters) assert all(isinstance(bst, lgb.Booster) for bst in cvb.boosters)
assert cvb.best_iteration > 0 assert cvb.best_iteration > 0
# predict by each fold booster # predict by each fold booster
preds = cvb.predict(X_test, num_iteration=cvb.best_iteration) preds = cvb.predict(X_test)
assert isinstance(preds, list) assert isinstance(preds, list)
assert len(preds) == 3 assert len(preds) == nfold
# check that each booster predicted using the best iteration
for fold_preds, bst in zip(preds, cvb.boosters):
assert bst.best_iteration == cvb.best_iteration
expected = bst.predict(X_test, num_iteration=cvb.best_iteration)
np.testing.assert_allclose(fold_preds, expected)
# fold averaging # fold averaging
avg_pred = np.mean(preds, axis=0) avg_pred = np.mean(preds, axis=0)
ret = log_loss(y_test, avg_pred) ret = log_loss(y_test, avg_pred)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment