Unverified Commit 1f3e72c4 authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[R-package] fixed best_iter and best_score when training data is passed (fixes...

[R-package] fixed best_iter and best_score when training data is passed (fixes #2295, #2525) (#2961)

* [R-package] fixed best_iter and best_score when early_stopping is not used (fixes #2295)

* fixed tests
parent 2b9175aa
# constants that control naming in lists
.EVAL_KEY <- function() {
return("eval")
}
.EVAL_ERR_KEY <- function() {
return("eval_err")
}
#' @importFrom R6 R6Class #' @importFrom R6 R6Class
CB_ENV <- R6::R6Class( CB_ENV <- R6::R6Class(
"lgb.cb_env", "lgb.cb_env",
...@@ -216,8 +224,8 @@ cb.record.evaluation <- function() { ...@@ -216,8 +224,8 @@ cb.record.evaluation <- function() {
# Create dummy lists # Create dummy lists
env$model$record_evals[[data_name]][[name]] <- list() env$model$record_evals[[data_name]][[name]] <- list()
env$model$record_evals[[data_name]][[name]]$eval <- list() env$model$record_evals[[data_name]][[name]][[.EVAL_KEY()]] <- list()
env$model$record_evals[[data_name]][[name]]$eval_err <- list() env$model$record_evals[[data_name]][[name]][[.EVAL_ERR_KEY()]] <- list()
} }
...@@ -238,12 +246,12 @@ cb.record.evaluation <- function() { ...@@ -238,12 +246,12 @@ cb.record.evaluation <- function() {
name <- eval_res$name name <- eval_res$name
# Store evaluation data # Store evaluation data
env$model$record_evals[[data_name]][[name]]$eval <- c( env$model$record_evals[[data_name]][[name]][[.EVAL_KEY()]] <- c(
env$model$record_evals[[data_name]][[name]]$eval env$model$record_evals[[data_name]][[name]][[.EVAL_KEY()]]
, eval_res$value , eval_res$value
) )
env$model$record_evals[[data_name]][[name]]$eval_err <- c( env$model$record_evals[[data_name]][[name]][[.EVAL_ERR_KEY()]] <- c(
env$model$record_evals[[data_name]][[name]]$eval_err env$model$record_evals[[data_name]][[name]][[.EVAL_ERR_KEY()]]
, eval_err , eval_err
) )
......
...@@ -5,7 +5,7 @@ Booster <- R6::R6Class( ...@@ -5,7 +5,7 @@ Booster <- R6::R6Class(
public = list( public = list(
best_iter = -1L, best_iter = -1L,
best_score = NA, best_score = NA_real_,
record_evals = list(), record_evals = list(),
# Finalize will free up the handles # Finalize will free up the handles
...@@ -989,11 +989,11 @@ lgb.get.eval.result <- function(booster, data_name, eval_name, iters = NULL, is_ ...@@ -989,11 +989,11 @@ lgb.get.eval.result <- function(booster, data_name, eval_name, iters = NULL, is_
} }
# Create result # Create result
result <- booster$record_evals[[data_name]][[eval_name]]$eval result <- booster$record_evals[[data_name]][[eval_name]][[.EVAL_KEY()]]
# Check if error is requested # Check if error is requested
if (is_err) { if (is_err) {
result <- booster$record_evals[[data_name]][[eval_name]]$eval_err result <- booster$record_evals[[data_name]][[eval_name]][[.EVAL_ERR_KEY()]]
} }
# Check if iteration is non existant # Check if iteration is non existant
......
...@@ -370,14 +370,22 @@ lgb.cv <- function(params = list() ...@@ -370,14 +370,22 @@ lgb.cv <- function(params = list()
} }
# When early stopping is not activated, we compute the best iteration / score ourselves
# based on the first first metric
if (record && is.na(env$best_score)) { if (record && is.na(env$best_score)) {
if (env$eval_list[[1L]]$higher_better[1L] == TRUE) { first_metric <- cv_booster$boosters[[1L]][[1L]]$.__enclos_env__$private$eval_names[1L]
cv_booster$best_iter <- unname(which.max(unlist(cv_booster$record_evals[[2L]][[1L]][[1L]]))) .find_best <- which.min
cv_booster$best_score <- cv_booster$record_evals[[2L]][[1L]][[1L]][[cv_booster$best_iter]] if (isTRUE(env$eval_list[[1L]]$higher_better[1L])) {
} else { .find_best <- which.max
cv_booster$best_iter <- unname(which.min(unlist(cv_booster$record_evals[[2L]][[1L]][[1L]])))
cv_booster$best_score <- cv_booster$record_evals[[2L]][[1L]][[1L]][[cv_booster$best_iter]]
} }
cv_booster$best_iter <- unname(
.find_best(
unlist(
cv_booster$record_evals[["valid"]][[first_metric]][[.EVAL_KEY()]]
)
)
)
cv_booster$best_score <- cv_booster$record_evals[["valid"]][[first_metric]][[.EVAL_KEY()]][[cv_booster$best_iter]]
} }
if (reset_data) { if (reset_data) {
......
...@@ -237,6 +237,7 @@ lgb.train <- function(params = list(), ...@@ -237,6 +237,7 @@ lgb.train <- function(params = list(),
if (valid_contain_train) { if (valid_contain_train) {
booster$set_train_data_name(train_data_name) booster$set_train_data_name(train_data_name)
} }
for (key in names(reduced_valid_sets)) { for (key in names(reduced_valid_sets)) {
booster$add_valid(reduced_valid_sets[[key]], key) booster$add_valid(reduced_valid_sets[[key]], key)
} }
...@@ -290,16 +291,26 @@ lgb.train <- function(params = list(), ...@@ -290,16 +291,26 @@ lgb.train <- function(params = list(),
} }
# check if any valids were given other than the training data
non_train_valid_names <- names(valids)[!(names(valids) == train_data_name)]
first_valid_name <- non_train_valid_names[1L]
# When early stopping is not activated, we compute the best iteration / score ourselves by # When early stopping is not activated, we compute the best iteration / score ourselves by
# selecting the first metric and the first dataset # selecting the first metric and the first dataset
if (record && length(valids) > 0L && is.na(env$best_score)) { if (record && length(non_train_valid_names) > 0L && is.na(env$best_score)) {
if (env$eval_list[[1L]]$higher_better[1L] == TRUE) { first_metric <- booster$.__enclos_env__$private$eval_names[1L]
booster$best_iter <- unname(which.max(unlist(booster$record_evals[[2L]][[1L]][[1L]]))) .find_best <- which.min
booster$best_score <- booster$record_evals[[2L]][[1L]][[1L]][[booster$best_iter]] if (isTRUE(env$eval_list[[1L]]$higher_better[1L])) {
} else { .find_best <- which.max
booster$best_iter <- unname(which.min(unlist(booster$record_evals[[2L]][[1L]][[1L]])))
booster$best_score <- booster$record_evals[[2L]][[1L]][[1L]][[booster$best_iter]]
} }
booster$best_iter <- unname(
.find_best(
unlist(
booster$record_evals[[first_valid_name]][[first_metric]][[.EVAL_KEY()]]
)
)
)
booster$best_score <- booster$record_evals[[first_valid_name]][[first_metric]][[.EVAL_KEY()]][[booster$best_iter]]
} }
# Check for booster model conversion to predictor model # Check for booster model conversion to predictor model
......
...@@ -250,6 +250,37 @@ test_that("lgb.cv() throws an informative error is 'data' is not an lgb.Dataset ...@@ -250,6 +250,37 @@ test_that("lgb.cv() throws an informative error is 'data' is not an lgb.Dataset
} }
}) })
test_that("lightgbm.cv() gives the correct best_score and best_iter for a metric where higher values are better", {
set.seed(708L)
dtrain <- lgb.Dataset(
data = as.matrix(runif(n = 500L, min = 0.0, max = 15.0), drop = FALSE)
, label = rep(c(0L, 1L), 250L)
)
nrounds <- 10L
cv_bst <- lgb.cv(
data = dtrain
, nfold = 5L
, nrounds = nrounds
, num_leaves = 5L
, params = list(
objective = "binary"
, metric = "auc,binary_error"
, learning_rate = 1.5
)
)
expect_is(cv_bst, "lgb.CVBooster")
expect_named(
cv_bst$record_evals
, c("start_iter", "valid")
, ignore.order = FALSE
, ignore.case = FALSE
)
auc_scores <- unlist(cv_bst$record_evals[["valid"]][["auc"]][["eval"]])
expect_length(auc_scores, nrounds)
expect_identical(cv_bst$best_iter, which.max(auc_scores))
expect_identical(cv_bst$best_score, auc_scores[which.max(auc_scores)])
})
context("lgb.train()") context("lgb.train()")
test_that("lgb.train() works as expected with multiple eval metrics", { test_that("lgb.train() works as expected with multiple eval metrics", {
...@@ -595,3 +626,266 @@ test_that("lgb.train() supports non-ASCII feature names", { ...@@ -595,3 +626,266 @@ test_that("lgb.train() supports non-ASCII feature names", {
, feature_names , feature_names
) )
}) })
test_that("when early stopping is not activated, best_iter and best_score come from valids and not training data", {
set.seed(708L)
trainDF <- data.frame(
"feat1" = rep(c(10.0, 100.0), 500L)
, "target" = rep(c(-50.0, 50.0), 500L)
)
validDF <- data.frame(
"feat1" = rep(50.0, 4L)
, "target" = rep(50.0, 4L)
)
dtrain <- lgb.Dataset(
data = as.matrix(trainDF[["feat1"]], drop = FALSE)
, label = trainDF[["target"]]
)
dvalid1 <- lgb.Dataset(
data = as.matrix(validDF[["feat1"]], drop = FALSE)
, label = validDF[["target"]]
)
dvalid2 <- lgb.Dataset(
data = as.matrix(validDF[1L:10L, "feat1"], drop = FALSE)
, label = validDF[1L:10L, "target"]
)
nrounds <- 10L
train_params <- list(
objective = "regression"
, metric = "rmse"
, learning_rate = 1.5
)
# example 1: two valids, neither are the training data
bst <- lgb.train(
data = dtrain
, nrounds = nrounds
, num_leaves = 5L
, valids = list(
"valid1" = dvalid1
, "valid2" = dvalid2
)
, params = train_params
)
expect_named(
bst$record_evals
, c("start_iter", "valid1", "valid2")
, ignore.order = FALSE
, ignore.case = FALSE
)
rmse_scores <- unlist(bst$record_evals[["valid1"]][["rmse"]][["eval"]])
expect_length(rmse_scores, nrounds)
expect_identical(bst$best_iter, which.min(rmse_scores))
expect_identical(bst$best_score, rmse_scores[which.min(rmse_scores)])
# example 2: train first (called "train") and two valids
bst <- lgb.train(
data = dtrain
, nrounds = nrounds
, num_leaves = 5L
, valids = list(
"train" = dtrain
, "valid1" = dvalid1
, "valid2" = dvalid2
)
, params = train_params
)
expect_named(
bst$record_evals
, c("start_iter", "train", "valid1", "valid2")
, ignore.order = FALSE
, ignore.case = FALSE
)
rmse_scores <- unlist(bst$record_evals[["valid1"]][["rmse"]][["eval"]])
expect_length(rmse_scores, nrounds)
expect_identical(bst$best_iter, which.min(rmse_scores))
expect_identical(bst$best_score, rmse_scores[which.min(rmse_scores)])
# example 3: train second (called "train") and two valids
bst <- lgb.train(
data = dtrain
, nrounds = nrounds
, num_leaves = 5L
, valids = list(
"valid1" = dvalid1
, "train" = dtrain
, "valid2" = dvalid2
)
, params = train_params
)
# note that "train" still ends up as the first one
expect_named(
bst$record_evals
, c("start_iter", "train", "valid1", "valid2")
, ignore.order = FALSE
, ignore.case = FALSE
)
rmse_scores <- unlist(bst$record_evals[["valid1"]][["rmse"]][["eval"]])
expect_length(rmse_scores, nrounds)
expect_identical(bst$best_iter, which.min(rmse_scores))
expect_identical(bst$best_score, rmse_scores[which.min(rmse_scores)])
# example 4: train third (called "train") and two valids
bst <- lgb.train(
data = dtrain
, nrounds = nrounds
, num_leaves = 5L
, valids = list(
"valid1" = dvalid1
, "valid2" = dvalid2
, "train" = dtrain
)
, params = train_params
)
# note that "train" still ends up as the first one
expect_named(
bst$record_evals
, c("start_iter", "train", "valid1", "valid2")
, ignore.order = FALSE
, ignore.case = FALSE
)
rmse_scores <- unlist(bst$record_evals[["valid1"]][["rmse"]][["eval"]])
expect_length(rmse_scores, nrounds)
expect_identical(bst$best_iter, which.min(rmse_scores))
expect_identical(bst$best_score, rmse_scores[which.min(rmse_scores)])
# example 5: train second (called "something-random-we-would-not-hardcode") and two valids
bst <- lgb.train(
data = dtrain
, nrounds = nrounds
, num_leaves = 5L
, valids = list(
"valid1" = dvalid1
, "something-random-we-would-not-hardcode" = dtrain
, "valid2" = dvalid2
)
, params = train_params
)
# note that "something-random-we-would-not-hardcode" was recognized as the training
# data even though it isn't named "train"
expect_named(
bst$record_evals
, c("start_iter", "something-random-we-would-not-hardcode", "valid1", "valid2")
, ignore.order = FALSE
, ignore.case = FALSE
)
rmse_scores <- unlist(bst$record_evals[["valid1"]][["rmse"]][["eval"]])
expect_length(rmse_scores, nrounds)
expect_identical(bst$best_iter, which.min(rmse_scores))
expect_identical(bst$best_score, rmse_scores[which.min(rmse_scores)])
# example 6: the only valid supplied is the training data
bst <- lgb.train(
data = dtrain
, nrounds = nrounds
, num_leaves = 5L
, valids = list(
"train" = dtrain
)
, params = train_params
)
expect_identical(bst$best_iter, -1L)
expect_identical(bst$best_score, NA_real_)
})
test_that("lightgbm.train() gives the correct best_score and best_iter for a metric where higher values are better", {
set.seed(708L)
trainDF <- data.frame(
"feat1" = runif(n = 500L, min = 0.0, max = 15.0)
, "target" = rep(c(0L, 1L), 500L)
)
validDF <- data.frame(
"feat1" = runif(n = 50L, min = 0.0, max = 15.0)
, "target" = rep(c(0L, 1L), 50L)
)
dtrain <- lgb.Dataset(
data = as.matrix(trainDF[["feat1"]], drop = FALSE)
, label = trainDF[["target"]]
)
dvalid1 <- lgb.Dataset(
data = as.matrix(validDF[1L:25L, "feat1"], drop = FALSE)
, label = validDF[1L:25L, "target"]
)
nrounds <- 10L
bst <- lgb.train(
data = dtrain
, nrounds = nrounds
, num_leaves = 5L
, valids = list(
"valid1" = dvalid1
, "something-random-we-would-not-hardcode" = dtrain
)
, params = list(
objective = "binary"
, metric = "auc"
, learning_rate = 1.5
)
)
# note that "something-random-we-would-not-hardcode" was recognized as the training
# data even though it isn't named "train"
expect_named(
bst$record_evals
, c("start_iter", "something-random-we-would-not-hardcode", "valid1")
, ignore.order = FALSE
, ignore.case = FALSE
)
auc_scores <- unlist(bst$record_evals[["valid1"]][["auc"]][["eval"]])
expect_length(auc_scores, nrounds)
expect_identical(bst$best_iter, which.max(auc_scores))
expect_identical(bst$best_score, auc_scores[which.max(auc_scores)])
})
test_that("using lightgbm() without early stopping, best_iter and best_score come from valids and not training data", {
set.seed(708L)
# example: train second (called "something-random-we-would-not-hardcode"), two valids,
# and a metric where higher values are better ("auc")
trainDF <- data.frame(
"feat1" = runif(n = 500L, min = 0.0, max = 15.0)
, "target" = rep(c(0L, 1L), 500L)
)
validDF <- data.frame(
"feat1" = runif(n = 50L, min = 0.0, max = 15.0)
, "target" = rep(c(0L, 1L), 50L)
)
dtrain <- lgb.Dataset(
data = as.matrix(trainDF[["feat1"]], drop = FALSE)
, label = trainDF[["target"]]
)
dvalid1 <- lgb.Dataset(
data = as.matrix(validDF[1L:25L, "feat1"], drop = FALSE)
, label = validDF[1L:25L, "target"]
)
dvalid2 <- lgb.Dataset(
data = as.matrix(validDF[26L:50L, "feat1"], drop = FALSE)
, label = validDF[26L:50L, "target"]
)
nrounds <- 10L
bst <- lightgbm(
data = dtrain
, nrounds = nrounds
, num_leaves = 5L
, valids = list(
"valid1" = dvalid1
, "something-random-we-would-not-hardcode" = dtrain
, "valid2" = dvalid2
)
, params = list(
objective = "binary"
, metric = "auc"
, learning_rate = 1.5
)
, verbose = -7L
)
# when verbose <= 0 is passed to lightgbm(), 'valids' is passed through to lgb.train()
# untouched. If you set verbose to > 0, the training data will still be first but called "train"
expect_named(
bst$record_evals
, c("start_iter", "something-random-we-would-not-hardcode", "valid1", "valid2")
, ignore.order = FALSE
, ignore.case = FALSE
)
auc_scores <- unlist(bst$record_evals[["valid1"]][["auc"]][["eval"]])
expect_length(auc_scores, nrounds)
expect_identical(bst$best_iter, which.max(auc_scores))
expect_identical(bst$best_score, auc_scores[which.max(auc_scores)])
})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment