Unverified Commit f0bca1a2 authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[R-package] avoid unnecessary computation of std deviations in `lgb.cv()` (#4360)

* [R-package] avoid unnecessary computation of std deviations in lgb.cv()

* use expect_equal()
parent 4af4698b
......@@ -26,7 +26,9 @@ CVBooster <- R6::R6Class(
#' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}
#' @param weight vector of response values. If not NULL, will set to dataset
#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
#' @param showsd \code{boolean}, whether to show standard deviation of cross validation
#' @param showsd \code{boolean}, whether to show standard deviation of cross validation.
#' This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a
#' slight speedup by avoiding unnecessary computation.
#' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified
#' by the values of outcome labels.
#' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
......@@ -379,7 +381,10 @@ lgb.cv <- function(params = list()
})
# Prepare collection of evaluation results
merged_msg <- lgb.merge.cv.result(msg = msg)
merged_msg <- lgb.merge.cv.result(
msg = msg
, showsd = showsd
)
# Write evaluation result in environment
env$eval_list <- merged_msg$eval_list
......@@ -576,7 +581,7 @@ lgb.stratified.folds <- function(y, k) {
return(out)
}
lgb.merge.cv.result <- function(msg, showsd = TRUE) {
lgb.merge.cv.result <- function(msg, showsd) {
# Get CV message length
if (length(msg) == 0L) {
......
......@@ -88,7 +88,9 @@ may allow you to pass other types of data like \code{matrix} and then separately
\item{eval_freq}{evaluation output frequency, only effect when verbose > 0}
\item{showsd}{\code{boolean}, whether to show standard deviation of cross validation}
\item{showsd}{\code{boolean}, whether to show standard deviation of cross validation.
This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a
slight speedup by avoiding unnecessary computation.}
\item{stratified}{a \code{boolean} indicating whether sampling of folds should be stratified
by the values of outcome labels.}
......
......@@ -389,6 +389,39 @@ test_that("lgb.cv() fit on linearly-relatead data improves when using linear lea
expect_true(cv_bst_linear$best_score < cv_bst$best_score)
})
test_that("lgb.cv() respects showsd argument", {
dtrain <- lgb.Dataset(train$data, label = train$label)
params <- list(objective = "regression", metric = "l2")
nrounds <- 5L
set.seed(708L)
bst_showsd <- lgb.cv(
params = params
, data = dtrain
, nrounds = nrounds
, nfold = 3L
, min_data = 1L
, showsd = TRUE
)
evals_showsd <- bst_showsd$record_evals[["valid"]][["l2"]]
set.seed(708L)
bst_no_showsd <- lgb.cv(
params = params
, data = dtrain
, nrounds = nrounds
, nfold = 3L
, min_data = 1L
, showsd = FALSE
)
evals_no_showsd <- bst_no_showsd$record_evals[["valid"]][["l2"]]
expect_equal(
evals_showsd[["eval"]]
, evals_no_showsd[["eval"]]
)
expect_is(evals_showsd[["eval_err"]], "list")
expect_equal(length(evals_showsd[["eval_err"]]), nrounds)
expect_identical(evals_no_showsd[["eval_err"]], list())
})
context("lgb.train()")
test_that("lgb.train() works as expected with multiple eval metrics", {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment