[R-package] avoid unnecessary computation of std deviations in `lgb.cv()` (#4360)

* [R-package] avoid unnecessary computation of std deviations in lgb.cv() * use expect_equal()

[R-package] avoid unnecessary computation of std deviations in `lgb.cv()` (#4360)
* [R-package] avoid unnecessary computation of std deviations in lgb.cv() * use expect_equal()
f0bca1a2 · James Lamb · GitHub · 4af4698b · f0bca1a2 · f0bca1a2
Unverified Commit f0bca1a2 authored Jun 12, 2021 by James Lamb Committed by GitHub Jun 12, 2021
Showing with 44 additions and 4 deletions

R-package/R/lgb.cv.R R-package/R/lgb.cv.R +8 -3

R-package/man/lgb.cv.Rd R-package/man/lgb.cv.Rd +3 -1

R-package/tests/testthat/test_basic.R R-package/tests/testthat/test_basic.R +33 -0

No files found.
--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -26,7 +26,9 @@ CVBooster <- R6::R6Class(
 #' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}
 #' @param weight vector of response values. If not NULL, will set to dataset
 #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
-#' @param showsd \code{boolean}, whether to show standard deviation of cross validation
+#' @param showsd \code{boolean}, whether to show standard deviation of cross validation.
+#'               This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a
+#'               slight speedup by avoiding unnecessary computation.
 #' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified
 #'                   by the values of outcome labels.
 #' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
@@ -379,7 +381,10 @@ lgb.cv <- function(params = list()
    })

    # Prepare collection of evaluation results
-    merged_msg <- lgb.merge.cv.result(msg = msg)
+    merged_msg <- lgb.merge.cv.result(
+      msg = msg
+      , showsd = showsd
+    )

    # Write evaluation result in environment
    env$eval_list <- merged_msg$eval_list
@@ -576,7 +581,7 @@ lgb.stratified.folds <- function(y, k) {
  return(out)
 }

-lgb.merge.cv.result <- function(msg, showsd = TRUE) {
+lgb.merge.cv.result <- function(msg, showsd) {

  # Get CV message length
  if (length(msg) == 0L) {

--- a/R-package/man/lgb.cv.Rd
+++ b/R-package/man/lgb.cv.Rd
@@ -88,7 +88,9 @@ may allow you to pass other types of data like \code{matrix} and then separately

 \item{eval_freq}{evaluation output frequency, only effect when verbose > 0}

-\item{showsd}{\code{boolean}, whether to show standard deviation of cross validation}
+\item{showsd}{\code{boolean}, whether to show standard deviation of cross validation.
+This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a
+slight speedup by avoiding unnecessary computation.}

 \item{stratified}{a \code{boolean} indicating whether sampling of folds should be stratified
 by the values of outcome labels.}

--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -389,6 +389,39 @@ test_that("lgb.cv() fit on linearly-relatead data improves when using linear lea
  expect_true(cv_bst_linear$best_score < cv_bst$best_score)
 })

+test_that("lgb.cv() respects showsd argument", {
+  dtrain <- lgb.Dataset(train$data, label = train$label)
+  params <- list(objective = "regression", metric = "l2")
+  nrounds <- 5L
+  set.seed(708L)
+  bst_showsd <- lgb.cv(
+    params = params
+    , data = dtrain
+    , nrounds = nrounds
+    , nfold = 3L
+    , min_data = 1L
+    , showsd = TRUE
+  )
+  evals_showsd <- bst_showsd$record_evals[["valid"]][["l2"]]
+  set.seed(708L)
+  bst_no_showsd <- lgb.cv(
+    params = params
+    , data = dtrain
+    , nrounds = nrounds
+    , nfold = 3L
+    , min_data = 1L
+    , showsd = FALSE
+  )
+  evals_no_showsd <- bst_no_showsd$record_evals[["valid"]][["l2"]]
+  expect_equal(
+    evals_showsd[["eval"]]
+    , evals_no_showsd[["eval"]]
+  )
+  expect_is(evals_showsd[["eval_err"]], "list")
+  expect_equal(length(evals_showsd[["eval_err"]]), nrounds)
+  expect_identical(evals_no_showsd[["eval_err"]], list())
+})
+
 context("lgb.train()")

 test_that("lgb.train() works as expected with multiple eval metrics", {