[R-package] stop automatically calculating eval metrics on training data in lightgbm() (#5209)

* [R-package] stop automatically calculating eval metrics on training data in lightgbm() * update docs * update docs * roxygen cares about line breaks

[R-package] stop automatically calculating eval metrics on training data in lightgbm() (#5209)
* [R-package] stop automatically calculating eval metrics on training data in lightgbm() * update docs * update docs * roxygen cares about line breaks
1617a63e · James Lamb · GitHub · 7d89ab40 · 1617a63e · 1617a63e
Unverified Commit 1617a63e authored May 18, 2022 by James Lamb Committed by GitHub May 19, 2022
6 changed files
--- a/R-package/R/lightgbm.R
+++ b/R-package/R/lightgbm.R
@@ -43,7 +43,7 @@
 #'                     These should follow the requirements from the descriptions above.
 #'                 }
 #'             }
-#' @param eval_freq evaluation output frequency, only effect when verbose > 0
+#' @param eval_freq evaluation output frequency, only effective when verbose > 0 and \code{valids} has been provided
 #' @param init_model path of model file of \code{lgb.Booster} object, will continue training from this model
 #' @param nrounds number of training rounds
 #' @param obj objective function, can be character or custom objective function. Examples include
@@ -51,7 +51,8 @@
 #'            \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}
 #' @param params a list of parameters. See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{
 #'               the "Parameters" section of the documentation} for a list of parameters and valid values.
-#' @param verbose verbosity for output, if <= 0, also will disable the print of evaluation during training
+#' @param verbose verbosity for output, if <= 0 and \code{valids} has been provided, also will disable the
+#'                printing of evaluation during training
 #' @param serializable whether to make the resulting objects serializable through functions such as
 #'                     \code{save} or \code{saveRDS} (see section "Model serialization").
 #' @section Early Stopping:
@@ -193,11 +194,6 @@ lightgbm <- function(data,
    train_args[["valids"]] <- list()
  }

-  # Set validation as oneself
-  if (params[["verbosity"]] > 0L) {
-    train_args[["valids"]][["train"]] <- dtrain
-  }
-
  # Train a model using the regular way
  bst <- do.call(
    what = lgb.train

--- a/R-package/man/lgb.cv.Rd
+++ b/R-package/man/lgb.cv.Rd
@@ -83,11 +83,12 @@ may allow you to pass other types of data like \code{matrix} and then separately
                }
            }}

-\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training}
+\item{verbose}{verbosity for output, if <= 0 and \code{valids} has been provided, also will disable the
+printing of evaluation during training}

 \item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}}

-\item{eval_freq}{evaluation output frequency, only effect when verbose > 0}
+\item{eval_freq}{evaluation output frequency, only effective when verbose > 0 and \code{valids} has been provided}

 \item{showsd}{\code{boolean}, whether to show standard deviation of cross validation.
 This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a

--- a/R-package/man/lgb.train.Rd
+++ b/R-package/man/lgb.train.Rd
@@ -73,11 +73,12 @@ may allow you to pass other types of data like \code{matrix} and then separately
                }
            }}

-\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training}
+\item{verbose}{verbosity for output, if <= 0 and \code{valids} has been provided, also will disable the
+printing of evaluation during training}

 \item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}}

-\item{eval_freq}{evaluation output frequency, only effect when verbose > 0}
+\item{eval_freq}{evaluation output frequency, only effective when verbose > 0 and \code{valids} has been provided}

 \item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model}


--- a/R-package/man/lgb_shared_params.Rd
+++ b/R-package/man/lgb_shared_params.Rd
@@ -50,7 +50,7 @@ set to the iteration number of the best iteration.}
                }
            }}

-\item{eval_freq}{evaluation output frequency, only effect when verbose > 0}
+\item{eval_freq}{evaluation output frequency, only effective when verbose > 0 and \code{valids} has been provided}

 \item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model}

@@ -63,7 +63,8 @@ set to the iteration number of the best iteration.}
 \item{params}{a list of parameters. See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{
 the "Parameters" section of the documentation} for a list of parameters and valid values.}

-\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training}
+\item{verbose}{verbosity for output, if <= 0 and \code{valids} has been provided, also will disable the
+printing of evaluation during training}

 \item{serializable}{whether to make the resulting objects serializable through functions such as
 \code{save} or \code{saveRDS} (see section "Model serialization").}

--- a/R-package/man/lightgbm.Rd
+++ b/R-package/man/lightgbm.Rd
@@ -37,9 +37,10 @@ the "Parameters" section of the documentation} for a list of parameters and vali

 \item{nrounds}{number of training rounds}

-\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training}
+\item{verbose}{verbosity for output, if <= 0 and \code{valids} has been provided, also will disable the
+printing of evaluation during training}

-\item{eval_freq}{evaluation output frequency, only effect when verbose > 0}
+\item{eval_freq}{evaluation output frequency, only effective when verbose > 0 and \code{valids} has been provided}

 \item{early_stopping_rounds}{int. Activates early stopping. When this parameter is non-null,
 training will stop if the evaluation of any metric on any validation set

--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -80,6 +80,12 @@ test_that("train and predict binary classification", {
        , metric = "binary_error"
    )
    , nrounds = nrounds
+    , valids = list(
+      "train" = lgb.Dataset(
+        data = train$data
+        , label = train$label
+      )
+    )
  )
  expect_false(is.null(bst$record_evals))
  record_results <- lgb.get.eval.result(bst, "train", "binary_error")
@@ -98,10 +104,11 @@ test_that("train and predict binary classification", {

 test_that("train and predict softmax", {
  set.seed(708L)
+  X_mat <- as.matrix(iris[, -5L])
  lb <- as.numeric(iris$Species) - 1L

  bst <- lightgbm(
-    data = as.matrix(iris[, -5L])
+    data = X_mat
    , label = lb
    , params = list(
        num_leaves = 4L
@@ -113,6 +120,12 @@ test_that("train and predict softmax", {
        , num_class = 3L
    )
    , nrounds = 20L
+    , valids = list(
+      "train" = lgb.Dataset(
+        data = X_mat
+        , label = lb
+      )
+    )
  )

  expect_false(is.null(bst$record_evals))
@@ -136,6 +149,12 @@ test_that("use of multiple eval metrics works", {
        , metric = metrics
    )
    , nrounds = 10L
+    , valids = list(
+      "train" = lgb.Dataset(
+        data = train$data
+        , label = train$label
+      )
+    )
  )
  expect_false(is.null(bst$record_evals))
  expect_named(
@@ -284,6 +303,10 @@ test_that("lightgbm() performs evaluation on validation sets if they are provide
    , valids = list(
      "valid1" = dvalid1
      , "valid2" = dvalid2
+      , "train" = lgb.Dataset(
+        data = train$data
+        , label = train$label
+      )
    )
  )

@@ -3044,7 +3067,7 @@ test_that("lightgbm() accepts 'weight' and 'weights'", {
  expect_equal(model$.__enclos_env__$private$train_set$get_field("weight"), w)
 })

-.assert_has_expected_logs <- function(log_txt, lgb_info, lgb_warn, early_stopping, valid_eval_msg, train_eval_msg) {
+.assert_has_expected_logs <- function(log_txt, lgb_info, lgb_warn, early_stopping, valid_eval_msg) {
  expect_identical(
    object = any(grepl("\\[LightGBM\\] \\[Info\\]", log_txt))
    , expected = lgb_info
@@ -3065,13 +3088,9 @@ test_that("lightgbm() accepts 'weight' and 'weights'", {
    object = any(grepl("valid's auc\\:[0-9]+", log_txt))
    , expected = valid_eval_msg
  )
-  expect_identical(
-    object = any(grepl("train's auc\\:[0-9]+", log_txt))
-    , expected = train_eval_msg
-  )
 }

-.assert_has_expected_record_evals <- function(fitted_model, valids_should_include_train_set) {
+.assert_has_expected_record_evals <- function(fitted_model) {
  record_evals <- fitted_model$record_evals
  expect_equal(record_evals$start_iter, 1L)
  if (inherits(fitted_model, "lgb.CVBooster")) {
@@ -3084,17 +3103,7 @@ test_that("lightgbm() accepts 'weight' and 'weights'", {
    , expected = expected_valid_auc
    , tolerance = TOLERANCE
  )
-  if (isTRUE(valids_should_include_train_set)) {
-    expect_named(record_evals, c("start_iter", "valid", "train"), ignore.order = TRUE, ignore.case = FALSE)
-    expect_equal(
-      object = unlist(record_evals[["train"]][["auc"]][["eval"]])
-      , expected = c(0.9817835, 0.9817835, 0.9929513, 0.9929513, 0.9947141)
-      , tolerance = TOLERANCE
-    )
-    expect_equal(record_evals[["train"]][["auc"]][["eval_err"]], list())
-  } else {
-    expect_named(record_evals, c("start_iter", "valid"), ignore.order = TRUE, ignore.case = FALSE)
-  }
+   expect_named(record_evals, c("start_iter", "valid"), ignore.order = TRUE, ignore.case = FALSE)
  expect_equal(record_evals[["valid"]][["auc"]][["eval_err"]], list())
 }

@@ -3167,11 +3176,9 @@ test_that("lgb.train() only prints eval metrics when expected to", {
      , lgb_warn = FALSE
      , early_stopping = FALSE
      , valid_eval_msg = FALSE
-      , train_eval_msg = FALSE
    )
    .assert_has_expected_record_evals(
      fitted_model = out[["booster"]]
-      , valids_should_include_train_set = FALSE
    )

    # (verbose = 0) should be only WARN-level LightGBM logs
@@ -3186,11 +3193,9 @@ test_that("lgb.train() only prints eval metrics when expected to", {
      , lgb_warn = TRUE
      , early_stopping = FALSE
      , valid_eval_msg = FALSE
-      , train_eval_msg = FALSE
    )
    .assert_has_expected_record_evals(
      fitted_model = out[["booster"]]
-      , valids_should_include_train_set = FALSE
    )

    # (verbose > 0) should be INFO- and WARN-level LightGBM logs, and record eval messages
@@ -3205,11 +3210,9 @@ test_that("lgb.train() only prints eval metrics when expected to", {
      , lgb_warn = TRUE
      , early_stopping = TRUE
      , valid_eval_msg = TRUE
-      , train_eval_msg = FALSE
    )
    .assert_has_expected_record_evals(
      fitted_model = out[["booster"]]
-      , valids_should_include_train_set = FALSE
    )
  }

@@ -3228,11 +3231,9 @@ test_that("lgb.train() only prints eval metrics when expected to", {
    , lgb_warn = FALSE
    , early_stopping = FALSE
    , valid_eval_msg = FALSE
-    , train_eval_msg = FALSE
  )
  .assert_has_expected_record_evals(
    fitted_model = out[["booster"]]
-    , valids_should_include_train_set = FALSE
  )

  # (verbose = 0) should be only WARN-level LightGBM logs
@@ -3247,11 +3248,9 @@ test_that("lgb.train() only prints eval metrics when expected to", {
    , lgb_warn = TRUE
    , early_stopping = FALSE
    , valid_eval_msg = FALSE
-    , train_eval_msg = FALSE
  )
  .assert_has_expected_record_evals(
    fitted_model = out[["booster"]]
-    , valids_should_include_train_set = FALSE
  )

  # (verbose > 0) should be INFO- and WARN-level LightGBM logs, and record eval messages
@@ -3266,11 +3265,9 @@ test_that("lgb.train() only prints eval metrics when expected to", {
    , lgb_warn = TRUE
    , early_stopping = TRUE
    , valid_eval_msg = TRUE
-    , train_eval_msg = FALSE
  )
  .assert_has_expected_record_evals(
    fitted_model = out[["booster"]]
-    , valids_should_include_train_set = FALSE
  )
 })

@@ -3292,11 +3289,9 @@ test_that("lightgbm() only prints eval metrics when expected to", {
      , lgb_warn = FALSE
      , early_stopping = FALSE
      , valid_eval_msg = FALSE
-      , train_eval_msg = FALSE
    )
    .assert_has_expected_record_evals(
      fitted_model = out[["booster"]]
-      , valids_should_include_train_set = FALSE
    )

    # (verbose = 0) should be only WARN-level LightGBM logs, train should not be in valids
@@ -3311,11 +3306,9 @@ test_that("lightgbm() only prints eval metrics when expected to", {
      , lgb_warn = TRUE
      , early_stopping = FALSE
      , valid_eval_msg = FALSE
-      , train_eval_msg = FALSE
    )
    .assert_has_expected_record_evals(
      fitted_model = out[["booster"]]
-      , valids_should_include_train_set = FALSE
    )

    # (verbose > 0) should be INFO- and WARN-level LightGBM logs, and record eval messages, and
@@ -3331,11 +3324,9 @@ test_that("lightgbm() only prints eval metrics when expected to", {
      , lgb_warn = TRUE
      , early_stopping = TRUE
      , valid_eval_msg = TRUE
-      , train_eval_msg = TRUE
    )
    .assert_has_expected_record_evals(
      fitted_model = out[["booster"]]
-      , valids_should_include_train_set = TRUE
    )
  }

@@ -3354,11 +3345,9 @@ test_that("lightgbm() only prints eval metrics when expected to", {
    , lgb_warn = FALSE
    , early_stopping = FALSE
    , valid_eval_msg = FALSE
-    , train_eval_msg = FALSE
  )
  .assert_has_expected_record_evals(
    fitted_model = out[["booster"]]
-    , valids_should_include_train_set = FALSE
  )

  # (verbose = 0) should be only WARN-level LightGBM logs, train should not be in valids
@@ -3373,11 +3362,9 @@ test_that("lightgbm() only prints eval metrics when expected to", {
    , lgb_warn = TRUE
    , early_stopping = FALSE
    , valid_eval_msg = FALSE
-    , train_eval_msg = FALSE
  )
  .assert_has_expected_record_evals(
    fitted_model = out[["booster"]]
-    , valids_should_include_train_set = FALSE
  )

  # (verbose > 0) should be INFO- and WARN-level LightGBM logs, and record eval messages, and
@@ -3393,11 +3380,9 @@ test_that("lightgbm() only prints eval metrics when expected to", {
    , lgb_warn = TRUE
    , early_stopping = TRUE
    , valid_eval_msg = TRUE
-    , train_eval_msg = TRUE
  )
  .assert_has_expected_record_evals(
    fitted_model = out[["booster"]]
-    , valids_should_include_train_set = TRUE
  )
 })

@@ -3419,11 +3404,9 @@ test_that("lgb.cv() only prints eval metrics when expected to", {
      , lgb_warn = FALSE
      , early_stopping = FALSE
      , valid_eval_msg = FALSE
-      , train_eval_msg = FALSE
    )
    .assert_has_expected_record_evals(
      fitted_model = out[["booster"]]
-      , valids_should_include_train_set = FALSE
    )

    # (verbose = 0) should be only WARN-level LightGBM logs
@@ -3438,11 +3421,9 @@ test_that("lgb.cv() only prints eval metrics when expected to", {
      , lgb_warn = TRUE
      , early_stopping = FALSE
      , valid_eval_msg = FALSE
-      , train_eval_msg = FALSE
    )
    .assert_has_expected_record_evals(
      fitted_model = out[["booster"]]
-      , valids_should_include_train_set = FALSE
    )

    # (verbose > 0) should be INFO- and WARN-level LightGBM logs, and record eval messages
@@ -3457,11 +3438,9 @@ test_that("lgb.cv() only prints eval metrics when expected to", {
      , lgb_warn = TRUE
      , early_stopping = TRUE
      , valid_eval_msg = TRUE
-      , train_eval_msg = FALSE
    )
    .assert_has_expected_record_evals(
      fitted_model = out[["booster"]]
-      , valids_should_include_train_set = FALSE
    )
  }

@@ -3480,11 +3459,9 @@ test_that("lgb.cv() only prints eval metrics when expected to", {
    , lgb_warn = FALSE
    , early_stopping = FALSE
    , valid_eval_msg = FALSE
-    , train_eval_msg = FALSE
  )
  .assert_has_expected_record_evals(
    fitted_model = out[["booster"]]
-    , valids_should_include_train_set = FALSE
  )

  # (verbose = 0) should be only WARN-level LightGBM logs
@@ -3499,11 +3476,9 @@ test_that("lgb.cv() only prints eval metrics when expected to", {
    , lgb_warn = TRUE
    , early_stopping = FALSE
    , valid_eval_msg = FALSE
-    , train_eval_msg = FALSE
  )
  .assert_has_expected_record_evals(
    fitted_model = out[["booster"]]
-    , valids_should_include_train_set = FALSE
  )

  # (verbose > 0) should be INFO- and WARN-level LightGBM logs, and record eval messages
@@ -3518,10 +3493,8 @@ test_that("lgb.cv() only prints eval metrics when expected to", {
    , lgb_warn = TRUE
    , early_stopping = TRUE
    , valid_eval_msg = TRUE
-    , train_eval_msg = FALSE
  )
  .assert_has_expected_record_evals(
    fitted_model = out[["booster"]]
-    , valids_should_include_train_set = FALSE
  )
 })