test_basic.R

VERBOSITY <- as.integer(
  Sys.getenv("LIGHTGBM_TEST_VERBOSITY", "-1")
)

ON_WINDOWS <- .Platform$OS.type == "windows"

UTF8_LOCALE <- all(endsWith(
  Sys.getlocale(category = "LC_CTYPE")
  , "UTF-8"
))

data(agaricus.train, package = "lightgbm")
data(agaricus.test, package = "lightgbm")
train <- agaricus.train
test <- agaricus.test

TOLERANCE <- 1e-6
set.seed(708L)

# [description] Every time this function is called, it adds 0.1
#               to an accumulator then returns the current value.
#               This is used to mock the situation where an evaluation
#               metric increases every iteration
ACCUMULATOR_NAME <- "INCREASING_METRIC_ACUMULATOR"
assign(x = ACCUMULATOR_NAME, value = 0.0, envir = .GlobalEnv)

.increasing_metric <- function(preds, dtrain) {
  if (!exists(ACCUMULATOR_NAME, envir = .GlobalEnv)) {
    assign(ACCUMULATOR_NAME, 0.0, envir = .GlobalEnv)
  }
  assign(
    x = ACCUMULATOR_NAME
    , value = get(ACCUMULATOR_NAME, envir = .GlobalEnv) + 0.1
    , envir = .GlobalEnv
  )
  return(list(
    name = "increasing_metric"
    , value = get(ACCUMULATOR_NAME, envir = .GlobalEnv)
    , higher_better = TRUE
  ))
}

# [description] Evaluation function that always returns the
#               same value
CONSTANT_METRIC_VALUE <- 0.2
.constant_metric <- function(preds, dtrain) {
  return(list(
    name = "constant_metric"
    , value = CONSTANT_METRIC_VALUE
    , higher_better = FALSE
  ))
}

# sample datasets to test early stopping
DTRAIN_RANDOM_REGRESSION <- lgb.Dataset(
  data = as.matrix(rnorm(100L), ncol = 1L, drop = FALSE)
  , label = rnorm(100L)
  , params = list(num_threads = .LGB_MAX_THREADS)
)
DVALID_RANDOM_REGRESSION <- lgb.Dataset(
  data = as.matrix(rnorm(50L), ncol = 1L, drop = FALSE)
  , label = rnorm(50L)
  , params = list(num_threads = .LGB_MAX_THREADS)
)
DTRAIN_RANDOM_CLASSIFICATION <- lgb.Dataset(
  data = as.matrix(rnorm(120L), ncol = 1L, drop = FALSE)
  , label = sample(c(0L, 1L), size = 120L, replace = TRUE)
  , params = list(num_threads = .LGB_MAX_THREADS)
)
DVALID_RANDOM_CLASSIFICATION <- lgb.Dataset(
  data = as.matrix(rnorm(37L), ncol = 1L, drop = FALSE)
  , label = sample(c(0L, 1L), size = 37L, replace = TRUE)
  , params = list(num_threads = .LGB_MAX_THREADS)
)

test_that("train and predict binary classification", {
  nrounds <- 10L
  bst <- lightgbm(
    data = train$data
    , label = train$label
    , params = list(
        num_leaves = 5L
        , objective = "binary"
        , metric = "binary_error"
        , verbose = VERBOSITY
        , num_threads = .LGB_MAX_THREADS
    )
    , nrounds = nrounds
    , valids = list(
      "train" = lgb.Dataset(
        data = train$data
        , label = train$label
      )
    )
  )
  expect_false(is.null(bst$record_evals))
  record_results <- lgb.get.eval.result(bst, "train", "binary_error")
  expect_lt(min(record_results), 0.02)

  pred <- predict(bst, test$data)
  expect_equal(length(pred), 1611L)

  pred1 <- predict(bst, train$data, num_iteration = 1L)
  expect_equal(length(pred1), 6513L)
  err_pred1 <- sum((pred1 > 0.5) != train$label) / length(train$label)
  err_log <- record_results[1L]
  expect_lt(abs(err_pred1 - err_log), TOLERANCE)
})


test_that("train and predict softmax", {
  set.seed(708L)
  X_mat <- as.matrix(iris[, -5L])
  lb <- as.numeric(iris$Species) - 1L

  bst <- lightgbm(
    data = X_mat
    , label = lb
    , params = list(
        num_leaves = 4L
        , learning_rate = 0.05
        , min_data = 20L
        , min_hessian = 10.0
        , objective = "multiclass"
        , metric = "multi_error"
        , num_class = 3L
        , verbose = VERBOSITY
        , num_threads = .LGB_MAX_THREADS
    )
    , nrounds = 20L
    , valids = list(
      "train" = lgb.Dataset(
        data = X_mat
        , label = lb
      )
    )
  )

  expect_false(is.null(bst$record_evals))
  record_results <- lgb.get.eval.result(bst, "train", "multi_error")
  expect_lt(min(record_results), 0.06)

  pred <- predict(bst, as.matrix(iris[, -5L]))
  expect_equal(length(pred), nrow(iris) * 3L)
})


test_that("use of multiple eval metrics works", {
  metrics <- list("binary_error", "auc", "binary_logloss")
  bst <- lightgbm(
    data = train$data
    , label = train$label
    , params = list(
        num_leaves = 4L
        , learning_rate = 1.0
        , objective = "binary"
        , metric = metrics
        , verbose = VERBOSITY
        , num_threads = .LGB_MAX_THREADS
    )
    , nrounds = 10L
    , valids = list(
      "train" = lgb.Dataset(
        data = train$data
        , label = train$label
        , params = list(num_threads = .LGB_MAX_THREADS)
      )
    )
  )
  expect_false(is.null(bst$record_evals))
  expect_named(
    bst$record_evals[["train"]]
    , unlist(metrics)
    , ignore.order = FALSE
    , ignore.case = FALSE
  )
})

test_that("lgb.Booster.upper_bound() and lgb.Booster.lower_bound() work as expected for binary classification", {
  set.seed(708L)
  nrounds <- 10L
  bst <- lightgbm(
    data = train$data
    , label = train$label
    , params = list(
        num_leaves = 5L
        , objective = "binary"
        , metric = "binary_error"
        , verbose = VERBOSITY
        , num_threads = .LGB_MAX_THREADS
    )
    , nrounds = nrounds
  )
  expect_true(abs(bst$lower_bound() - -1.590853) < TOLERANCE)
  expect_true(abs(bst$upper_bound() - 1.871015) <  TOLERANCE)
})

test_that("lgb.Booster.upper_bound() and lgb.Booster.lower_bound() work as expected for regression", {
  set.seed(708L)
  nrounds <- 10L
  bst <- lightgbm(
    data = train$data
    , label = train$label
    , params = list(
        num_leaves = 5L
        , objective = "regression"
        , metric = "l2"
        , verbose = VERBOSITY
        , num_threads = .LGB_MAX_THREADS
    )
    , nrounds = nrounds
  )
  expect_true(abs(bst$lower_bound() - 0.1513859) < TOLERANCE)
  expect_true(abs(bst$upper_bound() - 0.9080349) < TOLERANCE)
})

test_that("lightgbm() rejects negative or 0 value passed to nrounds", {
  dtrain <- lgb.Dataset(train$data, label = train$label)
  params <- list(objective = "regression", metric = "l2,l1", num_threads = .LGB_MAX_THREADS)
  for (nround_value in c(-10L, 0L)) {
    expect_error({
      bst <- lightgbm(
        data = dtrain
        , params = params
        , nrounds = nround_value
      )
    }, "nrounds should be greater than zero")
  }
})

test_that("lightgbm() accepts nrounds as either a top-level argument or parameter", {
  nrounds <- 15L

  set.seed(708L)
  top_level_bst <- lightgbm(
    data = train$data
    , label = train$label
    , nrounds = nrounds
    , params = list(
      objective = "regression"
      , metric = "l2"
      , num_leaves = 5L
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
  )

  set.seed(708L)
  param_bst <- lightgbm(
    data = train$data
    , label = train$label
    , params = list(
      objective = "regression"
      , metric = "l2"
      , num_leaves = 5L
      , nrounds = nrounds
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
  )

  set.seed(708L)
  both_customized <- lightgbm(
    data = train$data
    , label = train$label
    , nrounds = 20L
    , params = list(
      objective = "regression"
      , metric = "l2"
      , num_leaves = 5L
      , nrounds = nrounds
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
  )

  top_level_l2 <- top_level_bst$eval_train()[[1L]][["value"]]
  params_l2 <- param_bst$eval_train()[[1L]][["value"]]
  both_l2 <- both_customized$eval_train()[[1L]][["value"]]

  # check type just to be sure the subsetting didn't return a NULL
  expect_true(is.numeric(top_level_l2))
  expect_true(is.numeric(params_l2))
  expect_true(is.numeric(both_l2))

  # check that model produces identical performance
  expect_identical(top_level_l2, params_l2)
  expect_identical(both_l2, params_l2)

  expect_identical(param_bst$current_iter(), top_level_bst$current_iter())
  expect_identical(param_bst$current_iter(), both_customized$current_iter())
  expect_identical(param_bst$current_iter(), nrounds)

})

test_that("lightgbm() performs evaluation on validation sets if they are provided", {
  set.seed(708L)
  dvalid1 <- lgb.Dataset(
    data = train$data
    , label = train$label
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  dvalid2 <- lgb.Dataset(
    data = train$data
    , label = train$label
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  nrounds <- 10L
  bst <- lightgbm(
    data = train$data
    , label = train$label
    , params = list(
        num_leaves = 5L
        , objective = "binary"
        , metric = c(
            "binary_error"
            , "auc"
        )
        , verbose = VERBOSITY
        , num_threads = .LGB_MAX_THREADS
    )
    , nrounds = nrounds
    , valids = list(
      "valid1" = dvalid1
      , "valid2" = dvalid2
      , "train" = lgb.Dataset(
        data = train$data
        , label = train$label
        , params = list(num_threads = .LGB_MAX_THREADS)
      )
    )
  )

  expect_named(
    bst$record_evals
    , c("train", "valid1", "valid2", "start_iter")
    , ignore.order = TRUE
    , ignore.case = FALSE
  )
  for (valid_name in c("train", "valid1", "valid2")) {
    eval_results <- bst$record_evals[[valid_name]][["binary_error"]]
    expect_length(eval_results[["eval"]], nrounds)
  }
  expect_true(abs(bst$record_evals[["train"]][["binary_error"]][["eval"]][[1L]] - 0.02226317) < TOLERANCE)
  expect_true(abs(bst$record_evals[["valid1"]][["binary_error"]][["eval"]][[1L]] - 0.02226317) < TOLERANCE)
  expect_true(abs(bst$record_evals[["valid2"]][["binary_error"]][["eval"]][[1L]] - 0.02226317) < TOLERANCE)
})

test_that("training continuation works", {
  dtrain <- lgb.Dataset(
    train$data
    , label = train$label
    , free_raw_data = FALSE
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  watchlist <- list(train = dtrain)
  param <- list(
    objective = "binary"
    , metric = "binary_logloss"
    , num_leaves = 5L
    , learning_rate = 1.0
    , verbose = VERBOSITY
    , num_threads = .LGB_MAX_THREADS
  )

  # train for 10 consecutive iterations
  bst <- lgb.train(param, dtrain, nrounds = 10L, watchlist)
  err_bst <- lgb.get.eval.result(bst, "train", "binary_logloss", 10L)

  #  train for 5 iterations, save, load, train for 5 more
  bst1 <- lgb.train(param, dtrain, nrounds = 5L, watchlist)
  model_file <- tempfile(fileext = ".model")
  lgb.save(bst1, model_file)
  bst2 <- lgb.train(param, dtrain, nrounds = 5L, watchlist, init_model = bst1)
  err_bst2 <- lgb.get.eval.result(bst2, "train", "binary_logloss", 10L)

  # evaluation metrics should be nearly identical for the model trained in 10 coonsecutive
  # iterations and the one trained in 5-then-5.
  expect_lt(abs(err_bst - err_bst2), 0.01)
})

test_that("cv works", {
  dtrain <- lgb.Dataset(train$data, label = train$label)
  params <- list(
    objective = "regression"
    , metric = "l2,l1"
    , min_data = 1L
    , learning_rate = 1.0
    , verbose = VERBOSITY
    , num_threads = .LGB_MAX_THREADS
  )
  bst <- lgb.cv(
    params
    , dtrain
    , 10L
    , nfold = 5L
    , early_stopping_rounds = 10L
  )
  expect_false(is.null(bst$record_evals))
})

test_that("CVBooster$reset_parameter() works as expected", {
  dtrain <- lgb.Dataset(train$data, label = train$label)
  n_folds <- 2L
  cv_bst <- lgb.cv(
    params = list(
      objective = "regression"
      , min_data = 1L
      , num_leaves = 7L
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , data = dtrain
    , nrounds = 3L
    , nfold = n_folds
  )
  expect_true(methods::is(cv_bst, "lgb.CVBooster"))
  expect_length(cv_bst$boosters, n_folds)
  for (bst in cv_bst$boosters) {
    expect_equal(bst[["booster"]]$params[["num_leaves"]], 7L)
  }
  cv_bst$reset_parameter(list(num_leaves = 11L))
  for (bst in cv_bst$boosters) {
    expect_equal(bst[["booster"]]$params[["num_leaves"]], 11L)
  }
})

test_that("lgb.cv() rejects negative or 0 value passed to nrounds", {
  dtrain <- lgb.Dataset(train$data, label = train$label, params = list(num_threads = 2L))
  params <- list(
    objective = "regression"
    , metric = "l2,l1"
    , min_data = 1L
    , num_threads = .LGB_MAX_THREADS
  )
  for (nround_value in c(-10L, 0L)) {
    expect_error({
      bst <- lgb.cv(
        params
        , dtrain
        , nround_value
        , nfold = 5L
      )
    }, "nrounds should be greater than zero")
  }
})

test_that("lgb.cv() throws an informative error is 'data' is not an lgb.Dataset and labels are not given", {
  bad_values <- list(
    4L
    , "hello"
    , list(a = TRUE, b = seq_len(10L))
    , data.frame(x = seq_len(5L), y = seq_len(5L))
    , data.table::data.table(x = seq_len(5L),  y = seq_len(5L))
    , matrix(data = seq_len(10L), 2L, 5L)
  )
  for (val in bad_values) {
    expect_error({
      bst <- lgb.cv(
        params = list(
            objective = "regression"
            , metric = "l2,l1"
            , min_data = 1L
        )
        , data = val
        , 10L
        , nfold = 5L
      )
    }, regexp = "'label' must be provided for lgb.cv if 'data' is not an 'lgb.Dataset'", fixed = TRUE)
  }
})

test_that("lightgbm.cv() gives the correct best_score and best_iter for a metric where higher values are better", {
  set.seed(708L)
  dtrain <- lgb.Dataset(
    data = as.matrix(runif(n = 500L, min = 0.0, max = 15.0), drop = FALSE)
    , label = rep(c(0L, 1L), 250L)
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  nrounds <- 10L
  cv_bst <- lgb.cv(
    data = dtrain
    , nfold = 5L
    , nrounds = nrounds
    , params = list(
      objective = "binary"
      , metric = "auc,binary_error"
      , learning_rate = 1.5
      , num_leaves = 5L
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
  )
  expect_true(methods::is(cv_bst, "lgb.CVBooster"))
  expect_named(
    cv_bst$record_evals
    , c("start_iter", "valid")
    , ignore.order = FALSE
    , ignore.case = FALSE
  )
  auc_scores <- unlist(cv_bst$record_evals[["valid"]][["auc"]][["eval"]])
  expect_length(auc_scores, nrounds)
  expect_identical(cv_bst$best_iter, which.max(auc_scores))
  expect_identical(cv_bst$best_score, auc_scores[which.max(auc_scores)])
})

test_that("lgb.cv() fit on linearly-relatead data improves when using linear learners", {
  set.seed(708L)
  .new_dataset <- function() {
    X <- matrix(rnorm(1000L), ncol = 1L)
    return(lgb.Dataset(
      data = X
      , label = 2L * X + runif(nrow(X), 0L, 0.1)
      , params = list(num_threads = .LGB_MAX_THREADS)
    ))
  }

  params <- list(
    objective = "regression"
    , verbose = -1L
    , metric = "mse"
    , seed = 0L
    , num_leaves = 2L
    , num_threads = .LGB_MAX_THREADS
  )

  dtrain <- .new_dataset()
  cv_bst <- lgb.cv(
    data = dtrain
    , nrounds = 10L
    , params = params
    , nfold = 5L
  )
  expect_true(methods::is(cv_bst, "lgb.CVBooster"))

  dtrain <- .new_dataset()
  cv_bst_linear <- lgb.cv(
    data = dtrain
    , nrounds = 10L
    , params = utils::modifyList(params, list(linear_tree = TRUE))
    , nfold = 5L
  )
  expect_true(methods::is(cv_bst_linear, "lgb.CVBooster"))

  expect_true(cv_bst_linear$best_score < cv_bst$best_score)
})

test_that("lgb.cv() respects showsd argument", {
  dtrain <- lgb.Dataset(train$data, label = train$label, params = list(num_threads = .LGB_MAX_THREADS))
  params <- list(
    objective = "regression"
    , metric = "l2"
    , min_data = 1L
    , verbose = VERBOSITY
    , num_threads = .LGB_MAX_THREADS
  )
  nrounds <- 5L
  set.seed(708L)
  bst_showsd <- lgb.cv(
    params = params
    , data = dtrain
    , nrounds = nrounds
    , nfold = 3L
    , showsd = TRUE
  )
  evals_showsd <- bst_showsd$record_evals[["valid"]][["l2"]]
  set.seed(708L)
  bst_no_showsd <- lgb.cv(
    params = params
    , data = dtrain
    , nrounds = nrounds
    , nfold = 3L
    , showsd = FALSE
  )
  evals_no_showsd <- bst_no_showsd$record_evals[["valid"]][["l2"]]
  expect_equal(
    evals_showsd[["eval"]]
    , evals_no_showsd[["eval"]]
  )
  expect_true(methods::is(evals_showsd[["eval_err"]], "list"))
  expect_equal(length(evals_showsd[["eval_err"]]), nrounds)
  expect_identical(evals_no_showsd[["eval_err"]], list())
})

test_that("lgb.cv() raises an informative error for unrecognized objectives", {
  dtrain <- lgb.Dataset(
    data = train$data
    , label = train$label
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  expect_error({
    capture.output({
      bst <- lgb.cv(
        data = dtrain
        , params = list(
          objective_type = "not_a_real_objective"
          , verbosity = VERBOSITY
          , num_threads = .LGB_MAX_THREADS
        )
      )
    }, type = "message")
  }, regexp = "Unknown objective type name: not_a_real_objective")
})

test_that("lgb.cv() respects parameter aliases for objective", {
  nrounds <- 3L
  nfold <- 4L
  dtrain <- lgb.Dataset(
    data = train$data
    , label = train$label
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  cv_bst <- lgb.cv(
    data = dtrain
    , params = list(
      num_leaves = 5L
      , application = "binary"
      , num_iterations = nrounds
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , nfold = nfold
  )
  expect_equal(cv_bst$best_iter, nrounds)
  expect_named(cv_bst$record_evals[["valid"]], "binary_logloss")
  expect_length(cv_bst$record_evals[["valid"]][["binary_logloss"]][["eval"]], nrounds)
  expect_length(cv_bst$boosters, nfold)
})

test_that("lgb.cv() prefers objective in params to keyword argument", {
  data("EuStockMarkets")
  cv_bst <- lgb.cv(
    data = lgb.Dataset(
      data = EuStockMarkets[, c("SMI", "CAC", "FTSE")]
      , label = EuStockMarkets[, "DAX"]
      , params = list(num_threads = .LGB_MAX_THREADS)
    )
    , params = list(
      application = "regression_l1"
      , verbosity = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , nrounds = 5L
    , obj = "regression_l2"
  )
  for (bst_list in cv_bst$boosters) {
    bst <- bst_list[["booster"]]
    expect_equal(bst$params$objective, "regression_l1")
    # NOTE: using save_model_to_string() since that is the simplest public API in the R package
    #       allowing access to the "objective" attribute of the Booster object on the C++ side
    model_txt_lines <- strsplit(
      x = bst$save_model_to_string()
      , split = "\n"
      , fixed = TRUE
    )[[1L]]
    expect_true(any(model_txt_lines == "objective=regression_l1"))
    expect_false(any(model_txt_lines == "objective=regression_l2"))
  }
})

test_that("lgb.cv() respects parameter aliases for metric", {
  nrounds <- 3L
  nfold <- 4L
  dtrain <- lgb.Dataset(
    data = train$data
    , label = train$label
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  cv_bst <- lgb.cv(
    data = dtrain
    , params = list(
      num_leaves = 5L
      , objective = "binary"
      , num_iterations = nrounds
      , metric_types = c("auc", "binary_logloss")
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , nfold = nfold
  )
  expect_equal(cv_bst$best_iter, nrounds)
  expect_named(cv_bst$record_evals[["valid"]], c("auc", "binary_logloss"))
  expect_length(cv_bst$record_evals[["valid"]][["binary_logloss"]][["eval"]], nrounds)
  expect_length(cv_bst$record_evals[["valid"]][["auc"]][["eval"]], nrounds)
  expect_length(cv_bst$boosters, nfold)
})

test_that("lgb.cv() respects eval_train_metric argument", {
  dtrain <- lgb.Dataset(train$data, label = train$label)
  params <- list(
    objective = "regression"
    , metric = "l2"
    , min_data = 1L
    , verbose = VERBOSITY
    , num_threads = .LGB_MAX_THREADS
  )
  nrounds <- 5L
  set.seed(708L)
  bst_train <- lgb.cv(
    params = params
    , data = dtrain
    , nrounds = nrounds
    , nfold = 3L
    , showsd = FALSE
    , eval_train_metric = TRUE
  )
  set.seed(708L)
  bst_no_train <- lgb.cv(
    params = params
    , data = dtrain
    , nrounds = nrounds
    , nfold = 3L
    , showsd = FALSE
    , eval_train_metric = FALSE
  )
  expect_equal(
    bst_train$record_evals[["valid"]][["l2"]]
    , bst_no_train$record_evals[["valid"]][["l2"]]
  )
  expect_true("train" %in% names(bst_train$record_evals))
  expect_false("train" %in% names(bst_no_train$record_evals))
  expect_true(methods::is(bst_train$record_evals[["train"]][["l2"]][["eval"]], "list"))
  expect_equal(
    length(bst_train$record_evals[["train"]][["l2"]][["eval"]])
    , nrounds
  )
})

test_that("lgb.train() works as expected with multiple eval metrics", {
  metrics <- c("binary_error", "auc", "binary_logloss")
  bst <- lgb.train(
    data = lgb.Dataset(
      train$data
      , label = train$label
      , params = list(num_threads = .LGB_MAX_THREADS)
    )
    , nrounds = 10L
    , params = list(
      objective = "binary"
      , metric = metrics
      , learning_rate = 1.0
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , valids = list(
      "train" = lgb.Dataset(
        train$data
        , label = train$label
        , params = list(num_threads = .LGB_MAX_THREADS)
      )
    )
  )
  expect_false(is.null(bst$record_evals))
  expect_named(
    bst$record_evals[["train"]]
    , unlist(metrics)
    , ignore.order = FALSE
    , ignore.case = FALSE
  )
})

test_that("lgb.train() raises an informative error for unrecognized objectives", {
  dtrain <- lgb.Dataset(
    data = train$data
    , label = train$label
  )
  expect_error({
    capture.output({
      bst <- lgb.train(
        data = dtrain
        , params = list(
          objective_type = "not_a_real_objective"
          , verbosity = VERBOSITY
        )
      )
    }, type = "message")
  }, regexp = "Unknown objective type name: not_a_real_objective")
})

test_that("lgb.train() respects parameter aliases for objective", {
  nrounds <- 3L
  dtrain <- lgb.Dataset(
    data = train$data
    , label = train$label
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  bst <- lgb.train(
    data = dtrain
    , params = list(
      num_leaves = 5L
      , application = "binary"
      , num_iterations = nrounds
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , valids = list(
      "the_training_data" = dtrain
    )
  )
  expect_named(bst$record_evals[["the_training_data"]], "binary_logloss")
  expect_length(bst$record_evals[["the_training_data"]][["binary_logloss"]][["eval"]], nrounds)
  expect_equal(bst$params[["objective"]], "binary")
})

test_that("lgb.train() prefers objective in params to keyword argument", {
  data("EuStockMarkets")
  bst <- lgb.train(
    data = lgb.Dataset(
      data = EuStockMarkets[, c("SMI", "CAC", "FTSE")]
      , label = EuStockMarkets[, "DAX"]
      , params = list(num_threads = .LGB_MAX_THREADS)
    )
    , params = list(
        loss = "regression_l1"
        , verbosity = VERBOSITY
        , num_threads = .LGB_MAX_THREADS
    )
    , nrounds = 5L
    , obj = "regression_l2"
  )
  expect_equal(bst$params$objective, "regression_l1")
  # NOTE: using save_model_to_string() since that is the simplest public API in the R package
  #       allowing access to the "objective" attribute of the Booster object on the C++ side
  model_txt_lines <- strsplit(
    x = bst$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(model_txt_lines == "objective=regression_l1"))
  expect_false(any(model_txt_lines == "objective=regression_l2"))
})

test_that("lgb.train() respects parameter aliases for metric", {
  nrounds <- 3L
  dtrain <- lgb.Dataset(
    data = train$data
    , label = train$label
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  bst <- lgb.train(
    data = dtrain
    , params = list(
      num_leaves = 5L
      , objective = "binary"
      , num_iterations = nrounds
      , metric_types = c("auc", "binary_logloss")
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , valids = list(
      "train" = dtrain
    )
  )
  record_results <- bst$record_evals[["train"]]
  expect_equal(sort(names(record_results)), c("auc", "binary_logloss"))
  expect_length(record_results[["auc"]][["eval"]], nrounds)
  expect_length(record_results[["binary_logloss"]][["eval"]], nrounds)
  expect_equal(bst$params[["metric"]], list("auc", "binary_logloss"))
})

test_that("lgb.train() rejects negative or 0 value passed to nrounds", {
  dtrain <- lgb.Dataset(train$data, label = train$label, params = list(num_threads = .LGB_MAX_THREADS))
  params <- list(
    objective = "regression"
    , metric = "l2,l1"
    , verbose = VERBOSITY
    , num_threads = .LGB_MAX_THREADS
  )
  for (nround_value in c(-10L, 0L)) {
    expect_error({
      bst <- lgb.train(
        params
        , dtrain
        , nround_value
      )
    }, "nrounds should be greater than zero")
  }
})


test_that("lgb.train() accepts nrounds as either a top-level argument or parameter", {
  nrounds <- 15L

  set.seed(708L)
  top_level_bst <- lgb.train(
    data = lgb.Dataset(
      train$data
      , label = train$label
      , params = list(num_threads = .LGB_MAX_THREADS)
    )
    , nrounds = nrounds
    , params = list(
      objective = "regression"
      , metric = "l2"
      , num_leaves = 5L
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
  )

  set.seed(708L)
  param_bst <- lgb.train(
    data = lgb.Dataset(
      train$data
      , label = train$label
      , params = list(num_threads = .LGB_MAX_THREADS)
    )
    , params = list(
      objective = "regression"
      , metric = "l2"
      , num_leaves = 5L
      , nrounds = nrounds
      , verbose = VERBOSITY
    )
  )

  set.seed(708L)
  both_customized <- lgb.train(
    data = lgb.Dataset(
      train$data
      , label = train$label
      , params = list(num_threads = .LGB_MAX_THREADS)
    )
    , nrounds = 20L
    , params = list(
      objective = "regression"
      , metric = "l2"
      , num_leaves = 5L
      , nrounds = nrounds
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
  )

  top_level_l2 <- top_level_bst$eval_train()[[1L]][["value"]]
  params_l2 <- param_bst$eval_train()[[1L]][["value"]]
  both_l2 <- both_customized$eval_train()[[1L]][["value"]]

  # check type just to be sure the subsetting didn't return a NULL
  expect_true(is.numeric(top_level_l2))
  expect_true(is.numeric(params_l2))
  expect_true(is.numeric(both_l2))

  # check that model produces identical performance
  expect_identical(top_level_l2, params_l2)
  expect_identical(both_l2, params_l2)

  expect_identical(param_bst$current_iter(), top_level_bst$current_iter())
  expect_identical(param_bst$current_iter(), both_customized$current_iter())
  expect_identical(param_bst$current_iter(), nrounds)

})


test_that("lgb.train() throws an informative error if 'data' is not an lgb.Dataset", {
  bad_values <- list(
    4L
    , "hello"
    , list(a = TRUE, b = seq_len(10L))
    , data.frame(x = seq_len(5L), y = seq_len(5L))
    , data.table::data.table(x = seq_len(5L),  y = seq_len(5L))
    , matrix(data = seq_len(10L), 2L, 5L)
  )
  for (val in bad_values) {
    expect_error({
      bst <- lgb.train(
        params = list(
            objective = "regression"
            , metric = "l2,l1"
            , verbose = VERBOSITY
        )
        , data = val
        , 10L
      )
    }, regexp = "data must be an lgb.Dataset instance", fixed = TRUE)
  }
})

test_that("lgb.train() throws an informative error if 'valids' is not a list of lgb.Dataset objects", {
  valids <- list(
    "valid1" = data.frame(x = rnorm(5L), y = rnorm(5L))
    , "valid2" = data.frame(x = rnorm(5L), y = rnorm(5L))
  )
  expect_error({
    bst <- lgb.train(
      params = list(
        objective = "regression"
        , metric = "l2,l1"
        , verbose = VERBOSITY
      )
      , data = lgb.Dataset(train$data, label = train$label)
      , 10L
      , valids = valids
    )
  }, regexp = "valids must be a list of lgb.Dataset elements")
})

test_that("lgb.train() errors if 'valids' is a list of lgb.Dataset objects but some do not have names", {
  valids <- list(
    "valid1" = lgb.Dataset(matrix(rnorm(10L), 5L, 2L))
    , lgb.Dataset(matrix(rnorm(10L), 2L, 5L))
  )
  expect_error({
    bst <- lgb.train(
      params = list(
        objective = "regression"
        , metric = "l2,l1"
        , verbose = VERBOSITY
      )
      , data = lgb.Dataset(train$data, label = train$label)
      , 10L
      , valids = valids
    )
  }, regexp = "each element of valids must have a name")
})

test_that("lgb.train() throws an informative error if 'valids' contains lgb.Dataset objects but none have names", {
  valids <- list(
    lgb.Dataset(matrix(rnorm(10L), 5L, 2L))
    , lgb.Dataset(matrix(rnorm(10L), 2L, 5L))
  )
  expect_error({
    bst <- lgb.train(
      params = list(
        objective = "regression"
        , metric = "l2,l1"
        , verbose = VERBOSITY
    )
      , data = lgb.Dataset(train$data, label = train$label)
      , 10L
      , valids = valids
    )
  }, regexp = "each element of valids must have a name")
})

test_that("lgb.train() works with force_col_wise and force_row_wise", {
  set.seed(1234L)
  nrounds <- 10L
  dtrain <- lgb.Dataset(
    train$data
    , label = train$label
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  params <- list(
    objective = "binary"
    , metric = "binary_error"
    , force_col_wise = TRUE
    , verbose = VERBOSITY
    , num_threads = .LGB_MAX_THREADS
  )
  bst_col_wise <- lgb.train(
    params = params
    , data = dtrain
    , nrounds = nrounds
  )

  params <- list(
    objective = "binary"
    , metric = "binary_error"
    , force_row_wise = TRUE
    , verbose = VERBOSITY
    , num_threads = .LGB_MAX_THREADS
  )
  bst_row_wise <- lgb.train(
    params = params
    , data = dtrain
    , nrounds = nrounds
  )

  expected_error <- 0.003070782
  expect_equal(bst_col_wise$eval_train()[[1L]][["value"]], expected_error)
  expect_equal(bst_row_wise$eval_train()[[1L]][["value"]], expected_error)

  # check some basic details of the boosters just to be sure force_col_wise
  # and force_row_wise are not causing any weird side effects
  for (bst in list(bst_row_wise, bst_col_wise)) {
    expect_equal(bst$current_iter(), nrounds)
    parsed_model <- jsonlite::fromJSON(bst$dump_model())
    expect_equal(parsed_model$objective, "binary sigmoid:1")
    expect_false(parsed_model$average_output)
  }
})

test_that("lgb.train() works as expected with sparse features", {
  set.seed(708L)
  num_obs <- 70000L
  trainDF <- data.frame(
    y = sample(c(0L, 1L), size = num_obs, replace = TRUE)
    , x = sample(c(1.0:10.0, rep(NA_real_, 50L)), size = num_obs, replace = TRUE)
  )
  dtrain <- lgb.Dataset(
    data = as.matrix(trainDF[["x"]], drop = FALSE)
    , label = trainDF[["y"]]
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  nrounds <- 1L
  bst <- lgb.train(
    params = list(
      objective = "binary"
      , min_data = 1L
      , min_data_in_bin = 1L
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , data = dtrain
    , nrounds = nrounds
  )

  expect_true(lgb.is.Booster(bst))
  expect_equal(bst$current_iter(), nrounds)
  parsed_model <- jsonlite::fromJSON(bst$dump_model())
  expect_equal(parsed_model$objective, "binary sigmoid:1")
  expect_false(parsed_model$average_output)
  expected_error <- 0.6931268
  expect_true(abs(bst$eval_train()[[1L]][["value"]] - expected_error) < TOLERANCE)
})

test_that("lgb.train() works with early stopping for classification", {
  trainDF <- data.frame(
    "feat1" = rep(c(5.0, 10.0), 500L)
    , "target" = rep(c(0L, 1L), 500L)
  )
  validDF <- data.frame(
    "feat1" = rep(c(5.0, 10.0), 50L)
    , "target" = rep(c(0L, 1L), 50L)
  )
  dtrain <- lgb.Dataset(
    data = as.matrix(trainDF[["feat1"]], drop = FALSE)
    , label = trainDF[["target"]]
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  dvalid <- lgb.Dataset(
    data = as.matrix(validDF[["feat1"]], drop = FALSE)
    , label = validDF[["target"]]
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  nrounds <- 10L

  ################################
  # train with no early stopping #
  ################################
  bst <- lgb.train(
    params = list(
      objective = "binary"
      , metric = "binary_error"
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , data = dtrain
    , nrounds = nrounds
    , valids = list(
      "valid1" = dvalid
    )
  )

  # a perfect model should be trivial to obtain, but all 10 rounds
  # should happen
  expect_equal(bst$best_score, 0.0)
  expect_equal(bst$best_iter, 1L)
  expect_equal(length(bst$record_evals[["valid1"]][["binary_error"]][["eval"]]), nrounds)

  #############################
  # train with early stopping #
  #############################
  early_stopping_rounds <- 5L
  bst <- lgb.train(
    params = list(
      objective = "binary"
      , metric = "binary_error"
      , early_stopping_rounds = early_stopping_rounds
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , data = dtrain
    , nrounds = nrounds
    , valids = list(
      "valid1" = dvalid
    )
  )

  # a perfect model should be trivial to obtain, and only 6 rounds
  # should have happen (1 with improvement, 5 consecutive with no improvement)
  expect_equal(bst$best_score, 0.0)
  expect_equal(bst$best_iter, 1L)
  expect_equal(
    length(bst$record_evals[["valid1"]][["binary_error"]][["eval"]])
    , early_stopping_rounds + 1L
  )

})

test_that("lgb.train() treats early_stopping_rounds<=0 as disabling early stopping", {
  set.seed(708L)
  trainDF <- data.frame(
    "feat1" = rep(c(5.0, 10.0), 500L)
    , "target" = rep(c(0L, 1L), 500L)
  )
  validDF <- data.frame(
    "feat1" = rep(c(5.0, 10.0), 50L)
    , "target" = rep(c(0L, 1L), 50L)
  )
  dtrain <- lgb.Dataset(
    data = as.matrix(trainDF[["feat1"]], drop = FALSE)
    , label = trainDF[["target"]]
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  dvalid <- lgb.Dataset(
    data = as.matrix(validDF[["feat1"]], drop = FALSE)
    , label = validDF[["target"]]
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  nrounds <- 5L

  for (value in c(-5L, 0L)) {

    #----------------------------#
    # passed as keyword argument #
    #----------------------------#
    bst <- lgb.train(
      params = list(
        objective = "binary"
        , metric = "binary_error"
        , verbose = VERBOSITY
        , num_threads = .LGB_MAX_THREADS
      )
      , data = dtrain
      , nrounds = nrounds
      , valids = list(
        "valid1" = dvalid
      )
      , early_stopping_rounds = value
    )

    # a perfect model should be trivial to obtain, but all 10 rounds
    # should happen
    expect_equal(bst$best_score, 0.0)
    expect_equal(bst$best_iter, 1L)
    expect_equal(length(bst$record_evals[["valid1"]][["binary_error"]][["eval"]]), nrounds)

    #---------------------------#
    # passed as parameter alias #
    #---------------------------#
    bst <- lgb.train(
      params = list(
        objective = "binary"
        , metric = "binary_error"
        , n_iter_no_change = value
        , verbose = VERBOSITY
        , num_threads = .LGB_MAX_THREADS
      )
      , data = dtrain
      , nrounds = nrounds
      , valids = list(
        "valid1" = dvalid
      )
    )

    # a perfect model should be trivial to obtain, but all 10 rounds
    # should happen
    expect_equal(bst$best_score, 0.0)
    expect_equal(bst$best_iter, 1L)
    expect_equal(length(bst$record_evals[["valid1"]][["binary_error"]][["eval"]]), nrounds)
  }
})

test_that("lgb.train() works with early stopping for classification with a metric that should be maximized", {
  set.seed(708L)
  dtrain <- lgb.Dataset(
    data = train$data
    , label = train$label
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  dvalid <- lgb.Dataset(
    data = test$data
    , label = test$label
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  nrounds <- 10L

  #############################
  # train with early stopping #
  #############################
  early_stopping_rounds <- 5L
  # the harsh max_depth guarantees that AUC improves over at least the first few iterations
  bst_auc <- lgb.train(
    params = list(
      objective = "binary"
      , metric = "auc"
      , max_depth = 3L
      , early_stopping_rounds = early_stopping_rounds
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , data = dtrain
    , nrounds = nrounds
    , valids = list(
      "valid1" = dvalid
    )
  )
  bst_binary_error <- lgb.train(
    params = list(
      objective = "binary"
      , metric = "binary_error"
      , max_depth = 3L
      , early_stopping_rounds = early_stopping_rounds
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , data = dtrain
    , nrounds = nrounds
    , valids = list(
      "valid1" = dvalid
    )
  )

  # early stopping should have been hit for binary_error (higher_better = FALSE)
  eval_info <- bst_binary_error$.__enclos_env__$private$get_eval_info()
  expect_identical(eval_info, "binary_error")
  expect_identical(
    unname(bst_binary_error$.__enclos_env__$private$higher_better_inner_eval)
    , FALSE
  )
  expect_identical(bst_binary_error$best_iter, 1L)
  expect_identical(bst_binary_error$current_iter(), early_stopping_rounds + 1L)
  expect_true(abs(bst_binary_error$best_score - 0.01613904) < TOLERANCE)

  # early stopping should not have been hit for AUC (higher_better = TRUE)
  eval_info <- bst_auc$.__enclos_env__$private$get_eval_info()
  expect_identical(eval_info, "auc")
  expect_identical(
    unname(bst_auc$.__enclos_env__$private$higher_better_inner_eval)
    , TRUE
  )
  expect_identical(bst_auc$best_iter, 9L)
  expect_identical(bst_auc$current_iter(), nrounds)
  expect_true(abs(bst_auc$best_score - 0.9999969) < TOLERANCE)
})

test_that("lgb.train() works with early stopping for regression", {
  set.seed(708L)
  trainDF <- data.frame(
    "feat1" = rep(c(10.0, 100.0), 500L)
    , "target" = rep(c(-50.0, 50.0), 500L)
  )
  validDF <- data.frame(
    "feat1" = rep(50.0, 4L)
    , "target" = rep(50.0, 4L)
  )
  dtrain <- lgb.Dataset(
    data = as.matrix(trainDF[["feat1"]], drop = FALSE)
    , label = trainDF[["target"]]
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  dvalid <- lgb.Dataset(
    data = as.matrix(validDF[["feat1"]], drop = FALSE)
    , label = validDF[["target"]]
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  nrounds <- 10L

  ################################
  # train with no early stopping #
  ################################
  bst <- lgb.train(
    params = list(
      objective = "regression"
      , metric = "rmse"
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , data = dtrain
    , nrounds = nrounds
    , valids = list(
      "valid1" = dvalid
    )
  )

  # the best possible model should come from the first iteration, but
  # all 10 training iterations should happen
  expect_equal(bst$best_score, 55.0)
  expect_equal(bst$best_iter, 1L)
  expect_equal(length(bst$record_evals[["valid1"]][["rmse"]][["eval"]]), nrounds)

  #############################
  # train with early stopping #
  #############################
  early_stopping_rounds <- 5L
  bst <- lgb.train(
    params = list(
      objective = "regression"
      , metric = "rmse"
      , early_stopping_rounds = early_stopping_rounds
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , data = dtrain
    , nrounds = nrounds
    , valids = list(
      "valid1" = dvalid
    )
  )

  # the best model should be from the first iteration, and only 6 rounds
  # should have happen (1 with improvement, 5 consecutive with no improvement)
  expect_equal(bst$best_score, 55.0)
  expect_equal(bst$best_iter, 1L)
  expect_equal(
    length(bst$record_evals[["valid1"]][["rmse"]][["eval"]])
    , early_stopping_rounds + 1L
  )
})

test_that("lgb.train() does not stop early if early_stopping_rounds is not given", {
  set.seed(708L)

  increasing_metric_starting_value <- get(
    ACCUMULATOR_NAME
    , envir = .GlobalEnv
  )
  nrounds <- 10L
  metrics <- list(
    .constant_metric
    , .increasing_metric
  )
  bst <- lgb.train(
    params = list(
      objective = "regression"
      , metric = "None"
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , data = DTRAIN_RANDOM_REGRESSION
    , nrounds = nrounds
    , valids = list("valid1" = DVALID_RANDOM_REGRESSION)
    , eval = metrics
  )

  # Only the two functions provided to "eval" should have been evaluated
  expect_equal(length(bst$record_evals[["valid1"]]), 2L)

  # all 10 iterations should have happen, and the best_iter should be
  # the first one (based on constant_metric)
  best_iter <- 1L
  expect_equal(bst$best_iter, best_iter)

  # best_score should be taken from the first metric
  expect_equal(
    bst$best_score
    , bst$record_evals[["valid1"]][["constant_metric"]][["eval"]][[best_iter]]
  )

  # early stopping should not have happened. Even though constant_metric
  # had 9 consecutive iterations with no improvement, it is ignored because of
  # first_metric_only = TRUE
  expect_equal(
    length(bst$record_evals[["valid1"]][["constant_metric"]][["eval"]])
    , nrounds
  )
  expect_equal(
    length(bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]])
    , nrounds
  )
})

test_that("If first_metric_only is not given or is FALSE, lgb.train() decides to stop early based on all metrics", {
  set.seed(708L)

  early_stopping_rounds <- 3L
  param_variations <- list(
    list(
      objective = "regression"
      , metric = "None"
      , early_stopping_rounds = early_stopping_rounds
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , list(
      objective = "regression"
      , metric = "None"
      , early_stopping_rounds = early_stopping_rounds
      , first_metric_only = FALSE
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
  )

  for (params in param_variations) {

    nrounds <- 10L
    bst <- lgb.train(
      params = params
      , data = DTRAIN_RANDOM_REGRESSION
      , nrounds = nrounds
      , valids = list(
        "valid1" = DVALID_RANDOM_REGRESSION
      )
      , eval = list(
        .increasing_metric
        , .constant_metric
      )
    )

    # Only the two functions provided to "eval" should have been evaluated
    expect_equal(length(bst$record_evals[["valid1"]]), 2L)

    # early stopping should have happened, and should have stopped early_stopping_rounds + 1 rounds in
    # because constant_metric never improves
    #
    # the best iteration should be the last one, because increasing_metric was first
    # and gets better every iteration
    best_iter <- early_stopping_rounds + 1L
    expect_equal(bst$best_iter, best_iter)

    # best_score should be taken from "increasing_metric" because it was first
    expect_equal(
      bst$best_score
      , bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]][[best_iter]]
    )

    # early stopping should not have happened. even though increasing_metric kept
    # getting better, early stopping should have happened because "constant_metric"
    # did not improve
    expect_equal(
      length(bst$record_evals[["valid1"]][["constant_metric"]][["eval"]])
      , early_stopping_rounds + 1L
    )
    expect_equal(
      length(bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]])
      , early_stopping_rounds + 1L
    )
  }

})

test_that("If first_metric_only is TRUE, lgb.train() decides to stop early based on only the first metric", {
  set.seed(708L)
  nrounds <- 10L
  early_stopping_rounds <- 3L
  increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
  bst <- lgb.train(
    params = list(
      objective = "regression"
      , metric = "None"
      , early_stopping_rounds = early_stopping_rounds
      , first_metric_only = TRUE
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , data = DTRAIN_RANDOM_REGRESSION
    , nrounds = nrounds
    , valids = list(
      "valid1" = DVALID_RANDOM_REGRESSION
    )
    , eval = list(
      .increasing_metric
      , .constant_metric
    )
  )

  # Only the two functions provided to "eval" should have been evaluated
  expect_equal(length(bst$record_evals[["valid1"]]), 2L)

  # all 10 iterations should happen, and the best_iter should be the final one
  expect_equal(bst$best_iter, nrounds)

  # best_score should be taken from "increasing_metric"
  expect_equal(
    bst$best_score
    , increasing_metric_starting_value + 0.1 * nrounds
  )

  # early stopping should not have happened. Even though constant_metric
  # had 9 consecutive iterations with no improvement, it is ignored because of
  # first_metric_only = TRUE
  expect_equal(
    length(bst$record_evals[["valid1"]][["constant_metric"]][["eval"]])
    , nrounds
  )
  expect_equal(
    length(bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]])
    , nrounds
  )
})

test_that("lgb.train() works when a mixture of functions and strings are passed to eval", {
  set.seed(708L)
  nrounds <- 10L
  increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
  bst <- lgb.train(
    params = list(
      objective = "regression"
      , metric = "None"
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , data = DTRAIN_RANDOM_REGRESSION
    , nrounds = nrounds
    , valids = list(
      "valid1" = DVALID_RANDOM_REGRESSION
    )
    , eval = list(
      .increasing_metric
      , "rmse"
      , .constant_metric
      , "l2"
    )
  )

  # all 4 metrics should have been used
  expect_named(
    bst$record_evals[["valid1"]]
    , expected = c("rmse", "l2", "increasing_metric", "constant_metric")
    , ignore.order = TRUE
    , ignore.case = FALSE
  )

  # the difference metrics shouldn't have been mixed up with each other
  results <- bst$record_evals[["valid1"]]
  expect_true(abs(results[["rmse"]][["eval"]][[1L]] - 1.105012) < TOLERANCE)
  expect_true(abs(results[["l2"]][["eval"]][[1L]] - 1.221051) < TOLERANCE)
  expected_increasing_metric <- increasing_metric_starting_value + 0.1
  expect_true(
    abs(
      results[["increasing_metric"]][["eval"]][[1L]] - expected_increasing_metric
    ) < TOLERANCE
  )
  expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE)

})

test_that("lgb.train() works when a list of strings or a character vector is passed to eval", {

  # testing list and character vector, as well as length-1 and length-2
  eval_variations <- list(
    c("binary_error", "binary_logloss")
    , "binary_logloss"
    , list("binary_error", "binary_logloss")
    , list("binary_logloss")
  )

  for (eval_variation in eval_variations) {

    set.seed(708L)
    nrounds <- 10L
    increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
    bst <- lgb.train(
      params = list(
        objective = "binary"
        , metric = "None"
        , verbose = VERBOSITY
        , num_threads = .LGB_MAX_THREADS
      )
      , data = DTRAIN_RANDOM_CLASSIFICATION
      , nrounds = nrounds
      , valids = list(
        "valid1" = DVALID_RANDOM_CLASSIFICATION
      )
      , eval = eval_variation
    )

    # both metrics should have been used
    expect_named(
      bst$record_evals[["valid1"]]
      , expected = unlist(eval_variation)
      , ignore.order = TRUE
      , ignore.case = FALSE
    )

    # the difference metrics shouldn't have been mixed up with each other
    results <- bst$record_evals[["valid1"]]
    if ("binary_error" %in% unlist(eval_variation)) {
      expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.4864865) < TOLERANCE)
    }
    if ("binary_logloss" %in% unlist(eval_variation)) {
      expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6932548) < TOLERANCE)
    }
  }
})

test_that("lgb.train() works when you specify both 'metric' and 'eval' with strings", {
  set.seed(708L)
  nrounds <- 10L
  increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
  bst <- lgb.train(
    params = list(
      objective = "binary"
      , metric = "binary_error"
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , data = DTRAIN_RANDOM_CLASSIFICATION
    , nrounds = nrounds
    , valids = list(
      "valid1" = DVALID_RANDOM_CLASSIFICATION
    )
    , eval = "binary_logloss"
  )

  # both metrics should have been used
  expect_named(
    bst$record_evals[["valid1"]]
    , expected = c("binary_error", "binary_logloss")
    , ignore.order = TRUE
    , ignore.case = FALSE
  )

  # the difference metrics shouldn't have been mixed up with each other
  results <- bst$record_evals[["valid1"]]
  expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.4864865) < TOLERANCE)
  expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6932548) < TOLERANCE)
})

test_that("lgb.train() works when you give a function for eval", {
  set.seed(708L)
  nrounds <- 10L
  increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
  bst <- lgb.train(
    params = list(
      objective = "binary"
      , metric = "None"
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , data = DTRAIN_RANDOM_CLASSIFICATION
    , nrounds = nrounds
    , valids = list(
      "valid1" = DVALID_RANDOM_CLASSIFICATION
    )
    , eval = .constant_metric
  )

  # the difference metrics shouldn't have been mixed up with each other
  results <- bst$record_evals[["valid1"]]
  expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE)
})

test_that("lgb.train() works with early stopping for regression with a metric that should be minimized", {
  set.seed(708L)
  trainDF <- data.frame(
    "feat1" = rep(c(10.0, 100.0), 500L)
    , "target" = rep(c(-50.0, 50.0), 500L)
  )
  validDF <- data.frame(
    "feat1" = rep(50.0, 4L)
    , "target" = rep(50.0, 4L)
  )
  dtrain <- lgb.Dataset(
    data = as.matrix(trainDF[["feat1"]], drop = FALSE)
    , label = trainDF[["target"]]
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  dvalid <- lgb.Dataset(
    data = as.matrix(validDF[["feat1"]], drop = FALSE)
    , label = validDF[["target"]]
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  nrounds <- 10L

  #############################
  # train with early stopping #
  #############################
  early_stopping_rounds <- 5L
  bst <- lgb.train(
    params = list(
      objective = "regression"
      , metric = c(
          "mape"
          , "rmse"
          , "mae"
      )
      , min_data_in_bin = 5L
      , early_stopping_rounds = early_stopping_rounds
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , data = dtrain
    , nrounds = nrounds
    , valids = list(
      "valid1" = dvalid
    )
  )

  # the best model should be from the first iteration, and only 6 rounds
  # should have happened (1 with improvement, 5 consecutive with no improvement)
  expect_equal(bst$best_score, 1.1)
  expect_equal(bst$best_iter, 1L)
  expect_equal(
    length(bst$record_evals[["valid1"]][["mape"]][["eval"]])
    , early_stopping_rounds + 1L
  )

  # Booster should understand thatt all three of these metrics should be minimized
  eval_info <- bst$.__enclos_env__$private$get_eval_info()
  expect_identical(eval_info, c("mape", "rmse", "l1"))
  expect_identical(
    unname(bst$.__enclos_env__$private$higher_better_inner_eval)
    , rep(FALSE, 3L)
  )
})


test_that("lgb.train() supports non-ASCII feature names", {
  dtrain <- lgb.Dataset(
    data = matrix(rnorm(400L), ncol =  4L)
    , label = rnorm(100L)
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  # content below is equivalent to
  #
  #  feature_names <- c("F_零", "F_一", "F_二", "F_三")
  #
  # but using rawToChar() to avoid weird issues when {testthat}
  # sources files and converts their encodings prior to evaluating the code
  feature_names <- c(
    rawToChar(as.raw(c(0x46, 0x5f, 0xe9, 0x9b, 0xb6)))
    , rawToChar(as.raw(c(0x46, 0x5f, 0xe4, 0xb8, 0x80)))
    , rawToChar(as.raw(c(0x46, 0x5f, 0xe4, 0xba, 0x8c)))
    , rawToChar(as.raw(c(0x46, 0x5f, 0xe4, 0xb8, 0x89)))
  )
  bst <- lgb.train(
    data = dtrain
    , nrounds = 5L
    , obj = "regression"
    , params = list(
      metric = "rmse"
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , colnames = feature_names
  )
  expect_true(lgb.is.Booster(bst))
  dumped_model <- jsonlite::fromJSON(bst$dump_model())

  # UTF-8 strings are not well-supported on Windows
  # * https://developer.r-project.org/Blog/public/2020/05/02/utf-8-support-on-windows/
  # * https://developer.r-project.org/Blog/public/2020/07/30/windows/utf-8-build-of-r-and-cran-packages/index.html
  if (UTF8_LOCALE && !ON_WINDOWS) {
    expect_identical(
      dumped_model[["feature_names"]]
      , feature_names
    )
  } else {
    expect_identical(
      dumped_model[["feature_names"]]
      , iconv(feature_names, to = "UTF-8")
    )
  }
})

test_that("lgb.train() works with integer, double, and numeric data", {
  data(mtcars)
  X <- as.matrix(mtcars[, -1L])
  y <- mtcars[, 1L, drop = TRUE]
  expected_mae <- 4.263667
  for (data_mode in c("numeric", "double", "integer")) {
    mode(X) <- data_mode
    nrounds <- 10L
    bst <- lightgbm(
      data = X
      , label = y
      , params = list(
        objective = "regression"
        , min_data_in_bin = 1L
        , min_data_in_leaf = 1L
        , learning_rate = 0.01
        , seed = 708L
        , verbose = VERBOSITY
      )
      , nrounds = nrounds
    )

    # should have trained for 10 iterations and found splits
    modelDT <- lgb.model.dt.tree(bst)
    expect_equal(modelDT[, max(tree_index)], nrounds - 1L)
    expect_gt(nrow(modelDT), nrounds * 3L)

    # should have achieved expected performance
    preds <- predict(bst, X)
    mae <- mean(abs(y - preds))
    expect_true(abs(mae - expected_mae) < TOLERANCE)
  }
})

test_that("lgb.train() updates params based on keyword arguments", {
  dtrain <- lgb.Dataset(
    data = matrix(rnorm(400L), ncol =  4L)
    , label = rnorm(100L)
    , params = list(num_threads = .LGB_MAX_THREADS)
  )

  # defaults from keyword arguments should be used if not specified in params
  invisible(
    capture.output({
      bst <- lgb.train(
        data = dtrain
        , obj = "regression"
        , params = list(num_threads = .LGB_MAX_THREADS)
      )
    })
  )
  expect_equal(bst$params[["verbosity"]], 1L)
  expect_equal(bst$params[["num_iterations"]], 100L)

  # main param names should be preferred to keyword arguments
  invisible(
    capture.output({
      bst <- lgb.train(
        data = dtrain
        , obj = "regression"
        , params = list(
          "verbosity" = 5L
          , "num_iterations" = 2L
          , num_threads = .LGB_MAX_THREADS
        )
      )
    })
  )
  expect_equal(bst$params[["verbosity"]], 5L)
  expect_equal(bst$params[["num_iterations"]], 2L)

  # aliases should be preferred to keyword arguments, and converted to main parameter name
  invisible(
    capture.output({
      bst <- lgb.train(
        data = dtrain
        , obj = "regression"
        , params = list(
          "verbose" = 5L
          , "num_boost_round" = 2L
          , num_threads = .LGB_MAX_THREADS
        )
      )
    })
  )
  expect_equal(bst$params[["verbosity"]], 5L)
  expect_false("verbose" %in% bst$params)
  expect_equal(bst$params[["num_iterations"]], 2L)
  expect_false("num_boost_round" %in% bst$params)
})

test_that("when early stopping is not activated, best_iter and best_score come from valids and not training data", {
  set.seed(708L)
  trainDF <- data.frame(
    "feat1" = rep(c(10.0, 100.0), 500L)
    , "target" = rep(c(-50.0, 50.0), 500L)
  )
  validDF <- data.frame(
    "feat1" = rep(50.0, 4L)
    , "target" = rep(50.0, 4L)
  )
  dtrain <- lgb.Dataset(
    data = as.matrix(trainDF[["feat1"]], drop = FALSE)
    , label = trainDF[["target"]]
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  dvalid1 <- lgb.Dataset(
    data = as.matrix(validDF[["feat1"]], drop = FALSE)
    , label = validDF[["target"]]
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  dvalid2 <- lgb.Dataset(
    data = as.matrix(validDF[1L:10L, "feat1"], drop = FALSE)
    , label = validDF[1L:10L, "target"]
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  nrounds <- 10L
  train_params <- list(
    objective = "regression"
    , metric = "rmse"
    , learning_rate = 1.5
    , num_leaves = 5L
    , verbose = VERBOSITY
    , num_threads = .LGB_MAX_THREADS
  )

  # example 1: two valids, neither are the training data
  bst <- lgb.train(
    data = dtrain
    , nrounds = nrounds
    , valids = list(
      "valid1" = dvalid1
      , "valid2" = dvalid2
    )
    , params = train_params
  )
  expect_named(
    bst$record_evals
    , c("start_iter", "valid1", "valid2")
    , ignore.order = FALSE
    , ignore.case = FALSE
  )
  rmse_scores <- unlist(bst$record_evals[["valid1"]][["rmse"]][["eval"]])
  expect_length(rmse_scores, nrounds)
  expect_identical(bst$best_iter, which.min(rmse_scores))
  expect_identical(bst$best_score, rmse_scores[which.min(rmse_scores)])

  # example 2: train first (called "train") and two valids
  bst <- lgb.train(
    data = dtrain
    , nrounds = nrounds
    , valids = list(
      "train" = dtrain
      , "valid1" = dvalid1
      , "valid2" = dvalid2
    )
    , params = train_params
  )
  expect_named(
    bst$record_evals
    , c("start_iter", "train", "valid1", "valid2")
    , ignore.order = FALSE
    , ignore.case = FALSE
  )
  rmse_scores <- unlist(bst$record_evals[["valid1"]][["rmse"]][["eval"]])
  expect_length(rmse_scores, nrounds)
  expect_identical(bst$best_iter, which.min(rmse_scores))
  expect_identical(bst$best_score, rmse_scores[which.min(rmse_scores)])

  # example 3: train second (called "train") and two valids
  bst <- lgb.train(
    data = dtrain
    , nrounds = nrounds
    , valids = list(
      "valid1" = dvalid1
      , "train" = dtrain
      , "valid2" = dvalid2
    )
    , params = train_params
  )
  # note that "train" still ends up as the first one
  expect_named(
    bst$record_evals
    , c("start_iter", "train", "valid1", "valid2")
    , ignore.order = FALSE
    , ignore.case = FALSE
  )
  rmse_scores <- unlist(bst$record_evals[["valid1"]][["rmse"]][["eval"]])
  expect_length(rmse_scores, nrounds)
  expect_identical(bst$best_iter, which.min(rmse_scores))
  expect_identical(bst$best_score, rmse_scores[which.min(rmse_scores)])

  # example 4: train third (called "train") and two valids
  bst <- lgb.train(
    data = dtrain
    , nrounds = nrounds
    , valids = list(
      "valid1" = dvalid1
      , "valid2" = dvalid2
      , "train" = dtrain
    )
    , params = train_params
  )
  # note that "train" still ends up as the first one
  expect_named(
    bst$record_evals
    , c("start_iter", "train", "valid1", "valid2")
    , ignore.order = FALSE
    , ignore.case = FALSE
  )
  rmse_scores <- unlist(bst$record_evals[["valid1"]][["rmse"]][["eval"]])
  expect_length(rmse_scores, nrounds)
  expect_identical(bst$best_iter, which.min(rmse_scores))
  expect_identical(bst$best_score, rmse_scores[which.min(rmse_scores)])

  # example 5: train second (called "something-random-we-would-not-hardcode") and two valids
  bst <- lgb.train(
    data = dtrain
    , nrounds = nrounds
    , valids = list(
      "valid1" = dvalid1
      , "something-random-we-would-not-hardcode" = dtrain
      , "valid2" = dvalid2
    )
    , params = train_params
  )
  # note that "something-random-we-would-not-hardcode" was recognized as the training
  # data even though it isn't named "train"
  expect_named(
    bst$record_evals
    , c("start_iter", "something-random-we-would-not-hardcode", "valid1", "valid2")
    , ignore.order = FALSE
    , ignore.case = FALSE
  )
  rmse_scores <- unlist(bst$record_evals[["valid1"]][["rmse"]][["eval"]])
  expect_length(rmse_scores, nrounds)
  expect_identical(bst$best_iter, which.min(rmse_scores))
  expect_identical(bst$best_score, rmse_scores[which.min(rmse_scores)])

  # example 6: the only valid supplied is the training data
  bst <- lgb.train(
    data = dtrain
    , nrounds = nrounds
    , valids = list(
      "train" = dtrain
    )
    , params = train_params
  )
  expect_identical(bst$best_iter, -1L)
  expect_identical(bst$best_score, NA_real_)
})

test_that("lightgbm.train() gives the correct best_score and best_iter for a metric where higher values are better", {
  set.seed(708L)
  trainDF <- data.frame(
    "feat1" = runif(n = 500L, min = 0.0, max = 15.0)
    , "target" = rep(c(0L, 1L), 500L)
  )
  validDF <- data.frame(
    "feat1" = runif(n = 50L, min = 0.0, max = 15.0)
    , "target" = rep(c(0L, 1L), 50L)
  )
  dtrain <- lgb.Dataset(
    data = as.matrix(trainDF[["feat1"]], drop = FALSE)
    , label = trainDF[["target"]]
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  dvalid1 <- lgb.Dataset(
    data = as.matrix(validDF[1L:25L, "feat1"], drop = FALSE)
    , label = validDF[1L:25L, "target"]
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  nrounds <- 10L
  bst <- lgb.train(
    data = dtrain
    , nrounds = nrounds
    , valids = list(
      "valid1" = dvalid1
      , "something-random-we-would-not-hardcode" = dtrain
    )
    , params = list(
      objective = "binary"
      , metric = "auc"
      , learning_rate = 1.5
      , num_leaves = 5L
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
  )
  # note that "something-random-we-would-not-hardcode" was recognized as the training
  # data even though it isn't named "train"
  expect_named(
    bst$record_evals
    , c("start_iter", "something-random-we-would-not-hardcode", "valid1")
    , ignore.order = FALSE
    , ignore.case = FALSE
  )
  auc_scores <- unlist(bst$record_evals[["valid1"]][["auc"]][["eval"]])
  expect_length(auc_scores, nrounds)
  expect_identical(bst$best_iter, which.max(auc_scores))
  expect_identical(bst$best_score, auc_scores[which.max(auc_scores)])
})

test_that("using lightgbm() without early stopping, best_iter and best_score come from valids and not training data", {
  set.seed(708L)
  # example: train second (called "something-random-we-would-not-hardcode"), two valids,
  #          and a metric where higher values are better ("auc")
  trainDF <- data.frame(
    "feat1" = runif(n = 500L, min = 0.0, max = 15.0)
    , "target" = rep(c(0L, 1L), 500L)
  )
  validDF <- data.frame(
    "feat1" = runif(n = 50L, min = 0.0, max = 15.0)
    , "target" = rep(c(0L, 1L), 50L)
  )
  dtrain <- lgb.Dataset(
    data = as.matrix(trainDF[["feat1"]], drop = FALSE)
    , label = trainDF[["target"]]
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  dvalid1 <- lgb.Dataset(
    data = as.matrix(validDF[1L:25L, "feat1"], drop = FALSE)
    , label = validDF[1L:25L, "target"]
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  dvalid2 <- lgb.Dataset(
    data = as.matrix(validDF[26L:50L, "feat1"], drop = FALSE)
    , label = validDF[26L:50L, "target"]
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  nrounds <- 10L
  bst <- lightgbm(
    data = dtrain
    , nrounds = nrounds
    , valids = list(
      "valid1" = dvalid1
      , "something-random-we-would-not-hardcode" = dtrain
      , "valid2" = dvalid2
    )
    , params = list(
      objective = "binary"
      , metric = "auc"
      , learning_rate = 1.5
      , num_leaves = 5L
      , num_threads = .LGB_MAX_THREADS
    )
    , verbose = -7L
  )
  # when verbose <= 0 is passed to lightgbm(), 'valids' is passed through to lgb.train()
  # untouched. If you set verbose to > 0, the training data will still be first but called "train"
  expect_named(
    bst$record_evals
    , c("start_iter", "something-random-we-would-not-hardcode", "valid1", "valid2")
    , ignore.order = FALSE
    , ignore.case = FALSE
  )
  auc_scores <- unlist(bst$record_evals[["valid1"]][["auc"]][["eval"]])
  expect_length(auc_scores, nrounds)
  expect_identical(bst$best_iter, which.max(auc_scores))
  expect_identical(bst$best_score, auc_scores[which.max(auc_scores)])
})

test_that("lgb.cv() works when you specify both 'metric' and 'eval' with strings", {
  set.seed(708L)
  nrounds <- 10L
  nfolds <- 4L
  increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
  bst <- lgb.cv(
    params = list(
      objective = "binary"
      , metric = "binary_error"
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , data = DTRAIN_RANDOM_CLASSIFICATION
    , nrounds = nrounds
    , nfold = nfolds
    , eval = "binary_logloss"
  )

  # both metrics should have been used
  expect_named(
    bst$record_evals[["valid"]]
    , expected = c("binary_error", "binary_logloss")
    , ignore.order = TRUE
    , ignore.case = FALSE
  )

  # the difference metrics shouldn't have been mixed up with each other
  results <- bst$record_evals[["valid"]]
  expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.5005654) < TOLERANCE)
  expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.7011232) < TOLERANCE)

  # all boosters should have been created
  expect_length(bst$boosters, nfolds)
})

test_that("lgb.cv() works when you give a function for eval", {
  set.seed(708L)
  nrounds <- 10L
  nfolds <- 3L
  increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
  bst <- lgb.cv(
    params = list(
      objective = "binary"
      , metric = "None"
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , data = DTRAIN_RANDOM_CLASSIFICATION
    , nfold = nfolds
    , nrounds = nrounds
    , eval = .constant_metric
  )

  # the difference metrics shouldn't have been mixed up with each other
  results <- bst$record_evals[["valid"]]
  expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE)
  expect_named(results, "constant_metric")
})

test_that("If first_metric_only is TRUE, lgb.cv() decides to stop early based on only the first metric", {
  set.seed(708L)
  nrounds <- 10L
  nfolds <- 5L
  early_stopping_rounds <- 3L
  increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
  bst <- lgb.cv(
    params = list(
      objective = "regression"
      , metric = "None"
      , early_stopping_rounds = early_stopping_rounds
      , first_metric_only = TRUE
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , data = DTRAIN_RANDOM_REGRESSION
    , nfold = nfolds
    , nrounds = nrounds
    , eval = list(
      .increasing_metric
      , .constant_metric
    )
  )

  # Only the two functions provided to "eval" should have been evaluated
  expect_named(bst$record_evals[["valid"]], c("increasing_metric", "constant_metric"))

  # all 10 iterations should happen, and the best_iter should be the final one
  expect_equal(bst$best_iter, nrounds)

  # best_score should be taken from "increasing_metric"
  #
  # this expected value looks magical and confusing, but it's because
  # evaluation metrics are averaged over all folds.
  #
  # consider 5-fold CV with a metric that adds 0.1 to a global accumulator
  # each time it's called
  #
  # * iter 1: [0.1, 0.2, 0.3, 0.4, 0.5] (mean = 0.3)
  # * iter 2: [0.6, 0.7, 0.8, 0.9, 1.0] (mean = 1.3)
  # * iter 3: [1.1, 1.2, 1.3, 1.4, 1.5] (mean = 1.8)
  #
  cv_value <- increasing_metric_starting_value + mean(seq_len(nfolds) / 10.0) + (nrounds  - 1L) * 0.1 * nfolds
  expect_equal(bst$best_score, cv_value)

  # early stopping should not have happened. Even though constant_metric
  # had 9 consecutive iterations with no improvement, it is ignored because of
  # first_metric_only = TRUE
  expect_equal(
    length(bst$record_evals[["valid"]][["constant_metric"]][["eval"]])
    , nrounds
  )
  expect_equal(
    length(bst$record_evals[["valid"]][["increasing_metric"]][["eval"]])
    , nrounds
  )
})

test_that("early stopping works with lgb.cv()", {
  set.seed(708L)
  nrounds <- 10L
  nfolds <- 5L
  early_stopping_rounds <- 3L
  increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
  bst <- lgb.cv(
    params = list(
      objective = "regression"
      , metric = "None"
      , early_stopping_rounds = early_stopping_rounds
      , first_metric_only = TRUE
      , verbose = VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
    , data = DTRAIN_RANDOM_REGRESSION
    , nfold = nfolds
    , nrounds = nrounds
    , eval = list(
      .constant_metric
      , .increasing_metric
    )
  )

  # only the two functions provided to "eval" should have been evaluated
  expect_named(bst$record_evals[["valid"]], c("constant_metric", "increasing_metric"))

  # best_iter should be based on the first metric. Since constant_metric
  # never changes, its first iteration was the best oone
  expect_equal(bst$best_iter, 1L)

  # best_score should be taken from the first metri
  expect_equal(bst$best_score, 0.2)

  # early stopping should have happened, since constant_metric was the first
  # one passed to eval and it will not improve over consecutive iterations
  #
  # note that this test is identical to the previous one, but with the
  # order of the eval metrics switched
  expect_equal(
    length(bst$record_evals[["valid"]][["constant_metric"]][["eval"]])
    , early_stopping_rounds + 1L
  )
  expect_equal(
    length(bst$record_evals[["valid"]][["increasing_metric"]][["eval"]])
    , early_stopping_rounds + 1L
  )

  # every booster's predict method should use best_iter as num_iteration in predict
  random_data <- as.matrix(rnorm(10L), ncol = 1L, drop = FALSE)
  for (x in bst$boosters) {
    expect_equal(x$booster$best_iter, bst$best_iter)
    expect_gt(x$booster$current_iter(), bst$best_iter)
    preds_iter <- predict(x$booster, random_data, num_iteration = bst$best_iter)
    preds_no_iter <- predict(x$booster, random_data)
    expect_equal(preds_iter, preds_no_iter)
  }
})

test_that("lgb.cv() respects changes to logging verbosity", {
  dtrain <- lgb.Dataset(
    data = train$data
    , label = train$label
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  # (verbose = 1) should be INFO and WARNING level logs
  lgb_cv_logs <- capture.output({
    cv_bst <- lgb.cv(
      params = list(num_threads = .LGB_MAX_THREADS)
      , nfold = 2L
      , nrounds = 5L
      , data = dtrain
      , obj = "binary"
      , verbose = 1L
    )
  })
  expect_true(any(grepl("[LightGBM] [Info]", lgb_cv_logs, fixed = TRUE)))
  expect_true(any(grepl("[LightGBM] [Warning]", lgb_cv_logs, fixed = TRUE)))

  # (verbose = 0) should be WARNING level logs only
  lgb_cv_logs <- capture.output({
    cv_bst <- lgb.cv(
      params = list(num_threads = .LGB_MAX_THREADS)
      , nfold = 2L
      , nrounds = 5L
      , data = dtrain
      , obj = "binary"
      , verbose = 0L
    )
  })
  expect_false(any(grepl("[LightGBM] [Info]", lgb_cv_logs, fixed = TRUE)))
  expect_true(any(grepl("[LightGBM] [Warning]", lgb_cv_logs, fixed = TRUE)))

  # (verbose = -1) no logs
  lgb_cv_logs <- capture.output({
    cv_bst <- lgb.cv(
      params = list(num_threads = .LGB_MAX_THREADS)
      , nfold = 2L
      , nrounds = 5L
      , data = dtrain
      , obj = "binary"
      , verbose = -1L
    )
  })
  # NOTE: this is not length(lgb_cv_logs) == 0 because lightgbm's
  #       dependencies might print other messages
  expect_false(any(grepl("[LightGBM] [Info]", lgb_cv_logs, fixed = TRUE)))
  expect_false(any(grepl("[LightGBM] [Warning]", lgb_cv_logs, fixed = TRUE)))
})

test_that("lgb.cv() updates params based on keyword arguments", {
  dtrain <- lgb.Dataset(
    data = matrix(rnorm(400L), ncol =  4L)
    , label = rnorm(100L)
    , params = list(num_threads = .LGB_MAX_THREADS)
  )

  # defaults from keyword arguments should be used if not specified in params
  invisible(
    capture.output({
      cv_bst <- lgb.cv(
        data = dtrain
        , obj = "regression"
        , params = list(num_threads = .LGB_MAX_THREADS)
        , nfold = 2L
      )
    })
  )

  for (bst in cv_bst$boosters) {
    bst_params <- bst[["booster"]]$params
    expect_equal(bst_params[["verbosity"]], 1L)
    expect_equal(bst_params[["num_iterations"]], 100L)
  }

  # main param names should be preferred to keyword arguments
  invisible(
    capture.output({
      cv_bst <- lgb.cv(
        data = dtrain
        , obj = "regression"
        , params = list(
          "verbosity" = 5L
          , "num_iterations" = 2L
          , num_threads = .LGB_MAX_THREADS
        )
        , nfold = 2L
      )
    })
  )
  for (bst in cv_bst$boosters) {
    bst_params <- bst[["booster"]]$params
    expect_equal(bst_params[["verbosity"]], 5L)
    expect_equal(bst_params[["num_iterations"]], 2L)
  }

  # aliases should be preferred to keyword arguments, and converted to main parameter name
  invisible(
    capture.output({
      cv_bst <- lgb.cv(
        data = dtrain
        , obj = "regression"
        , params = list(
          "verbose" = 5L
          , "num_boost_round" = 2L
          , num_threads = .LGB_MAX_THREADS
        )
        , nfold = 2L
      )
    })
  )
  for (bst in cv_bst$boosters) {
    bst_params <- bst[["booster"]]$params
    expect_equal(bst_params[["verbosity"]], 5L)
    expect_false("verbose" %in% bst_params)
    expect_equal(bst_params[["num_iterations"]], 2L)
    expect_false("num_boost_round" %in% bst_params)
  }

})

test_that("lgb.train() fit on linearly-relatead data improves when using linear learners", {
  set.seed(708L)
  .new_dataset <- function() {
    X <- matrix(rnorm(100L), ncol = 1L)
    return(lgb.Dataset(
      data = X
      , label = 2L * X + runif(nrow(X), 0L, 0.1)
      , params = list(num_threads = .LGB_MAX_THREADS)
    ))
  }

  params <- list(
    objective = "regression"
    , verbose = VERBOSITY
    , metric = "mse"
    , seed = 0L
    , num_leaves = 2L
    , num_threads = .LGB_MAX_THREADS
  )

  dtrain <- .new_dataset()
  bst <- lgb.train(
    data = dtrain
    , nrounds = 10L
    , params = params
    , valids = list("train" = dtrain)
  )
  expect_true(lgb.is.Booster(bst))

  dtrain <- .new_dataset()
  bst_linear <- lgb.train(
    data = dtrain
    , nrounds = 10L
    , params = utils::modifyList(params, list(linear_tree = TRUE))
    , valids = list("train" = dtrain)
  )
  expect_true(lgb.is.Booster(bst_linear))

  bst_last_mse <- bst$record_evals[["train"]][["l2"]][["eval"]][[10L]]
  bst_lin_last_mse <- bst_linear$record_evals[["train"]][["l2"]][["eval"]][[10L]]
  expect_true(bst_lin_last_mse <  bst_last_mse)
})


test_that("lgb.train() with linear learner fails already-constructed dataset with linear=false", {
  set.seed(708L)
  params <- list(
    objective = "regression"
    , verbose = VERBOSITY
    , metric = "mse"
    , seed = 0L
    , num_leaves = 2L
    , num_threads = .LGB_MAX_THREADS
  )

  dtrain <- lgb.Dataset(
    data = matrix(rnorm(100L), ncol = 1L)
    , label = rnorm(100L)
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  dtrain$construct()
  expect_error({
    capture.output({
      bst_linear <- lgb.train(
        data = dtrain
        , nrounds = 10L
        , params = utils::modifyList(params, list(linear_tree = TRUE))
      )
    }, type = "message")
  }, regexp = "Cannot change linear_tree after constructed Dataset handle")
})

test_that("lgb.train() works with linear learners even if Dataset has missing values", {
  set.seed(708L)
  .new_dataset <- function() {
    values <- rnorm(100L)
    values[sample(seq_len(length(values)), size = 10L)] <- NA_real_
    X <- matrix(
      data = sample(values, size = 100L)
      , ncol = 1L
    )
    return(lgb.Dataset(
      data = X
      , label = 2L * X + runif(nrow(X), 0L, 0.1)
      , params = list(num_threads = .LGB_MAX_THREADS)
    ))
  }

  params <- list(
    objective = "regression"
    , verbose = VERBOSITY
    , metric = "mse"
    , seed = 0L
    , num_leaves = 2L
    , num_threads = .LGB_MAX_THREADS
  )

  dtrain <- .new_dataset()
  bst <- lgb.train(
    data = dtrain
    , nrounds = 10L
    , params = params
    , valids = list("train" = dtrain)
  )
  expect_true(lgb.is.Booster(bst))

  dtrain <- .new_dataset()
  bst_linear <- lgb.train(
    data = dtrain
    , nrounds = 10L
    , params = utils::modifyList(params, list(linear_tree = TRUE))
    , valids = list("train" = dtrain)
  )
  expect_true(lgb.is.Booster(bst_linear))

  bst_last_mse <- bst$record_evals[["train"]][["l2"]][["eval"]][[10L]]
  bst_lin_last_mse <- bst_linear$record_evals[["train"]][["l2"]][["eval"]][[10L]]
  expect_true(bst_lin_last_mse <  bst_last_mse)
})

test_that("lgb.train() works with linear learners, bagging, and a Dataset that has missing values", {
  set.seed(708L)
  .new_dataset <- function() {
    values <- rnorm(100L)
    values[sample(seq_len(length(values)), size = 10L)] <- NA_real_
    X <- matrix(
      data = sample(values, size = 100L)
      , ncol = 1L
    )
    return(lgb.Dataset(
      data = X
      , label = 2L * X + runif(nrow(X), 0L, 0.1)
      , params = list(num_threads = .LGB_MAX_THREADS)
    ))
  }

  params <- list(
    objective = "regression"
    , verbose = VERBOSITY
    , metric = "mse"
    , seed = 0L
    , num_leaves = 2L
    , bagging_freq = 1L
    , subsample = 0.8
    , num_threads = .LGB_MAX_THREADS
  )

  dtrain <- .new_dataset()
  bst <- lgb.train(
    data = dtrain
    , nrounds = 10L
    , params = params
    , valids = list("train" = dtrain)
  )
  expect_true(lgb.is.Booster(bst))

  dtrain <- .new_dataset()
  bst_linear <- lgb.train(
    data = dtrain
    , nrounds = 10L
    , params = utils::modifyList(params, list(linear_tree = TRUE))
    , valids = list("train" = dtrain)
  )
  expect_true(lgb.is.Booster(bst_linear))

  bst_last_mse <- bst$record_evals[["train"]][["l2"]][["eval"]][[10L]]
  bst_lin_last_mse <- bst_linear$record_evals[["train"]][["l2"]][["eval"]][[10L]]
  expect_true(bst_lin_last_mse <  bst_last_mse)
})

test_that("lgb.train() works with linear learners and data where a feature has only 1 non-NA value", {
  set.seed(708L)
  .new_dataset <- function() {
    values <- c(rnorm(100L), rep(NA_real_, 100L))
    values[118L] <- rnorm(1L)
    X <- matrix(
      data = values
      , ncol = 2L
    )
    return(lgb.Dataset(
      data = X
      , label = 2L * X[, 1L] + runif(nrow(X), 0L, 0.1)
      , params = list(
        feature_pre_filter = FALSE
        , num_threads = .LGB_MAX_THREADS
      )
    ))
  }

  params <- list(
    objective = "regression"
    , verbose = -1L
    , metric = "mse"
    , seed = 0L
    , num_leaves = 2L
    , num_threads = .LGB_MAX_THREADS
  )

  dtrain <- .new_dataset()
  bst_linear <- lgb.train(
    data = dtrain
    , nrounds = 10L
    , params = utils::modifyList(params, list(linear_tree = TRUE))
  )
  expect_true(lgb.is.Booster(bst_linear))
})

test_that("lgb.train() works with linear learners when Dataset has categorical features", {
  set.seed(708L)
  .new_dataset <- function() {
    X <- matrix(numeric(200L), nrow = 100L, ncol = 2L)
    X[, 1L] <- rnorm(100L)
    X[, 2L] <- sample(seq_len(4L), size = 100L, replace = TRUE)
    return(lgb.Dataset(
      data = X
      , label = 2L * X[, 1L] + runif(nrow(X), 0L, 0.1)
      , params = list(num_threads = .LGB_MAX_THREADS)
    ))
  }

  params <- list(
    objective = "regression"
    , verbose = -1L
    , metric = "mse"
    , seed = 0L
    , num_leaves = 2L
    , categorical_feature = 1L
    , num_threads = .LGB_MAX_THREADS
  )

  dtrain <- .new_dataset()
  bst <- lgb.train(
    data = dtrain
    , nrounds = 10L
    , params = params
    , valids = list("train" = dtrain)
  )
  expect_true(lgb.is.Booster(bst))

  dtrain <- .new_dataset()
  bst_linear <- lgb.train(
    data = dtrain
    , nrounds = 10L
    , params = utils::modifyList(params, list(linear_tree = TRUE))
    , valids = list("train" = dtrain)
  )
  expect_true(lgb.is.Booster(bst_linear))

  bst_last_mse <- bst$record_evals[["train"]][["l2"]][["eval"]][[10L]]
  bst_lin_last_mse <- bst_linear$record_evals[["train"]][["l2"]][["eval"]][[10L]]
  expect_true(bst_lin_last_mse <  bst_last_mse)
})

test_that("lgb.train() throws an informative error if interaction_constraints is not a list", {
  dtrain <- lgb.Dataset(train$data, label = train$label)
  params <- list(objective = "regression", interaction_constraints = "[1,2],[3]")
    expect_error({
      bst <- lightgbm(
        data = dtrain
        , params = params
        , nrounds = 2L
      )
    }, "interaction_constraints must be a list")
})

test_that(paste0("lgb.train() throws an informative error if the members of interaction_constraints ",
                 "are not character or numeric vectors"), {
  dtrain <- lgb.Dataset(train$data, label = train$label)
  params <- list(objective = "regression", interaction_constraints = list(list(1L, 2L), list(3L)))
    expect_error({
      bst <- lightgbm(
        data = dtrain
        , params = params
        , nrounds = 2L
      )
    }, "every element in interaction_constraints must be a character vector or numeric vector")
})

test_that("lgb.train() throws an informative error if interaction_constraints contains a too large index", {
  dtrain <- lgb.Dataset(train$data, label = train$label)
  params <- list(objective = "regression",
                 interaction_constraints = list(c(1L, length(colnames(train$data)) + 1L), 3L))
    expect_error({
      bst <- lightgbm(
        data = dtrain
        , params = params
        , nrounds = 2L
      )
    }, "supplied a too large value in interaction_constraints")
})

test_that(paste0("lgb.train() gives same result when interaction_constraints is specified as a list of ",
                 "character vectors, numeric vectors, or a combination"), {
  set.seed(1L)
  dtrain <- lgb.Dataset(train$data, label = train$label, params = list(num_threads = .LGB_MAX_THREADS))

  params <- list(
    objective = "regression"
    , interaction_constraints = list(c(1L, 2L), 3L)
    , verbose = VERBOSITY
    , num_threads = .LGB_MAX_THREADS
  )
  bst <- lightgbm(
    data = dtrain
    , params = params
    , nrounds = 2L
  )
  pred1 <- bst$predict(test$data)

  cnames <- colnames(train$data)
  params <- list(
    objective = "regression"
    , interaction_constraints = list(c(cnames[[1L]], cnames[[2L]]), cnames[[3L]])
    , verbose = VERBOSITY
    , num_threads = .LGB_MAX_THREADS
  )
  bst <- lightgbm(
    data = dtrain
    , params = params
    , nrounds = 2L
  )
  pred2 <- bst$predict(test$data)

  params <- list(
    objective = "regression"
    , interaction_constraints = list(c(cnames[[1L]], cnames[[2L]]), 3L)
    , verbose = VERBOSITY
    , num_threads = .LGB_MAX_THREADS
  )
  bst <- lightgbm(
    data = dtrain
    , params = params
    , nrounds = 2L
  )
  pred3 <- bst$predict(test$data)

  expect_equal(pred1, pred2)
  expect_equal(pred2, pred3)

})

test_that(paste0("lgb.train() gives same results when using interaction_constraints and specifying colnames"), {
  set.seed(1L)
  dtrain <- lgb.Dataset(train$data, label = train$label, params = list(num_threads = .LGB_MAX_THREADS))

  params <- list(
    objective = "regression"
    , interaction_constraints = list(c(1L, 2L), 3L)
    , verbose = VERBOSITY
    , num_threads = .LGB_MAX_THREADS
  )
  bst <- lightgbm(
    data = dtrain
    , params = params
    , nrounds = 2L
  )
  pred1 <- bst$predict(test$data)

  new_colnames <- paste0(colnames(train$data), "_x")
  params <- list(
    objective = "regression"
    , interaction_constraints = list(c(new_colnames[1L], new_colnames[2L]), new_colnames[3L])
    , verbose = VERBOSITY
    , num_threads = .LGB_MAX_THREADS
  )
  bst <- lightgbm(
    data = dtrain
    , params = params
    , nrounds = 2L
    , colnames = new_colnames
  )
  pred2 <- bst$predict(test$data)

  expect_equal(pred1, pred2)

})

.generate_trainset_for_monotone_constraints_tests <- function(x3_to_categorical) {
  n_samples <- 3000L
  x1_positively_correlated_with_y <- runif(n = n_samples, min = 0.0, max = 1.0)
  x2_negatively_correlated_with_y <- runif(n = n_samples, min = 0.0, max = 1.0)
  x3_negatively_correlated_with_y <- runif(n = n_samples, min = 0.0, max = 1.0)
  if (x3_to_categorical) {
    x3_negatively_correlated_with_y <- as.integer(x3_negatively_correlated_with_y / 0.01)
    categorical_features <- "feature_3"
  } else {
    categorical_features <- NULL
  }
  X <- matrix(
    data = c(
        x1_positively_correlated_with_y
        , x2_negatively_correlated_with_y
        , x3_negatively_correlated_with_y
    )
    , ncol = 3L
  )
  zs <- rnorm(n = n_samples, mean = 0.0, sd = 0.01)
  scales <- 10.0 * (runif(n = 6L, min = 0.0, max = 1.0) + 0.5)
  y <- (
    scales[1L] * x1_positively_correlated_with_y
    + sin(scales[2L] * pi * x1_positively_correlated_with_y)
    - scales[3L] * x2_negatively_correlated_with_y
    - cos(scales[4L] * pi * x2_negatively_correlated_with_y)
    - scales[5L] * x3_negatively_correlated_with_y
    - cos(scales[6L] * pi * x3_negatively_correlated_with_y)
    + zs
  )
  return(lgb.Dataset(
    data = X
    , label = y
    , categorical_feature = categorical_features
    , free_raw_data = FALSE
    , colnames = c("feature_1", "feature_2", "feature_3")
    , params = list(num_threads = .LGB_MAX_THREADS)
  ))
}

.is_increasing <- function(y) {
  return(all(diff(y) >= 0.0))
}

.is_decreasing <- function(y) {
  return(all(diff(y) <= 0.0))
}

.is_non_monotone <- function(y) {
  return(any(diff(y) < 0.0) & any(diff(y) > 0.0))
}

# R equivalent of numpy.linspace()
.linspace <- function(start_val, stop_val, num) {
  weights <- (seq_len(num) - 1L) / (num - 1L)
  return(start_val + weights * (stop_val - start_val))
}

.is_correctly_constrained <- function(learner, x3_to_categorical) {
  iterations <- 10L
  n <- 1000L
  variable_x <- .linspace(0L, 1L, n)
  fixed_xs_values <- .linspace(0L, 1L, n)
  for (i in seq_len(iterations)) {
    fixed_x <- fixed_xs_values[i] * rep(1.0, n)
    monotonically_increasing_x <- matrix(
      data = c(variable_x, fixed_x, fixed_x)
      , ncol = 3L
    )
    monotonically_increasing_y <- predict(
      learner
      , monotonically_increasing_x
    )

    monotonically_decreasing_x <- matrix(
      data = c(fixed_x, variable_x, fixed_x)
      , ncol = 3L
    )
    monotonically_decreasing_y <- predict(
      learner
      , monotonically_decreasing_x
    )

    if (x3_to_categorical) {
      non_monotone_data <- c(
        fixed_x
        , fixed_x
        , as.integer(variable_x / 0.01)
      )
    } else {
      non_monotone_data <- c(fixed_x, fixed_x, variable_x)
    }
    non_monotone_x <- matrix(
      data = non_monotone_data
      , ncol = 3L
    )
    non_monotone_y <- predict(
      learner
      , non_monotone_x
    )
    if (!(.is_increasing(monotonically_increasing_y) &&
          .is_decreasing(monotonically_decreasing_y) &&
          .is_non_monotone(non_monotone_y)
    )) {
      return(FALSE)
    }
  }
  return(TRUE)
}

for (x3_to_categorical in c(TRUE, FALSE)) {
  set.seed(708L)
  dtrain <- .generate_trainset_for_monotone_constraints_tests(
    x3_to_categorical = x3_to_categorical
  )
  for (monotone_constraints_method in c("basic", "intermediate", "advanced")) {
    test_msg <- paste0(
      "lgb.train() supports monotone constraints ("
      , "categoricals="
      , x3_to_categorical
      , ", method="
      , monotone_constraints_method
      , ")"
    )
    test_that(test_msg, {
      params <- list(
        min_data = 20L
        , num_leaves = 20L
        , monotone_constraints = c(1L, -1L, 0L)
        , monotone_constraints_method = monotone_constraints_method
        , use_missing = FALSE
        , verbose = VERBOSITY
        , num_threads = .LGB_MAX_THREADS
      )
      constrained_model <- lgb.train(
        params = params
        , data = dtrain
        , obj = "regression_l2"
        , nrounds = 100L
      )
      expect_true({
        .is_correctly_constrained(
          learner = constrained_model
          , x3_to_categorical = x3_to_categorical
        )
      })
    })
  }
}

test_that("lightgbm() accepts objective as function argument and under params", {
  bst1 <- lightgbm(
    data = train$data
    , label = train$label
    , params = list(objective = "regression_l1", num_threads = .LGB_MAX_THREADS)
    , nrounds = 5L
    , verbose = VERBOSITY
  )
  expect_equal(bst1$params$objective, "regression_l1")
  model_txt_lines <- strsplit(
    x = bst1$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(model_txt_lines == "objective=regression_l1"))
  expect_false(any(model_txt_lines == "objective=regression_l2"))

  bst2 <- lightgbm(
    data = train$data
    , label = train$label
    , objective = "regression_l1"
    , nrounds = 5L
    , verbose = VERBOSITY
  )
  expect_equal(bst2$params$objective, "regression_l1")
  model_txt_lines <- strsplit(
    x = bst2$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(model_txt_lines == "objective=regression_l1"))
  expect_false(any(model_txt_lines == "objective=regression_l2"))
})

test_that("lightgbm() prioritizes objective under params over objective as function argument", {
  bst1 <- lightgbm(
    data = train$data
    , label = train$label
    , objective = "regression"
    , params = list(objective = "regression_l1", num_threads = .LGB_MAX_THREADS)
    , nrounds = 5L
    , verbose = VERBOSITY
  )
  expect_equal(bst1$params$objective, "regression_l1")
  model_txt_lines <- strsplit(
    x = bst1$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(model_txt_lines == "objective=regression_l1"))
  expect_false(any(model_txt_lines == "objective=regression_l2"))

  bst2 <- lightgbm(
    data = train$data
    , label = train$label
    , objective = "regression"
    , params = list(loss = "regression_l1", num_threads = .LGB_MAX_THREADS)
    , nrounds = 5L
    , verbose = VERBOSITY
  )
  expect_equal(bst2$params$objective, "regression_l1")
  model_txt_lines <- strsplit(
    x = bst2$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(model_txt_lines == "objective=regression_l1"))
  expect_false(any(model_txt_lines == "objective=regression_l2"))
})

test_that("lightgbm() accepts init_score as function argument", {
  bst1 <- lightgbm(
    data = train$data
    , label = train$label
    , objective = "binary"
    , nrounds = 5L
    , verbose = VERBOSITY
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  pred1 <- predict(bst1, train$data, type = "raw")

  bst2 <- lightgbm(
    data = train$data
    , label = train$label
    , init_score = pred1
    , objective = "binary"
    , nrounds = 5L
    , verbose = VERBOSITY
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  pred2 <- predict(bst2, train$data, type = "raw")

  expect_true(any(pred1 != pred2))
})

test_that("lightgbm() defaults to 'regression' objective if objective not otherwise provided", {
  bst <- lightgbm(
    data = train$data
    , label = train$label
    , nrounds = 5L
    , verbose = VERBOSITY
    , params = list(num_threads = .LGB_MAX_THREADS)
  )
  expect_equal(bst$params$objective, "regression")
  model_txt_lines <- strsplit(
    x = bst$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(model_txt_lines == "objective=regression"))
  expect_false(any(model_txt_lines == "objective=regression_l1"))
})

test_that("lightgbm() accepts 'num_threads' as either top-level argument or under params", {
  bst <- lightgbm(
    data = train$data
    , label = train$label
    , nrounds = 5L
    , verbose = VERBOSITY
    , num_threads = 1L
  )
  expect_equal(bst$params$num_threads, 1L)
  model_txt_lines <- strsplit(
    x = bst$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(grepl("[num_threads: 1]", model_txt_lines, fixed = TRUE)))

  bst <- lightgbm(
    data = train$data
    , label = train$label
    , nrounds = 5L
    , verbose = VERBOSITY
    , params = list(num_threads = 1L)
  )
  expect_equal(bst$params$num_threads, 1L)
  model_txt_lines <- strsplit(
    x = bst$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(grepl("[num_threads: 1]", model_txt_lines, fixed = TRUE)))

  bst <- lightgbm(
    data = train$data
    , label = train$label
    , nrounds = 5L
    , verbose = VERBOSITY
    , num_threads = 10L
    , params = list(num_threads = 1L)
  )
  expect_equal(bst$params$num_threads, 1L)
  model_txt_lines <- strsplit(
    x = bst$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(grepl("[num_threads: 1]", model_txt_lines, fixed = TRUE)))
})

test_that("lightgbm() accepts 'weight' and 'weights'", {
  data(mtcars)
  X <- as.matrix(mtcars[, -1L])
  y <- as.numeric(mtcars[, 1L])
  w <- rep(1.0, nrow(X))
  model <- lightgbm(
    X
    , y
    , weights = w
    , obj = "regression"
    , nrounds = 5L
    , verbose = VERBOSITY
    , params = list(
      min_data_in_bin = 1L
      , min_data_in_leaf = 1L
      , num_threads = .LGB_MAX_THREADS
    )
  )
  expect_equal(model$.__enclos_env__$private$train_set$get_field("weight"), w)

  # Avoid a bad CRAN check due to partial argument matches
  lgb_args <- list(
    X
    , y
    , weight = w
    , obj = "regression"
    , nrounds = 5L
    , verbose = -1L
  )
  model <- do.call(lightgbm, lgb_args)
  expect_equal(model$.__enclos_env__$private$train_set$get_field("weight"), w)
})

.assert_has_expected_logs <- function(log_txt, lgb_info, lgb_warn, early_stopping, valid_eval_msg) {
  expect_identical(
    object = any(grepl("[LightGBM] [Info]", log_txt, fixed = TRUE))
    , expected = lgb_info
  )
  expect_identical(
    object = any(grepl("[LightGBM] [Warning]", log_txt, fixed = TRUE))
    , expected = lgb_warn
  )
  expect_identical(
    object = any(grepl("Will train until there is no improvement in 5 rounds", log_txt, fixed = TRUE))
    , expected = early_stopping
  )
  expect_identical(
    object = any(grepl("Did not meet early stopping", log_txt, fixed = TRUE))
    , expected = early_stopping
  )
  expect_identical(
    object = any(grepl("valid's auc\\:[0-9]+", log_txt))
    , expected = valid_eval_msg
  )
}

.assert_has_expected_record_evals <- function(fitted_model) {
  record_evals <- fitted_model$record_evals
  expect_equal(record_evals$start_iter, 1L)
  if (inherits(fitted_model, "lgb.CVBooster")) {
    expected_valid_auc <- c(0.979056, 0.9844697, 0.9900813, 0.9908026, 0.9935588)
  } else {
    expected_valid_auc <-  c(0.9805752, 0.9805752, 0.9934957, 0.9934957, 0.9949372)
  }
  expect_equal(
    object = unlist(record_evals[["valid"]][["auc"]][["eval"]])
    , expected = expected_valid_auc
    , tolerance = TOLERANCE
  )
   expect_named(record_evals, c("start_iter", "valid"), ignore.order = TRUE, ignore.case = FALSE)
  expect_equal(record_evals[["valid"]][["auc"]][["eval_err"]], list())
}

.train_for_verbosity_test <- function(train_function, verbose_kwarg, verbose_param) {
  set.seed(708L)
  nrounds <- 5L
  params <- list(
    num_leaves = 5L
    , objective = "binary"
    , metric =  "auc"
    , early_stopping_round = nrounds
    , num_threads = .LGB_MAX_THREADS
  )
  if (!is.null(verbose_param)) {
    params[["verbose"]] <- verbose_param
  }
  train_kwargs <- list(
    params = params
    , nrounds = nrounds
  )
  if (!is.null(verbose_kwarg)) {
    train_kwargs[["verbose"]] <- verbose_kwarg
  }
  function_name <- deparse(substitute(train_function))
  if (function_name == "lgb.train") {
    train_kwargs[["data"]] <- lgb.Dataset(
      data = train$data
      , label = train$label
      , params = list(num_threads = .LGB_MAX_THREADS)
    )
    train_kwargs[["valids"]] <- list(
      "valid" = lgb.Dataset(data = test$data, label = test$label)
    )
  } else if (function_name == "lightgbm") {
    train_kwargs[["data"]] <- train$data
    train_kwargs[["label"]] <- train$label
    train_kwargs[["valids"]] <- list(
      "valid" = lgb.Dataset(data = test$data, label = test$label)
    )
  } else if (function_name == "lgb.cv") {
    train_kwargs[["data"]] <- lgb.Dataset(
      data = train$data
      , label = train$label
    )
    train_kwargs[["nfold"]] <- 3L
    train_kwargs[["showsd"]] <- FALSE
  }
  log_txt <- capture.output({
    bst <- do.call(
      what = train_function
      , args = train_kwargs
    )
  })
  return(list(booster = bst, logs = log_txt))
}

test_that("lgb.train() only prints eval metrics when expected to", {

  # regardless of value passed to keyword argument 'verbose', value in params
  # should take precedence
  for (verbose_keyword_arg in c(-5L, -1L, 0L, 1L, 5L)) {

    # (verbose = -1) should not be any logs, should be record evals
    out <- .train_for_verbosity_test(
      train_function = lgb.train
      , verbose_kwarg = verbose_keyword_arg
      , verbose_param = -1L
    )
    .assert_has_expected_logs(
      log_txt = out[["logs"]]
      , lgb_info = FALSE
      , lgb_warn = FALSE
      , early_stopping = FALSE
      , valid_eval_msg = FALSE
    )
    .assert_has_expected_record_evals(
      fitted_model = out[["booster"]]
    )

    # (verbose = 0) should be only WARN-level LightGBM logs
    out <- .train_for_verbosity_test(
      train_function = lgb.train
      , verbose_kwarg = verbose_keyword_arg
      , verbose_param = 0L
    )
    .assert_has_expected_logs(
      log_txt = out[["logs"]]
      , lgb_info = FALSE
      , lgb_warn = TRUE
      , early_stopping = FALSE
      , valid_eval_msg = FALSE
    )
    .assert_has_expected_record_evals(
      fitted_model = out[["booster"]]
    )

    # (verbose > 0) should be INFO- and WARN-level LightGBM logs, and record eval messages
    out <- .train_for_verbosity_test(
      train_function = lgb.train
      , verbose_kwarg = verbose_keyword_arg
      , verbose_param = 1L
    )
    .assert_has_expected_logs(
      log_txt = out[["logs"]]
      , lgb_info = TRUE
      , lgb_warn = TRUE
      , early_stopping = TRUE
      , valid_eval_msg = TRUE
    )
    .assert_has_expected_record_evals(
      fitted_model = out[["booster"]]
    )
  }

  # if verbosity isn't specified in `params`, changing keyword argument `verbose` should
  # alter what messages are printed

  # (verbose = -1) should not be any logs, should be record evals
  out <- .train_for_verbosity_test(
    train_function = lgb.train
    , verbose_kwarg = -1L
    , verbose_param = NULL
  )
  .assert_has_expected_logs(
    log_txt = out[["logs"]]
    , lgb_info = FALSE
    , lgb_warn = FALSE
    , early_stopping = FALSE
    , valid_eval_msg = FALSE
  )
  .assert_has_expected_record_evals(
    fitted_model = out[["booster"]]
  )

  # (verbose = 0) should be only WARN-level LightGBM logs
  out <- .train_for_verbosity_test(
    train_function = lgb.train
    , verbose_kwarg = 0L
    , verbose_param = NULL
  )
  .assert_has_expected_logs(
    log_txt = out[["logs"]]
    , lgb_info = FALSE
    , lgb_warn = TRUE
    , early_stopping = FALSE
    , valid_eval_msg = FALSE
  )
  .assert_has_expected_record_evals(
    fitted_model = out[["booster"]]
  )

  # (verbose > 0) should be INFO- and WARN-level LightGBM logs, and record eval messages
  out <- .train_for_verbosity_test(
    train_function = lgb.train
    , verbose_kwarg = 1L
    , verbose_param = NULL
  )
  .assert_has_expected_logs(
    log_txt = out[["logs"]]
    , lgb_info = TRUE
    , lgb_warn = TRUE
    , early_stopping = TRUE
    , valid_eval_msg = TRUE
  )
  .assert_has_expected_record_evals(
    fitted_model = out[["booster"]]
  )
})

test_that("lightgbm() only prints eval metrics when expected to", {

  # regardless of value passed to keyword argument 'verbose', value in params
  # should take precedence
  for (verbose_keyword_arg in c(-5L, -1L, 0L, 1L, 5L)) {

    # (verbose = -1) should not be any logs, train should not be in valids
    out <- .train_for_verbosity_test(
      train_function = lightgbm
      , verbose_kwarg = verbose_keyword_arg
      , verbose_param = -1L
    )
    .assert_has_expected_logs(
      log_txt = out[["logs"]]
      , lgb_info = FALSE
      , lgb_warn = FALSE
      , early_stopping = FALSE
      , valid_eval_msg = FALSE
    )
    .assert_has_expected_record_evals(
      fitted_model = out[["booster"]]
    )

    # (verbose = 0) should be only WARN-level LightGBM logs, train should not be in valids
    out <- .train_for_verbosity_test(
      train_function = lightgbm
      , verbose_kwarg = verbose_keyword_arg
      , verbose_param = 0L
    )
    .assert_has_expected_logs(
      log_txt = out[["logs"]]
      , lgb_info = FALSE
      , lgb_warn = TRUE
      , early_stopping = FALSE
      , valid_eval_msg = FALSE
    )
    .assert_has_expected_record_evals(
      fitted_model = out[["booster"]]
    )

    # (verbose > 0) should be INFO- and WARN-level LightGBM logs, and record eval messages, and
    #               train should be in valids
    out <- .train_for_verbosity_test(
      train_function = lightgbm
      , verbose_kwarg = verbose_keyword_arg
      , verbose_param = 1L
    )
    .assert_has_expected_logs(
      log_txt = out[["logs"]]
      , lgb_info = TRUE
      , lgb_warn = TRUE
      , early_stopping = TRUE
      , valid_eval_msg = TRUE
    )
    .assert_has_expected_record_evals(
      fitted_model = out[["booster"]]
    )
  }

  # if verbosity isn't specified in `params`, changing keyword argument `verbose` should
  # alter what messages are printed

  # (verbose = -1) should not be any logs, train should not be in valids
  out <- .train_for_verbosity_test(
    train_function = lightgbm
    , verbose_kwarg = -1L
    , verbose_param = NULL
  )
  .assert_has_expected_logs(
    log_txt = out[["logs"]]
    , lgb_info = FALSE
    , lgb_warn = FALSE
    , early_stopping = FALSE
    , valid_eval_msg = FALSE
  )
  .assert_has_expected_record_evals(
    fitted_model = out[["booster"]]
  )

  # (verbose = 0) should be only WARN-level LightGBM logs, train should not be in valids
  out <- .train_for_verbosity_test(
    train_function = lightgbm
    , verbose_kwarg = 0L
    , verbose_param = NULL
  )
  .assert_has_expected_logs(
    log_txt = out[["logs"]]
    , lgb_info = FALSE
    , lgb_warn = TRUE
    , early_stopping = FALSE
    , valid_eval_msg = FALSE
  )
  .assert_has_expected_record_evals(
    fitted_model = out[["booster"]]
  )

  # (verbose > 0) should be INFO- and WARN-level LightGBM logs, and record eval messages, and
  #               train should be in valids
  out <- .train_for_verbosity_test(
    train_function = lightgbm
    , verbose_kwarg = 1L
    , verbose_param = NULL
  )
  .assert_has_expected_logs(
    log_txt = out[["logs"]]
    , lgb_info = TRUE
    , lgb_warn = TRUE
    , early_stopping = TRUE
    , valid_eval_msg = TRUE
  )
  .assert_has_expected_record_evals(
    fitted_model = out[["booster"]]
  )
})

test_that("lgb.cv() only prints eval metrics when expected to", {

  # regardless of value passed to keyword argument 'verbose', value in params
  # should take precedence
  for (verbose_keyword_arg in c(-5L, -1L, 0L, 1L, 5L)) {

    # (verbose = -1) should not be any logs, should be record evals
    out <- .train_for_verbosity_test(
      verbose_kwarg = verbose_keyword_arg
      , verbose_param = -1L
      , train_function = lgb.cv
    )
    .assert_has_expected_logs(
      log_txt = out[["logs"]]
      , lgb_info = FALSE
      , lgb_warn = FALSE
      , early_stopping = FALSE
      , valid_eval_msg = FALSE
    )
    .assert_has_expected_record_evals(
      fitted_model = out[["booster"]]
    )

    # (verbose = 0) should be only WARN-level LightGBM logs
    out <- .train_for_verbosity_test(
      verbose_kwarg = verbose_keyword_arg
      , verbose_param = 0L
      , train_function = lgb.cv
    )
    .assert_has_expected_logs(
      log_txt = out[["logs"]]
      , lgb_info = FALSE
      , lgb_warn = TRUE
      , early_stopping = FALSE
      , valid_eval_msg = FALSE
    )
    .assert_has_expected_record_evals(
      fitted_model = out[["booster"]]
    )

    # (verbose > 0) should be INFO- and WARN-level LightGBM logs, and record eval messages
    out <- .train_for_verbosity_test(
      verbose_kwarg = verbose_keyword_arg
      , verbose_param = 1L
      , train_function = lgb.cv
    )
    .assert_has_expected_logs(
      log_txt = out[["logs"]]
      , lgb_info = TRUE
      , lgb_warn = TRUE
      , early_stopping = TRUE
      , valid_eval_msg = TRUE
    )
    .assert_has_expected_record_evals(
      fitted_model = out[["booster"]]
    )
  }

  # if verbosity isn't specified in `params`, changing keyword argument `verbose` should
  # alter what messages are printed

  # (verbose = -1) should not be any logs, should be record evals
  out <- .train_for_verbosity_test(
    verbose_kwarg = verbose_keyword_arg
    , verbose_param = -1L
    , train_function = lgb.cv
  )
  .assert_has_expected_logs(
    log_txt = out[["logs"]]
    , lgb_info = FALSE
    , lgb_warn = FALSE
    , early_stopping = FALSE
    , valid_eval_msg = FALSE
  )
  .assert_has_expected_record_evals(
    fitted_model = out[["booster"]]
  )

  # (verbose = 0) should be only WARN-level LightGBM logs
  out <- .train_for_verbosity_test(
    verbose_kwarg = verbose_keyword_arg
    , verbose_param = 0L
    , train_function = lgb.cv
  )
  .assert_has_expected_logs(
    log_txt = out[["logs"]]
    , lgb_info = FALSE
    , lgb_warn = TRUE
    , early_stopping = FALSE
    , valid_eval_msg = FALSE
  )
  .assert_has_expected_record_evals(
    fitted_model = out[["booster"]]
  )

  # (verbose > 0) should be INFO- and WARN-level LightGBM logs, and record eval messages
  out <- .train_for_verbosity_test(
    verbose_kwarg = verbose_keyword_arg
    , verbose_param = 1L
    , train_function = lgb.cv
  )
  .assert_has_expected_logs(
    log_txt = out[["logs"]]
    , lgb_info = TRUE
    , lgb_warn = TRUE
    , early_stopping = TRUE
    , valid_eval_msg = TRUE
  )
  .assert_has_expected_record_evals(
    fitted_model = out[["booster"]]
  )
})

test_that("lightgbm() changes objective='auto' appropriately", {
  # Regression
  data("mtcars")
  y <- mtcars$mpg
  x <- as.matrix(mtcars[, -1L])
  model <- lightgbm(x, y, objective = "auto", verbose = VERBOSITY, nrounds = 5L, num_threads = .LGB_MAX_THREADS)
  expect_equal(model$params$objective, "regression")
  model_txt_lines <- strsplit(
    x = model$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(grepl("objective=regression", model_txt_lines, fixed = TRUE)))
  expect_false(any(grepl("objective=regression_l1", model_txt_lines, fixed = TRUE)))

  # Binary classification
  x <- train$data
  y <- factor(train$label)
  model <- lightgbm(x, y, objective = "auto", verbose = VERBOSITY, nrounds = 5L, num_threads = .LGB_MAX_THREADS)
  expect_equal(model$params$objective, "binary")
  model_txt_lines <- strsplit(
    x = model$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(grepl("objective=binary", model_txt_lines, fixed = TRUE)))

  # Multi-class classification
  data("iris")
  y <- factor(iris$Species)
  x <- as.matrix(iris[, -5L])
  model <- lightgbm(x, y, objective = "auto", verbose = VERBOSITY, nrounds = 5L, num_threads = .LGB_MAX_THREADS)
  expect_equal(model$params$objective, "multiclass")
  expect_equal(model$params$num_class, 3L)
  model_txt_lines <- strsplit(
    x = model$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(grepl("objective=multiclass", model_txt_lines, fixed = TRUE)))
})

test_that("lightgbm() determines number of classes for non-default multiclass objectives", {
  data("iris")
  y <- factor(iris$Species)
  x <- as.matrix(iris[, -5L])
  model <- lightgbm(
    x
    , y
    , objective = "multiclassova"
    , verbose = VERBOSITY
    , nrounds = 5L
    , num_threads = .LGB_MAX_THREADS
  )
  expect_equal(model$params$objective, "multiclassova")
  expect_equal(model$params$num_class, 3L)
  model_txt_lines <- strsplit(
    x = model$save_model_to_string()
    , split = "\n"
    , fixed = TRUE
  )[[1L]]
  expect_true(any(grepl("objective=multiclassova", model_txt_lines, fixed = TRUE)))
})

test_that("lightgbm() doesn't accept binary classification with non-binary factors", {
  data("iris")
  y <- factor(iris$Species)
  x <- as.matrix(iris[, -5L])
  expect_error({
    lightgbm(x, y, objective = "binary", verbose = VERBOSITY, nrounds = 5L, num_threads = .LGB_MAX_THREADS)
  }, regexp = "Factors with >2 levels as labels only allowed for multi-class objectives")
})

test_that("lightgbm() doesn't accept multi-class classification with binary factors", {
  data("iris")
  y <- as.character(iris$Species)
  y[y == "setosa"] <- "versicolor"
  y <- factor(y)
  x <- as.matrix(iris[, -5L])
  expect_error({
    lightgbm(x, y, objective = "multiclass", verbose = VERBOSITY, nrounds = 5L, num_threads = .LGB_MAX_THREADS)
  }, regexp = "Two-level factors as labels only allowed for objective='binary'")
})

test_that("lightgbm() model predictions retain factor levels for multiclass classification", {
  data("iris")
  y <- factor(iris$Species)
  x <- as.matrix(iris[, -5L])
  model <- lightgbm(x, y, objective = "auto", verbose = VERBOSITY, nrounds = 5L, num_threads = .LGB_MAX_THREADS)

  pred <- predict(model, x, type = "class")
  expect_true(is.factor(pred))
  expect_equal(levels(pred), levels(y))

  pred <- predict(model, x, type = "response")
  expect_equal(colnames(pred), levels(y))

  pred <- predict(model, x, type = "raw")
  expect_equal(colnames(pred), levels(y))
})

test_that("lightgbm() model predictions retain factor levels for binary classification", {
  data("iris")
  y <- as.character(iris$Species)
  y[y == "setosa"] <- "versicolor"
  y <- factor(y)
  x <- as.matrix(iris[, -5L])
  model <- lightgbm(x, y, objective = "auto", verbose = VERBOSITY, nrounds = 5L, num_threads = .LGB_MAX_THREADS)

  pred <- predict(model, x, type = "class")
  expect_true(is.factor(pred))
  expect_equal(levels(pred), levels(y))

  pred <- predict(model, x, type = "response")
  expect_true(is.vector(pred))
  expect_true(is.numeric(pred))
  expect_false(any(pred %in% y))

  pred <- predict(model, x, type = "raw")
  expect_true(is.vector(pred))
  expect_true(is.numeric(pred))
  expect_false(any(pred %in% y))
})