Unverified Commit 00f87c52 authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[R-package] remove support for '...' in lgb.train() (#4863)



* [R-package] remove support for '...' in lgb.train()

* Apply suggestions from code review
Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
parent 431556a0
......@@ -12,19 +12,6 @@
#' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the
#' booster model into a predictor model which frees up memory and the
#' original datasets
#' @param ... other parameters, see \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{
#' the "Parameters" section of the documentation} for more information. A few key parameters:
#' \itemize{
#' \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
#' \item{\code{num_leaves}: Maximum number of leaves in one tree.}
#' \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
#' overfitting. Tree still grow by leaf-wise.}
#' \item{\code{num_threads}: Number of threads for LightGBM. For the best speed, set this to
#' the number of real CPU cores(\code{parallel::detectCores(logical = FALSE)}),
#' not the number of threads (most CPU using hyper-threading to generate 2 threads
#' per CPU core).}
#' }
#' NOTE: As of v3.3.0, use of \code{...} is deprecated. Add parameters to \code{params} directly.
#' @inheritSection lgb_shared_params Early Stopping
#' @return a trained booster model \code{lgb.Booster}.
#'
......@@ -67,8 +54,7 @@ lgb.train <- function(params = list(),
early_stopping_rounds = NULL,
callbacks = list(),
reset_data = FALSE,
serializable = TRUE,
...) {
serializable = TRUE) {
# validate inputs early to avoid unnecessary computation
if (nrounds <= 0L) {
......@@ -88,23 +74,12 @@ lgb.train <- function(params = list(),
}
# Setup temporary variables
additional_params <- list(...)
params <- append(params, additional_params)
params$verbose <- verbose
params <- lgb.check.obj(params = params, obj = obj)
params <- lgb.check.eval(params = params, eval = eval)
fobj <- NULL
eval_functions <- list(NULL)
if (length(additional_params) > 0L) {
warning(paste0(
"lgb.train: Found the following passed through '...': "
, paste(names(additional_params), collapse = ", ")
, ". These will be used, but in future releases of lightgbm, this warning will become an error. "
, "Add these to 'params' instead. See ?lgb.train for documentation on how to call this function."
))
}
# set some parameters, resolving the way they were passed in with other parameters
# in `params`.
# this ensures that the model stored with Booster$save() correctly represents
......
......@@ -108,14 +108,6 @@ NULL
#' say "the first and tenth columns").}
#' \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
#' into a predictor model which frees up memory and the original datasets}
#' \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
#' \item{\code{num_leaves}: Maximum number of leaves in one tree.}
#' \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
#' overfit when #data is small. Tree still grow by leaf-wise.}
#' \item{\code{num_threads}: Number of threads for LightGBM. For the best speed, set this to
#' the number of real CPU cores(\code{parallel::detectCores(logical = FALSE)}),
#' not the number of threads (most CPU using hyper-threading to generate 2 threads
#' per CPU core).}
#' }
#' @inheritSection lgb_shared_params Early Stopping
#' @return a trained \code{lgb.Booster}
......
......@@ -20,8 +20,7 @@ lgb.train(
early_stopping_rounds = NULL,
callbacks = list(),
reset_data = FALSE,
serializable = TRUE,
...
serializable = TRUE
)
}
\arguments{
......@@ -102,20 +101,6 @@ original datasets}
\item{serializable}{whether to make the resulting objects serializable through functions such as
\code{save} or \code{saveRDS} (see section "Model serialization").}
\item{...}{other parameters, see \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{
the "Parameters" section of the documentation} for more information. A few key parameters:
\itemize{
\item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
\item{\code{num_leaves}: Maximum number of leaves in one tree.}
\item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
overfitting. Tree still grow by leaf-wise.}
\item{\code{num_threads}: Number of threads for LightGBM. For the best speed, set this to
the number of real CPU cores(\code{parallel::detectCores(logical = FALSE)}),
not the number of threads (most CPU using hyper-threading to generate 2 threads
per CPU core).}
}
NOTE: As of v3.3.0, use of \code{...} is deprecated. Add parameters to \code{params} directly.}
}
\value{
a trained booster model \code{lgb.Booster}.
......
......@@ -68,14 +68,6 @@ If passing `NULL`, will not save the trained model to disk.}
say "the first and tenth columns").}
\item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
into a predictor model which frees up memory and the original datasets}
\item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
\item{\code{num_leaves}: Maximum number of leaves in one tree.}
\item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
overfit when #data is small. Tree still grow by leaf-wise.}
\item{\code{num_threads}: Number of threads for LightGBM. For the best speed, set this to
the number of real CPU cores(\code{parallel::detectCores(logical = FALSE)}),
not the number of threads (most CPU using hyper-threading to generate 2 threads
per CPU core).}
}}
}
\value{
......
......@@ -6,7 +6,9 @@ test_that("Predictor$finalize() should not fail", {
dtrain <- lgb.Dataset(X, label = y)
bst <- lgb.train(
data = dtrain
, objective = "regression"
, params = list(
objective = "regression"
)
, verbose = -1L
, nrounds = 3L
)
......@@ -32,7 +34,9 @@ test_that("predictions do not fail for integer input", {
dtrain <- lgb.Dataset(X, label = y)
fit <- lgb.train(
data = dtrain
, objective = "regression"
, params = list(
objective = "regression"
)
, verbose = -1L
, nrounds = 3L
)
......@@ -62,10 +66,12 @@ test_that("start_iteration works correctly", {
bst <- lightgbm(
data = as.matrix(train$data)
, label = train$label
, num_leaves = 4L
, params = list(
num_leaves = 4L
, learning_rate = 0.6
, nrounds = 50L
, objective = "binary"
)
, nrounds = 50L
, valids = list("test" = dtest)
, early_stopping_rounds = 2L
)
......
......@@ -72,10 +72,12 @@ test_that("train and predict binary classification", {
bst <- lightgbm(
data = train$data
, label = train$label
, num_leaves = 5L
, nrounds = nrounds
, params = list(
num_leaves = 5L
, objective = "binary"
, metric = "binary_error"
)
, nrounds = nrounds
, save_name = tempfile(fileext = ".model")
)
expect_false(is.null(bst$record_evals))
......@@ -100,14 +102,16 @@ test_that("train and predict softmax", {
bst <- lightgbm(
data = as.matrix(iris[, -5L])
, label = lb
, num_leaves = 4L
, params = list(
num_leaves = 4L
, learning_rate = 0.05
, nrounds = 20L
, min_data = 20L
, min_hessian = 10.0
, objective = "multiclass"
, metric = "multi_error"
, num_class = 3L
)
, nrounds = 20L
, save_name = tempfile(fileext = ".model")
)
......@@ -125,11 +129,13 @@ test_that("use of multiple eval metrics works", {
bst <- lightgbm(
data = train$data
, label = train$label
, num_leaves = 4L
, params = list(
num_leaves = 4L
, learning_rate = 1.0
, nrounds = 10L
, objective = "binary"
, metric = metrics
)
, nrounds = 10L
, save_name = tempfile(fileext = ".model")
)
expect_false(is.null(bst$record_evals))
......@@ -147,10 +153,12 @@ test_that("lgb.Booster.upper_bound() and lgb.Booster.lower_bound() work as expec
bst <- lightgbm(
data = train$data
, label = train$label
, num_leaves = 5L
, nrounds = nrounds
, params = list(
num_leaves = 5L
, objective = "binary"
, metric = "binary_error"
)
, nrounds = nrounds
, save_name = tempfile(fileext = ".model")
)
expect_true(abs(bst$lower_bound() - -1.590853) < TOLERANCE)
......@@ -163,10 +171,12 @@ test_that("lgb.Booster.upper_bound() and lgb.Booster.lower_bound() work as expec
bst <- lightgbm(
data = train$data
, label = train$label
, num_leaves = 5L
, nrounds = nrounds
, params = list(
num_leaves = 5L
, objective = "regression"
, metric = "l2"
)
, nrounds = nrounds
, save_name = tempfile(fileext = ".model")
)
expect_true(abs(bst$lower_bound() - 0.1513859) < TOLERANCE)
......@@ -264,13 +274,15 @@ test_that("lightgbm() performs evaluation on validation sets if they are provide
bst <- lightgbm(
data = train$data
, label = train$label
, num_leaves = 5L
, nrounds = nrounds
, params = list(
num_leaves = 5L
, objective = "binary"
, metric = c(
"binary_error"
, "auc"
)
)
, nrounds = nrounds
, valids = list(
"valid1" = dvalid1
, "valid2" = dvalid2
......@@ -521,11 +533,11 @@ test_that("lgb.train() works as expected with multiple eval metrics", {
train$data
, label = train$label
)
, learning_rate = 1.0
, nrounds = 10L
, params = list(
objective = "binary"
, metric = metrics
, learning_rate = 1.0
)
, valids = list(
"train" = lgb.Dataset(
......@@ -1499,13 +1511,13 @@ test_that("when early stopping is not activated, best_iter and best_score come f
objective = "regression"
, metric = "rmse"
, learning_rate = 1.5
, num_leaves = 5L
)
# example 1: two valids, neither are the training data
bst <- lgb.train(
data = dtrain
, nrounds = nrounds
, num_leaves = 5L
, valids = list(
"valid1" = dvalid1
, "valid2" = dvalid2
......@@ -1527,7 +1539,6 @@ test_that("when early stopping is not activated, best_iter and best_score come f
bst <- lgb.train(
data = dtrain
, nrounds = nrounds
, num_leaves = 5L
, valids = list(
"train" = dtrain
, "valid1" = dvalid1
......@@ -1550,7 +1561,6 @@ test_that("when early stopping is not activated, best_iter and best_score come f
bst <- lgb.train(
data = dtrain
, nrounds = nrounds
, num_leaves = 5L
, valids = list(
"valid1" = dvalid1
, "train" = dtrain
......@@ -1574,7 +1584,6 @@ test_that("when early stopping is not activated, best_iter and best_score come f
bst <- lgb.train(
data = dtrain
, nrounds = nrounds
, num_leaves = 5L
, valids = list(
"valid1" = dvalid1
, "valid2" = dvalid2
......@@ -1598,7 +1607,6 @@ test_that("when early stopping is not activated, best_iter and best_score come f
bst <- lgb.train(
data = dtrain
, nrounds = nrounds
, num_leaves = 5L
, valids = list(
"valid1" = dvalid1
, "something-random-we-would-not-hardcode" = dtrain
......@@ -1623,7 +1631,6 @@ test_that("when early stopping is not activated, best_iter and best_score come f
bst <- lgb.train(
data = dtrain
, nrounds = nrounds
, num_leaves = 5L
, valids = list(
"train" = dtrain
)
......@@ -1655,7 +1662,6 @@ test_that("lightgbm.train() gives the correct best_score and best_iter for a met
bst <- lgb.train(
data = dtrain
, nrounds = nrounds
, num_leaves = 5L
, valids = list(
"valid1" = dvalid1
, "something-random-we-would-not-hardcode" = dtrain
......@@ -1664,6 +1670,7 @@ test_that("lightgbm.train() gives the correct best_score and best_iter for a met
objective = "binary"
, metric = "auc"
, learning_rate = 1.5
, num_leaves = 5L
)
)
# note that "something-random-we-would-not-hardcode" was recognized as the training
......@@ -1708,7 +1715,6 @@ test_that("using lightgbm() without early stopping, best_iter and best_score com
bst <- lightgbm(
data = dtrain
, nrounds = nrounds
, num_leaves = 5L
, valids = list(
"valid1" = dvalid1
, "something-random-we-would-not-hardcode" = dtrain
......@@ -1718,6 +1724,7 @@ test_that("using lightgbm() without early stopping, best_iter and best_score com
objective = "binary"
, metric = "auc"
, learning_rate = 1.5
, num_leaves = 5L
)
, verbose = -7L
, save_name = tempfile(fileext = ".model")
......
......@@ -9,7 +9,9 @@ test_that("Booster$finalize() should not fail", {
dtrain <- lgb.Dataset(X, label = y)
bst <- lgb.train(
data = dtrain
, objective = "regression"
, params = list(
objective = "regression"
)
, verbose = -1L
, nrounds = 3L
)
......@@ -61,6 +63,8 @@ test_that("lgb.get.eval.result() should throw an informative error for incorrect
params = list(
objective = "regression"
, metric = "l2"
, min_data = 1L
, learning_rate = 1.0
)
, data = dtrain
, nrounds = 5L
......@@ -71,8 +75,6 @@ test_that("lgb.get.eval.result() should throw an informative error for incorrect
, label = agaricus.test$label
)
)
, min_data = 1L
, learning_rate = 1.0
)
expect_error({
eval_results <- lgb.get.eval.result(
......@@ -94,6 +96,8 @@ test_that("lgb.get.eval.result() should throw an informative error for incorrect
params = list(
objective = "regression"
, metric = "l2"
, min_data = 1L
, learning_rate = 1.0
)
, data = dtrain
, nrounds = 5L
......@@ -104,8 +108,6 @@ test_that("lgb.get.eval.result() should throw an informative error for incorrect
, label = agaricus.test$label
)
)
, min_data = 1L
, learning_rate = 1.0
)
expect_error({
eval_results <- lgb.get.eval.result(
......@@ -127,10 +129,12 @@ test_that("lgb.load() gives the expected error messages given different incorrec
bst <- lightgbm(
data = as.matrix(train$data)
, label = train$label
, params = list(
objective = "binary"
, num_leaves = 4L
, learning_rate = 1.0
)
, nrounds = 2L
, objective = "binary"
, save_name = tempfile(fileext = ".model")
)
......@@ -171,10 +175,12 @@ test_that("Loading a Booster from a text file works", {
bst <- lightgbm(
data = as.matrix(train$data)
, label = train$label
, num_leaves = 4L
, params = list(
num_leaves = 4L
, learning_rate = 1.0
, nrounds = 2L
, objective = "binary"
)
, nrounds = 2L
, save_name = tempfile(fileext = ".model")
)
expect_true(lgb.is.Booster(bst))
......@@ -244,10 +250,12 @@ test_that("Loading a Booster from a string works", {
bst <- lightgbm(
data = as.matrix(train$data)
, label = train$label
, num_leaves = 4L
, params = list(
num_leaves = 4L
, learning_rate = 1.0
, nrounds = 2L
, objective = "binary"
)
, nrounds = 2L
, save_name = tempfile(fileext = ".model")
)
expect_true(lgb.is.Booster(bst))
......@@ -274,10 +282,12 @@ test_that("Saving a large model to string should work", {
bst <- lightgbm(
data = as.matrix(train$data)
, label = train$label
, num_leaves = 100L
, params = list(
num_leaves = 100L
, learning_rate = 0.01
, nrounds = 500L
, objective = "binary"
)
, nrounds = 500L
, save_name = tempfile(fileext = ".model")
, verbose = -1L
)
......@@ -316,10 +326,12 @@ test_that("Saving a large model to JSON should work", {
bst <- lightgbm(
data = as.matrix(train$data)
, label = train$label
, num_leaves = 100L
, params = list(
num_leaves = 100L
, learning_rate = 0.01
, nrounds = 200L
, objective = "binary"
)
, nrounds = 200L
, save_name = tempfile(fileext = ".model")
, verbose = -1L
)
......@@ -344,10 +356,12 @@ test_that("If a string and a file are both passed to lgb.load() the file is used
bst <- lightgbm(
data = as.matrix(train$data)
, label = train$label
, num_leaves = 4L
, params = list(
num_leaves = 4L
, learning_rate = 1.0
, nrounds = 2L
, objective = "binary"
)
, nrounds = 2L
, save_name = tempfile(fileext = ".model")
)
expect_true(lgb.is.Booster(bst))
......@@ -398,10 +412,12 @@ test_that("Creating a Booster from a Dataset with an existing predictor should w
bst <- lightgbm(
data = as.matrix(agaricus.train$data)
, label = agaricus.train$label
, num_leaves = 4L
, params = list(
num_leaves = 4L
, learning_rate = 1.0
, nrounds = nrounds
, objective = "binary"
)
, nrounds = nrounds
, save_name = tempfile(fileext = ".model")
)
data(agaricus.test, package = "lightgbm")
......@@ -485,10 +501,12 @@ test_that("Booster$rollback_one_iter() should work as expected", {
bst <- lightgbm(
data = as.matrix(train$data)
, label = train$label
, num_leaves = 4L
, params = list(
num_leaves = 4L
, learning_rate = 1.0
, nrounds = nrounds
, objective = "binary"
)
, nrounds = nrounds
, save_name = tempfile(fileext = ".model")
)
expect_equal(bst$current_iter(), nrounds)
......@@ -517,10 +535,12 @@ test_that("Booster$update() passing a train_set works as expected", {
bst <- lightgbm(
data = as.matrix(agaricus.train$data)
, label = agaricus.train$label
, num_leaves = 4L
, params = list(
num_leaves = 4L
, learning_rate = 1.0
, nrounds = nrounds
, objective = "binary"
)
, nrounds = nrounds
, save_name = tempfile(fileext = ".model")
)
expect_true(lgb.is.Booster(bst))
......@@ -538,10 +558,12 @@ test_that("Booster$update() passing a train_set works as expected", {
bst2 <- lightgbm(
data = as.matrix(agaricus.train$data)
, label = agaricus.train$label
, num_leaves = 4L
, params = list(
num_leaves = 4L
, learning_rate = 1.0
, nrounds = nrounds + 1L
, objective = "binary"
)
, nrounds = nrounds + 1L
, save_name = tempfile(fileext = ".model")
)
expect_true(lgb.is.Booster(bst2))
......@@ -561,10 +583,12 @@ test_that("Booster$update() throws an informative error if you provide a non-Dat
bst <- lightgbm(
data = as.matrix(agaricus.train$data)
, label = agaricus.train$label
, num_leaves = 4L
, params = list(
num_leaves = 4L
, learning_rate = 1.0
, nrounds = nrounds
, objective = "binary"
)
, nrounds = nrounds
, save_name = tempfile(fileext = ".model")
)
expect_error({
......@@ -652,10 +676,12 @@ test_that("Saving a model with different feature importance types works", {
bst <- lightgbm(
data = as.matrix(train$data)
, label = train$label
, num_leaves = 4L
, params = list(
num_leaves = 4L
, learning_rate = 1.0
, nrounds = 2L
, objective = "binary"
)
, nrounds = 2L
, save_name = tempfile(fileext = ".model")
)
expect_true(lgb.is.Booster(bst))
......@@ -705,10 +731,12 @@ test_that("Saving a model with unknown importance type fails", {
bst <- lightgbm(
data = as.matrix(train$data)
, label = train$label
, num_leaves = 4L
, params = list(
num_leaves = 4L
, learning_rate = 1.0
, nrounds = 2L
, objective = "binary"
)
, nrounds = 2L
, save_name = tempfile(fileext = ".model")
)
expect_true(lgb.is.Booster(bst))
......
......@@ -78,12 +78,12 @@ test_that("lgb.intereprete works as expected for multiclass classification", {
, metric = "multi_logloss"
, num_class = 3L
, learning_rate = 0.00001
, min_data = 1L
)
model <- lgb.train(
params = params
, data = dtrain
, nrounds = 3L
, min_data = 1L
)
num_trees <- 5L
tree_interpretation <- lgb.interprete(
......
......@@ -76,12 +76,12 @@ test_that("lgb.plot.interepretation works as expected for multiclass classificat
, metric = "multi_logloss"
, num_class = 3L
, learning_rate = 0.00001
, min_data = 1L
)
model <- lgb.train(
params = params
, data = dtrain
, nrounds = 3L
, min_data = 1L
)
num_trees <- 5L
tree_interpretation <- lgb.interprete(
......
......@@ -8,11 +8,11 @@ test_that("lgb.unloader works as expected", {
params = list(
objective = "regression"
, metric = "l2"
, min_data = 1L
, learning_rate = 1.0
)
, data = dtrain
, nrounds = 1L
, min_data = 1L
, learning_rate = 1.0
)
expect_true(exists("bst"))
result <- lgb.unloader(restore = TRUE, wipe = TRUE, envir = environment())
......@@ -28,21 +28,21 @@ test_that("lgb.unloader finds all boosters and removes them", {
params = list(
objective = "regression"
, metric = "l2"
, min_data = 1L
, learning_rate = 1.0
)
, data = dtrain
, nrounds = 1L
, min_data = 1L
, learning_rate = 1.0
)
bst2 <- lgb.train(
params = list(
objective = "regression"
, metric = "l2"
, min_data = 1L
, learning_rate = 1.0
)
, data = dtrain
, nrounds = 1L
, min_data = 1L
, learning_rate = 1.0
)
expect_true(exists("bst1"))
expect_true(exists("bst2"))
......
......@@ -17,12 +17,14 @@ test_that("Feature penalties work properly", {
lightgbm(
data = train$data
, label = train$label
, num_leaves = 5L
, params = list(
num_leaves = 5L
, learning_rate = 0.05
, nrounds = 5L
, objective = "binary"
, feature_penalty = paste0(feature_penalties, collapse = ",")
, metric = "binary_error"
)
, nrounds = 5L
, verbose = -1L
, save_name = tempfile(fileext = ".model")
)
......@@ -64,20 +66,20 @@ test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where
test_that("training should warn if you use 'dart' boosting, specified with 'boosting' or aliases", {
for (boosting_param in .PARAMETER_ALIASES()[["boosting"]]) {
params <- list(
num_leaves = 5L
, learning_rate = 0.05
, objective = "binary"
, metric = "binary_error"
)
params[[boosting_param]] <- "dart"
expect_warning({
result <- lightgbm(
data = train$data
, label = train$label
, num_leaves = 5L
, learning_rate = 0.05
, params = params
, nrounds = 5L
, objective = "binary"
, metric = "binary_error"
, verbose = -1L
, params = stats::setNames(
object = "dart"
, nm = boosting_param
)
, save_name = tempfile(fileext = ".model")
)
}, regexp = "Early stopping is not available in 'dart' mode")
......
......@@ -62,10 +62,12 @@ X <- data.matrix(bank[, c("age", "balance")])
fit <- lightgbm(
data = X
, label = y
, num_leaves = 4L
, params = list(
num_leaves = 4L
, learning_rate = 1.0
, nrounds = 10L
, objective = "binary"
)
, nrounds = 10L
, verbose = -1L
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment