Unverified Commit d24260fd authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[R-package] require lgb.Dataset, remove support for passing 'colnames' and...

[R-package] require lgb.Dataset, remove support for passing 'colnames' and 'categorical_feature' for lgb.train() and lgb.cv() (#6714)
parent c6d90bc7
......@@ -67,7 +67,7 @@ CB_ENV <- R6::R6Class(
}
return(paste0(msg, collapse = " "))
return(paste(msg, collapse = " "))
}
......
......@@ -457,7 +457,7 @@ Dataset <- R6::R6Class(
if (!.is_null_handle(x = private$handle)) {
# Merge names with tab separation
merged_name <- paste0(as.list(private$colnames), collapse = "\t")
merged_name <- paste(as.list(private$colnames), collapse = "\t")
.Call(
LGBM_DatasetSetFeatureNames_R
, private$handle
......
......@@ -5,7 +5,7 @@
vapply(
X = df
, FUN = function(x) {
paste0(class(x), collapse = ",")
paste(class(x), collapse = ",")
}
, FUN.VALUE = character(1L)
)
......
......@@ -25,8 +25,6 @@ CVBooster <- R6::R6Class(
#' @description Cross validation logic used by LightGBM
#' @inheritParams lgb_shared_params
#' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
#' @param label Deprecated. See "Deprecated Arguments" section below.
#' @param weight Deprecated. See "Deprecated Arguments" section below.
#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
#' @param showsd \code{boolean}, whether to show standard deviation of cross validation.
#' This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a
......@@ -36,8 +34,6 @@ CVBooster <- R6::R6Class(
#' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
#' (each element must be a vector of test fold's indices). When folds are supplied,
#' the \code{nfold} and \code{stratified} parameters are ignored.
#' @param colnames Deprecated. See "Deprecated Arguments" section below.
#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below.
#' @param callbacks List of callback functions that are applied at each iteration.
#' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the booster model
#' into a predictor model which frees up memory and the original datasets
......@@ -69,20 +65,12 @@ CVBooster <- R6::R6Class(
#' )
#' }
#'
#' @section Deprecated Arguments:
#'
#' A future release of \code{lightgbm} will require passing an \code{lgb.Dataset}
#' to argument \code{'data'}. It will also remove support for passing arguments
#' \code{'categorical_feature'}, \code{'colnames'}, \code{'label'}, and \code{'weight'}.
#'
#' @importFrom data.table data.table setorderv
#' @export
lgb.cv <- function(params = list()
, data
, nrounds = 100L
, nfold = 3L
, label = NULL
, weight = NULL
, obj = NULL
, eval = NULL
, verbose = 1L
......@@ -92,8 +80,6 @@ lgb.cv <- function(params = list()
, stratified = TRUE
, folds = NULL
, init_model = NULL
, colnames = NULL
, categorical_feature = NULL
, early_stopping_rounds = NULL
, callbacks = list()
, reset_data = FALSE
......@@ -104,33 +90,8 @@ lgb.cv <- function(params = list()
if (nrounds <= 0L) {
stop("nrounds should be greater than zero")
}
# If 'data' is not an lgb.Dataset, try to construct one using 'label'
if (!.is_Dataset(x = data)) {
warning(paste0(
"Passing anything other than an lgb.Dataset object to lgb.cv() is deprecated. "
, "Either pass an lgb.Dataset object, or use lightgbm()."
))
if (is.null(label)) {
stop("'label' must be provided for lgb.cv if 'data' is not an 'lgb.Dataset'")
}
data <- lgb.Dataset(data = data, label = label)
}
# raise deprecation warnings if necessary
# ref: https://github.com/microsoft/LightGBM/issues/6435
args <- names(match.call())
if ("categorical_feature" %in% args) {
.emit_dataset_kwarg_warning("categorical_feature", "lgb.cv")
}
if ("colnames" %in% args) {
.emit_dataset_kwarg_warning("colnames", "lgb.cv")
}
if ("label" %in% args) {
.emit_dataset_kwarg_warning("label", "lgb.cv")
}
if ("weight" %in% args) {
.emit_dataset_kwarg_warning("weight", "lgb.cv")
stop("lgb.cv: data must be an lgb.Dataset instance")
}
# set some parameters, resolving the way they were passed in with other parameters
......@@ -214,37 +175,17 @@ lgb.cv <- function(params = list()
data$construct()
# Check interaction constraints
cnames <- NULL
if (!is.null(colnames)) {
cnames <- colnames
} else if (!is.null(data$get_colnames())) {
cnames <- data$get_colnames()
}
params[["interaction_constraints"]] <- .check_interaction_constraints(
interaction_constraints = interaction_constraints
, column_names = cnames
, column_names = data$get_colnames()
)
if (!is.null(weight)) {
data$set_field(field_name = "weight", data = weight)
}
# Update parameters with parsed parameters
data$update_params(params = params)
# Create the predictor set
data$.__enclos_env__$private$set_predictor(predictor = predictor)
# Write column names
if (!is.null(colnames)) {
data$set_colnames(colnames = colnames)
}
# Write categorical features
if (!is.null(categorical_feature)) {
data$set_categorical_feature(categorical_feature = categorical_feature)
}
if (!is.null(folds)) {
# Check for list of folds or for single value
......
......@@ -6,8 +6,6 @@
#' @inheritParams lgb_shared_params
#' @param valids a list of \code{lgb.Dataset} objects, used for validation
#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
#' @param colnames Deprecated. See "Deprecated Arguments" section below.
#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below.
#' @param callbacks List of callback functions that are applied at each iteration.
#' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the
#' booster model into a predictor model which frees up memory and the
......@@ -42,12 +40,6 @@
#' )
#' }
#'
#' @section Deprecated Arguments:
#'
#' A future release of \code{lightgbm} will remove support for passing arguments
#' \code{'categorical_feature'} and \code{'colnames'}. Pass those things to
#' \code{lgb.Dataset} instead.
#'
#' @export
lgb.train <- function(params = list(),
data,
......@@ -59,8 +51,6 @@ lgb.train <- function(params = list(),
record = TRUE,
eval_freq = 1L,
init_model = NULL,
colnames = NULL,
categorical_feature = NULL,
early_stopping_rounds = NULL,
callbacks = list(),
reset_data = FALSE,
......@@ -83,16 +73,6 @@ lgb.train <- function(params = list(),
}
}
# raise deprecation warnings if necessary
# ref: https://github.com/microsoft/LightGBM/issues/6435
args <- names(match.call())
if ("categorical_feature" %in% args) {
.emit_dataset_kwarg_warning("categorical_feature", "lgb.train")
}
if ("colnames" %in% args) {
.emit_dataset_kwarg_warning("colnames", "lgb.train")
}
# set some parameters, resolving the way they were passed in with other parameters
# in `params`.
# this ensures that the model stored with Booster$save() correctly represents
......@@ -171,21 +151,12 @@ lgb.train <- function(params = list(),
# Construct datasets, if needed
data$update_params(params = params)
if (!is.null(categorical_feature)) {
data$set_categorical_feature(categorical_feature)
}
data$construct()
# Check interaction constraints
cnames <- NULL
if (!is.null(colnames)) {
cnames <- colnames
} else if (!is.null(data$get_colnames())) {
cnames <- data$get_colnames()
}
params[["interaction_constraints"]] <- .check_interaction_constraints(
interaction_constraints = interaction_constraints
, column_names = cnames
, column_names = data$get_colnames()
)
# Update parameters with parsed parameters
......@@ -194,11 +165,6 @@ lgb.train <- function(params = list(),
# Create the predictor set
data$.__enclos_env__$private$set_predictor(predictor)
# Write column names
if (!is.null(colnames)) {
data$set_colnames(colnames)
}
valid_contain_train <- FALSE
train_data_name <- "train"
reduced_valid_sets <- list()
......
......@@ -34,7 +34,7 @@
# If a parameter has multiple values, join those values together with commas.
# trimws() is necessary because format() will pad to make strings the same width
val <- paste0(
val <- paste(
trimws(
format(
x = unname(params[[i]])
......@@ -46,7 +46,7 @@
if (nchar(val) <= 0L) next # Skip join
# Join key value
pair <- paste0(c(param_names[[i]], val), collapse = "=")
pair <- paste(c(param_names[[i]], val), collapse = "=")
ret <- c(ret, pair)
}
......@@ -55,7 +55,7 @@
return("")
}
return(paste0(ret, collapse = " "))
return(paste(ret, collapse = " "))
}
......@@ -115,7 +115,7 @@
# Turn indices 0-based and convert to string
for (j in seq_along(interaction_constraints)) {
interaction_constraints[[j]] <- paste0(
"[", paste0(interaction_constraints[[j]] - 1L, collapse = ","), "]"
"[", paste(interaction_constraints[[j]] - 1L, collapse = ","), "]"
)
}
return(interaction_constraints)
......@@ -258,19 +258,3 @@
return(a == b)
}
}
# ref: https://github.com/microsoft/LightGBM/issues/6435
.emit_dataset_kwarg_warning <- function(calling_function, argname) {
msg <- sprintf(
paste0(
"Argument '%s' to %s() is deprecated and will be removed in a future release. "
, "Set '%s' with lgb.Dataset() instead. "
, "See https://github.com/microsoft/LightGBM/issues/6435."
)
, argname
, calling_function
, argname
)
warning(msg)
return(invisible(NULL))
}
......@@ -9,8 +9,6 @@ lgb.cv(
data,
nrounds = 100L,
nfold = 3L,
label = NULL,
weight = NULL,
obj = NULL,
eval = NULL,
verbose = 1L,
......@@ -20,8 +18,6 @@ lgb.cv(
stratified = TRUE,
folds = NULL,
init_model = NULL,
colnames = NULL,
categorical_feature = NULL,
early_stopping_rounds = NULL,
callbacks = list(),
reset_data = FALSE,
......@@ -41,10 +37,6 @@ may allow you to pass other types of data like \code{matrix} and then separately
\item{nfold}{the original dataset is randomly partitioned into \code{nfold} equal size subsamples.}
\item{label}{Deprecated. See "Deprecated Arguments" section below.}
\item{weight}{Deprecated. See "Deprecated Arguments" section below.}
\item{obj}{objective function, can be character or custom objective function. Examples include
\code{regression}, \code{regression_l1}, \code{huber},
\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
......@@ -103,10 +95,6 @@ the \code{nfold} and \code{stratified} parameters are ignored.}
\item{init_model}{path of model file or \code{lgb.Booster} object, will continue training from this model}
\item{colnames}{Deprecated. See "Deprecated Arguments" section below.}
\item{categorical_feature}{Deprecated. See "Deprecated Arguments" section below.}
\item{early_stopping_rounds}{int. Activates early stopping. When this parameter is non-null,
training will stop if the evaluation of any metric on any validation set
fails to improve for \code{early_stopping_rounds} consecutive boosting rounds.
......@@ -131,14 +119,6 @@ a trained model \code{lgb.CVBooster}.
\description{
Cross validation logic used by LightGBM
}
\section{Deprecated Arguments}{
A future release of \code{lightgbm} will require passing an \code{lgb.Dataset}
to argument \code{'data'}. It will also remove support for passing arguments
\code{'categorical_feature'}, \code{'colnames'}, \code{'label'}, and \code{'weight'}.
}
\section{Early Stopping}{
......
......@@ -15,8 +15,6 @@ lgb.train(
record = TRUE,
eval_freq = 1L,
init_model = NULL,
colnames = NULL,
categorical_feature = NULL,
early_stopping_rounds = NULL,
callbacks = list(),
reset_data = FALSE,
......@@ -82,10 +80,6 @@ printing of evaluation during training}
\item{init_model}{path of model file or \code{lgb.Booster} object, will continue training from this model}
\item{colnames}{Deprecated. See "Deprecated Arguments" section below.}
\item{categorical_feature}{Deprecated. See "Deprecated Arguments" section below.}
\item{early_stopping_rounds}{int. Activates early stopping. When this parameter is non-null,
training will stop if the evaluation of any metric on any validation set
fails to improve for \code{early_stopping_rounds} consecutive boosting rounds.
......@@ -109,14 +103,6 @@ Low-level R interface to train a LightGBM model. Unlike \code{\link{lightgbm}},
this function is focused on performance (e.g. speed, memory efficiency). It is also
less likely to have breaking API changes in new releases than \code{\link{lightgbm}}.
}
\section{Deprecated Arguments}{
A future release of \code{lightgbm} will remove support for passing arguments
\code{'categorical_feature'} and \code{'colnames'}. Pass those things to
\code{lgb.Dataset} instead.
}
\section{Early Stopping}{
......
......@@ -51,7 +51,7 @@ inst_dir <- file.path(R_PACKAGE_SOURCE, "inst", fsep = "/")
, "make this faster."
))
}
cmd <- paste0(cmd, " ", paste0(args, collapse = " "))
cmd <- paste0(cmd, " ", paste(args, collapse = " "))
exit_code <- system(cmd)
}
......
......@@ -433,7 +433,7 @@ test_that("lgb.cv() rejects negative or 0 value passed to nrounds", {
}
})
test_that("lgb.cv() throws an informative error if 'data' is not an lgb.Dataset and labels are not given", {
test_that("lgb.cv() throws an informative error if 'data' is not an lgb.Dataset", {
bad_values <- list(
4L
, "hello"
......@@ -454,7 +454,7 @@ test_that("lgb.cv() throws an informative error if 'data' is not an lgb.Dataset
, 10L
, nfold = 5L
)
}, regexp = "'label' must be provided for lgb.cv if 'data' is not an 'lgb.Dataset'", fixed = TRUE)
}, regexp = "lgb.cv: data must be an lgb.Dataset instance", fixed = TRUE)
}
})
......
......@@ -888,7 +888,7 @@ test_that("Saving a model with different feature importance types works", {
.feat_importance_from_string <- function(model_string) {
file_lines <- strsplit(model_string, "\n", fixed = TRUE)[[1L]]
start_indx <- which(grepl("^feature_importances\\:$", file_lines)) + 1L
start_indx <- which(file_lines == "feature_importances:") + 1L
blank_line_indices <- which(file_lines == "")
end_indx <- blank_line_indices[blank_line_indices > start_indx][1L] - 1L
importances <- file_lines[start_indx: end_indx]
......@@ -955,7 +955,7 @@ test_that("Saving a model with unknown importance type fails", {
.params_from_model_string <- function(model_str) {
file_lines <- strsplit(model_str, "\n", fixed = TRUE)[[1L]]
start_indx <- which(grepl("^parameters\\:$", file_lines)) + 1L
start_indx <- which(file_lines == "parameters:") + 1L
blank_line_indices <- which(file_lines == "")
end_indx <- blank_line_indices[blank_line_indices > start_indx][1L] - 1L
params <- file_lines[start_indx: end_indx]
......@@ -1532,7 +1532,7 @@ test_that("Booster's print, show, and summary work correctly", {
}
.has_expected_content_for_finalized_model <- function(printed_txt) {
expect_true(any(grepl("^LightGBM Model$", printed_txt)))
expect_true(any(printed_txt == "LightGBM Model"))
expect_true(any(grepl("Booster handle is invalid", printed_txt, fixed = TRUE)))
}
......
......@@ -18,7 +18,7 @@ test_that("Feature penalties work properly", {
num_leaves = 5L
, learning_rate = 0.05
, objective = "binary"
, feature_penalty = paste0(feature_penalties, collapse = ",")
, feature_penalty = paste(feature_penalties, collapse = ",")
, metric = "binary_error"
, num_threads = .LGB_MAX_THREADS
)
......
......@@ -121,7 +121,7 @@ if (length(parsed_args[["make_args"]]) > 0L) {
pattern = "make_args_from_build_script <- character(0L)"
, replacement = paste0(
"make_args_from_build_script <- c(\""
, paste0(parsed_args[["make_args"]], collapse = "\", \"")
, paste(parsed_args[["make_args"]], collapse = "\", \"")
, "\")"
)
, x = install_libs_content
......@@ -167,7 +167,7 @@ if (length(parsed_args[["make_args"]]) > 0L) {
, "make this faster."
))
}
cmd <- paste0(cmd, " ", paste0(args, collapse = " "))
cmd <- paste0(cmd, " ", paste(args, collapse = " "))
exit_code <- system(cmd)
}
......@@ -426,6 +426,6 @@ install_args <- c("CMD", "INSTALL", "--no-multiarch", "--with-keep.source", tarb
if (INSTALL_AFTER_BUILD) {
.run_shell_command(install_cmd, install_args)
} else {
cmd <- paste0(install_cmd, " ", paste0(install_args, collapse = " "))
cmd <- paste0(install_cmd, " ", paste(install_args, collapse = " "))
print(sprintf("Skipping installation. Install the package with command '%s'", cmd))
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment