Unverified Commit a70e8327 authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[R-package] [python-package] deprecate Dataset arguments to cv() and train() (#6446)

parent ae55f32b
......@@ -25,8 +25,8 @@ CVBooster <- R6::R6Class(
#' @description Cross validation logic used by LightGBM
#' @inheritParams lgb_shared_params
#' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
#' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}
#' @param weight vector of response values. If not NULL, will set to dataset
#' @param label Deprecated. See "Deprecated Arguments" section below.
#' @param weight Deprecated. See "Deprecated Arguments" section below.
#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
#' @param showsd \code{boolean}, whether to show standard deviation of cross validation.
#' This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a
......@@ -36,10 +36,8 @@ CVBooster <- R6::R6Class(
#' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
#' (each element must be a vector of test fold's indices). When folds are supplied,
#' the \code{nfold} and \code{stratified} parameters are ignored.
#' @param colnames feature names, if not null, will use this to overwrite the names in dataset
#' @param categorical_feature categorical features. This can either be a character vector of feature
#' names or an integer vector with the indices of the features (e.g.
#' \code{c(1L, 10L)} to say "the first and tenth columns").
#' @param colnames Deprecated. See "Deprecated Arguments" section below.
#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below.
#' @param callbacks List of callback functions that are applied at each iteration.
#' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the booster model
#' into a predictor model which frees up memory and the original datasets
......@@ -70,6 +68,13 @@ CVBooster <- R6::R6Class(
#' , nfold = 3L
#' )
#' }
#'
#' @section Deprecated Arguments:
#'
#' A future release of \code{lightgbm} will require passing an \code{lgb.Dataset}
#' to argument \code{'data'}. It will also remove support for passing arguments
#' \code{'categorical_feature'}, \code{'colnames'}, \code{'label'}, and \code{'weight'}.
#'
#' @importFrom data.table data.table setorderv
#' @export
lgb.cv <- function(params = list()
......@@ -102,12 +107,32 @@ lgb.cv <- function(params = list()
# If 'data' is not an lgb.Dataset, try to construct one using 'label'
if (!.is_Dataset(x = data)) {
warning(paste0(
"Passing anything other than an lgb.Dataset object to lgb.cv() is deprecated. "
, "Either pass an lgb.Dataset object, or use lightgbm()."
))
if (is.null(label)) {
stop("'label' must be provided for lgb.cv if 'data' is not an 'lgb.Dataset'")
}
data <- lgb.Dataset(data = data, label = label)
}
# raise deprecation warnings if necessary
# ref: https://github.com/microsoft/LightGBM/issues/6435
args <- names(match.call())
if ("categorical_feature" %in% args) {
.emit_dataset_kwarg_warning("categorical_feature", "lgb.cv")
}
if ("colnames" %in% args) {
.emit_dataset_kwarg_warning("colnames", "lgb.cv")
}
if ("label" %in% args) {
.emit_dataset_kwarg_warning("label", "lgb.cv")
}
if ("weight" %in% args) {
.emit_dataset_kwarg_warning("weight", "lgb.cv")
}
# set some parameters, resolving the way they were passed in with other parameters
# in `params`.
# this ensures that the model stored with Booster$save() correctly represents
......
......@@ -6,10 +6,8 @@
#' @inheritParams lgb_shared_params
#' @param valids a list of \code{lgb.Dataset} objects, used for validation
#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
#' @param colnames feature names, if not null, will use this to overwrite the names in dataset
#' @param categorical_feature categorical features. This can either be a character vector of feature
#' names or an integer vector with the indices of the features (e.g.
#' \code{c(1L, 10L)} to say "the first and tenth columns").
#' @param colnames Deprecated. See "Deprecated Arguments" section below.
#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below.
#' @param callbacks List of callback functions that are applied at each iteration.
#' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the
#' booster model into a predictor model which frees up memory and the
......@@ -43,6 +41,13 @@
#' , early_stopping_rounds = 3L
#' )
#' }
#'
#' @section Deprecated Arguments:
#'
#' A future release of \code{lightgbm} will remove support for passing arguments
#' \code{'categorical_feature'} and \code{'colnames'}. Pass those things to
#' \code{lgb.Dataset} instead.
#'
#' @export
lgb.train <- function(params = list(),
data,
......@@ -78,6 +83,16 @@ lgb.train <- function(params = list(),
}
}
# raise deprecation warnings if necessary
# ref: https://github.com/microsoft/LightGBM/issues/6435
args <- names(match.call())
if ("categorical_feature" %in% args) {
.emit_dataset_kwarg_warning("categorical_feature", "lgb.train")
}
if ("colnames" %in% args) {
.emit_dataset_kwarg_warning("colnames", "lgb.train")
}
# set some parameters, resolving the way they were passed in with other parameters
# in `params`.
# this ensures that the model stored with Booster$save() correctly represents
......
......@@ -144,6 +144,12 @@ NULL
#'
#' \emph{New in version 4.0.0}
#'
#' @param colnames Character vector of features. Only used if \code{data} is not an \code{\link{lgb.Dataset}}.
#' @param categorical_feature categorical features. This can either be a character vector of feature
#' names or an integer vector with the indices of the features (e.g.
#' \code{c(1L, 10L)} to say "the first and tenth columns").
#' Only used if \code{data} is not an \code{\link{lgb.Dataset}}.
#'
#' @param ... Additional arguments passed to \code{\link{lgb.train}}. For example
#' \itemize{
#' \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
......@@ -152,10 +158,6 @@ NULL
#' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
#' \item{\code{eval}: evaluation function, can be (a list of) character or custom eval function}
#' \item{\code{record}: Boolean, TRUE will record iteration message to \code{booster$record_evals}}
#' \item{\code{colnames}: feature names, if not null, will use this to overwrite the names in dataset}
#' \item{\code{categorical_feature}: categorical features. This can either be a character vector of feature
#' names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to
#' say "the first and tenth columns").}
#' \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
#' into a predictor model which frees up memory and the original datasets}
#' }
......@@ -176,6 +178,8 @@ lightgbm <- function(data,
objective = "auto",
init_score = NULL,
num_threads = NULL,
colnames = NULL,
categorical_feature = NULL,
...) {
# validate inputs early to avoid unnecessary computation
......@@ -221,7 +225,14 @@ lightgbm <- function(data,
# Check whether data is lgb.Dataset, if not then create lgb.Dataset manually
if (!.is_Dataset(x = dtrain)) {
dtrain <- lgb.Dataset(data = data, label = label, weight = weights, init_score = init_score)
dtrain <- lgb.Dataset(
data = data
, label = label
, weight = weights
, init_score = init_score
, categorical_feature = categorical_feature
, colnames = colnames
)
}
train_args <- list(
......
......@@ -260,3 +260,19 @@
return(a == b)
}
}
# ref: https://github.com/microsoft/LightGBM/issues/6435
.emit_dataset_kwarg_warning <- function(calling_function, argname) {
msg <- sprintf(
paste0(
"Argument '%s' to %s() is deprecated and will be removed in a future release. "
, "Set '%s' with lgb.Dataset() instead. "
, "See https://github.com/microsoft/LightGBM/issues/6435."
)
, argname
, calling_function
, argname
)
warning(msg)
return(invisible(NULL))
}
......@@ -41,9 +41,9 @@ may allow you to pass other types of data like \code{matrix} and then separately
\item{nfold}{the original dataset is randomly partitioned into \code{nfold} equal size subsamples.}
\item{label}{Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}}
\item{label}{Deprecated. See "Deprecated Arguments" section below.}
\item{weight}{vector of response values. If not NULL, will set to dataset}
\item{weight}{Deprecated. See "Deprecated Arguments" section below.}
\item{obj}{objective function, can be character or custom objective function. Examples include
\code{regression}, \code{regression_l1}, \code{huber},
......@@ -103,11 +103,9 @@ the \code{nfold} and \code{stratified} parameters are ignored.}
\item{init_model}{path of model file or \code{lgb.Booster} object, will continue training from this model}
\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
\item{colnames}{Deprecated. See "Deprecated Arguments" section below.}
\item{categorical_feature}{categorical features. This can either be a character vector of feature
names or an integer vector with the indices of the features (e.g.
\code{c(1L, 10L)} to say "the first and tenth columns").}
\item{categorical_feature}{Deprecated. See "Deprecated Arguments" section below.}
\item{early_stopping_rounds}{int. Activates early stopping. When this parameter is non-null,
training will stop if the evaluation of any metric on any validation set
......@@ -133,6 +131,14 @@ a trained model \code{lgb.CVBooster}.
\description{
Cross validation logic used by LightGBM
}
\section{Deprecated Arguments}{
A future release of \code{lightgbm} will require passing an \code{lgb.Dataset}
to argument \code{'data'}. It will also remove support for passing arguments
\code{'categorical_feature'}, \code{'colnames'}, \code{'label'}, and \code{'weight'}.
}
\section{Early Stopping}{
......@@ -171,4 +177,5 @@ model <- lgb.cv(
, nfold = 3L
)
}
}
......@@ -82,11 +82,9 @@ printing of evaluation during training}
\item{init_model}{path of model file or \code{lgb.Booster} object, will continue training from this model}
\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
\item{colnames}{Deprecated. See "Deprecated Arguments" section below.}
\item{categorical_feature}{categorical features. This can either be a character vector of feature
names or an integer vector with the indices of the features (e.g.
\code{c(1L, 10L)} to say "the first and tenth columns").}
\item{categorical_feature}{Deprecated. See "Deprecated Arguments" section below.}
\item{early_stopping_rounds}{int. Activates early stopping. When this parameter is non-null,
training will stop if the evaluation of any metric on any validation set
......@@ -111,6 +109,14 @@ Low-level R interface to train a LightGBM model. Unlike \code{\link{lightgbm}},
this function is focused on performance (e.g. speed, memory efficiency). It is also
less likely to have breaking API changes in new releases than \code{\link{lightgbm}}.
}
\section{Deprecated Arguments}{
A future release of \code{lightgbm} will remove support for passing arguments
\code{'categorical_feature'} and \code{'colnames'}. Pass those things to
\code{lgb.Dataset} instead.
}
\section{Early Stopping}{
......@@ -154,4 +160,5 @@ model <- lgb.train(
, early_stopping_rounds = 3L
)
}
}
......@@ -19,6 +19,8 @@ lightgbm(
objective = "auto",
init_score = NULL,
num_threads = NULL,
colnames = NULL,
categorical_feature = NULL,
...
)
}
......@@ -96,6 +98,13 @@ set to the iteration number of the best iteration.}
\emph{New in version 4.0.0}}
\item{colnames}{Character vector of features. Only used if \code{data} is not an \code{\link{lgb.Dataset}}.}
\item{categorical_feature}{categorical features. This can either be a character vector of feature
names or an integer vector with the indices of the features (e.g.
\code{c(1L, 10L)} to say "the first and tenth columns").
Only used if \code{data} is not an \code{\link{lgb.Dataset}}.}
\item{...}{Additional arguments passed to \code{\link{lgb.train}}. For example
\itemize{
\item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
......@@ -104,10 +113,6 @@ set to the iteration number of the best iteration.}
\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
\item{\code{eval}: evaluation function, can be (a list of) character or custom eval function}
\item{\code{record}: Boolean, TRUE will record iteration message to \code{booster$record_evals}}
\item{\code{colnames}: feature names, if not null, will use this to overwrite the names in dataset}
\item{\code{categorical_feature}: categorical features. This can either be a character vector of feature
names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to
say "the first and tenth columns").}
\item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
into a predictor model which frees up memory and the original datasets}
}}
......
......@@ -433,7 +433,7 @@ test_that("lgb.cv() rejects negative or 0 value passed to nrounds", {
}
})
test_that("lgb.cv() throws an informative error is 'data' is not an lgb.Dataset and labels are not given", {
test_that("lgb.cv() throws an informative error if 'data' is not an lgb.Dataset and labels are not given", {
bad_values <- list(
4L
, "hello"
......@@ -1788,11 +1788,6 @@ test_that("lgb.train() works with early stopping for regression with a metric th
test_that("lgb.train() supports non-ASCII feature names", {
dtrain <- lgb.Dataset(
data = matrix(rnorm(400L), ncol = 4L)
, label = rnorm(100L)
, params = list(num_threads = .LGB_MAX_THREADS)
)
# content below is equivalent to
#
# feature_names <- c("F_零", "F_一", "F_二", "F_三")
......@@ -1805,6 +1800,12 @@ test_that("lgb.train() supports non-ASCII feature names", {
, rawToChar(as.raw(c(0x46, 0x5f, 0xe4, 0xba, 0x8c)))
, rawToChar(as.raw(c(0x46, 0x5f, 0xe4, 0xb8, 0x89)))
)
dtrain <- lgb.Dataset(
data = matrix(rnorm(400L), ncol = 4L)
, label = rnorm(100L)
, params = list(num_threads = .LGB_MAX_THREADS)
, colnames = feature_names
)
bst <- lgb.train(
data = dtrain
, nrounds = 5L
......@@ -1814,7 +1815,6 @@ test_that("lgb.train() supports non-ASCII feature names", {
, verbose = .LGB_VERBOSITY
, num_threads = .LGB_MAX_THREADS
)
, colnames = feature_names
)
expect_true(.is_Booster(bst))
dumped_model <- jsonlite::fromJSON(bst$dump_model())
......@@ -2838,7 +2838,11 @@ test_that(paste0("lgb.train() gives same result when interaction_constraints is
test_that(paste0("lgb.train() gives same results when using interaction_constraints and specifying colnames"), {
set.seed(1L)
dtrain <- lgb.Dataset(train$data, label = train$label, params = list(num_threads = .LGB_MAX_THREADS))
dtrain <- lgb.Dataset(
train$data
, label = train$label
, params = list(num_threads = .LGB_MAX_THREADS)
)
params <- list(
objective = "regression"
......@@ -2854,6 +2858,7 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai
pred1 <- bst$predict(test$data)
new_colnames <- paste0(colnames(train$data), "_x")
dtrain$set_colnames(new_colnames)
params <- list(
objective = "regression"
, interaction_constraints = list(c(new_colnames[1L], new_colnames[2L]), new_colnames[3L])
......@@ -2864,7 +2869,6 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai
data = dtrain
, params = params
, nrounds = 2L
, colnames = new_colnames
)
pred2 <- bst$predict(test$data)
......
......@@ -25,9 +25,14 @@ X_test = df_test.drop(0, axis=1)
num_train, num_feature = X_train.shape
# generate feature names
feature_name = [f"feature_{col}" for col in range(num_feature)]
# create dataset for lightgbm
# if you want to re-use data, remember to set free_raw_data=False
lgb_train = lgb.Dataset(X_train, y_train, weight=W_train, free_raw_data=False)
lgb_train = lgb.Dataset(
X_train, y_train, weight=W_train, feature_name=feature_name, categorical_feature=[21], free_raw_data=False
)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, weight=W_test, free_raw_data=False)
# specify your configurations as a dict
......@@ -43,9 +48,6 @@ params = {
"verbose": 0,
}
# generate feature names
feature_name = [f"feature_{col}" for col in range(num_feature)]
print("Starting training...")
# feature_name and categorical_feature
gbm = lgb.train(
......@@ -53,8 +55,6 @@ gbm = lgb.train(
lgb_train,
num_boost_round=10,
valid_sets=lgb_train, # eval training data
feature_name=feature_name,
categorical_feature=[21],
)
print("Finished first 10 rounds...")
......
......@@ -78,7 +78,12 @@
"metadata": {},
"outputs": [],
"source": [
"lgb_train = lgb.Dataset(X_train, y_train)\n",
"lgb_train = lgb.Dataset(\n",
" X_train,\n",
" y_train,\n",
" feature_name=[f\"f{i + 1}\" for i in range(X_train.shape[-1])],\n",
" categorical_feature=[21],\n",
")\n",
"lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)"
]
},
......@@ -144,8 +149,6 @@
" lgb_train,\n",
" num_boost_round=100,\n",
" valid_sets=[lgb_train, lgb_test],\n",
" feature_name=[f\"f{i + 1}\" for i in range(X_train.shape[-1])],\n",
" categorical_feature=[21],\n",
" callbacks=[lgb.log_evaluation(10), lgb.record_evaluation(evals_result)],\n",
")"
]
......
......@@ -22,7 +22,12 @@ X_train = df_train.drop(0, axis=1)
X_test = df_test.drop(0, axis=1)
# create dataset for lightgbm
lgb_train = lgb.Dataset(X_train, y_train)
lgb_train = lgb.Dataset(
X_train,
y_train,
feature_name=[f"f{i + 1}" for i in range(X_train.shape[-1])],
categorical_feature=[21],
)
lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)
# specify your configurations as a dict
......@@ -37,8 +42,6 @@ gbm = lgb.train(
lgb_train,
num_boost_round=100,
valid_sets=[lgb_train, lgb_test],
feature_name=[f"f{i + 1}" for i in range(X_train.shape[-1])],
categorical_feature=[21],
callbacks=[lgb.log_evaluation(10), lgb.record_evaluation(evals_result)],
)
......
......@@ -2,6 +2,7 @@
"""Library with training routines of LightGBM."""
import copy
import json
import warnings
from collections import OrderedDict, defaultdict
from operator import attrgetter
from pathlib import Path
......@@ -13,6 +14,7 @@ from . import callback
from .basic import (
Booster,
Dataset,
LGBMDeprecationWarning,
LightGBMError,
_choose_param_value,
_ConfigAliases,
......@@ -51,6 +53,15 @@ _LGBM_PreprocFunction = Callable[
]
def _emit_dataset_kwarg_warning(calling_function: str, argname: str) -> None:
msg = (
f"Argument '{argname}' to {calling_function}() is deprecated and will be removed in "
f"a future release. Set '{argname}' when calling lightgbm.Dataset() instead. "
"See https://github.com/microsoft/LightGBM/issues/6435."
)
warnings.warn(msg, category=LGBMDeprecationWarning, stacklevel=2)
def train(
params: Dict[str, Any],
train_set: Dataset,
......@@ -103,9 +114,11 @@ def train(
init_model : str, pathlib.Path, Booster or None, optional (default=None)
Filename of LightGBM model or Booster instance used for continue training.
feature_name : list of str, or 'auto', optional (default="auto")
**Deprecated.** Set ``feature_name`` on ``train_set`` instead.
Feature names.
If 'auto' and data is pandas DataFrame, data columns names are used.
categorical_feature : list of str or int, or 'auto', optional (default="auto")
**Deprecated.** Set ``categorical_feature`` on ``train_set`` instead.
Categorical features.
If list of int, interpreted as indices.
If list of str, interpreted as feature names (need to specify ``feature_name`` as well).
......@@ -166,6 +179,13 @@ def train(
f"Item {i} has type '{type(valid_item).__name__}'."
)
# raise deprecation warnings if necessary
# ref: https://github.com/microsoft/LightGBM/issues/6435
if categorical_feature != "auto":
_emit_dataset_kwarg_warning("train", "categorical_feature")
if feature_name != "auto":
_emit_dataset_kwarg_warning("train", "feature_name")
# create predictor first
params = copy.deepcopy(params)
params = _choose_param_value(
......@@ -625,9 +645,11 @@ def cv(
init_model : str, pathlib.Path, Booster or None, optional (default=None)
Filename of LightGBM model or Booster instance used for continue training.
feature_name : list of str, or 'auto', optional (default="auto")
**Deprecated.** Set ``feature_name`` on ``train_set`` instead.
Feature names.
If 'auto' and data is pandas DataFrame, data columns names are used.
categorical_feature : list of str or int, or 'auto', optional (default="auto")
**Deprecated.** Set ``categorical_feature`` on ``train_set`` instead.
Categorical features.
If list of int, interpreted as indices.
If list of str, interpreted as feature names (need to specify ``feature_name`` as well).
......@@ -693,6 +715,13 @@ def cv(
if num_boost_round <= 0:
raise ValueError(f"num_boost_round must be greater than 0. Got {num_boost_round}.")
# raise deprecation warnings if necessary
# ref: https://github.com/microsoft/LightGBM/issues/6435
if categorical_feature != "auto":
_emit_dataset_kwarg_warning("cv", "categorical_feature")
if feature_name != "auto":
_emit_dataset_kwarg_warning("cv", "feature_name")
params = copy.deepcopy(params)
params = _choose_param_value(
main_param_name="objective",
......
......@@ -862,6 +862,7 @@ class LGBMModel(_LGBMModelBase):
group=group,
init_score=init_score,
categorical_feature=categorical_feature,
feature_name=feature_name,
params=params,
)
......@@ -928,7 +929,6 @@ class LGBMModel(_LGBMModelBase):
valid_names=eval_names,
feval=eval_metrics_callable, # type: ignore[arg-type]
init_model=init_model,
feature_name=feature_name,
callbacks=callbacks,
)
......
......@@ -1421,13 +1421,14 @@ def test_cvbooster_picklable(serializer):
def test_feature_name():
X_train, y_train = make_synthetic_regression()
params = {"verbose": -1}
lgb_train = lgb.Dataset(X_train, y_train)
feature_names = [f"f_{i}" for i in range(X_train.shape[-1])]
gbm = lgb.train(params, lgb_train, num_boost_round=5, feature_name=feature_names)
lgb_train = lgb.Dataset(X_train, y_train, feature_name=feature_names)
gbm = lgb.train(params, lgb_train, num_boost_round=5)
assert feature_names == gbm.feature_name()
# test feature_names with whitespaces
feature_names_with_space = [f"f {i}" for i in range(X_train.shape[-1])]
gbm = lgb.train(params, lgb_train, num_boost_round=5, feature_name=feature_names_with_space)
lgb_train.set_feature_name(feature_names_with_space)
gbm = lgb.train(params, lgb_train, num_boost_round=5)
assert feature_names == gbm.feature_name()
......@@ -1437,9 +1438,9 @@ def test_feature_name_with_non_ascii():
# This has non-ascii strings.
feature_names = ["F_零", "F_一", "F_二", "F_三"]
params = {"verbose": -1}
lgb_train = lgb.Dataset(X_train, y_train)
lgb_train = lgb.Dataset(X_train, y_train, feature_name=feature_names)
gbm = lgb.train(params, lgb_train, num_boost_round=5, feature_name=feature_names)
gbm = lgb.train(params, lgb_train, num_boost_round=5)
assert feature_names == gbm.feature_name()
gbm.save_model("lgb.model")
......
......@@ -25,8 +25,8 @@ def test_register_logger(tmp_path):
X = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 4], [1, 2, 3]], dtype=np.float32)
y = np.array([0, 1, 1, 0])
lgb_train = lgb.Dataset(X, y)
lgb_valid = lgb.Dataset(X, y) # different object for early-stopping
lgb_train = lgb.Dataset(X, y, categorical_feature=[1])
lgb_valid = lgb.Dataset(X, y, categorical_feature=[1]) # different object for early-stopping
eval_records = {}
callbacks = [lgb.record_evaluation(eval_records), lgb.log_evaluation(2), lgb.early_stopping(10)]
......@@ -36,7 +36,6 @@ def test_register_logger(tmp_path):
num_boost_round=10,
feval=dummy_metric,
valid_sets=[lgb_valid],
categorical_feature=[1],
callbacks=callbacks,
)
......@@ -151,12 +150,11 @@ def test_register_custom_logger():
logged_messages = []
X = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 4], [1, 2, 3]], dtype=np.float32)
y = np.array([0, 1, 1, 0])
lgb_data = lgb.Dataset(X, y)
lgb_data = lgb.Dataset(X, y, categorical_feature=[1])
lgb.train(
{"objective": "binary", "metric": "auc"},
lgb_data,
num_boost_round=10,
valid_sets=[lgb_data],
categorical_feature=[1],
)
assert logged_messages, "custom logger was not called"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment