[R-package] [python-package] deprecate Dataset arguments to cv() and train() (#6446)

a70e8327 · James Lamb · GitHub · ae55f32b · a70e8327 · a70e8327
Unverified Commit a70e8327 authored May 10, 2024 by James Lamb Committed by GitHub May 10, 2024
15 changed files
--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -25,8 +25,8 @@ CVBooster <- R6::R6Class(
 #' @description Cross validation logic used by LightGBM
 #' @inheritParams lgb_shared_params
 #' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
-#' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}
+#' @param label Deprecated. See "Deprecated Arguments" section below.
-#' @param weight vector of response values. If not NULL, will set to dataset
+#' @param weight Deprecated. See "Deprecated Arguments" section below.
 #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
 #' @param showsd \code{boolean}, whether to show standard deviation of cross validation.
 #'               This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a
@@ -36,10 +36,8 @@ CVBooster <- R6::R6Class(
 #' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
 #'              (each element must be a vector of test fold's indices). When folds are supplied,
 #'              the \code{nfold} and \code{stratified} parameters are ignored.
-#' @param colnames feature names, if not null, will use this to overwrite the names in dataset
+#' @param colnames Deprecated. See "Deprecated Arguments" section below.
-#' @param categorical_feature categorical features. This can either be a character vector of feature
+#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below.
-#'                            names or an integer vector with the indices of the features (e.g.
-#'                            \code{c(1L, 10L)} to say "the first and tenth columns").
 #' @param callbacks List of callback functions that are applied at each iteration.
 #' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the booster model
 #'                   into a predictor model which frees up memory and the original datasets
@@ -70,6 +68,13 @@ CVBooster <- R6::R6Class(
 #'   , nfold = 3L
 #' )
 #' }
+#'
+#' @section Deprecated Arguments:
+#'
+#' A future release of \code{lightgbm} will require passing an \code{lgb.Dataset}
+#' to argument \code{'data'}. It will also remove support for passing arguments
+#' \code{'categorical_feature'}, \code{'colnames'}, \code{'label'}, and \code{'weight'}.
+#'
 #' @importFrom data.table data.table setorderv
 #' @export
 lgb.cv <- function(params = list()
@@ -102,12 +107,32 @@ lgb.cv <- function(params = list()
  # If 'data' is not an lgb.Dataset, try to construct one using 'label'
  if (!.is_Dataset(x = data)) {
+    warning(paste0(
+      "Passing anything other than an lgb.Dataset object to lgb.cv() is deprecated. "
+      , "Either pass an lgb.Dataset object, or use lightgbm()."
+    ))
    if (is.null(label)) {
      stop("'label' must be provided for lgb.cv if 'data' is not an 'lgb.Dataset'")
    }
    data <- lgb.Dataset(data = data, label = label)
  }
+  # raise deprecation warnings if necessary
+  # ref: https://github.com/microsoft/LightGBM/issues/6435
+  args <- names(match.call())
+  if ("categorical_feature" %in% args) {
+    .emit_dataset_kwarg_warning("categorical_feature", "lgb.cv")
+  }
+  if ("colnames" %in% args) {
+    .emit_dataset_kwarg_warning("colnames", "lgb.cv")
+  }
+  if ("label" %in% args) {
+    .emit_dataset_kwarg_warning("label", "lgb.cv")
+  }
+  if ("weight" %in% args) {
+    .emit_dataset_kwarg_warning("weight", "lgb.cv")
+  }
  # set some parameters, resolving the way they were passed in with other parameters
  # in `params`.
  # this ensures that the model stored with Booster$save() correctly represents

--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
@@ -6,10 +6,8 @@
 #' @inheritParams lgb_shared_params
 #' @param valids a list of \code{lgb.Dataset} objects, used for validation
 #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
-#' @param colnames feature names, if not null, will use this to overwrite the names in dataset
+#' @param colnames Deprecated. See "Deprecated Arguments" section below.
-#' @param categorical_feature categorical features. This can either be a character vector of feature
+#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below.
-#'                            names or an integer vector with the indices of the features (e.g.
-#'                            \code{c(1L, 10L)} to say "the first and tenth columns").
 #' @param callbacks List of callback functions that are applied at each iteration.
 #' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the
 #'                   booster model into a predictor model which frees up memory and the
@@ -43,6 +41,13 @@
 #'   , early_stopping_rounds = 3L
 #' )
 #' }
+#'
+#' @section Deprecated Arguments:
+#'
+#' A future release of \code{lightgbm} will remove support for passing arguments
+#' \code{'categorical_feature'} and \code{'colnames'}. Pass those things to
+#' \code{lgb.Dataset} instead.
+#'
 #' @export
 lgb.train <- function(params = list(),
                      data,
@@ -78,6 +83,16 @@ lgb.train <- function(params = list(),
    }
  }
+  # raise deprecation warnings if necessary
+  # ref: https://github.com/microsoft/LightGBM/issues/6435
+  args <- names(match.call())
+  if ("categorical_feature" %in% args) {
+    .emit_dataset_kwarg_warning("categorical_feature", "lgb.train")
+  }
+  if ("colnames" %in% args) {
+    .emit_dataset_kwarg_warning("colnames", "lgb.train")
+  }
  # set some parameters, resolving the way they were passed in with other parameters
  # in `params`.
  # this ensures that the model stored with Booster$save() correctly represents

--- a/R-package/R/lightgbm.R
+++ b/R-package/R/lightgbm.R
@@ -144,6 +144,12 @@ NULL
 #'
 #'                    \emph{New in version 4.0.0}
 #'
+#' @param colnames Character vector of features. Only used if \code{data} is not an \code{\link{lgb.Dataset}}.
+#' @param categorical_feature categorical features. This can either be a character vector of feature
+#'                            names or an integer vector with the indices of the features (e.g.
+#'                            \code{c(1L, 10L)} to say "the first and tenth columns").
+#'                            Only used if \code{data} is not an \code{\link{lgb.Dataset}}.
+#'
 #' @param ... Additional arguments passed to \code{\link{lgb.train}}. For example
 #'     \itemize{
 #'        \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
@@ -152,10 +158,6 @@ NULL
 #'                    \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
 #'        \item{\code{eval}: evaluation function, can be (a list of) character or custom eval function}
 #'        \item{\code{record}: Boolean, TRUE will record iteration message to \code{booster$record_evals}}
-#'        \item{\code{colnames}: feature names, if not null, will use this to overwrite the names in dataset}
-#'        \item{\code{categorical_feature}: categorical features. This can either be a character vector of feature
-#'                            names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to
-#'                            say "the first and tenth columns").}
 #'        \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
 #'                          into a predictor model which frees up memory and the original datasets}
 #'     }
@@ -176,6 +178,8 @@ lightgbm <- function(data,
                     objective = "auto",
                     init_score = NULL,
                     num_threads = NULL,
+                     colnames = NULL,
+                     categorical_feature = NULL,
                     ...) {
  # validate inputs early to avoid unnecessary computation
@@ -221,7 +225,14 @@ lightgbm <- function(data,
  # Check whether data is lgb.Dataset, if not then create lgb.Dataset manually
  if (!.is_Dataset(x = dtrain)) {
-    dtrain <- lgb.Dataset(data = data, label = label, weight = weights, init_score = init_score)
+    dtrain <- lgb.Dataset(
+      data = data
+      , label = label
+      , weight = weights
+      , init_score = init_score
+      , categorical_feature = categorical_feature
+      , colnames = colnames
+    )
  }
  train_args <- list(

--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -260,3 +260,19 @@
    return(a == b)
  }
 }
+# ref: https://github.com/microsoft/LightGBM/issues/6435
+.emit_dataset_kwarg_warning <- function(calling_function, argname) {
+  msg <- sprintf(
+    paste0(
+      "Argument '%s' to %s() is deprecated and will be removed in a future release. "
+      , "Set '%s' with lgb.Dataset() instead. "
+      , "See https://github.com/microsoft/LightGBM/issues/6435."
+    )
+    , argname
+    , calling_function
+    , argname
+  )
+  warning(msg)
+  return(invisible(NULL))
+}
--- a/R-package/man/lgb.cv.Rd
+++ b/R-package/man/lgb.cv.Rd
@@ -41,9 +41,9 @@ may allow you to pass other types of data like \code{matrix} and then separately
 \item{nfold}{the original dataset is randomly partitioned into \code{nfold} equal size subsamples.}
-\item{label}{Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}}
+\item{label}{Deprecated. See "Deprecated Arguments" section below.}
-\item{weight}{vector of response values. If not NULL, will set to dataset}
+\item{weight}{Deprecated. See "Deprecated Arguments" section below.}
 \item{obj}{objective function, can be character or custom objective function. Examples include
 \code{regression}, \code{regression_l1}, \code{huber},
@@ -103,11 +103,9 @@ the \code{nfold} and \code{stratified} parameters are ignored.}
 \item{init_model}{path of model file or \code{lgb.Booster} object, will continue training from this model}
-\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
+\item{colnames}{Deprecated. See "Deprecated Arguments" section below.}
-\item{categorical_feature}{categorical features. This can either be a character vector of feature
+\item{categorical_feature}{Deprecated. See "Deprecated Arguments" section below.}
-names or an integer vector with the indices of the features (e.g.
-\code{c(1L, 10L)} to say "the first and tenth columns").}
 \item{early_stopping_rounds}{int. Activates early stopping. When this parameter is non-null,
 training will stop if the evaluation of any metric on any validation set
@@ -133,6 +131,14 @@ a trained model \code{lgb.CVBooster}.
 \description{
 Cross validation logic used by LightGBM
 }
+\section{Deprecated Arguments}{
+A future release of \code{lightgbm} will require passing an \code{lgb.Dataset}
+to argument \code{'data'}. It will also remove support for passing arguments
+\code{'categorical_feature'}, \code{'colnames'}, \code{'label'}, and \code{'weight'}.
+}
 \section{Early Stopping}{
@@ -171,4 +177,5 @@ model <- lgb.cv(
  , nfold = 3L
 )
 }
 }
--- a/R-package/man/lgb.train.Rd
+++ b/R-package/man/lgb.train.Rd
@@ -82,11 +82,9 @@ printing of evaluation during training}
 \item{init_model}{path of model file or \code{lgb.Booster} object, will continue training from this model}
-\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
+\item{colnames}{Deprecated. See "Deprecated Arguments" section below.}
-\item{categorical_feature}{categorical features. This can either be a character vector of feature
+\item{categorical_feature}{Deprecated. See "Deprecated Arguments" section below.}
-names or an integer vector with the indices of the features (e.g.
-\code{c(1L, 10L)} to say "the first and tenth columns").}
 \item{early_stopping_rounds}{int. Activates early stopping. When this parameter is non-null,
 training will stop if the evaluation of any metric on any validation set
@@ -111,6 +109,14 @@ Low-level R interface to train a LightGBM model. Unlike \code{\link{lightgbm}},
             this function is focused on performance (e.g. speed, memory efficiency). It is also
             less likely to have breaking API changes in new releases than \code{\link{lightgbm}}.
 }
+\section{Deprecated Arguments}{
+A future release of \code{lightgbm} will remove support for passing arguments
+\code{'categorical_feature'} and \code{'colnames'}. Pass those things to
+\code{lgb.Dataset} instead.
+}
 \section{Early Stopping}{
@@ -154,4 +160,5 @@ model <- lgb.train(
  , early_stopping_rounds = 3L
 )
 }
 }
--- a/R-package/man/lightgbm.Rd
+++ b/R-package/man/lightgbm.Rd
@@ -19,6 +19,8 @@ lightgbm(
  objective = "auto",
  init_score = NULL,
  num_threads = NULL,
+  colnames = NULL,
+  categorical_feature = NULL,
  ...
 )
 }
@@ -96,6 +98,13 @@ set to the iteration number of the best iteration.}
                   \emph{New in version 4.0.0}}
+\item{colnames}{Character vector of features. Only used if \code{data} is not an \code{\link{lgb.Dataset}}.}
+\item{categorical_feature}{categorical features. This can either be a character vector of feature
+names or an integer vector with the indices of the features (e.g.
+\code{c(1L, 10L)} to say "the first and tenth columns").
+Only used if \code{data} is not an \code{\link{lgb.Dataset}}.}
 \item{...}{Additional arguments passed to \code{\link{lgb.train}}. For example
 \itemize{
   \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
@@ -104,10 +113,6 @@ set to the iteration number of the best iteration.}
               \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
   \item{\code{eval}: evaluation function, can be (a list of) character or custom eval function}
   \item{\code{record}: Boolean, TRUE will record iteration message to \code{booster$record_evals}}
-   \item{\code{colnames}: feature names, if not null, will use this to overwrite the names in dataset}
-   \item{\code{categorical_feature}: categorical features. This can either be a character vector of feature
-                       names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to
-                       say "the first and tenth columns").}
   \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
                     into a predictor model which frees up memory and the original datasets}
 }}

--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -433,7 +433,7 @@ test_that("lgb.cv() rejects negative or 0 value passed to nrounds", {
  }
 })
-test_that("lgb.cv() throws an informative error is 'data' is not an lgb.Dataset and labels are not given", {
+test_that("lgb.cv() throws an informative error if 'data' is not an lgb.Dataset and labels are not given", {
  bad_values <- list(
    4L
    , "hello"
@@ -1788,11 +1788,6 @@ test_that("lgb.train() works with early stopping for regression with a metric th
 test_that("lgb.train() supports non-ASCII feature names", {
-  dtrain <- lgb.Dataset(
-    data = matrix(rnorm(400L), ncol =  4L)
-    , label = rnorm(100L)
-    , params = list(num_threads = .LGB_MAX_THREADS)
-  )
  # content below is equivalent to
  #
  #  feature_names <- c("F_零", "F_一", "F_二", "F_三")
@@ -1805,6 +1800,12 @@ test_that("lgb.train() supports non-ASCII feature names", {
    , rawToChar(as.raw(c(0x46, 0x5f, 0xe4, 0xba, 0x8c)))
    , rawToChar(as.raw(c(0x46, 0x5f, 0xe4, 0xb8, 0x89)))
  )
+  dtrain <- lgb.Dataset(
+    data = matrix(rnorm(400L), ncol =  4L)
+    , label = rnorm(100L)
+    , params = list(num_threads = .LGB_MAX_THREADS)
+    , colnames = feature_names
+  )
  bst <- lgb.train(
    data = dtrain
    , nrounds = 5L
@@ -1814,7 +1815,6 @@ test_that("lgb.train() supports non-ASCII feature names", {
      , verbose = .LGB_VERBOSITY
      , num_threads = .LGB_MAX_THREADS
    )
-    , colnames = feature_names
  )
  expect_true(.is_Booster(bst))
  dumped_model <- jsonlite::fromJSON(bst$dump_model())
@@ -2838,7 +2838,11 @@ test_that(paste0("lgb.train() gives same result when interaction_constraints is
 test_that(paste0("lgb.train() gives same results when using interaction_constraints and specifying colnames"), {
  set.seed(1L)
-  dtrain <- lgb.Dataset(train$data, label = train$label, params = list(num_threads = .LGB_MAX_THREADS))
+  dtrain <- lgb.Dataset(
+    train$data
+    , label = train$label
+    , params = list(num_threads = .LGB_MAX_THREADS)
+  )
  params <- list(
    objective = "regression"
@@ -2854,6 +2858,7 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai
  pred1 <- bst$predict(test$data)
  new_colnames <- paste0(colnames(train$data), "_x")
+  dtrain$set_colnames(new_colnames)
  params <- list(
    objective = "regression"
    , interaction_constraints = list(c(new_colnames[1L], new_colnames[2L]), new_colnames[3L])
@@ -2864,7 +2869,6 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai
    data = dtrain
    , params = params
    , nrounds = 2L
-    , colnames = new_colnames
  )
  pred2 <- bst$predict(test$data)

--- a/examples/python-guide/advanced_example.py
+++ b/examples/python-guide/advanced_example.py
@@ -25,9 +25,14 @@ X_test = df_test.drop(0, axis=1)
 num_train, num_feature = X_train.shape
+# generate feature names
+feature_name = [f"feature_{col}" for col in range(num_feature)]
 # create dataset for lightgbm
 # if you want to re-use data, remember to set free_raw_data=False
-lgb_train = lgb.Dataset(X_train, y_train, weight=W_train, free_raw_data=False)
+lgb_train = lgb.Dataset(
+    X_train, y_train, weight=W_train, feature_name=feature_name, categorical_feature=[21], free_raw_data=False
+)
 lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, weight=W_test, free_raw_data=False)
 # specify your configurations as a dict
@@ -43,9 +48,6 @@ params = {
    "verbose": 0,
 }
-# generate feature names
-feature_name = [f"feature_{col}" for col in range(num_feature)]
 print("Starting training...")
 # feature_name and categorical_feature
 gbm = lgb.train(
@@ -53,8 +55,6 @@ gbm = lgb.train(
    lgb_train,
    num_boost_round=10,
    valid_sets=lgb_train,  # eval training data
-    feature_name=feature_name,
-    categorical_feature=[21],
 )
 print("Finished first 10 rounds...")

--- a/examples/python-guide/notebooks/interactive_plot_example.ipynb
+++ b/examples/python-guide/notebooks/interactive_plot_example.ipynb
@@ -78,7 +78,12 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "lgb_train = lgb.Dataset(X_train, y_train)\n",
+    "lgb_train = lgb.Dataset(\n",
+    "    X_train,\n",
+    "    y_train,\n",
+    "    feature_name=[f\"f{i + 1}\" for i in range(X_train.shape[-1])],\n",
+    "    categorical_feature=[21],\n",
+    ")\n",
    "lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)"
   ]
  },
@@ -144,8 +149,6 @@
    "    lgb_train,\n",
    "    num_boost_round=100,\n",
    "    valid_sets=[lgb_train, lgb_test],\n",
-    "    feature_name=[f\"f{i + 1}\" for i in range(X_train.shape[-1])],\n",
-    "    categorical_feature=[21],\n",
    "    callbacks=[lgb.log_evaluation(10), lgb.record_evaluation(evals_result)],\n",
    ")"
   ]

--- a/examples/python-guide/plot_example.py
+++ b/examples/python-guide/plot_example.py
@@ -22,7 +22,12 @@ X_train = df_train.drop(0, axis=1)
 X_test = df_test.drop(0, axis=1)
 # create dataset for lightgbm
-lgb_train = lgb.Dataset(X_train, y_train)
+lgb_train = lgb.Dataset(
+    X_train,
+    y_train,
+    feature_name=[f"f{i + 1}" for i in range(X_train.shape[-1])],
+    categorical_feature=[21],
+)
 lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)
 # specify your configurations as a dict
@@ -37,8 +42,6 @@ gbm = lgb.train(
    lgb_train,
    num_boost_round=100,
    valid_sets=[lgb_train, lgb_test],
-    feature_name=[f"f{i + 1}" for i in range(X_train.shape[-1])],
-    categorical_feature=[21],
    callbacks=[lgb.log_evaluation(10), lgb.record_evaluation(evals_result)],
 )

--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -2,6 +2,7 @@
 """Library with training routines of LightGBM."""
 import copy
 import json
+import warnings
 from collections import OrderedDict, defaultdict
 from operator import attrgetter
 from pathlib import Path
@@ -13,6 +14,7 @@ from . import callback
 from .basic import (
    Booster,
    Dataset,
+    LGBMDeprecationWarning,
    LightGBMError,
    _choose_param_value,
    _ConfigAliases,
@@ -51,6 +53,15 @@ _LGBM_PreprocFunction = Callable[
 ]
+def _emit_dataset_kwarg_warning(calling_function: str, argname: str) -> None:
+    msg = (
+        f"Argument '{argname}' to {calling_function}() is deprecated and will be removed in "
+        f"a future release. Set '{argname}' when calling lightgbm.Dataset() instead. "
+        "See https://github.com/microsoft/LightGBM/issues/6435."
+    )
+    warnings.warn(msg, category=LGBMDeprecationWarning, stacklevel=2)
 def train(
    params: Dict[str, Any],
    train_set: Dataset,
@@ -103,9 +114,11 @@ def train(
    init_model : str, pathlib.Path, Booster or None, optional (default=None)
        Filename of LightGBM model or Booster instance used for continue training.
    feature_name : list of str, or 'auto', optional (default="auto")
+        **Deprecated.** Set ``feature_name`` on ``train_set`` instead.
        Feature names.
        If 'auto' and data is pandas DataFrame, data columns names are used.
    categorical_feature : list of str or int, or 'auto', optional (default="auto")
+        **Deprecated.** Set ``categorical_feature`` on ``train_set`` instead.
        Categorical features.
        If list of int, interpreted as indices.
        If list of str, interpreted as feature names (need to specify ``feature_name`` as well).
@@ -166,6 +179,13 @@ def train(
                    f"Item {i} has type '{type(valid_item).__name__}'."
                )
+    # raise deprecation warnings if necessary
+    # ref: https://github.com/microsoft/LightGBM/issues/6435
+    if categorical_feature != "auto":
+        _emit_dataset_kwarg_warning("train", "categorical_feature")
+    if feature_name != "auto":
+        _emit_dataset_kwarg_warning("train", "feature_name")
    # create predictor first
    params = copy.deepcopy(params)
    params = _choose_param_value(
@@ -625,9 +645,11 @@ def cv(
    init_model : str, pathlib.Path, Booster or None, optional (default=None)
        Filename of LightGBM model or Booster instance used for continue training.
    feature_name : list of str, or 'auto', optional (default="auto")
+        **Deprecated.** Set ``feature_name`` on ``train_set`` instead.
        Feature names.
        If 'auto' and data is pandas DataFrame, data columns names are used.
    categorical_feature : list of str or int, or 'auto', optional (default="auto")
+        **Deprecated.** Set ``categorical_feature`` on ``train_set`` instead.
        Categorical features.
        If list of int, interpreted as indices.
        If list of str, interpreted as feature names (need to specify ``feature_name`` as well).
@@ -693,6 +715,13 @@ def cv(
    if num_boost_round <= 0:
        raise ValueError(f"num_boost_round must be greater than 0. Got {num_boost_round}.")
+    # raise deprecation warnings if necessary
+    # ref: https://github.com/microsoft/LightGBM/issues/6435
+    if categorical_feature != "auto":
+        _emit_dataset_kwarg_warning("cv", "categorical_feature")
+    if feature_name != "auto":
+        _emit_dataset_kwarg_warning("cv", "feature_name")
    params = copy.deepcopy(params)
    params = _choose_param_value(
        main_param_name="objective",

--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -862,6 +862,7 @@ class LGBMModel(_LGBMModelBase):
            group=group,
            init_score=init_score,
            categorical_feature=categorical_feature,
+            feature_name=feature_name,
            params=params,
        )
@@ -928,7 +929,6 @@ class LGBMModel(_LGBMModelBase):
            valid_names=eval_names,
            feval=eval_metrics_callable,  # type: ignore[arg-type]
            init_model=init_model,
-            feature_name=feature_name,
            callbacks=callbacks,
        )

--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1421,13 +1421,14 @@ def test_cvbooster_picklable(serializer):
 def test_feature_name():
    X_train, y_train = make_synthetic_regression()
    params = {"verbose": -1}
-    lgb_train = lgb.Dataset(X_train, y_train)
    feature_names = [f"f_{i}" for i in range(X_train.shape[-1])]
-    gbm = lgb.train(params, lgb_train, num_boost_round=5, feature_name=feature_names)
+    lgb_train = lgb.Dataset(X_train, y_train, feature_name=feature_names)
+    gbm = lgb.train(params, lgb_train, num_boost_round=5)
    assert feature_names == gbm.feature_name()
    # test feature_names with whitespaces
    feature_names_with_space = [f"f {i}" for i in range(X_train.shape[-1])]
-    gbm = lgb.train(params, lgb_train, num_boost_round=5, feature_name=feature_names_with_space)
+    lgb_train.set_feature_name(feature_names_with_space)
+    gbm = lgb.train(params, lgb_train, num_boost_round=5)
    assert feature_names == gbm.feature_name()
@@ -1437,9 +1438,9 @@ def test_feature_name_with_non_ascii():
    # This has non-ascii strings.
    feature_names = ["F_零", "F_一", "F_二", "F_三"]
    params = {"verbose": -1}
-    lgb_train = lgb.Dataset(X_train, y_train)
+    lgb_train = lgb.Dataset(X_train, y_train, feature_name=feature_names)
-    gbm = lgb.train(params, lgb_train, num_boost_round=5, feature_name=feature_names)
+    gbm = lgb.train(params, lgb_train, num_boost_round=5)
    assert feature_names == gbm.feature_name()
    gbm.save_model("lgb.model")

--- a/tests/python_package_test/test_utilities.py
+++ b/tests/python_package_test/test_utilities.py
@@ -25,8 +25,8 @@ def test_register_logger(tmp_path):
    X = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 4], [1, 2, 3]], dtype=np.float32)
    y = np.array([0, 1, 1, 0])
-    lgb_train = lgb.Dataset(X, y)
+    lgb_train = lgb.Dataset(X, y, categorical_feature=[1])
-    lgb_valid = lgb.Dataset(X, y)  # different object for early-stopping
+    lgb_valid = lgb.Dataset(X, y, categorical_feature=[1])  # different object for early-stopping
    eval_records = {}
    callbacks = [lgb.record_evaluation(eval_records), lgb.log_evaluation(2), lgb.early_stopping(10)]
@@ -36,7 +36,6 @@ def test_register_logger(tmp_path):
        num_boost_round=10,
        feval=dummy_metric,
        valid_sets=[lgb_valid],
-        categorical_feature=[1],
        callbacks=callbacks,
    )
@@ -151,12 +150,11 @@ def test_register_custom_logger():
    logged_messages = []
    X = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 4], [1, 2, 3]], dtype=np.float32)
    y = np.array([0, 1, 1, 0])
-    lgb_data = lgb.Dataset(X, y)
+    lgb_data = lgb.Dataset(X, y, categorical_feature=[1])
    lgb.train(
        {"objective": "binary", "metric": "auc"},
        lgb_data,
        num_boost_round=10,
        valid_sets=[lgb_data],
-        categorical_feature=[1],
    )
    assert logged_messages, "custom logger was not called"