[R-package] deprecate lgb.prepare() and lgb.prepare2() (#3095)

* [R-package] deprecate lgb.prepare() and lgb.prepare2() * linting * renaming * updated docs * linting * Apply suggestions from code review Co-authored-by: Nikita Titov <nekit94-08@mail.ru> * one more comment fix * remove comment about int being dangerous * empty commit Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

[R-package] deprecate lgb.prepare() and lgb.prepare2() (#3095)
* [R-package] deprecate lgb.prepare() and lgb.prepare2() * linting * renaming * updated docs * linting * Apply suggestions from code review Co-authored-by: Nikita Titov <nekit94-08@mail.ru> * one more comment fix * remove comment about int being dangerous * empty commit Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
fecac8e7 · James Lamb · GitHub · fed57520 · fecac8e7 · fecac8e7
Unverified Commit fecac8e7 authored Aug 01, 2020 by James Lamb Committed by GitHub Aug 01, 2020
18 changed files
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -14,6 +14,8 @@ export(lgb.Dataset.create.valid)
 export(lgb.Dataset.save)
 export(lgb.Dataset.set.categorical)
 export(lgb.Dataset.set.reference)
+export(lgb.convert)
+export(lgb.convert_with_rules)
 export(lgb.cv)
 export(lgb.dump)
 export(lgb.get.eval.result)

--- a/R-package/R/lgb.prepare2.R
+++ b/R-package/R/lgb.prepare2.R
-#' @name lgb.prepare2
+#' @name lgb.convert
 #' @title Data preparator for LightGBM datasets (integer)
 #' @description Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
-#'              Factors and characters are converted to numeric (specifically: integer).
-#'              Please use \code{\link{lgb.prepare_rules2}} if you want to apply this transformation to
+#'              Factors and characters are converted to integer.
+#'              Please use \code{\link{lgb.convert_with_rules}} if you want to apply this transformation to
 #'              other datasets. This is useful if you have a specific need for integer dataset instead
-#'              of numeric dataset. Note that there are programs which do not support integer-only
-#'              input. Consider this as a half memory technique which is dangerous, especially for LightGBM.
+#'              of numeric dataset.
+#'
+#'              NOTE: In previous releases of LightGBM, this function was called \code{lgb.prepare}.
 #' @param data A data.frame or data.table to prepare.
 #' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix})
 #'         for input in \code{lgb.Dataset}.
@@ -16,13 +17,13 @@
 #' str(iris)
 #'
 #' # Convert all factors/chars to integer
-#' str(lgb.prepare2(data = iris))
+#' str(lgb.convert(data = iris))
 #'
 #' \dontrun{
 #' # When lightgbm package is installed, and you do not want to load it
 #' # You can still use the function!
 #' lgb.unloader()
-#' str(lightgbm::lgb.prepare2(data = iris))
+#' str(lightgbm::lgb.convert(data = iris))
 #' # 'data.frame':	150 obs. of  5 variables:
 #' # $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
 #' # $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
@@ -32,7 +33,7 @@
 #' }
 #'
 #' @export
-lgb.prepare2 <- function(data) {
+lgb.convert <- function(data) {

  # data.table not behaving like data.frame
  if (inherits(data, "data.table")) {
@@ -75,7 +76,7 @@ lgb.prepare2 <- function(data) {
    } else {

      stop(
-        "lgb.prepare2: you provided "
+        "lgb.convert: you provided "
        , paste(class(data), collapse = " & ")
        , " but data should have class data.frame or data.table"
      )

--- a/R-package/R/lgb.prepare_rules2.R
+++ b/R-package/R/lgb.prepare_rules2.R
-#' @name lgb.prepare_rules2
+#' @name lgb.convert_with_rules
 #' @title Data preparator for LightGBM datasets with rules (integer)
 #' @description Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
-#'              Factors and characters are converted to numeric (specifically: integer).
+#'              Factors and characters are converted to integer.
 #'              In addition, keeps rules created so you can convert other datasets using this converter.
 #'              This is useful if you have a specific need for integer dataset instead of numeric dataset.
-#'              Note that there are programs which do not support integer-only input.
-#'              Consider this as a half memory technique which is dangerous, especially for LightGBM.
+#'
+#'              NOTE: In previous releases of LightGBM, this function was called \code{lgb.prepare_rules2}.
 #' @param data A data.frame or data.table to prepare.
 #' @param rules A set of rules from the data preparator, if already used.
 #' @return A list with the cleaned dataset (\code{data}) and the rules (\code{rules}).
@@ -17,7 +17,7 @@
 #'
 #' str(iris)
 #'
-#' new_iris <- lgb.prepare_rules2(data = iris) # Autoconverter
+#' new_iris <- lgb.convert_with_rules(data = iris) # Autoconverter
 #' str(new_iris$data)
 #'
 #' data(iris) # Erase iris dataset
@@ -25,7 +25,7 @@
 #'
 #' # Use conversion using known rules
 #' # Unknown factors become 0, excellent for sparse datasets
-#' newer_iris <- lgb.prepare_rules2(data = iris, rules = new_iris$rules)
+#' newer_iris <- lgb.convert_with_rules(data = iris, rules = new_iris$rules)
 #'
 #' # Unknown factor is now zero, perfect for sparse datasets
 #' newer_iris$data[1L, ] # Species became 0 as it is an unknown factor
@@ -46,12 +46,12 @@
 #'     , "virginica" = 1L
 #'   )
 #' )
-#' newest_iris <- lgb.prepare_rules2(data = iris, rules = personal_rules)
+#' newest_iris <- lgb.convert_with_rules(data = iris, rules = personal_rules)
 #' str(newest_iris$data) # SUCCESS!
 #'
 #' @importFrom data.table set
 #' @export
-lgb.prepare_rules2 <- function(data, rules = NULL) {
+lgb.convert_with_rules <- function(data, rules = NULL) {

  # data.table not behaving like data.frame
  if (inherits(data, "data.table")) {
@@ -166,7 +166,7 @@ lgb.prepare_rules2 <- function(data, rules = NULL) {
      } else {

        stop(
-          "lgb.prepare_rules2: you provided "
+          "lgb.convert_with_rules: you provided "
          , paste(class(data), collapse = " & ")
          , " but data should have class data.frame"
        )

--- a/R-package/R/lgb.prepare.R
+++ b/R-package/R/lgb.prepare.R
-#' @name lgb.prepare
-#' @title Data preparator for LightGBM datasets (numeric)
-#' @description Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
-#'              Factors and characters are converted to numeric without integers. Please use
-#'              \code{\link{lgb.prepare_rules}} if you want to apply this transformation to other datasets.
-#' @param data A data.frame or data.table to prepare.
-#' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix})
-#'         for input in \code{lgb.Dataset}.
-#'
-#' @examples
-#' data(iris)
-#'
-#' str(iris)
-#'
-#' str(lgb.prepare(data = iris)) # Convert all factors/chars to numeric
-#'
-#' \dontrun{
-#' # When lightgbm package is installed, and you do not want to load it
-#' # You can still use the function!
-#' lgb.unloader()
-#' str(lightgbm::lgb.prepare(data = iris))
-#' # 'data.frame':	150 obs. of  5 variables:
-#' # $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
-#' # $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
-#' # $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
-#' # $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
-#' # $ Species     : num  1 1 1 1 1 1 1 1 1 1 ...
-#' }
-#'
-#' @export
-lgb.prepare <- function(data) {
-
-  # data.table not behaving like data.frame
-  if ("data.table" %in% class(data)) {
-
-    # Get data classes
-    list_classes <- sapply(data, class)
-
-    # Convert characters to factors only (we can change them to numeric after)
-    is_char <- which(list_classes == "character")
-    if (length(is_char) > 0L) {
-      data[, (is_char) := lapply(.SD, function(x) {as.numeric(as.factor(x))}), .SDcols = is_char]
-    }
-
-    # Convert factors to numeric (integer is more efficient actually)
-    is_fact <- c(which(list_classes == "factor"), is_char)
-    if (length(is_fact) > 0L) {
-      data[, (is_fact) := lapply(.SD, function(x) {as.numeric(x)}), .SDcols = is_fact]
-    }
-
-  } else {
-
-    # Default routine (data.frame)
-    if ("data.frame" %in% class(data)) {
-
-      # Get data classes
-      list_classes <- sapply(data, class)
-
-      # Convert characters to factors to numeric
-      is_char <- which(list_classes == "character")
-      if (length(is_char) > 0L) {
-        data[is_char] <- lapply(data[is_char], function(x) {as.numeric(as.factor(x))})
-      }
-
-      # Convert factors to numeric
-      is_fact <- which(list_classes == "factor")
-      if (length(is_fact) > 0L) {
-        data[is_fact] <- lapply(data[is_fact], function(x) {as.numeric(x)})
-      }
-
-    } else {
-
-      stop(
-        "lgb.prepare: you provided "
-        , paste(class(data), collapse = " & ")
-        , " but data should have class data.frame or data.table"
-      )
-
-    }
-
-  }
-
-  return(data)
-
-}
--- a/R-package/R/lgb.prepare_rules.R
+++ b/R-package/R/lgb.prepare_rules.R
-#' @name lgb.prepare_rules
-#' @title Data preparator for LightGBM datasets with rules (numeric)
-#' @description Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
-#'              Factors and characters are converted to numeric. In addition, keeps rules created
-#'              so you can convert other datasets using this converter.
-#' @param data A data.frame or data.table to prepare.
-#' @param rules A set of rules from the data preparator, if already used.
-#' @return A list with the cleaned dataset (\code{data}) and the rules (\code{rules}).
-#'         The data must be converted to a matrix format (\code{as.matrix}) for input
-#'         in \code{lgb.Dataset}.
-#'
-#' @examples
-#' data(iris)
-#'
-#' str(iris)
-#'
-#' new_iris <- lgb.prepare_rules(data = iris) # Autoconverter
-#' str(new_iris$data)
-#'
-#' data(iris) # Erase iris dataset
-#' iris$Species[1L] <- "NEW FACTOR" # Introduce junk factor (NA)
-#'
-#' # Use conversion using known rules
-#' # Unknown factors become 0, excellent for sparse datasets
-#' newer_iris <- lgb.prepare_rules(data = iris, rules = new_iris$rules)
-#'
-#' # Unknown factor is now zero, perfect for sparse datasets
-#' newer_iris$data[1L, ] # Species became 0 as it is an unknown factor
-#'
-#' newer_iris$data[1L, 5L] <- 1.0 # Put back real initial value
-#'
-#' # Is the newly created dataset equal? YES!
-#' all.equal(new_iris$data, newer_iris$data)
-#'
-#' # Can we test our own rules?
-#' data(iris) # Erase iris dataset
-#'
-#' # We remapped values differently
-#' personal_rules <- list(
-#'     Species = c(
-#'         "setosa" = 3L
-#'         , "versicolor" = 2L
-#'         , "virginica" = 1L
-#'     )
-#' )
-#' newest_iris <- lgb.prepare_rules(data = iris, rules = personal_rules)
-#' str(newest_iris$data) # SUCCESS!
-#'
-#' @importFrom data.table set
-#' @export
-lgb.prepare_rules <- function(data, rules = NULL) {
-
-  # data.table not behaving like data.frame
-  if (inherits(data, "data.table")) {
-
-    # Must use existing rules
-    if (!is.null(rules)) {
-
-      # Loop through rules
-      for (i in names(rules)) {
-
-        data.table::set(data, j = i, value = unname(rules[[i]][data[[i]]]))
-        data[[i]][is.na(data[[i]])] <- 0L # Overwrite NAs by 0s
-
-      }
-
-    } else {
-
-      # Get data classes
-      list_classes <- vapply(data, class, character(1L))
-
-      # Map characters/factors
-      is_fix <- which(list_classes %in% c("character", "factor"))
-      rules <- list()
-
-      # Need to create rules?
-      if (length(is_fix) > 0L) {
-
-        # Go through all characters/factors
-        for (i in is_fix) {
-
-          # Store column elsewhere
-          mini_data <- data[[i]]
-
-          # Get unique values
-          if (is.factor(mini_data)) {
-            mini_unique <- levels(mini_data) # Factor
-            mini_numeric <- numeric(length(mini_unique))
-            mini_numeric[seq_along(mini_unique)] <- seq_along(mini_unique) # Respect ordinal if needed
-          } else {
-            mini_unique <- as.factor(unique(mini_data)) # Character
-            mini_numeric <- as.numeric(mini_unique) # No respect of ordinality
-          }
-
-          # Create rules
-          indexed <- colnames(data)[i] # Index value
-          rules[[indexed]] <- mini_numeric # Numeric content
-          names(rules[[indexed]]) <- mini_unique # Character equivalent
-
-          # Apply to real data column
-          data.table::set(data, j = i, value = unname(rules[[indexed]][mini_data]))
-
-        }
-
-      }
-
-    }
-
-  } else {
-
-    # Must use existing rules
-    if (!is.null(rules)) {
-
-      # Loop through rules
-      for (i in names(rules)) {
-
-        data[[i]] <- unname(rules[[i]][data[[i]]])
-        data[[i]][is.na(data[[i]])] <- 0L # Overwrite NAs by 0s
-
-      }
-
-    } else {
-
-      # Default routine (data.frame)
-      if (inherits(data, "data.frame")) {
-
-        # Get data classes
-        list_classes <- vapply(data, class, character(1L))
-
-        # Map characters/factors
-        is_fix <- which(list_classes %in% c("character", "factor"))
-        rules <- list()
-
-        # Need to create rules?
-        if (length(is_fix) > 0L) {
-
-          # Go through all characters/factors
-          for (i in is_fix) {
-
-            # Store column elsewhere
-            mini_data <- data[[i]]
-
-            # Get unique values
-            if (is.factor(mini_data)) {
-              mini_unique <- levels(mini_data) # Factor
-              mini_numeric <- numeric(length(mini_unique))
-              mini_numeric[seq_along(mini_unique)] <- seq_along(mini_unique) # Respect ordinal if needed
-            } else {
-              mini_unique <- as.factor(unique(mini_data)) # Character
-              mini_numeric <- as.numeric(mini_unique) # No respect of ordinality
-            }
-
-            # Create rules
-            indexed <- colnames(data)[i] # Index value
-            rules[[indexed]] <- mini_numeric # Numeric content
-            names(rules[[indexed]]) <- mini_unique # Character equivalent
-
-            # Apply to real data column
-            data[[i]] <- unname(rules[[indexed]][mini_data])
-
-          }
-
-        }
-
-      } else {
-
-        stop(
-          "lgb.prepare_rules: you provided "
-          , paste(class(data), collapse = " & ")
-          , " but data should have class data.frame"
-        )
-
-      }
-
-    }
-
-  }
-
-  return(list(data = data, rules = rules))
-
-}
--- a/R-package/R/removed.R
+++ b/R-package/R/removed.R
+#' @title removed functions
+#' @name lgb.prepare
+#' @description removed functions
+#' @param ... catch-all too match old calls
+#' @export
+lgb.prepare <- function(...) {
+    stop("lgb.prepare() was removed in LightGBM 3.0.0. Please use lgb.convert()")
+}
+
+#' @title removed functions
+#' @name lgb.prepare2
+#' @description removed functions
+#' @param ... catch-all too match old calls
+#' @export
+lgb.prepare2 <- function(...) {
+    stop("lgb.prepare2() was removed in LightGBM 3.0.0. Please use lgb.convert()")
+}
+
+#' @title removed functions
+#' @name lgb.prepare_rules
+#' @description removed functions
+#' @param ... catch-all too match old calls
+#' @export
+lgb.prepare_rules <- function(...) {
+    stop("lgb.prepare_rules() was removed in LightGBM 3.0.0. Please use lgb.convert_with_rules()")
+}
+
+#' @title removed functions
+#' @name lgb.prepare_rules2
+#' @description removed functions
+#' @param ... catch-all too match old calls
+#' @export
+lgb.prepare_rules2 <- function(...) {
+    stop("lgb.prepare_rules2() was removed in LightGBM 3.0.0. Please use lgb.convert_with_rules()")
+}
--- a/R-package/demo/categorical_features_rules.R
+++ b/R-package/demo/categorical_features_rules.R
@@ -32,7 +32,7 @@ bank_train <- bank[1L:4000L, ]
 bank_test <- bank[4001L:4521L, ]

 # We must now transform the data to fit in LightGBM
-# For this task, we use lgb.prepare
+# For this task, we use lgb.convert_with_rules
 # The function transforms the data into a fittable data
 #
 # Classes 'data.table' and 'data.frame':	521 obs. of  17 variables:
@@ -53,9 +53,9 @@ bank_test <- bank[4001L:4521L, ]
 # $ previous : int  1 0 0 0 0 2 0 0 0 1 ...
 # $ poutcome : num  1 4 4 4 4 1 4 4 4 3 ...
 # $ y        : num  1 1 1 1 1 1 1 1 1 2 ...
-bank_rules <- lgb.prepare_rules(data = bank_train)
+bank_rules <- lgb.convert_with_rules(data = bank_train)
 bank_train <- bank_rules$data
-bank_test <- lgb.prepare_rules(data = bank_test, rules = bank_rules$rules)$data
+bank_test <- lgb.convert_with_rules(data = bank_test, rules = bank_rules$rules)$data
 str(bank_test)

 # Remove 1 to label because it must be between 0 and 1

--- a/R-package/man/lgb.convert.Rd
+++ b/R-package/man/lgb.convert.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lgb.convert.R
+\name{lgb.convert}
+\alias{lgb.convert}
+\title{Data preparator for LightGBM datasets (integer)}
+\usage{
+lgb.convert(data)
+}
+\arguments{
+\item{data}{A data.frame or data.table to prepare.}
+}
+\value{
+The cleaned dataset. It must be converted to a matrix format (\code{as.matrix})
+        for input in \code{lgb.Dataset}.
+}
+\description{
+Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
+             Factors and characters are converted to integer.
+             Please use \code{\link{lgb.convert_with_rules}} if you want to apply this transformation to
+             other datasets. This is useful if you have a specific need for integer dataset instead
+             of numeric dataset.
+
+             NOTE: In previous releases of LightGBM, this function was called \code{lgb.prepare}.
+}
+\examples{
+data(iris)
+
+str(iris)
+
+# Convert all factors/chars to integer
+str(lgb.convert(data = iris))
+
+\dontrun{
+# When lightgbm package is installed, and you do not want to load it
+# You can still use the function!
+lgb.unloader()
+str(lightgbm::lgb.convert(data = iris))
+# 'data.frame':	150 obs. of  5 variables:
+# $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
+# $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
+# $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
+# $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
+# $ Species     : int  1 1 1 1 1 1 1 1 1 1 ...
+}
+
+}
--- a/R-package/man/lgb.convert_with_rules.Rd
+++ b/R-package/man/lgb.convert_with_rules.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lgb.convert_with_rules.R
+\name{lgb.convert_with_rules}
+\alias{lgb.convert_with_rules}
+\title{Data preparator for LightGBM datasets with rules (integer)}
+\usage{
+lgb.convert_with_rules(data, rules = NULL)
+}
+\arguments{
+\item{data}{A data.frame or data.table to prepare.}
+
+\item{rules}{A set of rules from the data preparator, if already used.}
+}
+\value{
+A list with the cleaned dataset (\code{data}) and the rules (\code{rules}).
+        The data must be converted to a matrix format (\code{as.matrix}) for input in
+        \code{lgb.Dataset}.
+}
+\description{
+Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
+             Factors and characters are converted to integer.
+             In addition, keeps rules created so you can convert other datasets using this converter.
+             This is useful if you have a specific need for integer dataset instead of numeric dataset.
+
+             NOTE: In previous releases of LightGBM, this function was called \code{lgb.prepare_rules2}.
+}
+\examples{
+data(iris)
+
+str(iris)
+
+new_iris <- lgb.convert_with_rules(data = iris) # Autoconverter
+str(new_iris$data)
+
+data(iris) # Erase iris dataset
+iris$Species[1L] <- "NEW FACTOR" # Introduce junk factor (NA)
+
+# Use conversion using known rules
+# Unknown factors become 0, excellent for sparse datasets
+newer_iris <- lgb.convert_with_rules(data = iris, rules = new_iris$rules)
+
+# Unknown factor is now zero, perfect for sparse datasets
+newer_iris$data[1L, ] # Species became 0 as it is an unknown factor
+
+newer_iris$data[1L, 5L] <- 1.0 # Put back real initial value
+
+# Is the newly created dataset equal? YES!
+all.equal(new_iris$data, newer_iris$data)
+
+# Can we test our own rules?
+data(iris) # Erase iris dataset
+
+# We remapped values differently
+personal_rules <- list(
+  Species = c(
+    "setosa" = 3L
+    , "versicolor" = 2L
+    , "virginica" = 1L
+  )
+)
+newest_iris <- lgb.convert_with_rules(data = iris, rules = personal_rules)
+str(newest_iris$data) # SUCCESS!
+
+}
--- a/R-package/man/lgb.prepare.Rd
+++ b/R-package/man/lgb.prepare.Rd
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lgb.prepare.R
+% Please edit documentation in R/removed.R
 \name{lgb.prepare}
 \alias{lgb.prepare}
-\title{Data preparator for LightGBM datasets (numeric)}
+\title{removed functions}
 \usage{
-lgb.prepare(data)
+lgb.prepare(...)
 }
 \arguments{
-\item{data}{A data.frame or data.table to prepare.}
-}
-\value{
-The cleaned dataset. It must be converted to a matrix format (\code{as.matrix})
-        for input in \code{lgb.Dataset}.
+\item{...}{catch-all too match old calls}
 }
 \description{
-Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
-             Factors and characters are converted to numeric without integers. Please use
-             \code{\link{lgb.prepare_rules}} if you want to apply this transformation to other datasets.
-}
-\examples{
-data(iris)
-
-str(iris)
-
-str(lgb.prepare(data = iris)) # Convert all factors/chars to numeric
-
-\dontrun{
-# When lightgbm package is installed, and you do not want to load it
-# You can still use the function!
-lgb.unloader()
-str(lightgbm::lgb.prepare(data = iris))
-# 'data.frame':	150 obs. of  5 variables:
-# $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
-# $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
-# $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
-# $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
-# $ Species     : num  1 1 1 1 1 1 1 1 1 1 ...
-}
-
+removed functions
 }
--- a/R-package/man/lgb.prepare2.Rd
+++ b/R-package/man/lgb.prepare2.Rd
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lgb.prepare2.R
+% Please edit documentation in R/removed.R
 \name{lgb.prepare2}
 \alias{lgb.prepare2}
-\title{Data preparator for LightGBM datasets (integer)}
+\title{removed functions}
 \usage{
-lgb.prepare2(data)
+lgb.prepare2(...)
 }
 \arguments{
-\item{data}{A data.frame or data.table to prepare.}
-}
-\value{
-The cleaned dataset. It must be converted to a matrix format (\code{as.matrix})
-        for input in \code{lgb.Dataset}.
+\item{...}{catch-all too match old calls}
 }
 \description{
-Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
-             Factors and characters are converted to numeric (specifically: integer).
-             Please use \code{\link{lgb.prepare_rules2}} if you want to apply this transformation to
-             other datasets. This is useful if you have a specific need for integer dataset instead
-             of numeric dataset. Note that there are programs which do not support integer-only
-             input. Consider this as a half memory technique which is dangerous, especially for LightGBM.
-}
-\examples{
-data(iris)
-
-str(iris)
-
-# Convert all factors/chars to integer
-str(lgb.prepare2(data = iris))
-
-\dontrun{
-# When lightgbm package is installed, and you do not want to load it
-# You can still use the function!
-lgb.unloader()
-str(lightgbm::lgb.prepare2(data = iris))
-# 'data.frame':	150 obs. of  5 variables:
-# $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
-# $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
-# $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
-# $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
-# $ Species     : int  1 1 1 1 1 1 1 1 1 1 ...
-}
-
+removed functions
 }
--- a/R-package/man/lgb.prepare_rules.Rd
+++ b/R-package/man/lgb.prepare_rules.Rd
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lgb.prepare_rules.R
+% Please edit documentation in R/removed.R
 \name{lgb.prepare_rules}
 \alias{lgb.prepare_rules}
-\title{Data preparator for LightGBM datasets with rules (numeric)}
+\title{removed functions}
 \usage{
-lgb.prepare_rules(data, rules = NULL)
+lgb.prepare_rules(...)
 }
 \arguments{
-\item{data}{A data.frame or data.table to prepare.}
-
-\item{rules}{A set of rules from the data preparator, if already used.}
-}
-\value{
-A list with the cleaned dataset (\code{data}) and the rules (\code{rules}).
-        The data must be converted to a matrix format (\code{as.matrix}) for input
-        in \code{lgb.Dataset}.
+\item{...}{catch-all too match old calls}
 }
 \description{
-Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
-             Factors and characters are converted to numeric. In addition, keeps rules created
-             so you can convert other datasets using this converter.
-}
-\examples{
-data(iris)
-
-str(iris)
-
-new_iris <- lgb.prepare_rules(data = iris) # Autoconverter
-str(new_iris$data)
-
-data(iris) # Erase iris dataset
-iris$Species[1L] <- "NEW FACTOR" # Introduce junk factor (NA)
-
-# Use conversion using known rules
-# Unknown factors become 0, excellent for sparse datasets
-newer_iris <- lgb.prepare_rules(data = iris, rules = new_iris$rules)
-
-# Unknown factor is now zero, perfect for sparse datasets
-newer_iris$data[1L, ] # Species became 0 as it is an unknown factor
-
-newer_iris$data[1L, 5L] <- 1.0 # Put back real initial value
-
-# Is the newly created dataset equal? YES!
-all.equal(new_iris$data, newer_iris$data)
-
-# Can we test our own rules?
-data(iris) # Erase iris dataset
-
-# We remapped values differently
-personal_rules <- list(
-    Species = c(
-        "setosa" = 3L
-        , "versicolor" = 2L
-        , "virginica" = 1L
-    )
-)
-newest_iris <- lgb.prepare_rules(data = iris, rules = personal_rules)
-str(newest_iris$data) # SUCCESS!
-
+removed functions
 }
--- a/R-package/man/lgb.prepare_rules2.Rd
+++ b/R-package/man/lgb.prepare_rules2.Rd
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lgb.prepare_rules2.R
+% Please edit documentation in R/removed.R
 \name{lgb.prepare_rules2}
 \alias{lgb.prepare_rules2}
-\title{Data preparator for LightGBM datasets with rules (integer)}
+\title{removed functions}
 \usage{
-lgb.prepare_rules2(data, rules = NULL)
+lgb.prepare_rules2(...)
 }
 \arguments{
-\item{data}{A data.frame or data.table to prepare.}
-
-\item{rules}{A set of rules from the data preparator, if already used.}
-}
-\value{
-A list with the cleaned dataset (\code{data}) and the rules (\code{rules}).
-        The data must be converted to a matrix format (\code{as.matrix}) for input in
-        \code{lgb.Dataset}.
+\item{...}{catch-all too match old calls}
 }
 \description{
-Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
-             Factors and characters are converted to numeric (specifically: integer).
-             In addition, keeps rules created so you can convert other datasets using this converter.
-             This is useful if you have a specific need for integer dataset instead of numeric dataset.
-             Note that there are programs which do not support integer-only input.
-             Consider this as a half memory technique which is dangerous, especially for LightGBM.
-}
-\examples{
-data(iris)
-
-str(iris)
-
-new_iris <- lgb.prepare_rules2(data = iris) # Autoconverter
-str(new_iris$data)
-
-data(iris) # Erase iris dataset
-iris$Species[1L] <- "NEW FACTOR" # Introduce junk factor (NA)
-
-# Use conversion using known rules
-# Unknown factors become 0, excellent for sparse datasets
-newer_iris <- lgb.prepare_rules2(data = iris, rules = new_iris$rules)
-
-# Unknown factor is now zero, perfect for sparse datasets
-newer_iris$data[1L, ] # Species became 0 as it is an unknown factor
-
-newer_iris$data[1L, 5L] <- 1.0 # Put back real initial value
-
-# Is the newly created dataset equal? YES!
-all.equal(new_iris$data, newer_iris$data)
-
-# Can we test our own rules?
-data(iris) # Erase iris dataset
-
-# We remapped values differently
-personal_rules <- list(
-  Species = c(
-    "setosa" = 3L
-    , "versicolor" = 2L
-    , "virginica" = 1L
-  )
-)
-newest_iris <- lgb.prepare_rules2(data = iris, rules = personal_rules)
-str(newest_iris$data) # SUCCESS!
-
+removed functions
 }
--- a/R-package/pkgdown/_pkgdown.yml
+++ b/R-package/pkgdown/_pkgdown.yml
@@ -65,13 +65,11 @@ reference:
    - '`lgb.Dataset.save`'
    - '`lgb.Dataset.set.categorical`'
    - '`lgb.Dataset.set.reference`'
+    - '`lgb.convert`'
+    - '`lgb.convert_with_rules`'
  - title: Machine Learning
    desc: Train models with LightGBM
    contents:
-    - '`lgb.prepare`'
-    - '`lgb.prepare2`'
-    - '`lgb.prepare_rules`'
-    - '`lgb.prepare_rules2`'
    - '`lightgbm`'
    - '`lgb.train`'
    - '`lgb.cv`'

--- a/R-package/tests/testthat/test_lgb.prepare2.R
+++ b/R-package/tests/testthat/test_lgb.prepare2.R
-context("lgb.prepare2()")
+context("lgb.convert()")

-test_that("lgb.prepare2() rejects inputs that are not a data.table or data.frame", {
+test_that("lgb.convert() rejects inputs that are not a data.table or data.frame", {
    bad_inputs <- list(
        matrix(1.0:10.0, 2L, 5L)
        , TRUE
@@ -14,12 +14,12 @@ test_that("lgb.prepare2() rejects inputs that are not a data.table or data.frame
    )
    for (bad_input in bad_inputs) {
        expect_error({
-            converted_dataset <- lgb.prepare2(bad_input)
-        }, regexp = "lgb.prepare2: you provided", fixed = TRUE)
+            converted_dataset <- lgb.convert(bad_input)
+        }, regexp = "lgb.convert: you provided", fixed = TRUE)
    }
 })

-test_that("lgb.prepare2() should work correctly for a dataset with only character columns", {
+test_that("lgb.convert() should work correctly for a dataset with only character columns", {
    testDF <- data.frame(
        col1 = c("a", "b", "c")
        , col2 =  c("green", "green", "red")
@@ -27,7 +27,7 @@ test_that("lgb.prepare2() should work correctly for a dataset with only characte
    )
    testDT <- data.table::as.data.table(testDF)
    for (input_data in list(testDF, testDT)) {
-        converted_dataset <- lgb.prepare2(input_data)
+        converted_dataset <- lgb.convert(input_data)
        expect_identical(class(input_data), class(converted_dataset))
        expect_identical(class(converted_dataset[["col1"]]), "integer")
        expect_identical(class(converted_dataset[["col2"]]), "integer")
@@ -36,7 +36,7 @@ test_that("lgb.prepare2() should work correctly for a dataset with only characte
    }
 })

-test_that("lgb.prepare2() should work correctly for a dataset with only factor columns", {
+test_that("lgb.convert() should work correctly for a dataset with only factor columns", {
    testDF <- data.frame(
        col1 = as.factor(c("a", "b", "c"))
        , col2 =  as.factor(c("green", "green", "red"))
@@ -44,7 +44,7 @@ test_that("lgb.prepare2() should work correctly for a dataset with only factor c
    )
    testDT <- data.table::as.data.table(testDF)
    for (input_data in list(testDF, testDT)) {
-        converted_dataset <- lgb.prepare2(input_data)
+        converted_dataset <- lgb.convert(input_data)
        expect_identical(class(input_data), class(converted_dataset))
        expect_identical(class(converted_dataset[["col1"]]), "integer")
        expect_identical(class(converted_dataset[["col2"]]), "integer")
@@ -53,7 +53,7 @@ test_that("lgb.prepare2() should work correctly for a dataset with only factor c
    }
 })

-test_that("lgb.prepare2() should not change a dataset with only integer columns", {
+test_that("lgb.convert() should not change a dataset with only integer columns", {
    testDF <- data.frame(
        col1 = 11L:15L
        , col2 = 16L:20L
@@ -61,12 +61,12 @@ test_that("lgb.prepare2() should not change a dataset with only integer columns"
    )
    testDT <- data.table::as.data.table(testDF)
    for (input_data in list(testDF, testDT)) {
-        converted_dataset <- lgb.prepare2(input_data)
+        converted_dataset <- lgb.convert(input_data)
        expect_identical(converted_dataset, input_data)
    }
 })

-test_that("lgb.prepare2() should work correctly for a dataset with numeric, factor, and character columns", {
+test_that("lgb.convert() should work correctly for a dataset with numeric, factor, and character columns", {
    testDF <- data.frame(
        character_col = c("a", "b", "c")
        , numeric_col = c(1.0, 9.0, 10.0)
@@ -75,20 +75,20 @@ test_that("lgb.prepare2() should work correctly for a dataset with numeric, fact
    )
    testDT <- data.table::as.data.table(testDF)
    for (input_data in list(testDF, testDT)) {
-        converted_dataset <- lgb.prepare2(input_data)
+        converted_dataset <- lgb.convert(input_data)
        expect_identical(class(input_data), class(converted_dataset))
        expect_identical(class(converted_dataset[["character_col"]]), "integer")
        expect_identical(class(converted_dataset[["factor_col"]]), "integer")
        expect_identical(converted_dataset[["character_col"]], c(1L, 2L, 3L))
        expect_identical(converted_dataset[["factor_col"]], c(1L, 1L, 2L))

-        # today, lgb.prepare2() does  not convert numeric  columns
+        # today, lgb.convert() does  not convert numeric  columns
        expect_identical(class(converted_dataset[["numeric_col"]]), "numeric")
        expect_identical(converted_dataset[["numeric_col"]], c(1.0, 9.0, 10.0))
    }
 })

-test_that("lgb.prepare2() should work correctly for a dataset with missing values", {
+test_that("lgb.convert() should work correctly for a dataset with missing values", {
    testDF <- data.frame(
        character_col = c("a", NA_character_, "c")
        , na_col = rep(NA, 3L)
@@ -102,7 +102,7 @@ test_that("lgb.prepare2() should work correctly for a dataset with missing value
    )
    testDT <- data.table::as.data.table(testDF)
    for (input_data in list(testDF, testDT)) {
-        converted_dataset <- lgb.prepare2(input_data)
+        converted_dataset <- lgb.convert(input_data)
        expect_identical(class(input_data), class(converted_dataset))

        expect_identical(class(converted_dataset[["character_col"]]), "integer")
@@ -120,10 +120,10 @@ test_that("lgb.prepare2() should work correctly for a dataset with missing value
            expect_identical(converted_dataset[[col]], rep(NA_integer_, nrow(converted_dataset)))
        }

-        # today, lgb.prepare2() does not convert logical columns
+        # today, lgb.convert() does not convert logical columns
        expect_identical(class(converted_dataset[["na_col"]]), "logical")

-        # today, lgb.prepare2() does not convert numeric columns to integer
+        # today, lgb.convert() does not convert numeric columns to integer
        expect_identical(class(converted_dataset[["na_real_col"]]), "numeric")
        expect_identical(converted_dataset[["na_real_col"]], rep(NA_real_, nrow(converted_dataset)))
        expect_identical(class(converted_dataset[["numeric_col"]]), "numeric")
@@ -131,7 +131,7 @@ test_that("lgb.prepare2() should work correctly for a dataset with missing value
    }
 })

-test_that("lgb.prepare2() should modify data.tables in-place", {
+test_that("lgb.convert() should modify data.tables in-place", {
    testDT <- data.table::data.table(
        character_col = c("a", NA_character_, "c")
        , na_col = rep(NA, 3L)
@@ -142,6 +142,6 @@ test_that("lgb.prepare2() should modify data.tables in-place", {
        , factor_col = as.factor(c("n", "n", "y"))
        , integer_col = c(1L, 9L, NA_integer_)
    )
-    resultDT <- lgb.prepare2(testDT)
+    resultDT <- lgb.convert(testDT)
    expect_identical(resultDT, testDT)
 })
--- a/R-package/tests/testthat/test_lgb.prepare_rules2.R
+++ b/R-package/tests/testthat/test_lgb.prepare_rules2.R
-context("lgb.prepare_rules2()")
+context("lgb.convert_with_rules()")

-test_that("lgb.prepare_rules2() rejects inputs that are not a data.table or data.frame", {
+test_that("lgb.convert_with_rules() rejects inputs that are not a data.table or data.frame", {
    bad_inputs <- list(
        matrix(1.0:10.0, 2L, 5L)
        , TRUE
@@ -14,12 +14,12 @@ test_that("lgb.prepare_rules2() rejects inputs that are not a data.table or data
    )
    for (bad_input in bad_inputs) {
        expect_error({
-            conversion_result <- lgb.prepare_rules2(bad_input)
-        }, regexp = "lgb.prepare_rules2: you provided", fixed = TRUE)
+            conversion_result <- lgb.convert_with_rules(bad_input)
+        }, regexp = "lgb.convert_with_rules: you provided", fixed = TRUE)
    }
 })

-test_that("lgb.prepare_rules2() should work correctly for a dataset with only character columns", {
+test_that("lgb.convert_with_rules() should work correctly for a dataset with only character columns", {
    testDF <- data.frame(
        col1 = c("a", "b", "c")
        , col2 =  c("green", "green", "red")
@@ -27,7 +27,7 @@ test_that("lgb.prepare_rules2() should work correctly for a dataset with only ch
    )
    testDT <- data.table::as.data.table(testDF)
    for (input_data in list(testDF, testDT)) {
-        conversion_result <- lgb.prepare_rules2(input_data)
+        conversion_result <- lgb.convert_with_rules(input_data)
        # dataset should have been converted to integer
        converted_dataset <- conversion_result[["data"]]
        expect_identical(class(input_data), class(converted_dataset))
@@ -44,7 +44,7 @@ test_that("lgb.prepare_rules2() should work correctly for a dataset with only ch
    }
 })

-test_that("lgb.prepare_rules2() should work correctly for a dataset with only factor columns", {
+test_that("lgb.convert_with_rules() should work correctly for a dataset with only factor columns", {
    testDF <- data.frame(
        col1 = as.factor(c("a", "b", "c"))
        , col2 =  as.factor(c("green", "green", "red"))
@@ -52,7 +52,7 @@ test_that("lgb.prepare_rules2() should work correctly for a dataset with only fa
    )
    testDT <- data.table::as.data.table(testDF)
    for (input_data in list(testDF, testDT)) {
-        conversion_result <- lgb.prepare_rules2(input_data)
+        conversion_result <- lgb.convert_with_rules(input_data)
        # dataset should have been converted to integer
        converted_dataset <- conversion_result[["data"]]
        expect_identical(class(input_data), class(converted_dataset))
@@ -69,7 +69,7 @@ test_that("lgb.prepare_rules2() should work correctly for a dataset with only fa
    }
 })

-test_that("lgb.prepare_rules2() should not change a dataset with only integer columns", {
+test_that("lgb.convert_with_rules() should not change a dataset with only integer columns", {
    testDF <- data.frame(
        col1 = 11L:15L
        , col2 = 16L:20L
@@ -77,7 +77,7 @@ test_that("lgb.prepare_rules2() should not change a dataset with only integer co
    )
    testDT <- data.table::as.data.table(testDF)
    for (input_data in list(testDF, testDT)) {
-        conversion_result <- lgb.prepare_rules2(input_data)
+        conversion_result <- lgb.convert_with_rules(input_data)
        # dataset should have been converted to integer
        converted_dataset <- conversion_result[["data"]]
        expect_identical(converted_dataset, input_data)
@@ -87,7 +87,7 @@ test_that("lgb.prepare_rules2() should not change a dataset with only integer co
    }
 })

-test_that("lgb.prepare_rules2() should work correctly for a dataset with numeric, factor, and character columns", {
+test_that("lgb.convert_with_rules() should work correctly for a dataset with numeric, factor, and character columns", {
    testDF <- data.frame(
        character_col = c("a", "b", "c")
        , numeric_col = c(1.0, 9.0, 10.0)
@@ -96,7 +96,7 @@ test_that("lgb.prepare_rules2() should work correctly for a dataset with numeric
    )
    testDT <- data.table::as.data.table(testDF)
    for (input_data in list(testDF, testDT)) {
-        conversion_result <- lgb.prepare_rules2(input_data)
+        conversion_result <- lgb.convert_with_rules(input_data)
        # dataset should have been converted to numeric
        converted_dataset <- conversion_result[["data"]]
        expect_identical(class(input_data), class(converted_dataset))
@@ -111,13 +111,13 @@ test_that("lgb.prepare_rules2() should work correctly for a dataset with numeric
        expect_identical(rules[["character_col"]], c("a" = 1L, "b" = 2L, "c" = 3L))
        expect_identical(rules[["factor_col"]], c("n" = 1L, "y" = 2L))

-        # today, lgb.prepare2() does  not convert numeric  columns
+        # today, lgb.convert_with_rules() does not convert numeric columns
        expect_identical(class(converted_dataset[["numeric_col"]]), "numeric")
        expect_identical(converted_dataset[["numeric_col"]], c(1.0, 9.0, 10.0))
    }
 })

-test_that("lgb.prepare_rules2() should work correctly for a dataset with missing values", {
+test_that("lgb.convert_with_rules() should work correctly for a dataset with missing values", {
    testDF <- data.frame(
        character_col = c("a", NA_character_, "c")
        , na_col = rep(NA, 3L)
@@ -131,7 +131,7 @@ test_that("lgb.prepare_rules2() should work correctly for a dataset with missing
    )
    testDT <- data.table::as.data.table(testDF)
    for (input_data in list(testDF, testDT)) {
-        conversion_result <- lgb.prepare_rules2(input_data)
+        conversion_result <- lgb.convert_with_rules(input_data)
        # dataset should have been converted to integer
        converted_dataset <- conversion_result[["data"]]
        expect_identical(class(input_data), class(converted_dataset))
@@ -151,10 +151,10 @@ test_that("lgb.prepare_rules2() should work correctly for a dataset with missing
            expect_identical(converted_dataset[[col]], rep(NA_integer_, nrow(converted_dataset)))
        }

-        # today, lgb.prepare2() does not convert logical columns
+        # today, lgb.convert_with_rules() does not convert logical columns
        expect_identical(class(converted_dataset[["na_col"]]), "logical")

-        # today, lgb.prepare2() does not convert numeric columns to integer
+        # today, lgb.convert_with_rules() does not convert numeric columns to integer
        expect_identical(class(converted_dataset[["na_real_col"]]), "numeric")
        expect_identical(converted_dataset[["na_real_col"]], rep(NA_real_, nrow(converted_dataset)))
        expect_identical(class(converted_dataset[["numeric_col"]]), "numeric")
@@ -170,7 +170,7 @@ test_that("lgb.prepare_rules2() should work correctly for a dataset with missing
    }
 })

-test_that("lgb.prepare_rules2() should work correctly if you provide your own well-formed rules", {
+test_that("lgb.convert_with_rules() should work correctly if you provide your own well-formed rules", {
    testDF <- data.frame(
        character_col = c("a", NA_character_, "c", "a", "a", "c")
        , na_col = rep(NA, 6L)
@@ -183,7 +183,7 @@ test_that("lgb.prepare_rules2() should work correctly if you provide your own we
        , stringsAsFactors = FALSE
    )
    testDT <- data.table::as.data.table(testDF)
-    # value used by lgb.prepare_rules2() when it encounters a categorical value that
+    # value used by lgb.convert_with_rules() when it encounters a categorical value that
    # is not in the provided rules
    UNKNOWN_FACTOR_VALUE <- 0L
    for (input_data in list(testDF, testDT)) {
@@ -197,7 +197,7 @@ test_that("lgb.prepare_rules2() should work correctly if you provide your own we
                , "y" = 66L
            )
        )
-        conversion_result <- lgb.prepare_rules2(
+        conversion_result <- lgb.convert_with_rules(
            data = input_data
            , rules = custom_rules
        )
@@ -223,7 +223,7 @@ test_that("lgb.prepare_rules2() should work correctly if you provide your own we
    }
 })

-test_that("lgb.prepare_rules2() should modify data.tables in-place", {
+test_that("lgb.convert_with_rules() should modify data.tables in-place", {
    testDT <- data.table::data.table(
        character_col = c("a", NA_character_, "c")
        , na_col = rep(NA, 3L)
@@ -234,7 +234,7 @@ test_that("lgb.prepare_rules2() should modify data.tables in-place", {
        , factor_col = as.factor(c("n", "n", "y"))
        , integer_col = c(1L, 9L, NA_integer_)
    )
-    conversion_result <- lgb.prepare_rules2(testDT)
+    conversion_result <- lgb.convert_with_rules(testDT)
    resultDT <- conversion_result[["data"]]
    expect_identical(resultDT, testDT)
 })
--- a/R-package/tests/testthat/test_lgb.prepare.R
+++ b/R-package/tests/testthat/test_lgb.prepare.R
-context("lgb.prepare()")
-
-test_that("lgb.prepare() rejects inputs that are not a data.table or data.frame", {
-    bad_inputs <- list(
-        matrix(1.0:10.0, 2L, 5L)
-        , TRUE
-        , c("a", "b")
-        , NA
-        , 10L
-        , lgb.Dataset(
-            data = matrix(1.0:10.0, 2L, 5L)
-            , params = list()
-        )
-    )
-    for (bad_input in bad_inputs) {
-        expect_error({
-            converted_dataset <- lgb.prepare(bad_input)
-        }, regexp = "lgb.prepare: you provided", fixed = TRUE)
-    }
-})
-
-test_that("lgb.prepare() should work correctly for a dataset with only character columns", {
-    testDF <- data.frame(
-        col1 = c("a", "b", "c")
-        , col2 =  c("green", "green", "red")
-        , stringsAsFactors = FALSE
-    )
-    testDT <- data.table::as.data.table(testDF)
-    for (input_data in list(testDF, testDT)) {
-        converted_dataset <- lgb.prepare(input_data)
-        expect_identical(class(input_data), class(converted_dataset))
-        expect_identical(class(converted_dataset[["col1"]]), "numeric")
-        expect_identical(class(converted_dataset[["col2"]]), "numeric")
-        expect_identical(converted_dataset[["col1"]], c(1.0, 2.0, 3.0))
-        expect_identical(converted_dataset[["col2"]], c(1.0, 1.0, 2.0))
-    }
-})
-
-test_that("lgb.prepare() should work correctly for a dataset with only factor columns", {
-    testDF <- data.frame(
-        col1 = as.factor(c("a", "b", "c"))
-        , col2 =  as.factor(c("green", "green", "red"))
-        , stringsAsFactors = FALSE
-    )
-    testDT <- data.table::as.data.table(testDF)
-    for (input_data in list(testDF, testDT)) {
-        converted_dataset <- lgb.prepare(input_data)
-        expect_identical(class(input_data), class(converted_dataset))
-        expect_identical(class(converted_dataset[["col1"]]), "numeric")
-        expect_identical(class(converted_dataset[["col2"]]), "numeric")
-        expect_identical(converted_dataset[["col1"]], c(1.0, 2.0, 3.0))
-        expect_identical(converted_dataset[["col2"]], c(1.0, 1.0, 2.0))
-    }
-})
-
-test_that("lgb.prepare() should not change a dataset with only numeric columns", {
-    testDF <- data.frame(
-        col1 = 11.0:15.0
-        , col2 = 16.0:20.0
-        , stringsAsFactors = FALSE
-    )
-    testDT <- data.table::as.data.table(testDF)
-    for (input_data in list(testDF, testDT)) {
-        converted_dataset <- lgb.prepare(input_data)
-        expect_identical(converted_dataset, input_data)
-    }
-})
-
-test_that("lgb.prepare() should work correctly for a dataset with numeric, factor, and character columns", {
-    testDF <- data.frame(
-        character_col = c("a", "b", "c")
-        , numeric_col = c(1.0, 9.0, 10.0)
-        , factor_col = as.factor(c("n", "n", "y"))
-        , stringsAsFactors = FALSE
-    )
-    testDT <- data.table::as.data.table(testDF)
-    for (input_data in list(testDF, testDT)) {
-        converted_dataset <- lgb.prepare(input_data)
-        expect_identical(class(input_data), class(converted_dataset))
-        expect_identical(class(converted_dataset[["character_col"]]), "numeric")
-        expect_identical(class(converted_dataset[["numeric_col"]]), "numeric")
-        expect_identical(class(converted_dataset[["factor_col"]]), "numeric")
-        expect_identical(converted_dataset[["character_col"]], c(1.0, 2.0, 3.0))
-        expect_identical(converted_dataset[["numeric_col"]], c(1.0, 9.0, 10.0))
-        expect_identical(converted_dataset[["factor_col"]], c(1.0, 1.0, 2.0))
-    }
-})
-
-test_that("lgb.prepare() should work correctly for a dataset with missing values", {
-    testDF <- data.frame(
-        character_col = c("a", NA_character_, "c")
-        , na_col = rep(NA, 3L)
-        , na_real_col = rep(NA_real_, 3L)
-        , na_int_col = rep(NA_integer_,  3L)
-        , na_character_col = rep(NA_character_, 3L)
-        , numeric_col = c(1.0, 9.0, NA_real_)
-        , factor_col = as.factor(c("n", "n", "y"))
-        , integer_col = c(1L, 9L, NA_integer_)
-        , stringsAsFactors = FALSE
-    )
-    testDT <- data.table::as.data.table(testDF)
-    for (input_data in list(testDF, testDT)) {
-        converted_dataset <- lgb.prepare(input_data)
-        expect_identical(class(input_data), class(converted_dataset))
-
-        expect_identical(class(converted_dataset[["character_col"]]), "numeric")
-        expect_identical(converted_dataset[["character_col"]], c(1.0, NA_real_, 2.0))
-
-        expect_identical(class(converted_dataset[["numeric_col"]]), "numeric")
-        expect_identical(converted_dataset[["numeric_col"]], c(1.0, 9.0, NA_real_))
-
-        expect_identical(class(converted_dataset[["factor_col"]]), "numeric")
-        expect_identical(converted_dataset[["factor_col"]], c(1.0, 1.0, 2.0))
-
-        # NAs of any type should be converted to numeric
-        for (col in c("na_real_col", "na_character_col")) {
-            expect_identical(class(converted_dataset[[col]]), "numeric")
-            expect_identical(converted_dataset[[col]], rep(NA_real_, nrow(converted_dataset)))
-        }
-
-        # today, lgb.prepare() does not convert logical columns
-        expect_identical(class(converted_dataset[["na_col"]]), "logical")
-
-        # today, lgb.prepare() does not convert integer columns to numeric
-        expect_identical(class(converted_dataset[["na_int_col"]]), "integer")
-        expect_identical(converted_dataset[["na_int_col"]], rep(NA_integer_, nrow(converted_dataset)))
-        expect_identical(class(converted_dataset[["integer_col"]]), "integer")
-        expect_identical(converted_dataset[["integer_col"]], c(1L, 9L, NA_integer_))
-    }
-})
-
-test_that("lgb.prepare() should modify data.tables in-place", {
-    testDT <- data.table::data.table(
-        character_col = c("a", NA_character_, "c")
-        , na_col = rep(NA, 3L)
-        , na_real_col = rep(NA_real_, 3L)
-        , na_int_col = rep(NA_integer_,  3L)
-        , na_character_col = rep(NA_character_, 3L)
-        , numeric_col = c(1.0, 9.0, NA_real_)
-        , factor_col = as.factor(c("n", "n", "y"))
-        , integer_col = c(1L, 9L, NA_integer_)
-    )
-    resultDT <- lgb.prepare(testDT)
-    expect_identical(resultDT, testDT)
-})
--- a/R-package/tests/testthat/test_lgb.prepare_rules.R
+++ b/R-package/tests/testthat/test_lgb.prepare_rules.R
-context("lgb.prepare_rules()")
-
-test_that("lgb.prepare_rules() rejects inputs that are not a data.table or data.frame", {
-    bad_inputs <- list(
-        matrix(1.0:10.0, 2L, 5L)
-        , TRUE
-        , c("a", "b")
-        , NA
-        , 10L
-        , lgb.Dataset(
-            data = matrix(1.0:10.0, 2L, 5L)
-            , params = list()
-        )
-    )
-    for (bad_input in bad_inputs) {
-        expect_error({
-            conversion_result <- lgb.prepare_rules(bad_input)
-        }, regexp = "lgb.prepare_rules: you provided", fixed = TRUE)
-    }
-})
-
-test_that("lgb.prepare_rules() should work correctly for a dataset with only character columns", {
-    testDF <- data.frame(
-        col1 = c("a", "b", "c")
-        , col2 =  c("green", "green", "red")
-        , stringsAsFactors = FALSE
-    )
-    testDT <- data.table::as.data.table(testDF)
-    for (input_data in list(testDF, testDT)) {
-        conversion_result <- lgb.prepare_rules(input_data)
-        # dataset should have been converted to numeric
-        converted_dataset <- conversion_result[["data"]]
-        expect_identical(class(input_data), class(converted_dataset))
-        expect_identical(class(converted_dataset[["col1"]]), "numeric")
-        expect_identical(class(converted_dataset[["col2"]]), "numeric")
-        expect_identical(converted_dataset[["col1"]], c(1.0, 2.0, 3.0))
-        expect_identical(converted_dataset[["col2"]], c(1.0, 1.0, 2.0))
-        # rules should be returned and correct
-        rules <- conversion_result$rules
-        expect_is(rules, "list")
-        expect_length(rules, ncol(input_data))
-        expect_identical(rules[["col1"]], c("a" = 1.0, "b" = 2.0, "c" = 3.0))
-        expect_identical(rules[["col2"]], c("green" = 1.0, "red" = 2.0))
-    }
-})
-
-test_that("lgb.prepare_rules() should work correctly for a dataset with only factor columns", {
-    testDF <- data.frame(
-        col1 = as.factor(c("a", "b", "c"))
-        , col2 =  as.factor(c("green", "green", "red"))
-        , stringsAsFactors = FALSE
-    )
-    testDT <- data.table::as.data.table(testDF)
-    for (input_data in list(testDF, testDT)) {
-        conversion_result <- lgb.prepare_rules(input_data)
-        # dataset should have been converted to numeric
-        converted_dataset <- conversion_result[["data"]]
-        expect_identical(class(input_data), class(converted_dataset))
-        expect_identical(class(converted_dataset[["col1"]]), "numeric")
-        expect_identical(class(converted_dataset[["col2"]]), "numeric")
-        expect_identical(converted_dataset[["col1"]], c(1.0, 2.0, 3.0))
-        expect_identical(converted_dataset[["col2"]], c(1.0, 1.0, 2.0))
-        # rules should be returned and correct
-        rules <- conversion_result$rules
-        expect_is(rules, "list")
-        expect_length(rules, ncol(input_data))
-        expect_identical(rules[["col1"]], c("a" = 1.0, "b" = 2.0, "c" = 3.0))
-        expect_identical(rules[["col2"]], c("green" = 1.0, "red" = 2.0))
-    }
-})
-
-test_that("lgb.prepare_rules() should not change a dataset with only numeric columns", {
-    testDF <- data.frame(
-        col1 = 11.0:15.0
-        , col2 = 16.0:20.0
-        , stringsAsFactors = FALSE
-    )
-    testDT <- data.table::as.data.table(testDF)
-    for (input_data in list(testDF, testDT)) {
-        conversion_result <- lgb.prepare_rules(input_data)
-        # dataset should have been converted to numeric
-        converted_dataset <- conversion_result[["data"]]
-        expect_identical(converted_dataset, input_data)
-        # rules should be returned and correct
-        rules <- conversion_result$rules
-        expect_identical(rules, list())
-    }
-})
-
-test_that("lgb.prepare_rules() should work correctly for a dataset with numeric, factor, and character columns", {
-    testDF <- data.frame(
-        character_col = c("a", "b", "c")
-        , numeric_col = c(1.0, 9.0, 10.0)
-        , factor_col = as.factor(c("n", "n", "y"))
-        , stringsAsFactors = FALSE
-    )
-    testDT <- data.table::as.data.table(testDF)
-    for (input_data in list(testDF, testDT)) {
-        conversion_result <- lgb.prepare_rules(input_data)
-        # dataset should have been converted to numeric
-        converted_dataset <- conversion_result[["data"]]
-        expect_identical(class(input_data), class(converted_dataset))
-        expect_identical(class(converted_dataset[["character_col"]]), "numeric")
-        expect_identical(class(converted_dataset[["numeric_col"]]), "numeric")
-        expect_identical(class(converted_dataset[["factor_col"]]), "numeric")
-        expect_identical(converted_dataset[["character_col"]], c(1.0, 2.0, 3.0))
-        expect_identical(converted_dataset[["numeric_col"]], c(1.0, 9.0, 10.0))
-        expect_identical(converted_dataset[["factor_col"]], c(1.0, 1.0, 2.0))
-        # rules should be returned and correct
-        rules <- conversion_result$rules
-        expect_is(rules, "list")
-        expect_length(rules, 2L)
-        expect_identical(rules[["character_col"]], c("a" = 1.0, "b" = 2.0, "c" = 3.0))
-        expect_identical(rules[["factor_col"]], c("n" = 1.0, "y" = 2.0))
-    }
-})
-
-test_that("lgb.prepare_rules() should work correctly for a dataset with missing values", {
-    testDF <- data.frame(
-        character_col = c("a", NA_character_, "c")
-        , na_col = rep(NA, 3L)
-        , na_real_col = rep(NA_real_, 3L)
-        , na_int_col = rep(NA_integer_,  3L)
-        , na_character_col = rep(NA_character_, 3L)
-        , numeric_col = c(1.0, 9.0, NA_real_)
-        , factor_col = as.factor(c("n", "n", "y"))
-        , integer_col = c(1L, 9L, NA_integer_)
-        , stringsAsFactors = FALSE
-    )
-    testDT <- data.table::as.data.table(testDF)
-    for (input_data in list(testDF, testDT)) {
-        conversion_result <- lgb.prepare_rules(input_data)
-        # dataset should have been converted to numeric
-        converted_dataset <- conversion_result[["data"]]
-        expect_identical(class(input_data), class(converted_dataset))
-
-        expect_identical(class(converted_dataset[["character_col"]]), "numeric")
-        expect_identical(converted_dataset[["character_col"]], c(1.0, NA_real_, 2.0))
-
-        expect_identical(class(converted_dataset[["numeric_col"]]), "numeric")
-        expect_identical(converted_dataset[["numeric_col"]], c(1.0, 9.0, NA_real_))
-
-        expect_identical(class(converted_dataset[["factor_col"]]), "numeric")
-        expect_identical(converted_dataset[["factor_col"]], c(1.0, 1.0, 2.0))
-
-        # NAs of any type should be converted to numeric
-        for (col in c("na_real_col", "na_character_col")) {
-            expect_identical(class(converted_dataset[[col]]), "numeric")
-            expect_identical(converted_dataset[[col]], rep(NA_real_, nrow(converted_dataset)))
-        }
-
-        # today, lgb.prepare_rules() does not convert logical columns
-        expect_identical(class(converted_dataset[["na_col"]]), "logical")
-
-        # today, lgb.prepare_rules() does not convert integer columns to numeric
-        expect_identical(class(converted_dataset[["na_int_col"]]), "integer")
-        expect_identical(converted_dataset[["na_int_col"]], rep(NA_integer_, nrow(converted_dataset)))
-        expect_identical(class(converted_dataset[["integer_col"]]), "integer")
-        expect_identical(converted_dataset[["integer_col"]], c(1L, 9L, NA_integer_))
-
-        # rules should be returned and correct
-        rules <- conversion_result$rules
-        expect_is(rules, "list")
-        expect_length(rules, 3L)
-        expect_identical(rules[["character_col"]], stats::setNames(c(1.0, NA_real_, 2.0), c("a", NA, "c")))
-        expect_identical(rules[["na_character_col"]], stats::setNames(NA_real_, NA))
-        expect_identical(rules[["factor_col"]], c("n" = 1.0, "y" = 2.0))
-    }
-})
-
-test_that("lgb.prepare_rules() should work correctly if you provide your own well-formed rules", {
-    testDF <- data.frame(
-        character_col = c("a", NA_character_, "c", "a", "a", "c")
-        , na_col = rep(NA, 6L)
-        , na_real_col = rep(NA_real_, 6L)
-        , na_int_col = rep(NA_integer_, 6L)
-        , na_character_col = rep(NA_character_, 6L)
-        , numeric_col = c(1.0, 9.0, NA_real_, 10.0, 11.0, 12.0)
-        , factor_col = as.factor(c("n", "n", "y", "y", "n", "n"))
-        , integer_col = c(1L, 9L, NA_integer_, 1L, 1L, 1L)
-        , stringsAsFactors = FALSE
-    )
-    testDT <- data.table::as.data.table(testDF)
-    # value used by lgb.prepare_rules() when it encounters a categorical value that
-    # is not in the provided rules
-    UNKNOWN_FACTOR_VALUE <- 0.0
-    for (input_data in list(testDF, testDT)) {
-        custom_rules <- list(
-            "character_col" = c(
-                "a" = 5.0
-                , "c" = -10.2
-            )
-            , "factor_col" = c(
-                "n" = 65.0
-                , "y" = 65.01
-            )
-        )
-        conversion_result <- lgb.prepare_rules(
-            data = input_data
-            , rules = custom_rules
-        )
-
-        # dataset should have been converted to numeric
-        converted_dataset <- conversion_result[["data"]]
-        expect_identical(class(input_data), class(converted_dataset))
-
-        expect_identical(class(converted_dataset[["character_col"]]), "numeric")
-        expect_identical(converted_dataset[["character_col"]], c(5.0, UNKNOWN_FACTOR_VALUE, -10.2, 5.0, 5.0, -10.2))
-
-        expect_identical(class(converted_dataset[["factor_col"]]), "numeric")
-        expect_identical(converted_dataset[["factor_col"]], c(65.0, 65.0, 65.01, 65.01, 65.0, 65.0))
-
-        # columns not specified in rules are not going to be converted
-        for (col in c("na_col", "na_real_col", "na_int_col", "na_character_col", "numeric_col", "integer_col")) {
-            expect_identical(converted_dataset[[col]], input_data[[col]])
-        }
-
-        # the rules you passed in should be returned unchanged
-        rules <- conversion_result$rules
-        expect_identical(rules, custom_rules)
-    }
-})
-
-test_that("lgb.prepare_rules() should modify data.tables in-place", {
-    testDT <- data.table::data.table(
-        character_col = c("a", NA_character_, "c")
-        , na_col = rep(NA, 3L)
-        , na_real_col = rep(NA_real_, 3L)
-        , na_int_col = rep(NA_integer_,  3L)
-        , na_character_col = rep(NA_character_, 3L)
-        , numeric_col = c(1.0, 9.0, NA_real_)
-        , factor_col = as.factor(c("n", "n", "y"))
-        , integer_col = c(1L, 9L, NA_integer_)
-    )
-    conversion_result <- lgb.prepare_rules(testDT)
-    resultDT <- conversion_result[["data"]]
-    expect_identical(resultDT, testDT)
-})