Unverified Commit 248fbfa6 authored by david-cortes's avatar david-cortes Committed by GitHub
Browse files

[R-package] Remove `reshape` argument in `predict` (#4971)

* change prediction default to reshape=TRUE

* remove reshape argument

* comments
parent 33eb0376
...@@ -474,7 +474,6 @@ Booster <- R6::R6Class( ...@@ -474,7 +474,6 @@ Booster <- R6::R6Class(
predleaf = FALSE, predleaf = FALSE,
predcontrib = FALSE, predcontrib = FALSE,
header = FALSE, header = FALSE,
reshape = FALSE,
params = list()) { params = list()) {
self$restore_handle() self$restore_handle()
...@@ -501,7 +500,6 @@ Booster <- R6::R6Class( ...@@ -501,7 +500,6 @@ Booster <- R6::R6Class(
, predleaf = predleaf , predleaf = predleaf
, predcontrib = predcontrib , predcontrib = predcontrib
, header = header , header = header
, reshape = reshape
) )
) )
...@@ -729,20 +727,16 @@ Booster <- R6::R6Class( ...@@ -729,20 +727,16 @@ Booster <- R6::R6Class(
#' @param predleaf whether predict leaf index instead. #' @param predleaf whether predict leaf index instead.
#' @param predcontrib return per-feature contributions for each record. #' @param predcontrib return per-feature contributions for each record.
#' @param header only used for prediction for text file. True if text file has header #' @param header only used for prediction for text file. True if text file has header
#' @param reshape whether to reshape the vector of predictions to a matrix form when there are several
#' prediction outputs per case.
#' @param params a list of additional named parameters. See #' @param params a list of additional named parameters. See
#' \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#predict-parameters}{ #' \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#predict-parameters}{
#' the "Predict Parameters" section of the documentation} for a list of parameters and #' the "Predict Parameters" section of the documentation} for a list of parameters and
#' valid values. #' valid values.
#' @param ... ignored #' @param ... ignored
#' @return For regression or binary classification, it returns a vector of length \code{nrows(data)}. #' @return For regression or binary classification, it returns a vector of length \code{nrows(data)}.
#' For multiclass classification, either a \code{num_class * nrows(data)} vector or #' For multiclass classification, it returns a matrix of dimensions \code{(nrows(data), num_class)}.
#' a \code{(nrows(data), num_class)} dimension matrix is returned, depending on
#' the \code{reshape} value.
#' #'
#' When \code{predleaf = TRUE}, the output is a matrix object with the #' When passing \code{predleaf=TRUE} or \code{predcontrib=TRUE}, the output will always be
#' number of columns corresponding to the number of trees. #' returned as a matrix.
#' #'
#' @examples #' @examples
#' \donttest{ #' \donttest{
...@@ -786,7 +780,6 @@ predict.lgb.Booster <- function(object, ...@@ -786,7 +780,6 @@ predict.lgb.Booster <- function(object,
predleaf = FALSE, predleaf = FALSE,
predcontrib = FALSE, predcontrib = FALSE,
header = FALSE, header = FALSE,
reshape = FALSE,
params = list(), params = list(),
...) { ...) {
...@@ -796,6 +789,9 @@ predict.lgb.Booster <- function(object, ...@@ -796,6 +789,9 @@ predict.lgb.Booster <- function(object,
additional_params <- list(...) additional_params <- list(...)
if (length(additional_params) > 0L) { if (length(additional_params) > 0L) {
if ("reshape" %in% names(additional_params)) {
stop("'reshape' argument is no longer supported.")
}
warning(paste0( warning(paste0(
"predict.lgb.Booster: Found the following passed through '...': " "predict.lgb.Booster: Found the following passed through '...': "
, paste(names(additional_params), collapse = ", ") , paste(names(additional_params), collapse = ", ")
...@@ -812,7 +808,6 @@ predict.lgb.Booster <- function(object, ...@@ -812,7 +808,6 @@ predict.lgb.Booster <- function(object,
, predleaf = predleaf , predleaf = predleaf
, predcontrib = predcontrib , predcontrib = predcontrib
, header = header , header = header
, reshape = reshape
, params = params , params = params
) )
) )
......
...@@ -295,7 +295,6 @@ Dataset <- R6::R6Class( ...@@ -295,7 +295,6 @@ Dataset <- R6::R6Class(
init_score <- private$predictor$predict( init_score <- private$predictor$predict(
data = private$raw_data data = private$raw_data
, rawscore = TRUE , rawscore = TRUE
, reshape = TRUE
) )
# Not needed to transpose, for is col_marjor # Not needed to transpose, for is col_marjor
......
...@@ -84,8 +84,7 @@ Predictor <- R6::R6Class( ...@@ -84,8 +84,7 @@ Predictor <- R6::R6Class(
rawscore = FALSE, rawscore = FALSE,
predleaf = FALSE, predleaf = FALSE,
predcontrib = FALSE, predcontrib = FALSE,
header = FALSE, header = FALSE) {
reshape = FALSE) {
# Check if number of iterations is existing - if not, then set it to -1 (use all) # Check if number of iterations is existing - if not, then set it to -1 (use all)
if (is.null(num_iteration)) { if (is.null(num_iteration)) {
...@@ -215,23 +214,12 @@ Predictor <- R6::R6Class( ...@@ -215,23 +214,12 @@ Predictor <- R6::R6Class(
# Get number of cases per row # Get number of cases per row
npred_per_case <- length(preds) / num_row npred_per_case <- length(preds) / num_row
# Data reshaping # Data reshaping
if (npred_per_case > 1L || predleaf || predcontrib) {
if (predleaf | predcontrib) {
# Predict leaves only, reshaping is mandatory
preds <- matrix(preds, ncol = npred_per_case, byrow = TRUE)
} else if (reshape && npred_per_case > 1L) {
# Predict with data reshaping
preds <- matrix(preds, ncol = npred_per_case, byrow = TRUE) preds <- matrix(preds, ncol = npred_per_case, byrow = TRUE)
} }
return(preds) return(preds)
} }
), ),
......
...@@ -56,21 +56,15 @@ model <- lgb.train( ...@@ -56,21 +56,15 @@ model <- lgb.train(
# We can predict on test data, identical # We can predict on test data, identical
my_preds <- predict(model, test[, 1L:4L]) my_preds <- predict(model, test[, 1L:4L])
# A (30x3) matrix with the predictions, use parameter reshape # A (30x3) matrix with the predictions
# class1 class2 class3 # class1 class2 class3
# obs1 obs1 obs1 # obs1 obs1 obs1
# obs2 obs2 obs2 # obs2 obs2 obs2
# .... .... .... # .... .... ....
my_preds <- predict(model, test[, 1L:4L], reshape = TRUE) my_preds <- predict(model, test[, 1L:4L])
# We can also get the predicted scores before the Sigmoid/Softmax application # We can also get the predicted scores before the Sigmoid/Softmax application
my_preds <- predict(model, test[, 1L:4L], rawscore = TRUE) my_preds <- predict(model, test[, 1L:4L], rawscore = TRUE)
# Raw score predictions as matrix instead of vector
my_preds <- predict(model, test[, 1L:4L], rawscore = TRUE, reshape = TRUE)
# We can also get the leaf index # We can also get the leaf index
my_preds <- predict(model, test[, 1L:4L], predleaf = TRUE) my_preds <- predict(model, test[, 1L:4L], predleaf = TRUE)
# Predict leaf index as matrix instead of vector
my_preds <- predict(model, test[, 1L:4L], predleaf = TRUE, reshape = TRUE)
...@@ -36,7 +36,7 @@ model_builtin <- lgb.train( ...@@ -36,7 +36,7 @@ model_builtin <- lgb.train(
, obj = "multiclass" , obj = "multiclass"
) )
preds_builtin <- predict(model_builtin, test[, 1L:4L], rawscore = TRUE, reshape = TRUE) preds_builtin <- predict(model_builtin, test[, 1L:4L], rawscore = TRUE)
probs_builtin <- exp(preds_builtin) / rowSums(exp(preds_builtin)) probs_builtin <- exp(preds_builtin) / rowSums(exp(preds_builtin))
# Method 2 of training with custom objective function # Method 2 of training with custom objective function
...@@ -109,7 +109,7 @@ model_custom <- lgb.train( ...@@ -109,7 +109,7 @@ model_custom <- lgb.train(
, eval = custom_multiclass_metric , eval = custom_multiclass_metric
) )
preds_custom <- predict(model_custom, test[, 1L:4L], rawscore = TRUE, reshape = TRUE) preds_custom <- predict(model_custom, test[, 1L:4L], rawscore = TRUE)
probs_custom <- exp(preds_custom) / rowSums(exp(preds_custom)) probs_custom <- exp(preds_custom) / rowSums(exp(preds_custom))
# compare predictions # compare predictions
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
predleaf = FALSE, predleaf = FALSE,
predcontrib = FALSE, predcontrib = FALSE,
header = FALSE, header = FALSE,
reshape = FALSE,
params = list(), params = list(),
... ...
) )
...@@ -44,9 +43,6 @@ for logistic regression would result in predictions for log-odds instead of prob ...@@ -44,9 +43,6 @@ for logistic regression would result in predictions for log-odds instead of prob
\item{header}{only used for prediction for text file. True if text file has header} \item{header}{only used for prediction for text file. True if text file has header}
\item{reshape}{whether to reshape the vector of predictions to a matrix form when there are several
prediction outputs per case.}
\item{params}{a list of additional named parameters. See \item{params}{a list of additional named parameters. See
\href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#predict-parameters}{ \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#predict-parameters}{
the "Predict Parameters" section of the documentation} for a list of parameters and the "Predict Parameters" section of the documentation} for a list of parameters and
...@@ -56,12 +52,10 @@ valid values.} ...@@ -56,12 +52,10 @@ valid values.}
} }
\value{ \value{
For regression or binary classification, it returns a vector of length \code{nrows(data)}. For regression or binary classification, it returns a vector of length \code{nrows(data)}.
For multiclass classification, either a \code{num_class * nrows(data)} vector or For multiclass classification, it returns a matrix of dimensions \code{(nrows(data), num_class)}.
a \code{(nrows(data), num_class)} dimension matrix is returned, depending on
the \code{reshape} value.
When \code{predleaf = TRUE}, the output is a matrix object with the When passing \code{predleaf=TRUE} or \code{predcontrib=TRUE}, the output will always be
number of columns corresponding to the number of trees. returned as a matrix.
} }
\description{ \description{
Predicted values based on class \code{lgb.Booster} Predicted values based on class \code{lgb.Booster}
......
...@@ -111,3 +111,61 @@ test_that("start_iteration works correctly", { ...@@ -111,3 +111,61 @@ test_that("start_iteration works correctly", {
pred_leaf2 <- predict(bst, test$data, start_iteration = 0L, num_iteration = end_iter + 1L, predleaf = TRUE) pred_leaf2 <- predict(bst, test$data, start_iteration = 0L, num_iteration = end_iter + 1L, predleaf = TRUE)
expect_equal(pred_leaf1, pred_leaf2) expect_equal(pred_leaf1, pred_leaf2)
}) })
test_that("predictions for regression and binary classification are returned as vectors", {
data(mtcars)
X <- as.matrix(mtcars[, -1L])
y <- as.numeric(mtcars[, 1L])
dtrain <- lgb.Dataset(X, label = y, params = list(max_bins = 5L))
model <- lgb.train(
data = dtrain
, obj = "regression"
, nrounds = 5L
, verbose = VERBOSITY
)
pred <- predict(model, X)
expect_true(is.vector(pred))
expect_equal(length(pred), nrow(X))
pred <- predict(model, X, rawscore = TRUE)
expect_true(is.vector(pred))
expect_equal(length(pred), nrow(X))
data(agaricus.train, package = "lightgbm")
X <- agaricus.train$data
y <- agaricus.train$label
dtrain <- lgb.Dataset(X, label = y)
model <- lgb.train(
data = dtrain
, obj = "binary"
, nrounds = 5L
, verbose = VERBOSITY
)
pred <- predict(model, X)
expect_true(is.vector(pred))
expect_equal(length(pred), nrow(X))
pred <- predict(model, X, rawscore = TRUE)
expect_true(is.vector(pred))
expect_equal(length(pred), nrow(X))
})
test_that("predictions for multiclass classification are returned as matrix", {
data(iris)
X <- as.matrix(iris[, -5L])
y <- as.numeric(iris$Species) - 1.0
dtrain <- lgb.Dataset(X, label = y)
model <- lgb.train(
data = dtrain
, obj = "multiclass"
, nrounds = 5L
, verbose = VERBOSITY
, params = list(num_class = 3L)
)
pred <- predict(model, X)
expect_true(is.matrix(pred))
expect_equal(nrow(pred), nrow(X))
expect_equal(ncol(pred), 3L)
pred <- predict(model, X, rawscore = TRUE)
expect_true(is.matrix(pred))
expect_equal(nrow(pred), nrow(X))
expect_equal(ncol(pred), 3L)
})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment