Unverified Commit 248fbfa6 authored by david-cortes's avatar david-cortes Committed by GitHub
Browse files

[R-package] Remove `reshape` argument in `predict` (#4971)

* change prediction default to reshape=TRUE

* remove reshape argument

* comments
parent 33eb0376
......@@ -474,7 +474,6 @@ Booster <- R6::R6Class(
predleaf = FALSE,
predcontrib = FALSE,
header = FALSE,
reshape = FALSE,
params = list()) {
self$restore_handle()
......@@ -501,7 +500,6 @@ Booster <- R6::R6Class(
, predleaf = predleaf
, predcontrib = predcontrib
, header = header
, reshape = reshape
)
)
......@@ -729,20 +727,16 @@ Booster <- R6::R6Class(
#' @param predleaf whether predict leaf index instead.
#' @param predcontrib return per-feature contributions for each record.
#' @param header only used for prediction for text file. True if text file has header
#' @param reshape whether to reshape the vector of predictions to a matrix form when there are several
#' prediction outputs per case.
#' @param params a list of additional named parameters. See
#' \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#predict-parameters}{
#' the "Predict Parameters" section of the documentation} for a list of parameters and
#' valid values.
#' @param ... ignored
#' @return For regression or binary classification, it returns a vector of length \code{nrows(data)}.
#' For multiclass classification, either a \code{num_class * nrows(data)} vector or
#' a \code{(nrows(data), num_class)} dimension matrix is returned, depending on
#' the \code{reshape} value.
#' For multiclass classification, it returns a matrix of dimensions \code{(nrows(data), num_class)}.
#'
#' When \code{predleaf = TRUE}, the output is a matrix object with the
#' number of columns corresponding to the number of trees.
#' When passing \code{predleaf=TRUE} or \code{predcontrib=TRUE}, the output will always be
#' returned as a matrix.
#'
#' @examples
#' \donttest{
......@@ -786,7 +780,6 @@ predict.lgb.Booster <- function(object,
predleaf = FALSE,
predcontrib = FALSE,
header = FALSE,
reshape = FALSE,
params = list(),
...) {
......@@ -796,6 +789,9 @@ predict.lgb.Booster <- function(object,
additional_params <- list(...)
if (length(additional_params) > 0L) {
if ("reshape" %in% names(additional_params)) {
stop("'reshape' argument is no longer supported.")
}
warning(paste0(
"predict.lgb.Booster: Found the following passed through '...': "
, paste(names(additional_params), collapse = ", ")
......@@ -812,7 +808,6 @@ predict.lgb.Booster <- function(object,
, predleaf = predleaf
, predcontrib = predcontrib
, header = header
, reshape = reshape
, params = params
)
)
......
......@@ -295,7 +295,6 @@ Dataset <- R6::R6Class(
init_score <- private$predictor$predict(
data = private$raw_data
, rawscore = TRUE
, reshape = TRUE
)
# Not needed to transpose, for is col_marjor
......
......@@ -84,8 +84,7 @@ Predictor <- R6::R6Class(
rawscore = FALSE,
predleaf = FALSE,
predcontrib = FALSE,
header = FALSE,
reshape = FALSE) {
header = FALSE) {
# Check if number of iterations is existing - if not, then set it to -1 (use all)
if (is.null(num_iteration)) {
......@@ -215,23 +214,12 @@ Predictor <- R6::R6Class(
# Get number of cases per row
npred_per_case <- length(preds) / num_row
# Data reshaping
if (predleaf | predcontrib) {
# Predict leaves only, reshaping is mandatory
preds <- matrix(preds, ncol = npred_per_case, byrow = TRUE)
} else if (reshape && npred_per_case > 1L) {
# Predict with data reshaping
if (npred_per_case > 1L || predleaf || predcontrib) {
preds <- matrix(preds, ncol = npred_per_case, byrow = TRUE)
}
return(preds)
}
),
......
......@@ -56,21 +56,15 @@ model <- lgb.train(
# We can predict on test data, identical
my_preds <- predict(model, test[, 1L:4L])
# A (30x3) matrix with the predictions, use parameter reshape
# A (30x3) matrix with the predictions
# class1 class2 class3
# obs1 obs1 obs1
# obs2 obs2 obs2
# .... .... ....
my_preds <- predict(model, test[, 1L:4L], reshape = TRUE)
my_preds <- predict(model, test[, 1L:4L])
# We can also get the predicted scores before the Sigmoid/Softmax application
my_preds <- predict(model, test[, 1L:4L], rawscore = TRUE)
# Raw score predictions as matrix instead of vector
my_preds <- predict(model, test[, 1L:4L], rawscore = TRUE, reshape = TRUE)
# We can also get the leaf index
my_preds <- predict(model, test[, 1L:4L], predleaf = TRUE)
# Predict leaf index as matrix instead of vector
my_preds <- predict(model, test[, 1L:4L], predleaf = TRUE, reshape = TRUE)
......@@ -36,7 +36,7 @@ model_builtin <- lgb.train(
, obj = "multiclass"
)
preds_builtin <- predict(model_builtin, test[, 1L:4L], rawscore = TRUE, reshape = TRUE)
preds_builtin <- predict(model_builtin, test[, 1L:4L], rawscore = TRUE)
probs_builtin <- exp(preds_builtin) / rowSums(exp(preds_builtin))
# Method 2 of training with custom objective function
......@@ -109,7 +109,7 @@ model_custom <- lgb.train(
, eval = custom_multiclass_metric
)
preds_custom <- predict(model_custom, test[, 1L:4L], rawscore = TRUE, reshape = TRUE)
preds_custom <- predict(model_custom, test[, 1L:4L], rawscore = TRUE)
probs_custom <- exp(preds_custom) / rowSums(exp(preds_custom))
# compare predictions
......
......@@ -13,7 +13,6 @@
predleaf = FALSE,
predcontrib = FALSE,
header = FALSE,
reshape = FALSE,
params = list(),
...
)
......@@ -44,9 +43,6 @@ for logistic regression would result in predictions for log-odds instead of prob
\item{header}{only used for prediction for text file. True if text file has header}
\item{reshape}{whether to reshape the vector of predictions to a matrix form when there are several
prediction outputs per case.}
\item{params}{a list of additional named parameters. See
\href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#predict-parameters}{
the "Predict Parameters" section of the documentation} for a list of parameters and
......@@ -56,12 +52,10 @@ valid values.}
}
\value{
For regression or binary classification, it returns a vector of length \code{nrows(data)}.
For multiclass classification, either a \code{num_class * nrows(data)} vector or
a \code{(nrows(data), num_class)} dimension matrix is returned, depending on
the \code{reshape} value.
For multiclass classification, it returns a matrix of dimensions \code{(nrows(data), num_class)}.
When \code{predleaf = TRUE}, the output is a matrix object with the
number of columns corresponding to the number of trees.
When passing \code{predleaf=TRUE} or \code{predcontrib=TRUE}, the output will always be
returned as a matrix.
}
\description{
Predicted values based on class \code{lgb.Booster}
......
......@@ -111,3 +111,61 @@ test_that("start_iteration works correctly", {
pred_leaf2 <- predict(bst, test$data, start_iteration = 0L, num_iteration = end_iter + 1L, predleaf = TRUE)
expect_equal(pred_leaf1, pred_leaf2)
})
test_that("predictions for regression and binary classification are returned as vectors", {
data(mtcars)
X <- as.matrix(mtcars[, -1L])
y <- as.numeric(mtcars[, 1L])
dtrain <- lgb.Dataset(X, label = y, params = list(max_bins = 5L))
model <- lgb.train(
data = dtrain
, obj = "regression"
, nrounds = 5L
, verbose = VERBOSITY
)
pred <- predict(model, X)
expect_true(is.vector(pred))
expect_equal(length(pred), nrow(X))
pred <- predict(model, X, rawscore = TRUE)
expect_true(is.vector(pred))
expect_equal(length(pred), nrow(X))
data(agaricus.train, package = "lightgbm")
X <- agaricus.train$data
y <- agaricus.train$label
dtrain <- lgb.Dataset(X, label = y)
model <- lgb.train(
data = dtrain
, obj = "binary"
, nrounds = 5L
, verbose = VERBOSITY
)
pred <- predict(model, X)
expect_true(is.vector(pred))
expect_equal(length(pred), nrow(X))
pred <- predict(model, X, rawscore = TRUE)
expect_true(is.vector(pred))
expect_equal(length(pred), nrow(X))
})
test_that("predictions for multiclass classification are returned as matrix", {
data(iris)
X <- as.matrix(iris[, -5L])
y <- as.numeric(iris$Species) - 1.0
dtrain <- lgb.Dataset(X, label = y)
model <- lgb.train(
data = dtrain
, obj = "multiclass"
, nrounds = 5L
, verbose = VERBOSITY
, params = list(num_class = 3L)
)
pred <- predict(model, X)
expect_true(is.matrix(pred))
expect_equal(nrow(pred), nrow(X))
expect_equal(ncol(pred), 3L)
pred <- predict(model, X, rawscore = TRUE)
expect_true(is.matrix(pred))
expect_equal(nrow(pred), nrow(X))
expect_equal(ncol(pred), 3L)
})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment