Unverified Commit e906a82c authored by david-cortes's avatar david-cortes Committed by GitHub
Browse files

[R-package] Use `type` argument to control prediction types (#5133)



* switch to single prediction type argument

* linter

* missing piece of code

* comments

* linter

* fix test

* revert incorrect 'fix'

* fix failing test

* fix test again

* modify recently introduced tests after changes here

* rename prediction types

* rebase

* restore tests for prediction type in params

* Update R-package/tests/testthat/test_Predictor.R
Co-authored-by: default avatarJames Lamb <jaylamb20@gmail.com>
Co-authored-by: default avatarJames Lamb <jaylamb20@gmail.com>
parent bdb02e05
...@@ -742,6 +742,26 @@ Booster <- R6::R6Class( ...@@ -742,6 +742,26 @@ Booster <- R6::R6Class(
#' @param object Object of class \code{lgb.Booster} #' @param object Object of class \code{lgb.Booster}
#' @param newdata a \code{matrix} object, a \code{dgCMatrix} object or #' @param newdata a \code{matrix} object, a \code{dgCMatrix} object or
#' a character representing a path to a text file (CSV, TSV, or LibSVM) #' a character representing a path to a text file (CSV, TSV, or LibSVM)
#' @param type Type of prediction to output. Allowed types are:\itemize{
#' \item \code{"response"}: will output the predicted score according to the objective function being
#' optimized (depending on the link function that the objective uses), after applying any necessary
#' transformations - for example, for \code{objective="binary"}, it will output class probabilities.
#' \item \code{"class"}: for classification objectives, will output the class with the highest predicted
#' probability. For other objectives, will output the same as "response".
#' \item \code{"raw"}: will output the non-transformed numbers (sum of predictions from boosting iterations'
#' results) from which the "response" number is produced for a given objective function - for example,
#' for \code{objective="binary"}, this corresponds to log-odds. For many objectives such as
#' "regression", since no transformation is applied, the output will be the same as for "response".
#' \item \code{"leaf"}: will output the index of the terminal node / leaf at which each observations falls
#' in each tree in the model, outputted as integers, with one column per tree.
#' \item \code{"contrib"}: will return the per-feature contributions for each prediction, including an
#' intercept (each feature will produce one column). If there are multiple classes, each class will
#' have separate feature contributions (thus the number of columns is features+1 multiplied by the
#' number of classes).
#' }
#'
#' Note that, if using custom objectives, types "class" and "response" will not be available and will
#' default towards using "raw" instead.
#' @param start_iteration int or None, optional (default=None) #' @param start_iteration int or None, optional (default=None)
#' Start index of the iteration to predict. #' Start index of the iteration to predict.
#' If None or <= 0, starts from the first iteration. #' If None or <= 0, starts from the first iteration.
...@@ -750,11 +770,6 @@ Booster <- R6::R6Class( ...@@ -750,11 +770,6 @@ Booster <- R6::R6Class(
#' If None, if the best iteration exists and start_iteration is None or <= 0, the #' If None, if the best iteration exists and start_iteration is None or <= 0, the
#' best iteration is used; otherwise, all iterations from start_iteration are used. #' best iteration is used; otherwise, all iterations from start_iteration are used.
#' If <= 0, all iterations from start_iteration are used (no limits). #' If <= 0, all iterations from start_iteration are used (no limits).
#' @param rawscore whether the prediction should be returned in the for of original untransformed
#' sum of predictions from boosting iterations' results. E.g., setting \code{rawscore=TRUE}
#' for logistic regression would result in predictions for log-odds instead of probabilities.
#' @param predleaf whether predict leaf index instead.
#' @param predcontrib return per-feature contributions for each record.
#' @param header only used for prediction for text file. True if text file has header #' @param header only used for prediction for text file. True if text file has header
#' @param params a list of additional named parameters. See #' @param params a list of additional named parameters. See
#' \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#predict-parameters}{ #' \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#predict-parameters}{
...@@ -762,11 +777,13 @@ Booster <- R6::R6Class( ...@@ -762,11 +777,13 @@ Booster <- R6::R6Class(
#' valid values. Where these conflict with the values of keyword arguments to this function, #' valid values. Where these conflict with the values of keyword arguments to this function,
#' the values in \code{params} take precedence. #' the values in \code{params} take precedence.
#' @param ... ignored #' @param ... ignored
#' @return For regression or binary classification, it returns a vector of length \code{nrows(data)}. #' @return For prediction types that are meant to always return one output per observation (e.g. when predicting
#' For multiclass classification, it returns a matrix of dimensions \code{(nrows(data), num_class)}. #' \code{type="response"} on a binary classification or regression objective), will return a vector with one
#' element per row in \code{newdata}.
#' #'
#' When passing \code{predleaf=TRUE} or \code{predcontrib=TRUE}, the output will always be #' For prediction types that are meant to return more than one output per observation (e.g. when predicting
#' returned as a matrix. #' \code{type="response"} on a multi-class objective, or when predicting \code{type="leaf"}, regardless of
#' objective), will return a matrix with one row per observation in \code{newdata} and one column per output.
#' #'
#' @examples #' @examples
#' \donttest{ #' \donttest{
...@@ -804,11 +821,9 @@ Booster <- R6::R6Class( ...@@ -804,11 +821,9 @@ Booster <- R6::R6Class(
#' @export #' @export
predict.lgb.Booster <- function(object, predict.lgb.Booster <- function(object,
newdata, newdata,
type = "response",
start_iteration = NULL, start_iteration = NULL,
num_iteration = NULL, num_iteration = NULL,
rawscore = FALSE,
predleaf = FALSE,
predcontrib = FALSE,
header = FALSE, header = FALSE,
params = list(), params = list(),
...) { ...) {
...@@ -819,9 +834,24 @@ predict.lgb.Booster <- function(object, ...@@ -819,9 +834,24 @@ predict.lgb.Booster <- function(object,
additional_params <- list(...) additional_params <- list(...)
if (length(additional_params) > 0L) { if (length(additional_params) > 0L) {
if ("reshape" %in% names(additional_params)) { additional_params_names <- names(additional_params)
if ("reshape" %in% additional_params_names) {
stop("'reshape' argument is no longer supported.") stop("'reshape' argument is no longer supported.")
} }
old_args_for_type <- list(
"rawscore" = "raw"
, "predleaf" = "leaf"
, "predcontrib" = "contrib"
)
for (arg in names(old_args_for_type)) {
if (arg %in% additional_params_names) {
stop(sprintf("Argument '%s' is no longer supported. Use type='%s' instead."
, arg
, old_args_for_type[[arg]]))
}
}
warning(paste0( warning(paste0(
"predict.lgb.Booster: Found the following passed through '...': " "predict.lgb.Booster: Found the following passed through '...': "
, toString(names(additional_params)) , toString(names(additional_params))
...@@ -829,18 +859,40 @@ predict.lgb.Booster <- function(object, ...@@ -829,18 +859,40 @@ predict.lgb.Booster <- function(object,
)) ))
} }
return( if (!is.null(object$params$objective) && object$params$objective == "none" && type %in% c("class", "response")) {
object$predict( warning("Prediction types 'class' and 'response' are not supported for custom objectives.")
data = newdata type <- "raw"
, start_iteration = start_iteration }
, num_iteration = num_iteration
, rawscore = rawscore rawscore <- FALSE
, predleaf = predleaf predleaf <- FALSE
, predcontrib = predcontrib predcontrib <- FALSE
, header = header if (type == "raw") {
, params = params rawscore <- TRUE
) } else if (type == "leaf") {
predleaf <- TRUE
} else if (type == "contrib") {
predcontrib <- TRUE
}
pred <- object$predict(
data = newdata
, start_iteration = start_iteration
, num_iteration = num_iteration
, rawscore = rawscore
, predleaf = predleaf
, predcontrib = predcontrib
, header = header
, params = params
) )
if (type == "class") {
if (object$params$objective == "binary") {
pred <- as.integer(pred >= 0.5)
} else if (object$params$objective %in% c("multiclass", "multiclassova")) {
pred <- max.col(pred) - 1L
}
}
return(pred)
} }
#' @name print.lgb.Booster #' @name print.lgb.Booster
......
...@@ -22,8 +22,8 @@ param <- list( ...@@ -22,8 +22,8 @@ param <- list(
bst <- lgb.train(param, dtrain, 1L, valids = valids) bst <- lgb.train(param, dtrain, 1L, valids = valids)
# Note: we need the margin value instead of transformed prediction in set_init_score # Note: we need the margin value instead of transformed prediction in set_init_score
ptrain <- predict(bst, agaricus.train$data, rawscore = TRUE) ptrain <- predict(bst, agaricus.train$data, type = "raw")
ptest <- predict(bst, agaricus.test$data, rawscore = TRUE) ptest <- predict(bst, agaricus.test$data, type = "raw")
# set the init_score property of dtrain and dtest # set the init_score property of dtrain and dtest
# base margin is the base prediction we will boost from # base margin is the base prediction we will boost from
......
...@@ -111,7 +111,7 @@ new_data <- data.frame( ...@@ -111,7 +111,7 @@ new_data <- data.frame(
X = rowMeans(predict( X = rowMeans(predict(
model model
, agaricus.test$data , agaricus.test$data
, predleaf = TRUE , type = "leaf"
)) ))
, Y = pmin( , Y = pmin(
pmax( pmax(
...@@ -162,7 +162,7 @@ new_data2 <- data.frame( ...@@ -162,7 +162,7 @@ new_data2 <- data.frame(
X = rowMeans(predict( X = rowMeans(predict(
model2 model2
, agaricus.test$data , agaricus.test$data
, predleaf = TRUE , type = "leaf"
)) ))
, Y = pmin( , Y = pmin(
pmax( pmax(
...@@ -218,7 +218,7 @@ new_data3 <- data.frame( ...@@ -218,7 +218,7 @@ new_data3 <- data.frame(
X = rowMeans(predict( X = rowMeans(predict(
model3 model3
, agaricus.test$data , agaricus.test$data
, predleaf = TRUE , type = "leaf"
)) ))
, Y = pmin( , Y = pmin(
pmax( pmax(
......
...@@ -64,7 +64,7 @@ my_preds <- predict(model, test[, 1L:4L]) ...@@ -64,7 +64,7 @@ my_preds <- predict(model, test[, 1L:4L])
my_preds <- predict(model, test[, 1L:4L]) my_preds <- predict(model, test[, 1L:4L])
# We can also get the predicted scores before the Sigmoid/Softmax application # We can also get the predicted scores before the Sigmoid/Softmax application
my_preds <- predict(model, test[, 1L:4L], rawscore = TRUE) my_preds <- predict(model, test[, 1L:4L], type = "raw")
# We can also get the leaf index # We can also get the leaf index
my_preds <- predict(model, test[, 1L:4L], predleaf = TRUE) my_preds <- predict(model, test[, 1L:4L], type = "leaf")
...@@ -36,7 +36,7 @@ model_builtin <- lgb.train( ...@@ -36,7 +36,7 @@ model_builtin <- lgb.train(
, obj = "multiclass" , obj = "multiclass"
) )
preds_builtin <- predict(model_builtin, test[, 1L:4L], rawscore = TRUE) preds_builtin <- predict(model_builtin, test[, 1L:4L], type = "raw")
probs_builtin <- exp(preds_builtin) / rowSums(exp(preds_builtin)) probs_builtin <- exp(preds_builtin) / rowSums(exp(preds_builtin))
# Method 2 of training with custom objective function # Method 2 of training with custom objective function
...@@ -109,7 +109,7 @@ model_custom <- lgb.train( ...@@ -109,7 +109,7 @@ model_custom <- lgb.train(
, eval = custom_multiclass_metric , eval = custom_multiclass_metric
) )
preds_custom <- predict(model_custom, test[, 1L:4L], rawscore = TRUE) preds_custom <- predict(model_custom, test[, 1L:4L], type = "raw")
probs_custom <- exp(preds_custom) / rowSums(exp(preds_custom)) probs_custom <- exp(preds_custom) / rowSums(exp(preds_custom))
# compare predictions # compare predictions
......
...@@ -7,11 +7,9 @@ ...@@ -7,11 +7,9 @@
\method{predict}{lgb.Booster}( \method{predict}{lgb.Booster}(
object, object,
newdata, newdata,
type = "response",
start_iteration = NULL, start_iteration = NULL,
num_iteration = NULL, num_iteration = NULL,
rawscore = FALSE,
predleaf = FALSE,
predcontrib = FALSE,
header = FALSE, header = FALSE,
params = list(), params = list(),
... ...
...@@ -23,6 +21,27 @@ ...@@ -23,6 +21,27 @@
\item{newdata}{a \code{matrix} object, a \code{dgCMatrix} object or \item{newdata}{a \code{matrix} object, a \code{dgCMatrix} object or
a character representing a path to a text file (CSV, TSV, or LibSVM)} a character representing a path to a text file (CSV, TSV, or LibSVM)}
\item{type}{Type of prediction to output. Allowed types are:\itemize{
\item \code{"response"}: will output the predicted score according to the objective function being
optimized (depending on the link function that the objective uses), after applying any necessary
transformations - for example, for \code{objective="binary"}, it will output class probabilities.
\item \code{"class"}: for classification objectives, will output the class with the highest predicted
probability. For other objectives, will output the same as "response".
\item \code{"raw"}: will output the non-transformed numbers (sum of predictions from boosting iterations'
results) from which the "response" number is produced for a given objective function - for example,
for \code{objective="binary"}, this corresponds to log-odds. For many objectives such as
"regression", since no transformation is applied, the output will be the same as for "response".
\item \code{"leaf"}: will output the index of the terminal node / leaf at which each observations falls
in each tree in the model, outputted as integers, with one column per tree.
\item \code{"contrib"}: will return the per-feature contributions for each prediction, including an
intercept (each feature will produce one column). If there are multiple classes, each class will
have separate feature contributions (thus the number of columns is features+1 multiplied by the
number of classes).
}
Note that, if using custom objectives, types "class" and "response" will not be available and will
default towards using "raw" instead.}
\item{start_iteration}{int or None, optional (default=None) \item{start_iteration}{int or None, optional (default=None)
Start index of the iteration to predict. Start index of the iteration to predict.
If None or <= 0, starts from the first iteration.} If None or <= 0, starts from the first iteration.}
...@@ -33,14 +52,6 @@ If None, if the best iteration exists and start_iteration is None or <= 0, the ...@@ -33,14 +52,6 @@ If None, if the best iteration exists and start_iteration is None or <= 0, the
best iteration is used; otherwise, all iterations from start_iteration are used. best iteration is used; otherwise, all iterations from start_iteration are used.
If <= 0, all iterations from start_iteration are used (no limits).} If <= 0, all iterations from start_iteration are used (no limits).}
\item{rawscore}{whether the prediction should be returned in the for of original untransformed
sum of predictions from boosting iterations' results. E.g., setting \code{rawscore=TRUE}
for logistic regression would result in predictions for log-odds instead of probabilities.}
\item{predleaf}{whether predict leaf index instead.}
\item{predcontrib}{return per-feature contributions for each record.}
\item{header}{only used for prediction for text file. True if text file has header} \item{header}{only used for prediction for text file. True if text file has header}
\item{params}{a list of additional named parameters. See \item{params}{a list of additional named parameters. See
...@@ -52,11 +63,13 @@ the values in \code{params} take precedence.} ...@@ -52,11 +63,13 @@ the values in \code{params} take precedence.}
\item{...}{ignored} \item{...}{ignored}
} }
\value{ \value{
For regression or binary classification, it returns a vector of length \code{nrows(data)}. For prediction types that are meant to always return one output per observation (e.g. when predicting
For multiclass classification, it returns a matrix of dimensions \code{(nrows(data), num_class)}. \code{type="response"} on a binary classification or regression objective), will return a vector with one
element per row in \code{newdata}.
When passing \code{predleaf=TRUE} or \code{predcontrib=TRUE}, the output will always be For prediction types that are meant to return more than one output per observation (e.g. when predicting
returned as a matrix. \code{type="response"} on a multi-class objective, or when predicting \code{type="leaf"}, regardless of
objective), will return a matrix with one row per observation in \code{newdata} and one column per output.
} }
\description{ \description{
Predicted values based on class \code{lgb.Booster} Predicted values based on class \code{lgb.Booster}
......
...@@ -6,8 +6,6 @@ VERBOSITY <- as.integer( ...@@ -6,8 +6,6 @@ VERBOSITY <- as.integer(
TOLERANCE <- 1e-6 TOLERANCE <- 1e-6
library(Matrix)
test_that("Predictor$finalize() should not fail", { test_that("Predictor$finalize() should not fail", {
X <- as.matrix(as.integer(iris[, "Species"]), ncol = 1L) X <- as.matrix(as.integer(iris[, "Species"]), ncol = 1L)
y <- iris[["Sepal.Length"]] y <- iris[["Sepal.Length"]]
...@@ -85,8 +83,8 @@ test_that("start_iteration works correctly", { ...@@ -85,8 +83,8 @@ test_that("start_iteration works correctly", {
, early_stopping_rounds = 2L , early_stopping_rounds = 2L
) )
expect_true(lgb.is.Booster(bst)) expect_true(lgb.is.Booster(bst))
pred1 <- predict(bst, newdata = test$data, rawscore = TRUE) pred1 <- predict(bst, newdata = test$data, type = "raw")
pred_contrib1 <- predict(bst, test$data, predcontrib = TRUE) pred_contrib1 <- predict(bst, test$data, type = "contrib")
pred2 <- rep(0.0, length(pred1)) pred2 <- rep(0.0, length(pred1))
pred_contrib2 <- rep(0.0, length(pred2)) pred_contrib2 <- rep(0.0, length(pred2))
step <- 11L step <- 11L
...@@ -100,7 +98,7 @@ test_that("start_iteration works correctly", { ...@@ -100,7 +98,7 @@ test_that("start_iteration works correctly", {
inc_pred <- predict(bst, test$data inc_pred <- predict(bst, test$data
, start_iteration = start_iter , start_iteration = start_iter
, num_iteration = n_iter , num_iteration = n_iter
, rawscore = TRUE , type = "raw"
) )
inc_pred_contrib <- bst$predict(test$data inc_pred_contrib <- bst$predict(test$data
, start_iteration = start_iter , start_iteration = start_iter
...@@ -113,8 +111,8 @@ test_that("start_iteration works correctly", { ...@@ -113,8 +111,8 @@ test_that("start_iteration works correctly", {
expect_equal(pred2, pred1) expect_equal(pred2, pred1)
expect_equal(pred_contrib2, pred_contrib1) expect_equal(pred_contrib2, pred_contrib1)
pred_leaf1 <- predict(bst, test$data, predleaf = TRUE) pred_leaf1 <- predict(bst, test$data, type = "leaf")
pred_leaf2 <- predict(bst, test$data, start_iteration = 0L, num_iteration = end_iter + 1L, predleaf = TRUE) pred_leaf2 <- predict(bst, test$data, start_iteration = 0L, num_iteration = end_iter + 1L, type = "leaf")
expect_equal(pred_leaf1, pred_leaf2) expect_equal(pred_leaf1, pred_leaf2)
}) })
...@@ -131,20 +129,20 @@ test_that("Feature contributions from sparse inputs produce sparse outputs", { ...@@ -131,20 +129,20 @@ test_that("Feature contributions from sparse inputs produce sparse outputs", {
, params = list(min_data_in_leaf = 5L) , params = list(min_data_in_leaf = 5L)
) )
pred_dense <- predict(bst, X, predcontrib = TRUE) pred_dense <- predict(bst, X, type = "contrib")
Xcsc <- as(X, "CsparseMatrix") Xcsc <- as(X, "CsparseMatrix")
pred_csc <- predict(bst, Xcsc, predcontrib = TRUE) pred_csc <- predict(bst, Xcsc, type = "contrib")
expect_s4_class(pred_csc, "dgCMatrix") expect_s4_class(pred_csc, "dgCMatrix")
expect_equal(unname(pred_dense), unname(as.matrix(pred_csc))) expect_equal(unname(pred_dense), unname(as.matrix(pred_csc)))
Xcsr <- as(X, "RsparseMatrix") Xcsr <- as(X, "RsparseMatrix")
pred_csr <- predict(bst, Xcsr, predcontrib = TRUE) pred_csr <- predict(bst, Xcsr, type = "contrib")
expect_s4_class(pred_csr, "dgRMatrix") expect_s4_class(pred_csr, "dgRMatrix")
expect_equal(as(pred_csr, "CsparseMatrix"), pred_csc) expect_equal(as(pred_csr, "CsparseMatrix"), pred_csc)
Xspv <- as(X[1L, , drop = FALSE], "sparseVector") Xspv <- as(X[1L, , drop = FALSE], "sparseVector")
pred_spv <- predict(bst, Xspv, predcontrib = TRUE) pred_spv <- predict(bst, Xspv, type = "contrib")
expect_s4_class(pred_spv, "dsparseVector") expect_s4_class(pred_spv, "dsparseVector")
expect_equal(Matrix::t(as(pred_spv, "CsparseMatrix")), unname(pred_csc[1L, , drop = FALSE])) expect_equal(Matrix::t(as(pred_spv, "CsparseMatrix")), unname(pred_csc[1L, , drop = FALSE]))
}) })
...@@ -164,14 +162,14 @@ test_that("Sparse feature contribution predictions do not take inputs with wrong ...@@ -164,14 +162,14 @@ test_that("Sparse feature contribution predictions do not take inputs with wrong
X_wrong <- X[, c(1L:10L, 1L:10L)] X_wrong <- X[, c(1L:10L, 1L:10L)]
X_wrong <- as(X_wrong, "CsparseMatrix") X_wrong <- as(X_wrong, "CsparseMatrix")
expect_error(predict(bst, X_wrong, predcontrib = TRUE), regexp = "input data has 20 columns") expect_error(predict(bst, X_wrong, type = "contrib"), regexp = "input data has 20 columns")
X_wrong <- as(X_wrong, "RsparseMatrix") X_wrong <- as(X_wrong, "RsparseMatrix")
expect_error(predict(bst, X_wrong, predcontrib = TRUE), regexp = "input data has 20 columns") expect_error(predict(bst, X_wrong, type = "contrib"), regexp = "input data has 20 columns")
X_wrong <- as(X_wrong, "CsparseMatrix") X_wrong <- as(X_wrong, "CsparseMatrix")
X_wrong <- X_wrong[, 1L:3L] X_wrong <- X_wrong[, 1L:3L]
expect_error(predict(bst, X_wrong, predcontrib = TRUE), regexp = "input data has 3 columns") expect_error(predict(bst, X_wrong, type = "contrib"), regexp = "input data has 3 columns")
}) })
test_that("Feature contribution predictions do not take non-general CSR or CSC inputs", { test_that("Feature contribution predictions do not take non-general CSR or CSC inputs", {
...@@ -192,8 +190,8 @@ test_that("Feature contribution predictions do not take non-general CSR or CSC i ...@@ -192,8 +190,8 @@ test_that("Feature contribution predictions do not take non-general CSR or CSC i
, params = list(min_data_in_leaf = 5L) , params = list(min_data_in_leaf = 5L)
) )
expect_error(predict(bst, SmatC, predcontrib = TRUE)) expect_error(predict(bst, SmatC, type = "contrib"))
expect_error(predict(bst, SmatR, predcontrib = TRUE)) expect_error(predict(bst, SmatR, type = "contrib"))
}) })
test_that("predict() params should override keyword argument for raw-score predictions", { test_that("predict() params should override keyword argument for raw-score predictions", {
...@@ -220,7 +218,7 @@ test_that("predict() params should override keyword argument for raw-score predi ...@@ -220,7 +218,7 @@ test_that("predict() params should override keyword argument for raw-score predi
# check that the predictions from predict.lgb.Booster() really look like raw score predictions # check that the predictions from predict.lgb.Booster() really look like raw score predictions
preds_prob <- predict(bst, X) preds_prob <- predict(bst, X)
preds_raw_s3_keyword <- predict(bst, X, rawscore = TRUE) preds_raw_s3_keyword <- predict(bst, X, type = "raw")
preds_prob_from_raw <- 1.0 / (1.0 + exp(-preds_raw_s3_keyword)) preds_prob_from_raw <- 1.0 / (1.0 + exp(-preds_raw_s3_keyword))
expect_equal(preds_prob, preds_prob_from_raw, tolerance = TOLERANCE) expect_equal(preds_prob, preds_prob_from_raw, tolerance = TOLERANCE)
accuracy <- sum(as.integer(preds_prob_from_raw > 0.5) == y) / length(y) accuracy <- sum(as.integer(preds_prob_from_raw > 0.5) == y) / length(y)
...@@ -270,7 +268,7 @@ test_that("predict() params should override keyword argument for leaf-index pred ...@@ -270,7 +268,7 @@ test_that("predict() params should override keyword argument for leaf-index pred
) )
# check that predictions really look like leaf index predictions # check that predictions really look like leaf index predictions
preds_leaf_s3_keyword <- predict(bst, X, predleaf = TRUE) preds_leaf_s3_keyword <- predict(bst, X, type = "leaf")
expect_true(is.matrix(preds_leaf_s3_keyword)) expect_true(is.matrix(preds_leaf_s3_keyword))
expect_equal(dim(preds_leaf_s3_keyword), c(nrow(X), bst$current_iter())) expect_equal(dim(preds_leaf_s3_keyword), c(nrow(X), bst$current_iter()))
expect_true(min(preds_leaf_s3_keyword) >= 0L) expect_true(min(preds_leaf_s3_keyword) >= 0L)
...@@ -323,7 +321,7 @@ test_that("predict() params should override keyword argument for feature contrib ...@@ -323,7 +321,7 @@ test_that("predict() params should override keyword argument for feature contrib
) )
# check that predictions really look like feature contributions # check that predictions really look like feature contributions
preds_contrib_s3_keyword <- predict(bst, X, predcontrib = TRUE) preds_contrib_s3_keyword <- predict(bst, X, type = "contrib")
num_features <- ncol(X) num_features <- ncol(X)
shap_base_value <- unname(preds_contrib_s3_keyword[, ncol(preds_contrib_s3_keyword)]) shap_base_value <- unname(preds_contrib_s3_keyword[, ncol(preds_contrib_s3_keyword)])
expect_true(is.matrix(preds_contrib_s3_keyword)) expect_true(is.matrix(preds_contrib_s3_keyword))
...@@ -378,11 +376,11 @@ test_that("predict() params should override keyword argument for feature contrib ...@@ -378,11 +376,11 @@ test_that("predict() params should override keyword argument for feature contrib
# dense matrix with row names # dense matrix with row names
pred <- predict(bst, X) pred <- predict(bst, X)
.expect_has_row_names(pred, X) .expect_has_row_names(pred, X)
pred <- predict(bst, X, rawscore = TRUE) pred <- predict(bst, X, type = "raw")
.expect_has_row_names(pred, X) .expect_has_row_names(pred, X)
pred <- predict(bst, X, predleaf = TRUE) pred <- predict(bst, X, type = "leaf")
.expect_has_row_names(pred, X) .expect_has_row_names(pred, X)
pred <- predict(bst, X, predcontrib = TRUE) pred <- predict(bst, X, type = "contrib")
.expect_has_row_names(pred, X) .expect_has_row_names(pred, X)
# dense matrix without row names # dense matrix without row names
...@@ -395,13 +393,13 @@ test_that("predict() params should override keyword argument for feature contrib ...@@ -395,13 +393,13 @@ test_that("predict() params should override keyword argument for feature contrib
Xcsc <- as(X, "CsparseMatrix") Xcsc <- as(X, "CsparseMatrix")
pred <- predict(bst, Xcsc) pred <- predict(bst, Xcsc)
.expect_has_row_names(pred, Xcsc) .expect_has_row_names(pred, Xcsc)
pred <- predict(bst, Xcsc, rawscore = TRUE) pred <- predict(bst, Xcsc, type = "raw")
.expect_has_row_names(pred, Xcsc) .expect_has_row_names(pred, Xcsc)
pred <- predict(bst, Xcsc, predleaf = TRUE) pred <- predict(bst, Xcsc, type = "leaf")
.expect_has_row_names(pred, Xcsc) .expect_has_row_names(pred, Xcsc)
pred <- predict(bst, Xcsc, predcontrib = TRUE) pred <- predict(bst, Xcsc, type = "contrib")
.expect_has_row_names(pred, Xcsc) .expect_has_row_names(pred, Xcsc)
pred <- predict(bst, as(Xcsc, "RsparseMatrix"), predcontrib = TRUE) pred <- predict(bst, as(Xcsc, "RsparseMatrix"), type = "contrib")
.expect_has_row_names(pred, Xcsc) .expect_has_row_names(pred, Xcsc)
# sparse matrix without row names # sparse matrix without row names
...@@ -486,7 +484,7 @@ test_that("predictions for regression and binary classification are returned as ...@@ -486,7 +484,7 @@ test_that("predictions for regression and binary classification are returned as
pred <- predict(model, X) pred <- predict(model, X)
expect_true(is.vector(pred)) expect_true(is.vector(pred))
expect_equal(length(pred), nrow(X)) expect_equal(length(pred), nrow(X))
pred <- predict(model, X, rawscore = TRUE) pred <- predict(model, X, type = "raw")
expect_true(is.vector(pred)) expect_true(is.vector(pred))
expect_equal(length(pred), nrow(X)) expect_equal(length(pred), nrow(X))
...@@ -503,7 +501,7 @@ test_that("predictions for regression and binary classification are returned as ...@@ -503,7 +501,7 @@ test_that("predictions for regression and binary classification are returned as
pred <- predict(model, X) pred <- predict(model, X)
expect_true(is.vector(pred)) expect_true(is.vector(pred))
expect_equal(length(pred), nrow(X)) expect_equal(length(pred), nrow(X))
pred <- predict(model, X, rawscore = TRUE) pred <- predict(model, X, type = "raw")
expect_true(is.vector(pred)) expect_true(is.vector(pred))
expect_equal(length(pred), nrow(X)) expect_equal(length(pred), nrow(X))
}) })
...@@ -524,8 +522,52 @@ test_that("predictions for multiclass classification are returned as matrix", { ...@@ -524,8 +522,52 @@ test_that("predictions for multiclass classification are returned as matrix", {
expect_true(is.matrix(pred)) expect_true(is.matrix(pred))
expect_equal(nrow(pred), nrow(X)) expect_equal(nrow(pred), nrow(X))
expect_equal(ncol(pred), 3L) expect_equal(ncol(pred), 3L)
pred <- predict(model, X, rawscore = TRUE) pred <- predict(model, X, type = "raw")
expect_true(is.matrix(pred)) expect_true(is.matrix(pred))
expect_equal(nrow(pred), nrow(X)) expect_equal(nrow(pred), nrow(X))
expect_equal(ncol(pred), 3L) expect_equal(ncol(pred), 3L)
}) })
test_that("predict type='class' returns predicted class for classification objectives", {
data(agaricus.train, package = "lightgbm")
X <- as.matrix(agaricus.train$data)
y <- agaricus.train$label
dtrain <- lgb.Dataset(X, label = y, params = list(max_bins = 5L))
bst <- lgb.train(
data = dtrain
, obj = "binary"
, nrounds = 5L
, verbose = VERBOSITY
)
pred <- predict(bst, X, type = "class")
expect_true(all(pred %in% c(0L, 1L)))
data(iris)
X <- as.matrix(iris[, -5L])
y <- as.numeric(iris$Species) - 1.0
dtrain <- lgb.Dataset(X, label = y)
model <- lgb.train(
data = dtrain
, obj = "multiclass"
, nrounds = 5L
, verbose = VERBOSITY
, params = list(num_class = 3L)
)
pred <- predict(model, X, type = "class")
expect_true(all(pred %in% c(0L, 1L, 2L)))
})
test_that("predict type='class' returns values in the target's range for regression objectives", {
data(agaricus.train, package = "lightgbm")
X <- as.matrix(agaricus.train$data)
y <- agaricus.train$label
dtrain <- lgb.Dataset(X, label = y, params = list(max_bins = 5L))
bst <- lgb.train(
data = dtrain
, obj = "regression"
, nrounds = 5L
, verbose = VERBOSITY
)
pred <- predict(bst, X, type = "class")
expect_true(!any(pred %in% c(0.0, 1.0)))
})
...@@ -2969,7 +2969,7 @@ test_that("lightgbm() accepts init_score as function argument", { ...@@ -2969,7 +2969,7 @@ test_that("lightgbm() accepts init_score as function argument", {
, nrounds = 5L , nrounds = 5L
, verbose = VERBOSITY , verbose = VERBOSITY
) )
pred1 <- predict(bst1, train$data, rawscore = TRUE) pred1 <- predict(bst1, train$data, type = "raw")
bst2 <- lightgbm( bst2 <- lightgbm(
data = train$data data = train$data
...@@ -2979,7 +2979,7 @@ test_that("lightgbm() accepts init_score as function argument", { ...@@ -2979,7 +2979,7 @@ test_that("lightgbm() accepts init_score as function argument", {
, nrounds = 5L , nrounds = 5L
, verbose = VERBOSITY , verbose = VERBOSITY
) )
pred2 <- predict(bst2, train$data, rawscore = TRUE) pred2 <- predict(bst2, train$data, type = "raw")
expect_true(any(pred1 != pred2)) expect_true(any(pred1 != pred2))
}) })
......
...@@ -293,8 +293,8 @@ test_that("Saving a large model to string should work", { ...@@ -293,8 +293,8 @@ test_that("Saving a large model to string should work", {
) )
pred <- predict(bst, train$data) pred <- predict(bst, train$data)
pred_leaf_indx <- predict(bst, train$data, predleaf = TRUE) pred_leaf_indx <- predict(bst, train$data, type = "leaf")
pred_raw_score <- predict(bst, train$data, rawscore = TRUE) pred_raw_score <- predict(bst, train$data, type = "raw")
model_string <- bst$save_model_to_string() model_string <- bst$save_model_to_string()
# make sure this test is still producing a model bigger than the default # make sure this test is still producing a model bigger than the default
...@@ -312,8 +312,8 @@ test_that("Saving a large model to string should work", { ...@@ -312,8 +312,8 @@ test_that("Saving a large model to string should work", {
model_str = model_string model_str = model_string
) )
pred2 <- predict(bst2, train$data) pred2 <- predict(bst2, train$data)
pred2_leaf_indx <- predict(bst2, train$data, predleaf = TRUE) pred2_leaf_indx <- predict(bst2, train$data, type = "leaf")
pred2_raw_score <- predict(bst2, train$data, rawscore = TRUE) pred2_raw_score <- predict(bst2, train$data, type = "raw")
expect_identical(pred, pred2) expect_identical(pred, pred2)
expect_identical(pred_leaf_indx, pred2_leaf_indx) expect_identical(pred_leaf_indx, pred2_leaf_indx)
expect_identical(pred_raw_score, pred2_raw_score) expect_identical(pred_raw_score, pred2_raw_score)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment