[R-package] Added unit tests (#2498)

b4bb38d9 · James Lamb · GitHub · bdc310a8 · b4bb38d9 · b4bb38d9
Unverified Commit b4bb38d9 authored Oct 23, 2019 by James Lamb Committed by GitHub Oct 23, 2019
6 changed files
--- a/R-package/man/lgb.interprete.Rd
+++ b/R-package/man/lgb.interprete.Rd
@@ -11,7 +11,7 @@ lgb.interprete(model, data, idxset, num_iteration = NULL)
 \item{data}{a matrix object or a dgCMatrix object.}
-\item{idxset}{an integer vector of indices of rows needed.}
+\item{idxset}{a integer vector of indices of rows needed.}
 \item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration.}
 }

--- a/R-package/man/slice.Rd
+++ b/R-package/man/slice.Rd
@@ -14,7 +14,7 @@ slice(dataset, ...)
 \item{...}{other parameters (currently not used)}
-\item{idxset}{an integer vector of indices of rows needed}
+\item{idxset}{a integer vector of indices of rows needed}
 }
 \value{
 constructed sub dataset

--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -51,6 +51,7 @@ test_that("use of multiple eval metrics works", {
 test_that("training continuation works", {
+  testthat::skip("This test is currently broken. See issue #2468 for details.")
  dtrain <- lgb.Dataset(train$data, label = train$label, free_raw_data=FALSE)
  watchlist = list(train=dtrain)
  param <- list(objective = "binary", metric="binary_logloss", num_leaves = 5, learning_rate = 1)

--- a/R-package/tests/testthat/test_lgb.importance.R
+++ b/R-package/tests/testthat/test_lgb.importance.R
+context("lgb.importance")
+test_that("lgb.importance() should reject bad inputs", {
+    bad_inputs <- list(
+        .Machine$integer.max
+        , Inf
+        , -Inf
+        , NA
+        , NA_real_
+        , -10L:10L
+        , list(c("a", "b", "c"))
+        , data.frame(
+            x = rnorm(20)
+            , y = sample(
+                x = c(1, 2)
+                , size = 20
+                , replace = TRUE
+            )
+        )
+        , data.table::data.table(
+            x = rnorm(20)
+            , y = sample(
+                x = c(1, 2)
+                , size = 20
+                , replace = TRUE
+            )
+        )
+        , lgb.Dataset(
+            data = matrix(rnorm(100), ncol = 2)
+            , label = matrix(sample(c(0, 1), 50, replace = TRUE))
+        )
+        , "lightgbm.model"
+    )
+    for (input in bad_inputs){
+        expect_error({
+            lgb.importance(input)
+        }, regexp = "'model' has to be an object of class lgb\\.Booster")
+    }
+})
--- a/R-package/tests/testthat/test_lgb.interprete.R
+++ b/R-package/tests/testthat/test_lgb.interprete.R
+context("lgb.interpete")
+.sigmoid <- function(x){
+    1 / (1 + exp(-x))
+}
+.logit <- function(x){
+    log(x / (1 - x))
+}
+test_that("lgb.intereprete works as expected for binary classification", {
+    data(agaricus.train, package = "lightgbm")
+    train <- agaricus.train
+    dtrain <- lgb.Dataset(train$data, label = train$label)
+    setinfo(
+        dataset = dtrain
+        , "init_score"
+        , rep(
+            .logit(mean(train$label))
+            , length(train$label)
+        )
+    )
+    data(agaricus.test, package = "lightgbm")
+    test <- agaricus.test
+    params <- list(
+        objective = "binary"
+        , learning_rate = 0.01
+        , num_leaves = 63
+        , max_depth = -1
+        , min_data_in_leaf = 1
+        , min_sum_hessian_in_leaf = 1
+    )
+    model <- lgb.train(
+        params = params
+        , data = dtrain
+        , nrounds = 10
+    )
+    num_trees <- 5
+    tree_interpretation <- lgb.interprete(
+        model = model
+        , data = test$data
+        , idxset = 1:num_trees
+    )
+    expect_true(methods::is(tree_interpretation, "list"))
+    expect_true(length(tree_interpretation) == num_trees)
+    expect_null(names(tree_interpretation))
+    expect_true(all(
+        sapply(
+            X = tree_interpretation
+            , FUN = function(treeDT){
+                checks <- c(
+                    data.table::is.data.table(treeDT)
+                    , identical(names(treeDT), c("Feature", "Contribution"))
+                    , is.character(treeDT[, Feature])
+                    , is.numeric(treeDT[, Contribution])
+                )
+                return(all(checks))
+            }
+        )
+    ))
+})
+test_that("lgb.intereprete works as expected for multiclass classification", {
+    data(iris)
+    # We must convert factors to numeric
+    # They must be starting from number 0 to use multiclass
+    # For instance: 0, 1, 2, 3, 4, 5...
+    iris$Species <- as.numeric(as.factor(iris$Species)) - 1
+    # Create imbalanced training data (20, 30, 40 examples for classes 0, 1, 2)
+    train <- as.matrix(iris[c(1:20, 51:80, 101:140), ])
+    # The 10 last samples of each class are for validation
+    test <- as.matrix(iris[c(41:50, 91:100, 141:150), ])
+    dtrain <- lgb.Dataset(data = train[, 1:4], label = train[, 5])
+    dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1:4], label = test[, 5])
+    params <- list(
+        objective = "multiclass"
+        , metric = "multi_logloss"
+        , num_class = 3
+        , learning_rate = 0.00001
+    )
+    model <- lgb.train(
+        params = params
+        , data = dtrain
+        , nrounds = 10
+        , min_data = 1
+    )
+    num_trees <- 5
+    tree_interpretation <- lgb.interprete(
+        model = model
+        , data = test[, 1:4]
+        , idxset = 1:num_trees
+    )
+    expect_true(methods::is(tree_interpretation, "list"))
+    expect_true(length(tree_interpretation) == num_trees)
+    expect_null(names(tree_interpretation))
+    expect_true(all(
+        sapply(
+            X = tree_interpretation
+            , FUN = function(treeDT){
+                checks <- c(
+                    data.table::is.data.table(treeDT)
+                    , identical(names(treeDT), c("Feature", "Class 0", "Class 1", "Class 2"))
+                    , is.character(treeDT[, Feature])
+                    , is.numeric(treeDT[, `Class 0`])
+                    , is.numeric(treeDT[, `Class 1`])
+                    , is.numeric(treeDT[, `Class 2`])
+                )
+                return(all(checks))
+            }
+        )
+    ))
+})
--- a/R-package/tests/testthat/test_lgb.plot.interpretation.R
+++ b/R-package/tests/testthat/test_lgb.plot.interpretation.R
+context("lgb.plot.interpretation")
+.sigmoid <- function(x){
+    1 / (1 + exp(-x))
+}
+.logit <- function(x){
+    log(x / (1 - x))
+}
+test_that("lgb.plot.interepretation works as expected for binary classification", {
+    data(agaricus.train, package = "lightgbm")
+    train <- agaricus.train
+    dtrain <- lgb.Dataset(train$data, label = train$label)
+    setinfo(
+        dataset = dtrain
+        , "init_score"
+        , rep(
+            .logit(mean(train$label))
+            , length(train$label)
+        )
+    )
+    data(agaricus.test, package = "lightgbm")
+    test <- agaricus.test
+    params <- list(
+        objective = "binary"
+        , learning_rate = 0.01
+        , num_leaves = 63
+        , max_depth = -1
+        , min_data_in_leaf = 1
+        , min_sum_hessian_in_leaf = 1
+    )
+    model <- lgb.train(
+        params = params
+        , data = dtrain
+        , nrounds = 10
+    )
+    num_trees <- 5
+    tree_interpretation <- lgb.interprete(
+        model = model
+        , data = test$data
+        , idxset = 1:num_trees
+    )
+    expect_true({
+        lgb.plot.interpretation(
+            tree_interpretation_dt = tree_interpretation[[1]]
+            , top_n = 5
+        )
+        TRUE
+    })
+    # should also work when you explicitly pass cex
+    plot_res <- lgb.plot.interpretation(
+        tree_interpretation_dt = tree_interpretation[[1]]
+        , top_n = 5
+        , cex = 0.95
+    )
+    expect_null(plot_res)
+})
+test_that("lgb.plot.interepretation works as expected for multiclass classification", {
+    data(iris)
+    # We must convert factors to numeric
+    # They must be starting from number 0 to use multiclass
+    # For instance: 0, 1, 2, 3, 4, 5...
+    iris$Species <- as.numeric(as.factor(iris$Species)) - 1
+    # Create imbalanced training data (20, 30, 40 examples for classes 0, 1, 2)
+    train <- as.matrix(iris[c(1:20, 51:80, 101:140), ])
+    # The 10 last samples of each class are for validation
+    test <- as.matrix(iris[c(41:50, 91:100, 141:150), ])
+    dtrain <- lgb.Dataset(data = train[, 1:4], label = train[, 5])
+    dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1:4], label = test[, 5])
+    params <- list(
+        objective = "multiclass"
+        , metric = "multi_logloss"
+        , num_class = 3
+        , learning_rate = 0.00001
+    )
+    model <- lgb.train(
+        params = params
+        , data = dtrain
+        , nrounds = 10
+        , min_data = 1
+    )
+    num_trees <- 5
+    tree_interpretation <- lgb.interprete(
+        model = model
+        , data = test[, 1:4]
+        , idxset = 1:num_trees
+    )
+    plot_res <- lgb.plot.interpretation(
+        tree_interpretation_dt = tree_interpretation[[1]]
+        , top_n = 5
+    )
+    expect_null(plot_res)
+})