test_lgb.interprete.R 3.64 KB
Newer Older
1
2
.sigmoid <- function(x) {
    1.0 / (1.0 + exp(-x))
3
}
4
5
.logit <- function(x) {
    log(x / (1.0 - x))
6
7
8
9
10
11
}

test_that("lgb.intereprete works as expected for binary classification", {
    data(agaricus.train, package = "lightgbm")
    train <- agaricus.train
    dtrain <- lgb.Dataset(train$data, label = train$label)
12
    set_field(
13
        dataset = dtrain
14
15
        , field_name = "init_score"
        , data = rep(
16
17
18
19
20
21
22
23
24
            .logit(mean(train$label))
            , length(train$label)
        )
    )
    data(agaricus.test, package = "lightgbm")
    test <- agaricus.test
    params <- list(
        objective = "binary"
        , learning_rate = 0.01
25
26
27
28
        , num_leaves = 63L
        , max_depth = -1L
        , min_data_in_leaf = 1L
        , min_sum_hessian_in_leaf = 1.0
29
        , verbose = .LGB_VERBOSITY
30
        , num_threads = .LGB_MAX_THREADS
31
32
33
34
    )
    model <- lgb.train(
        params = params
        , data = dtrain
35
        , nrounds = 3L
36
    )
37
    num_trees <- 5L
38
39
40
    tree_interpretation <- lgb.interprete(
        model = model
        , data = test$data
41
        , idxset = seq_len(num_trees)
42
    )
43
    expect_identical(class(tree_interpretation), "list")
44
45
46
47
48
    expect_true(length(tree_interpretation) == num_trees)
    expect_null(names(tree_interpretation))
    expect_true(all(
        sapply(
            X = tree_interpretation
49
            , FUN = function(treeDT) {
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
                checks <- c(
                    data.table::is.data.table(treeDT)
                    , identical(names(treeDT), c("Feature", "Contribution"))
                    , is.character(treeDT[, Feature])
                    , is.numeric(treeDT[, Contribution])
                )
                return(all(checks))
            }
        )
    ))
})

test_that("lgb.intereprete works as expected for multiclass classification", {
    data(iris)

    # We must convert factors to numeric
    # They must be starting from number 0 to use multiclass
    # For instance: 0, 1, 2, 3, 4, 5...
68
    iris$Species <- as.numeric(as.factor(iris$Species)) - 1L
69
70

    # Create imbalanced training data (20, 30, 40 examples for classes 0, 1, 2)
71
    train <- as.matrix(iris[c(1L:20L, 51L:80L, 101L:140L), ])
72
    # The 10 last samples of each class are for validation
73
74
75
    test <- as.matrix(iris[c(41L:50L, 91L:100L, 141L:150L), ])
    dtrain <- lgb.Dataset(data = train[, 1L:4L], label = train[, 5L])
    dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1L:4L], label = test[, 5L])
76
77
78
    params <- list(
        objective = "multiclass"
        , metric = "multi_logloss"
79
        , num_class = 3L
80
        , learning_rate = 0.00001
81
        , min_data = 1L
82
        , verbose = .LGB_VERBOSITY
83
        , num_threads = .LGB_MAX_THREADS
84
85
86
87
    )
    model <- lgb.train(
        params = params
        , data = dtrain
88
        , nrounds = 3L
89
    )
90
    num_trees <- 5L
91
92
    tree_interpretation <- lgb.interprete(
        model = model
93
94
        , data = test[, 1L:4L]
        , idxset = seq_len(num_trees)
95
    )
96
    expect_identical(class(tree_interpretation), "list")
97
98
99
100
101
    expect_true(length(tree_interpretation) == num_trees)
    expect_null(names(tree_interpretation))
    expect_true(all(
        sapply(
            X = tree_interpretation
102
            , FUN = function(treeDT) {
103
104
105
106
107
108
109
110
111
112
113
114
115
                checks <- c(
                    data.table::is.data.table(treeDT)
                    , identical(names(treeDT), c("Feature", "Class 0", "Class 1", "Class 2"))
                    , is.character(treeDT[, Feature])
                    , is.numeric(treeDT[, `Class 0`])
                    , is.numeric(treeDT[, `Class 1`])
                    , is.numeric(treeDT[, `Class 2`])
                )
                return(all(checks))
            }
        )
    ))
})