test_lgb.convert.R 6.03 KB
Newer Older
1
context("lgb.convert()")
2

3
test_that("lgb.convert() rejects inputs that are not a data.table or data.frame", {
4
5
6
7
8
9
10
11
12
13
14
15
16
    bad_inputs <- list(
        matrix(1.0:10.0, 2L, 5L)
        , TRUE
        , c("a", "b")
        , NA
        , 10L
        , lgb.Dataset(
            data = matrix(1.0:10.0, 2L, 5L)
            , params = list()
        )
    )
    for (bad_input in bad_inputs) {
        expect_error({
17
18
            converted_dataset <- lgb.convert(bad_input)
        }, regexp = "lgb.convert: you provided", fixed = TRUE)
19
20
21
    }
})

22
test_that("lgb.convert() should work correctly for a dataset with only character columns", {
23
24
25
26
27
28
29
    testDF <- data.frame(
        col1 = c("a", "b", "c")
        , col2 =  c("green", "green", "red")
        , stringsAsFactors = FALSE
    )
    testDT <- data.table::as.data.table(testDF)
    for (input_data in list(testDF, testDT)) {
30
        converted_dataset <- lgb.convert(input_data)
31
32
33
34
35
36
37
38
        expect_identical(class(input_data), class(converted_dataset))
        expect_identical(class(converted_dataset[["col1"]]), "integer")
        expect_identical(class(converted_dataset[["col2"]]), "integer")
        expect_identical(converted_dataset[["col1"]], c(1L, 2L, 3L))
        expect_identical(converted_dataset[["col2"]], c(1L, 1L, 2L))
    }
})

39
test_that("lgb.convert() should work correctly for a dataset with only factor columns", {
40
41
42
43
44
45
46
    testDF <- data.frame(
        col1 = as.factor(c("a", "b", "c"))
        , col2 =  as.factor(c("green", "green", "red"))
        , stringsAsFactors = FALSE
    )
    testDT <- data.table::as.data.table(testDF)
    for (input_data in list(testDF, testDT)) {
47
        converted_dataset <- lgb.convert(input_data)
48
49
50
51
52
53
54
55
        expect_identical(class(input_data), class(converted_dataset))
        expect_identical(class(converted_dataset[["col1"]]), "integer")
        expect_identical(class(converted_dataset[["col2"]]), "integer")
        expect_identical(converted_dataset[["col1"]], c(1L, 2L, 3L))
        expect_identical(converted_dataset[["col2"]], c(1L, 1L, 2L))
    }
})

56
test_that("lgb.convert() should not change a dataset with only integer columns", {
57
58
59
60
61
62
63
    testDF <- data.frame(
        col1 = 11L:15L
        , col2 = 16L:20L
        , stringsAsFactors = FALSE
    )
    testDT <- data.table::as.data.table(testDF)
    for (input_data in list(testDF, testDT)) {
64
        converted_dataset <- lgb.convert(input_data)
65
66
67
68
        expect_identical(converted_dataset, input_data)
    }
})

69
test_that("lgb.convert() should work correctly for a dataset with numeric, factor, and character columns", {
70
71
72
73
74
75
76
77
    testDF <- data.frame(
        character_col = c("a", "b", "c")
        , numeric_col = c(1.0, 9.0, 10.0)
        , factor_col = as.factor(c("n", "n", "y"))
        , stringsAsFactors = FALSE
    )
    testDT <- data.table::as.data.table(testDF)
    for (input_data in list(testDF, testDT)) {
78
        converted_dataset <- lgb.convert(input_data)
79
80
81
82
83
84
        expect_identical(class(input_data), class(converted_dataset))
        expect_identical(class(converted_dataset[["character_col"]]), "integer")
        expect_identical(class(converted_dataset[["factor_col"]]), "integer")
        expect_identical(converted_dataset[["character_col"]], c(1L, 2L, 3L))
        expect_identical(converted_dataset[["factor_col"]], c(1L, 1L, 2L))

85
        # today, lgb.convert() does  not convert numeric  columns
86
87
88
89
90
        expect_identical(class(converted_dataset[["numeric_col"]]), "numeric")
        expect_identical(converted_dataset[["numeric_col"]], c(1.0, 9.0, 10.0))
    }
})

91
test_that("lgb.convert() should work correctly for a dataset with missing values", {
92
93
94
95
96
97
98
99
100
101
102
103
104
    testDF <- data.frame(
        character_col = c("a", NA_character_, "c")
        , na_col = rep(NA, 3L)
        , na_real_col = rep(NA_real_, 3L)
        , na_int_col = rep(NA_integer_,  3L)
        , na_character_col = rep(NA_character_, 3L)
        , numeric_col = c(1.0, 9.0, NA_real_)
        , factor_col = as.factor(c("n", "n", "y"))
        , integer_col = c(1L, 9L, NA_integer_)
        , stringsAsFactors = FALSE
    )
    testDT <- data.table::as.data.table(testDF)
    for (input_data in list(testDF, testDT)) {
105
        converted_dataset <- lgb.convert(input_data)
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
        expect_identical(class(input_data), class(converted_dataset))

        expect_identical(class(converted_dataset[["character_col"]]), "integer")
        expect_identical(converted_dataset[["character_col"]], c(1L, NA_integer_, 2L))

        expect_identical(class(converted_dataset[["integer_col"]]), "integer")
        expect_identical(converted_dataset[["integer_col"]], c(1L, 9L, NA_integer_))

        expect_identical(class(converted_dataset[["factor_col"]]), "integer")
        expect_identical(converted_dataset[["factor_col"]], c(1L, 1L, 2L))

        # NAs of any type should be converted to numeric
        for (col in c("na_int_col", "na_character_col")) {
            expect_identical(class(converted_dataset[[col]]), "integer")
            expect_identical(converted_dataset[[col]], rep(NA_integer_, nrow(converted_dataset)))
        }

123
        # today, lgb.convert() does not convert logical columns
124
125
        expect_identical(class(converted_dataset[["na_col"]]), "logical")

126
        # today, lgb.convert() does not convert numeric columns to integer
127
128
129
130
131
132
133
        expect_identical(class(converted_dataset[["na_real_col"]]), "numeric")
        expect_identical(converted_dataset[["na_real_col"]], rep(NA_real_, nrow(converted_dataset)))
        expect_identical(class(converted_dataset[["numeric_col"]]), "numeric")
        expect_identical(converted_dataset[["numeric_col"]], c(1.0, 9.0, NA_real_))
    }
})

134
test_that("lgb.convert() should modify data.tables in-place", {
135
136
137
138
139
140
141
142
143
144
    testDT <- data.table::data.table(
        character_col = c("a", NA_character_, "c")
        , na_col = rep(NA, 3L)
        , na_real_col = rep(NA_real_, 3L)
        , na_int_col = rep(NA_integer_,  3L)
        , na_character_col = rep(NA_character_, 3L)
        , numeric_col = c(1.0, 9.0, NA_real_)
        , factor_col = as.factor(c("n", "n", "y"))
        , integer_col = c(1L, 9L, NA_integer_)
    )
145
    resultDT <- lgb.convert(testDT)
146
147
    expect_identical(resultDT, testDT)
})