Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
2048b139
Unverified
Commit
2048b139
authored
Jan 12, 2020
by
James Lamb
Committed by
GitHub
Jan 12, 2020
Browse files
[R-package] added tests on lgb.prepare function (#2685)
parent
8944b5e6
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
783 additions
and
13 deletions
+783
-13
R-package/R/lgb.prepare.R
R-package/R/lgb.prepare.R
+4
-5
R-package/R/lgb.prepare2.R
R-package/R/lgb.prepare2.R
+7
-4
R-package/R/lgb.prepare_rules.R
R-package/R/lgb.prepare_rules.R
+1
-2
R-package/R/lgb.prepare_rules2.R
R-package/R/lgb.prepare_rules2.R
+1
-2
R-package/tests/testthat/test_lgb.prepare.R
R-package/tests/testthat/test_lgb.prepare.R
+145
-0
R-package/tests/testthat/test_lgb.prepare2.R
R-package/tests/testthat/test_lgb.prepare2.R
+147
-0
R-package/tests/testthat/test_lgb.prepare_rules.R
R-package/tests/testthat/test_lgb.prepare_rules.R
+238
-0
R-package/tests/testthat/test_lgb.prepare_rules2.R
R-package/tests/testthat/test_lgb.prepare_rules2.R
+240
-0
No files found.
R-package/R/lgb.prepare.R
View file @
2048b139
...
...
@@ -59,13 +59,13 @@ lgb.prepare <- function(data) {
# Get data classes
list_classes
<-
sapply
(
data
,
class
)
# Convert characters to factors to numeric
(integer is more efficient actually)
# Convert characters to factors to numeric
is_char
<-
which
(
list_classes
==
"character"
)
if
(
length
(
is_char
)
>
0L
)
{
data
[
is_char
]
<-
lapply
(
data
[
is_char
],
function
(
x
)
{
as.numeric
(
as.factor
(
x
))})
}
# Convert factors to numeric
(integer is more efficient actually)
# Convert factors to numeric
is_fact
<-
which
(
list_classes
==
"factor"
)
if
(
length
(
is_fact
)
>
0L
)
{
data
[
is_fact
]
<-
lapply
(
data
[
is_fact
],
function
(
x
)
{
as.numeric
(
x
)})
...
...
@@ -73,11 +73,10 @@ lgb.prepare <- function(data) {
}
else
{
# What do you think you are doing here? Throw error.
stop
(
"lgb.prepare
2
: you provided "
"lgb.prepare: you provided "
,
paste
(
class
(
data
),
collapse
=
" & "
)
,
" but data should have class data.frame"
,
" but data should have class data.frame
or data.table
"
)
}
...
...
R-package/R/lgb.prepare2.R
View file @
2048b139
...
...
@@ -43,13 +43,13 @@ lgb.prepare2 <- function(data) {
# Get data classes
list_classes
<-
vapply
(
data
,
class
,
character
(
1L
))
# Convert characters to
factors only (we can change them to numeric after)
# Convert characters to
integer
is_char
<-
which
(
list_classes
==
"character"
)
if
(
length
(
is_char
)
>
0L
)
{
data
[,
(
is_char
)
:=
lapply
(
.SD
,
function
(
x
)
{
as.integer
(
as.factor
(
x
))}),
.SDcols
=
is_char
]
}
# Convert factors to
numeric (integer is more efficient actually)
# Convert factors to
integer
is_fact
<-
c
(
which
(
list_classes
==
"factor"
),
is_char
)
if
(
length
(
is_fact
)
>
0L
)
{
data
[,
(
is_fact
)
:=
lapply
(
.SD
,
function
(
x
)
{
as.integer
(
x
)}),
.SDcols
=
is_fact
]
...
...
@@ -77,8 +77,11 @@ lgb.prepare2 <- function(data) {
}
else
{
# What do you think you are doing here? Throw error.
stop
(
"lgb.prepare: you provided "
,
paste
(
class
(
data
),
collapse
=
" & "
),
" but data should have class data.frame"
)
stop
(
"lgb.prepare2: you provided "
,
paste
(
class
(
data
),
collapse
=
" & "
)
,
" but data should have class data.frame or data.table"
)
}
...
...
R-package/R/lgb.prepare_rules.R
View file @
2048b139
...
...
@@ -163,9 +163,8 @@ lgb.prepare_rules <- function(data, rules = NULL) {
}
else
{
# What do you think you are doing here? Throw error.
stop
(
"lgb.prepare: you provided "
"lgb.prepare
_rules
: you provided "
,
paste
(
class
(
data
),
collapse
=
" & "
)
,
" but data should have class data.frame"
)
...
...
R-package/R/lgb.prepare_rules2.R
View file @
2048b139
...
...
@@ -168,9 +168,8 @@ lgb.prepare_rules2 <- function(data, rules = NULL) {
}
else
{
# What do you think you are doing here? Throw error.
stop
(
"lgb.prepare: you provided "
"lgb.prepare
_rules2
: you provided "
,
paste
(
class
(
data
),
collapse
=
" & "
)
,
" but data should have class data.frame"
)
...
...
R-package/tests/testthat/test_lgb.prepare.R
0 → 100644
View file @
2048b139
context
(
"lgb.prepare()"
)
test_that
(
"lgb.prepare() rejects inputs that are not a data.table or data.frame"
,
{
bad_inputs
<-
list
(
matrix
(
1.0
:
10.0
,
2L
,
5L
)
,
TRUE
,
c
(
"a"
,
"b"
)
,
NA
,
10L
,
lgb.Dataset
(
data
=
matrix
(
1.0
:
10.0
,
2L
,
5L
)
,
params
=
list
()
)
)
for
(
bad_input
in
bad_inputs
)
{
expect_error
({
converted_dataset
<-
lgb.prepare
(
bad_input
)
},
regexp
=
"lgb.prepare: you provided"
,
fixed
=
TRUE
)
}
})
test_that
(
"lgb.prepare() should work correctly for a dataset with only character columns"
,
{
testDF
<-
data.frame
(
col1
=
c
(
"a"
,
"b"
,
"c"
)
,
col2
=
c
(
"green"
,
"green"
,
"red"
)
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
converted_dataset
<-
lgb.prepare
(
input_data
)
expect_identical
(
class
(
input_data
),
class
(
converted_dataset
))
expect_identical
(
class
(
converted_dataset
[[
"col1"
]]),
"numeric"
)
expect_identical
(
class
(
converted_dataset
[[
"col2"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"col1"
]],
c
(
1.0
,
2.0
,
3.0
))
expect_identical
(
converted_dataset
[[
"col2"
]],
c
(
1.0
,
1.0
,
2.0
))
}
})
test_that
(
"lgb.prepare() should work correctly for a dataset with only factor columns"
,
{
testDF
<-
data.frame
(
col1
=
as.factor
(
c
(
"a"
,
"b"
,
"c"
))
,
col2
=
as.factor
(
c
(
"green"
,
"green"
,
"red"
))
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
converted_dataset
<-
lgb.prepare
(
input_data
)
expect_identical
(
class
(
input_data
),
class
(
converted_dataset
))
expect_identical
(
class
(
converted_dataset
[[
"col1"
]]),
"numeric"
)
expect_identical
(
class
(
converted_dataset
[[
"col2"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"col1"
]],
c
(
1.0
,
2.0
,
3.0
))
expect_identical
(
converted_dataset
[[
"col2"
]],
c
(
1.0
,
1.0
,
2.0
))
}
})
test_that
(
"lgb.prepare() should not change a dataset with only numeric columns"
,
{
testDF
<-
data.frame
(
col1
=
11.0
:
15.0
,
col2
=
16.0
:
20.0
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
converted_dataset
<-
lgb.prepare
(
input_data
)
expect_identical
(
converted_dataset
,
input_data
)
}
})
test_that
(
"lgb.prepare() should work correctly for a dataset with numeric, factor, and character columns"
,
{
testDF
<-
data.frame
(
character_col
=
c
(
"a"
,
"b"
,
"c"
)
,
numeric_col
=
c
(
1.0
,
9.0
,
10.0
)
,
factor_col
=
as.factor
(
c
(
"n"
,
"n"
,
"y"
))
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
converted_dataset
<-
lgb.prepare
(
input_data
)
expect_identical
(
class
(
input_data
),
class
(
converted_dataset
))
expect_identical
(
class
(
converted_dataset
[[
"character_col"
]]),
"numeric"
)
expect_identical
(
class
(
converted_dataset
[[
"numeric_col"
]]),
"numeric"
)
expect_identical
(
class
(
converted_dataset
[[
"factor_col"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"character_col"
]],
c
(
1.0
,
2.0
,
3.0
))
expect_identical
(
converted_dataset
[[
"numeric_col"
]],
c
(
1.0
,
9.0
,
10.0
))
expect_identical
(
converted_dataset
[[
"factor_col"
]],
c
(
1.0
,
1.0
,
2.0
))
}
})
test_that
(
"lgb.prepare() should work correctly for a dataset with missing values"
,
{
testDF
<-
data.frame
(
character_col
=
c
(
"a"
,
NA_character_
,
"c"
)
,
na_col
=
rep
(
NA
,
3L
)
,
na_real_col
=
rep
(
NA_real_
,
3L
)
,
na_int_col
=
rep
(
NA_integer_
,
3L
)
,
na_character_col
=
rep
(
NA_character_
,
3L
)
,
numeric_col
=
c
(
1.0
,
9.0
,
NA_real_
)
,
factor_col
=
as.factor
(
c
(
"n"
,
"n"
,
"y"
))
,
integer_col
=
c
(
1L
,
9L
,
NA_integer_
)
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
converted_dataset
<-
lgb.prepare
(
input_data
)
expect_identical
(
class
(
input_data
),
class
(
converted_dataset
))
expect_identical
(
class
(
converted_dataset
[[
"character_col"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"character_col"
]],
c
(
1.0
,
NA_real_
,
2.0
))
expect_identical
(
class
(
converted_dataset
[[
"numeric_col"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"numeric_col"
]],
c
(
1.0
,
9.0
,
NA_real_
))
expect_identical
(
class
(
converted_dataset
[[
"factor_col"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"factor_col"
]],
c
(
1.0
,
1.0
,
2.0
))
# NAs of any type should be converted to numeric
for
(
col
in
c
(
"na_real_col"
,
"na_character_col"
))
{
expect_identical
(
class
(
converted_dataset
[[
col
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
col
]],
rep
(
NA_real_
,
nrow
(
converted_dataset
)))
}
# today, lgb.prepare() does not convert logical columns
expect_identical
(
class
(
converted_dataset
[[
"na_col"
]]),
"logical"
)
# today, lgb.prepare() does not convert integer columns to numeric
expect_identical
(
class
(
converted_dataset
[[
"na_int_col"
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
"na_int_col"
]],
rep
(
NA_integer_
,
nrow
(
converted_dataset
)))
expect_identical
(
class
(
converted_dataset
[[
"integer_col"
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
"integer_col"
]],
c
(
1L
,
9L
,
NA_integer_
))
}
})
test_that
(
"lgb.prepare() should modify data.tables in-place"
,
{
testDT
<-
data.table
::
data.table
(
character_col
=
c
(
"a"
,
NA_character_
,
"c"
)
,
na_col
=
rep
(
NA
,
3L
)
,
na_real_col
=
rep
(
NA_real_
,
3L
)
,
na_int_col
=
rep
(
NA_integer_
,
3L
)
,
na_character_col
=
rep
(
NA_character_
,
3L
)
,
numeric_col
=
c
(
1.0
,
9.0
,
NA_real_
)
,
factor_col
=
as.factor
(
c
(
"n"
,
"n"
,
"y"
))
,
integer_col
=
c
(
1L
,
9L
,
NA_integer_
)
)
resultDT
<-
lgb.prepare
(
testDT
)
expect_identical
(
resultDT
,
testDT
)
})
R-package/tests/testthat/test_lgb.prepare2.R
0 → 100644
View file @
2048b139
context
(
"lgb.prepare2()"
)
test_that
(
"lgb.prepare2() rejects inputs that are not a data.table or data.frame"
,
{
bad_inputs
<-
list
(
matrix
(
1.0
:
10.0
,
2L
,
5L
)
,
TRUE
,
c
(
"a"
,
"b"
)
,
NA
,
10L
,
lgb.Dataset
(
data
=
matrix
(
1.0
:
10.0
,
2L
,
5L
)
,
params
=
list
()
)
)
for
(
bad_input
in
bad_inputs
)
{
expect_error
({
converted_dataset
<-
lgb.prepare2
(
bad_input
)
},
regexp
=
"lgb.prepare2: you provided"
,
fixed
=
TRUE
)
}
})
test_that
(
"lgb.prepare2() should work correctly for a dataset with only character columns"
,
{
testDF
<-
data.frame
(
col1
=
c
(
"a"
,
"b"
,
"c"
)
,
col2
=
c
(
"green"
,
"green"
,
"red"
)
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
converted_dataset
<-
lgb.prepare2
(
input_data
)
expect_identical
(
class
(
input_data
),
class
(
converted_dataset
))
expect_identical
(
class
(
converted_dataset
[[
"col1"
]]),
"integer"
)
expect_identical
(
class
(
converted_dataset
[[
"col2"
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
"col1"
]],
c
(
1L
,
2L
,
3L
))
expect_identical
(
converted_dataset
[[
"col2"
]],
c
(
1L
,
1L
,
2L
))
}
})
test_that
(
"lgb.prepare2() should work correctly for a dataset with only factor columns"
,
{
testDF
<-
data.frame
(
col1
=
as.factor
(
c
(
"a"
,
"b"
,
"c"
))
,
col2
=
as.factor
(
c
(
"green"
,
"green"
,
"red"
))
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
converted_dataset
<-
lgb.prepare2
(
input_data
)
expect_identical
(
class
(
input_data
),
class
(
converted_dataset
))
expect_identical
(
class
(
converted_dataset
[[
"col1"
]]),
"integer"
)
expect_identical
(
class
(
converted_dataset
[[
"col2"
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
"col1"
]],
c
(
1L
,
2L
,
3L
))
expect_identical
(
converted_dataset
[[
"col2"
]],
c
(
1L
,
1L
,
2L
))
}
})
test_that
(
"lgb.prepare2() should not change a dataset with only integer columns"
,
{
testDF
<-
data.frame
(
col1
=
11L
:
15L
,
col2
=
16L
:
20L
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
converted_dataset
<-
lgb.prepare2
(
input_data
)
expect_identical
(
converted_dataset
,
input_data
)
}
})
test_that
(
"lgb.prepare2() should work correctly for a dataset with numeric, factor, and character columns"
,
{
testDF
<-
data.frame
(
character_col
=
c
(
"a"
,
"b"
,
"c"
)
,
numeric_col
=
c
(
1.0
,
9.0
,
10.0
)
,
factor_col
=
as.factor
(
c
(
"n"
,
"n"
,
"y"
))
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
converted_dataset
<-
lgb.prepare2
(
input_data
)
expect_identical
(
class
(
input_data
),
class
(
converted_dataset
))
expect_identical
(
class
(
converted_dataset
[[
"character_col"
]]),
"integer"
)
expect_identical
(
class
(
converted_dataset
[[
"factor_col"
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
"character_col"
]],
c
(
1L
,
2L
,
3L
))
expect_identical
(
converted_dataset
[[
"factor_col"
]],
c
(
1L
,
1L
,
2L
))
# today, lgb.prepare2() does not convert numeric columns
expect_identical
(
class
(
converted_dataset
[[
"numeric_col"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"numeric_col"
]],
c
(
1.0
,
9.0
,
10.0
))
}
})
test_that
(
"lgb.prepare2() should work correctly for a dataset with missing values"
,
{
testDF
<-
data.frame
(
character_col
=
c
(
"a"
,
NA_character_
,
"c"
)
,
na_col
=
rep
(
NA
,
3L
)
,
na_real_col
=
rep
(
NA_real_
,
3L
)
,
na_int_col
=
rep
(
NA_integer_
,
3L
)
,
na_character_col
=
rep
(
NA_character_
,
3L
)
,
numeric_col
=
c
(
1.0
,
9.0
,
NA_real_
)
,
factor_col
=
as.factor
(
c
(
"n"
,
"n"
,
"y"
))
,
integer_col
=
c
(
1L
,
9L
,
NA_integer_
)
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
converted_dataset
<-
lgb.prepare2
(
input_data
)
expect_identical
(
class
(
input_data
),
class
(
converted_dataset
))
expect_identical
(
class
(
converted_dataset
[[
"character_col"
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
"character_col"
]],
c
(
1L
,
NA_integer_
,
2L
))
expect_identical
(
class
(
converted_dataset
[[
"integer_col"
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
"integer_col"
]],
c
(
1L
,
9L
,
NA_integer_
))
expect_identical
(
class
(
converted_dataset
[[
"factor_col"
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
"factor_col"
]],
c
(
1L
,
1L
,
2L
))
# NAs of any type should be converted to numeric
for
(
col
in
c
(
"na_int_col"
,
"na_character_col"
))
{
expect_identical
(
class
(
converted_dataset
[[
col
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
col
]],
rep
(
NA_integer_
,
nrow
(
converted_dataset
)))
}
# today, lgb.prepare2() does not convert logical columns
expect_identical
(
class
(
converted_dataset
[[
"na_col"
]]),
"logical"
)
# today, lgb.prepare2() does not convert numeric columns to integer
expect_identical
(
class
(
converted_dataset
[[
"na_real_col"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"na_real_col"
]],
rep
(
NA_real_
,
nrow
(
converted_dataset
)))
expect_identical
(
class
(
converted_dataset
[[
"numeric_col"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"numeric_col"
]],
c
(
1.0
,
9.0
,
NA_real_
))
}
})
test_that
(
"lgb.prepare2() should modify data.tables in-place"
,
{
testDT
<-
data.table
::
data.table
(
character_col
=
c
(
"a"
,
NA_character_
,
"c"
)
,
na_col
=
rep
(
NA
,
3L
)
,
na_real_col
=
rep
(
NA_real_
,
3L
)
,
na_int_col
=
rep
(
NA_integer_
,
3L
)
,
na_character_col
=
rep
(
NA_character_
,
3L
)
,
numeric_col
=
c
(
1.0
,
9.0
,
NA_real_
)
,
factor_col
=
as.factor
(
c
(
"n"
,
"n"
,
"y"
))
,
integer_col
=
c
(
1L
,
9L
,
NA_integer_
)
)
resultDT
<-
lgb.prepare2
(
testDT
)
expect_identical
(
resultDT
,
testDT
)
})
R-package/tests/testthat/test_lgb.prepare_rules.R
0 → 100644
View file @
2048b139
context
(
"lgb.prepare_rules()"
)
test_that
(
"lgb.prepare_rules() rejects inputs that are not a data.table or data.frame"
,
{
bad_inputs
<-
list
(
matrix
(
1.0
:
10.0
,
2L
,
5L
)
,
TRUE
,
c
(
"a"
,
"b"
)
,
NA
,
10L
,
lgb.Dataset
(
data
=
matrix
(
1.0
:
10.0
,
2L
,
5L
)
,
params
=
list
()
)
)
for
(
bad_input
in
bad_inputs
)
{
expect_error
({
conversion_result
<-
lgb.prepare_rules
(
bad_input
)
},
regexp
=
"lgb.prepare_rules: you provided"
,
fixed
=
TRUE
)
}
})
test_that
(
"lgb.prepare_rules() should work correctly for a dataset with only character columns"
,
{
testDF
<-
data.frame
(
col1
=
c
(
"a"
,
"b"
,
"c"
)
,
col2
=
c
(
"green"
,
"green"
,
"red"
)
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
conversion_result
<-
lgb.prepare_rules
(
input_data
)
# dataset should have been converted to numeric
converted_dataset
<-
conversion_result
[[
"data"
]]
expect_identical
(
class
(
input_data
),
class
(
converted_dataset
))
expect_identical
(
class
(
converted_dataset
[[
"col1"
]]),
"numeric"
)
expect_identical
(
class
(
converted_dataset
[[
"col2"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"col1"
]],
c
(
1.0
,
2.0
,
3.0
))
expect_identical
(
converted_dataset
[[
"col2"
]],
c
(
1.0
,
1.0
,
2.0
))
# rules should be returned and correct
rules
<-
conversion_result
$
rules
expect_is
(
rules
,
"list"
)
expect_length
(
rules
,
ncol
(
input_data
))
expect_identical
(
rules
[[
"col1"
]],
c
(
"a"
=
1.0
,
"b"
=
2.0
,
"c"
=
3.0
))
expect_identical
(
rules
[[
"col2"
]],
c
(
"green"
=
1.0
,
"red"
=
2.0
))
}
})
test_that
(
"lgb.prepare_rules() should work correctly for a dataset with only factor columns"
,
{
testDF
<-
data.frame
(
col1
=
as.factor
(
c
(
"a"
,
"b"
,
"c"
))
,
col2
=
as.factor
(
c
(
"green"
,
"green"
,
"red"
))
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
conversion_result
<-
lgb.prepare_rules
(
input_data
)
# dataset should have been converted to numeric
converted_dataset
<-
conversion_result
[[
"data"
]]
expect_identical
(
class
(
input_data
),
class
(
converted_dataset
))
expect_identical
(
class
(
converted_dataset
[[
"col1"
]]),
"numeric"
)
expect_identical
(
class
(
converted_dataset
[[
"col2"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"col1"
]],
c
(
1.0
,
2.0
,
3.0
))
expect_identical
(
converted_dataset
[[
"col2"
]],
c
(
1.0
,
1.0
,
2.0
))
# rules should be returned and correct
rules
<-
conversion_result
$
rules
expect_is
(
rules
,
"list"
)
expect_length
(
rules
,
ncol
(
input_data
))
expect_identical
(
rules
[[
"col1"
]],
c
(
"a"
=
1.0
,
"b"
=
2.0
,
"c"
=
3.0
))
expect_identical
(
rules
[[
"col2"
]],
c
(
"green"
=
1.0
,
"red"
=
2.0
))
}
})
test_that
(
"lgb.prepare_rules() should not change a dataset with only numeric columns"
,
{
testDF
<-
data.frame
(
col1
=
11.0
:
15.0
,
col2
=
16.0
:
20.0
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
conversion_result
<-
lgb.prepare_rules
(
input_data
)
# dataset should have been converted to numeric
converted_dataset
<-
conversion_result
[[
"data"
]]
expect_identical
(
converted_dataset
,
input_data
)
# rules should be returned and correct
rules
<-
conversion_result
$
rules
expect_identical
(
rules
,
list
())
}
})
test_that
(
"lgb.prepare_rules() should work correctly for a dataset with numeric, factor, and character columns"
,
{
testDF
<-
data.frame
(
character_col
=
c
(
"a"
,
"b"
,
"c"
)
,
numeric_col
=
c
(
1.0
,
9.0
,
10.0
)
,
factor_col
=
as.factor
(
c
(
"n"
,
"n"
,
"y"
))
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
conversion_result
<-
lgb.prepare_rules
(
input_data
)
# dataset should have been converted to numeric
converted_dataset
<-
conversion_result
[[
"data"
]]
expect_identical
(
class
(
input_data
),
class
(
converted_dataset
))
expect_identical
(
class
(
converted_dataset
[[
"character_col"
]]),
"numeric"
)
expect_identical
(
class
(
converted_dataset
[[
"numeric_col"
]]),
"numeric"
)
expect_identical
(
class
(
converted_dataset
[[
"factor_col"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"character_col"
]],
c
(
1.0
,
2.0
,
3.0
))
expect_identical
(
converted_dataset
[[
"numeric_col"
]],
c
(
1.0
,
9.0
,
10.0
))
expect_identical
(
converted_dataset
[[
"factor_col"
]],
c
(
1.0
,
1.0
,
2.0
))
# rules should be returned and correct
rules
<-
conversion_result
$
rules
expect_is
(
rules
,
"list"
)
expect_length
(
rules
,
2L
)
expect_identical
(
rules
[[
"character_col"
]],
c
(
"a"
=
1.0
,
"b"
=
2.0
,
"c"
=
3.0
))
expect_identical
(
rules
[[
"factor_col"
]],
c
(
"n"
=
1.0
,
"y"
=
2.0
))
}
})
test_that
(
"lgb.prepare_rules() should work correctly for a dataset with missing values"
,
{
testDF
<-
data.frame
(
character_col
=
c
(
"a"
,
NA_character_
,
"c"
)
,
na_col
=
rep
(
NA
,
3L
)
,
na_real_col
=
rep
(
NA_real_
,
3L
)
,
na_int_col
=
rep
(
NA_integer_
,
3L
)
,
na_character_col
=
rep
(
NA_character_
,
3L
)
,
numeric_col
=
c
(
1.0
,
9.0
,
NA_real_
)
,
factor_col
=
as.factor
(
c
(
"n"
,
"n"
,
"y"
))
,
integer_col
=
c
(
1L
,
9L
,
NA_integer_
)
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
conversion_result
<-
lgb.prepare_rules
(
input_data
)
# dataset should have been converted to numeric
converted_dataset
<-
conversion_result
[[
"data"
]]
expect_identical
(
class
(
input_data
),
class
(
converted_dataset
))
expect_identical
(
class
(
converted_dataset
[[
"character_col"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"character_col"
]],
c
(
1.0
,
NA_real_
,
2.0
))
expect_identical
(
class
(
converted_dataset
[[
"numeric_col"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"numeric_col"
]],
c
(
1.0
,
9.0
,
NA_real_
))
expect_identical
(
class
(
converted_dataset
[[
"factor_col"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"factor_col"
]],
c
(
1.0
,
1.0
,
2.0
))
# NAs of any type should be converted to numeric
for
(
col
in
c
(
"na_real_col"
,
"na_character_col"
))
{
expect_identical
(
class
(
converted_dataset
[[
col
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
col
]],
rep
(
NA_real_
,
nrow
(
converted_dataset
)))
}
# today, lgb.prepare_rules() does not convert logical columns
expect_identical
(
class
(
converted_dataset
[[
"na_col"
]]),
"logical"
)
# today, lgb.prepare_rules() does not convert integer columns to numeric
expect_identical
(
class
(
converted_dataset
[[
"na_int_col"
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
"na_int_col"
]],
rep
(
NA_integer_
,
nrow
(
converted_dataset
)))
expect_identical
(
class
(
converted_dataset
[[
"integer_col"
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
"integer_col"
]],
c
(
1L
,
9L
,
NA_integer_
))
# rules should be returned and correct
rules
<-
conversion_result
$
rules
expect_is
(
rules
,
"list"
)
expect_length
(
rules
,
3L
)
expect_identical
(
rules
[[
"character_col"
]],
stats
::
setNames
(
c
(
1.0
,
NA_real_
,
2.0
),
c
(
"a"
,
NA
,
"c"
)))
expect_identical
(
rules
[[
"na_character_col"
]],
stats
::
setNames
(
NA_real_
,
NA
))
expect_identical
(
rules
[[
"factor_col"
]],
c
(
"n"
=
1.0
,
"y"
=
2.0
))
}
})
test_that
(
"lgb.prepare_rules() should work correctly if you provide your own well-formed rules"
,
{
testDF
<-
data.frame
(
character_col
=
c
(
"a"
,
NA_character_
,
"c"
,
"a"
,
"a"
,
"c"
)
,
na_col
=
rep
(
NA
,
6L
)
,
na_real_col
=
rep
(
NA_real_
,
6L
)
,
na_int_col
=
rep
(
NA_integer_
,
6L
)
,
na_character_col
=
rep
(
NA_character_
,
6L
)
,
numeric_col
=
c
(
1.0
,
9.0
,
NA_real_
,
10.0
,
11.0
,
12.0
)
,
factor_col
=
as.factor
(
c
(
"n"
,
"n"
,
"y"
,
"y"
,
"n"
,
"n"
))
,
integer_col
=
c
(
1L
,
9L
,
NA_integer_
,
1L
,
1L
,
1L
)
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
# value used by lgb.prepare_rules() when it encounters a categorical value that
# is not in the provided rules
UNKNOWN_FACTOR_VALUE
<-
0.0
for
(
input_data
in
list
(
testDF
,
testDT
))
{
custom_rules
<-
list
(
"character_col"
=
c
(
"a"
=
5.0
,
"c"
=
-10.2
)
,
"factor_col"
=
c
(
"n"
=
65.0
,
"y"
=
65.01
)
)
conversion_result
<-
lgb.prepare_rules
(
data
=
input_data
,
rules
=
custom_rules
)
# dataset should have been converted to numeric
converted_dataset
<-
conversion_result
[[
"data"
]]
expect_identical
(
class
(
input_data
),
class
(
converted_dataset
))
expect_identical
(
class
(
converted_dataset
[[
"character_col"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"character_col"
]],
c
(
5.0
,
UNKNOWN_FACTOR_VALUE
,
-10.2
,
5.0
,
5.0
,
-10.2
))
expect_identical
(
class
(
converted_dataset
[[
"factor_col"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"factor_col"
]],
c
(
65.0
,
65.0
,
65.01
,
65.01
,
65.0
,
65.0
))
# columns not specified in rules are not going to be converted
for
(
col
in
c
(
"na_col"
,
"na_real_col"
,
"na_int_col"
,
"na_character_col"
,
"numeric_col"
,
"integer_col"
))
{
expect_identical
(
converted_dataset
[[
col
]],
input_data
[[
col
]])
}
# the rules you passed in should be returned unchanged
rules
<-
conversion_result
$
rules
expect_identical
(
rules
,
custom_rules
)
}
})
test_that
(
"lgb.prepare_rules() should modify data.tables in-place"
,
{
testDT
<-
data.table
::
data.table
(
character_col
=
c
(
"a"
,
NA_character_
,
"c"
)
,
na_col
=
rep
(
NA
,
3L
)
,
na_real_col
=
rep
(
NA_real_
,
3L
)
,
na_int_col
=
rep
(
NA_integer_
,
3L
)
,
na_character_col
=
rep
(
NA_character_
,
3L
)
,
numeric_col
=
c
(
1.0
,
9.0
,
NA_real_
)
,
factor_col
=
as.factor
(
c
(
"n"
,
"n"
,
"y"
))
,
integer_col
=
c
(
1L
,
9L
,
NA_integer_
)
)
conversion_result
<-
lgb.prepare_rules
(
testDT
)
resultDT
<-
conversion_result
[[
"data"
]]
expect_identical
(
resultDT
,
testDT
)
})
R-package/tests/testthat/test_lgb.prepare_rules2.R
0 → 100644
View file @
2048b139
context
(
"lgb.prepare_rules2()"
)
test_that
(
"lgb.prepare_rules2() rejects inputs that are not a data.table or data.frame"
,
{
bad_inputs
<-
list
(
matrix
(
1.0
:
10.0
,
2L
,
5L
)
,
TRUE
,
c
(
"a"
,
"b"
)
,
NA
,
10L
,
lgb.Dataset
(
data
=
matrix
(
1.0
:
10.0
,
2L
,
5L
)
,
params
=
list
()
)
)
for
(
bad_input
in
bad_inputs
)
{
expect_error
({
conversion_result
<-
lgb.prepare_rules2
(
bad_input
)
},
regexp
=
"lgb.prepare_rules2: you provided"
,
fixed
=
TRUE
)
}
})
test_that
(
"lgb.prepare_rules2() should work correctly for a dataset with only character columns"
,
{
testDF
<-
data.frame
(
col1
=
c
(
"a"
,
"b"
,
"c"
)
,
col2
=
c
(
"green"
,
"green"
,
"red"
)
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
conversion_result
<-
lgb.prepare_rules2
(
input_data
)
# dataset should have been converted to integer
converted_dataset
<-
conversion_result
[[
"data"
]]
expect_identical
(
class
(
input_data
),
class
(
converted_dataset
))
expect_identical
(
class
(
converted_dataset
[[
"col1"
]]),
"integer"
)
expect_identical
(
class
(
converted_dataset
[[
"col2"
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
"col1"
]],
c
(
1L
,
2L
,
3L
))
expect_identical
(
converted_dataset
[[
"col2"
]],
c
(
1L
,
1L
,
2L
))
# rules should be returned and correct
rules
<-
conversion_result
$
rules
expect_is
(
rules
,
"list"
)
expect_length
(
rules
,
ncol
(
input_data
))
expect_identical
(
rules
[[
"col1"
]],
c
(
"a"
=
1L
,
"b"
=
2L
,
"c"
=
3L
))
expect_identical
(
rules
[[
"col2"
]],
c
(
"green"
=
1L
,
"red"
=
2L
))
}
})
test_that
(
"lgb.prepare_rules2() should work correctly for a dataset with only factor columns"
,
{
testDF
<-
data.frame
(
col1
=
as.factor
(
c
(
"a"
,
"b"
,
"c"
))
,
col2
=
as.factor
(
c
(
"green"
,
"green"
,
"red"
))
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
conversion_result
<-
lgb.prepare_rules2
(
input_data
)
# dataset should have been converted to integer
converted_dataset
<-
conversion_result
[[
"data"
]]
expect_identical
(
class
(
input_data
),
class
(
converted_dataset
))
expect_identical
(
class
(
converted_dataset
[[
"col1"
]]),
"integer"
)
expect_identical
(
class
(
converted_dataset
[[
"col2"
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
"col1"
]],
c
(
1L
,
2L
,
3L
))
expect_identical
(
converted_dataset
[[
"col2"
]],
c
(
1L
,
1L
,
2L
))
# rules should be returned and correct
rules
<-
conversion_result
$
rules
expect_is
(
rules
,
"list"
)
expect_length
(
rules
,
ncol
(
input_data
))
expect_identical
(
rules
[[
"col1"
]],
c
(
"a"
=
1L
,
"b"
=
2L
,
"c"
=
3L
))
expect_identical
(
rules
[[
"col2"
]],
c
(
"green"
=
1L
,
"red"
=
2L
))
}
})
test_that
(
"lgb.prepare_rules2() should not change a dataset with only integer columns"
,
{
testDF
<-
data.frame
(
col1
=
11L
:
15L
,
col2
=
16L
:
20L
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
conversion_result
<-
lgb.prepare_rules2
(
input_data
)
# dataset should have been converted to integer
converted_dataset
<-
conversion_result
[[
"data"
]]
expect_identical
(
converted_dataset
,
input_data
)
# rules should be returned and correct
rules
<-
conversion_result
$
rules
expect_identical
(
rules
,
list
())
}
})
test_that
(
"lgb.prepare_rules2() should work correctly for a dataset with numeric, factor, and character columns"
,
{
testDF
<-
data.frame
(
character_col
=
c
(
"a"
,
"b"
,
"c"
)
,
numeric_col
=
c
(
1.0
,
9.0
,
10.0
)
,
factor_col
=
as.factor
(
c
(
"n"
,
"n"
,
"y"
))
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
conversion_result
<-
lgb.prepare_rules2
(
input_data
)
# dataset should have been converted to numeric
converted_dataset
<-
conversion_result
[[
"data"
]]
expect_identical
(
class
(
input_data
),
class
(
converted_dataset
))
expect_identical
(
class
(
converted_dataset
[[
"character_col"
]]),
"integer"
)
expect_identical
(
class
(
converted_dataset
[[
"factor_col"
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
"character_col"
]],
c
(
1L
,
2L
,
3L
))
expect_identical
(
converted_dataset
[[
"factor_col"
]],
c
(
1L
,
1L
,
2L
))
# rules should be returned and correct
rules
<-
conversion_result
$
rules
expect_is
(
rules
,
"list"
)
expect_length
(
rules
,
2L
)
expect_identical
(
rules
[[
"character_col"
]],
c
(
"a"
=
1L
,
"b"
=
2L
,
"c"
=
3L
))
expect_identical
(
rules
[[
"factor_col"
]],
c
(
"n"
=
1L
,
"y"
=
2L
))
# today, lgb.prepare2() does not convert numeric columns
expect_identical
(
class
(
converted_dataset
[[
"numeric_col"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"numeric_col"
]],
c
(
1.0
,
9.0
,
10.0
))
}
})
test_that
(
"lgb.prepare_rules2() should work correctly for a dataset with missing values"
,
{
testDF
<-
data.frame
(
character_col
=
c
(
"a"
,
NA_character_
,
"c"
)
,
na_col
=
rep
(
NA
,
3L
)
,
na_real_col
=
rep
(
NA_real_
,
3L
)
,
na_int_col
=
rep
(
NA_integer_
,
3L
)
,
na_character_col
=
rep
(
NA_character_
,
3L
)
,
numeric_col
=
c
(
1.0
,
9.0
,
NA_real_
)
,
factor_col
=
as.factor
(
c
(
"n"
,
"n"
,
"y"
))
,
integer_col
=
c
(
1L
,
9L
,
NA_integer_
)
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
for
(
input_data
in
list
(
testDF
,
testDT
))
{
conversion_result
<-
lgb.prepare_rules2
(
input_data
)
# dataset should have been converted to integer
converted_dataset
<-
conversion_result
[[
"data"
]]
expect_identical
(
class
(
input_data
),
class
(
converted_dataset
))
expect_identical
(
class
(
converted_dataset
[[
"character_col"
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
"character_col"
]],
c
(
1L
,
NA_integer_
,
2L
))
expect_identical
(
class
(
converted_dataset
[[
"integer_col"
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
"integer_col"
]],
c
(
1L
,
9L
,
NA_integer_
))
expect_identical
(
class
(
converted_dataset
[[
"factor_col"
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
"factor_col"
]],
c
(
1L
,
1L
,
2L
))
# NAs of any type should be converted to numeric
for
(
col
in
c
(
"na_int_col"
,
"na_character_col"
))
{
expect_identical
(
class
(
converted_dataset
[[
col
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
col
]],
rep
(
NA_integer_
,
nrow
(
converted_dataset
)))
}
# today, lgb.prepare2() does not convert logical columns
expect_identical
(
class
(
converted_dataset
[[
"na_col"
]]),
"logical"
)
# today, lgb.prepare2() does not convert numeric columns to integer
expect_identical
(
class
(
converted_dataset
[[
"na_real_col"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"na_real_col"
]],
rep
(
NA_real_
,
nrow
(
converted_dataset
)))
expect_identical
(
class
(
converted_dataset
[[
"numeric_col"
]]),
"numeric"
)
expect_identical
(
converted_dataset
[[
"numeric_col"
]],
c
(
1.0
,
9.0
,
NA_real_
))
# rules should be returned and correct
rules
<-
conversion_result
$
rules
expect_is
(
rules
,
"list"
)
expect_length
(
rules
,
3L
)
expect_identical
(
rules
[[
"character_col"
]],
stats
::
setNames
(
c
(
1L
,
NA_integer_
,
2L
),
c
(
"a"
,
NA
,
"c"
)))
expect_identical
(
rules
[[
"na_character_col"
]],
stats
::
setNames
(
NA_integer_
,
NA
))
expect_identical
(
rules
[[
"factor_col"
]],
c
(
"n"
=
1L
,
"y"
=
2L
))
}
})
test_that
(
"lgb.prepare_rules2() should work correctly if you provide your own well-formed rules"
,
{
testDF
<-
data.frame
(
character_col
=
c
(
"a"
,
NA_character_
,
"c"
,
"a"
,
"a"
,
"c"
)
,
na_col
=
rep
(
NA
,
6L
)
,
na_real_col
=
rep
(
NA_real_
,
6L
)
,
na_int_col
=
rep
(
NA_integer_
,
6L
)
,
na_character_col
=
rep
(
NA_character_
,
6L
)
,
numeric_col
=
c
(
1.0
,
9.0
,
NA_real_
,
10.0
,
11.0
,
12.0
)
,
factor_col
=
as.factor
(
c
(
"n"
,
"n"
,
"y"
,
"y"
,
"n"
,
"n"
))
,
integer_col
=
c
(
1L
,
9L
,
NA_integer_
,
1L
,
1L
,
1L
)
,
stringsAsFactors
=
FALSE
)
testDT
<-
data.table
::
as.data.table
(
testDF
)
# value used by lgb.prepare_rules2() when it encounters a categorical value that
# is not in the provided rules
UNKNOWN_FACTOR_VALUE
<-
0L
for
(
input_data
in
list
(
testDF
,
testDT
))
{
custom_rules
<-
list
(
"character_col"
=
c
(
"a"
=
5L
,
"c"
=
-10L
)
,
"factor_col"
=
c
(
"n"
=
65L
,
"y"
=
66L
)
)
conversion_result
<-
lgb.prepare_rules2
(
data
=
input_data
,
rules
=
custom_rules
)
# dataset should have been converted to integer
converted_dataset
<-
conversion_result
[[
"data"
]]
expect_identical
(
class
(
input_data
),
class
(
converted_dataset
))
expect_identical
(
class
(
converted_dataset
[[
"character_col"
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
"character_col"
]],
c
(
5L
,
UNKNOWN_FACTOR_VALUE
,
-10L
,
5L
,
5L
,
-10L
))
expect_identical
(
class
(
converted_dataset
[[
"factor_col"
]]),
"integer"
)
expect_identical
(
converted_dataset
[[
"factor_col"
]],
c
(
65L
,
65L
,
66L
,
66L
,
65L
,
65L
))
# columns not specified in rules are not going to be converted
for
(
col
in
c
(
"na_col"
,
"na_real_col"
,
"na_int_col"
,
"na_character_col"
,
"numeric_col"
,
"integer_col"
))
{
expect_identical
(
converted_dataset
[[
col
]],
input_data
[[
col
]])
}
# the rules you passed in should be returned unchanged
rules
<-
conversion_result
$
rules
expect_identical
(
rules
,
custom_rules
)
}
})
test_that
(
"lgb.prepare_rules2() should modify data.tables in-place"
,
{
testDT
<-
data.table
::
data.table
(
character_col
=
c
(
"a"
,
NA_character_
,
"c"
)
,
na_col
=
rep
(
NA
,
3L
)
,
na_real_col
=
rep
(
NA_real_
,
3L
)
,
na_int_col
=
rep
(
NA_integer_
,
3L
)
,
na_character_col
=
rep
(
NA_character_
,
3L
)
,
numeric_col
=
c
(
1.0
,
9.0
,
NA_real_
)
,
factor_col
=
as.factor
(
c
(
"n"
,
"n"
,
"y"
))
,
integer_col
=
c
(
1L
,
9L
,
NA_integer_
)
)
conversion_result
<-
lgb.prepare_rules2
(
testDT
)
resultDT
<-
conversion_result
[[
"data"
]]
expect_identical
(
resultDT
,
testDT
)
})
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment