Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
b4bb38d9
"docs/Python-intro.md" did not exist on "b1e34d15944b75db637b66ad025d95fa848a0ca7"
Unverified
Commit
b4bb38d9
authored
Oct 23, 2019
by
James Lamb
Committed by
GitHub
Oct 23, 2019
Browse files
[R-package] Added unit tests (#2498)
parent
bdc310a8
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
252 additions
and
2 deletions
+252
-2
R-package/man/lgb.interprete.Rd
R-package/man/lgb.interprete.Rd
+1
-1
R-package/man/slice.Rd
R-package/man/slice.Rd
+1
-1
R-package/tests/testthat/test_basic.R
R-package/tests/testthat/test_basic.R
+1
-0
R-package/tests/testthat/test_lgb.importance.R
R-package/tests/testthat/test_lgb.importance.R
+39
-0
R-package/tests/testthat/test_lgb.interprete.R
R-package/tests/testthat/test_lgb.interprete.R
+113
-0
R-package/tests/testthat/test_lgb.plot.interpretation.R
R-package/tests/testthat/test_lgb.plot.interpretation.R
+97
-0
No files found.
R-package/man/lgb.interprete.Rd
View file @
b4bb38d9
...
@@ -11,7 +11,7 @@ lgb.interprete(model, data, idxset, num_iteration = NULL)
...
@@ -11,7 +11,7 @@ lgb.interprete(model, data, idxset, num_iteration = NULL)
\
item
{
data
}{
a
matrix
object
or
a
dgCMatrix
object
.}
\
item
{
data
}{
a
matrix
object
or
a
dgCMatrix
object
.}
\
item
{
idxset
}{
a
n
integer
vector
of
indices
of
rows
needed
.}
\
item
{
idxset
}{
a
integer
vector
of
indices
of
rows
needed
.}
\
item
{
num_iteration
}{
number
of
iteration
want
to
predict
with
,
NULL
or
<=
0
means
use
best
iteration
.}
\
item
{
num_iteration
}{
number
of
iteration
want
to
predict
with
,
NULL
or
<=
0
means
use
best
iteration
.}
}
}
...
...
R-package/man/slice.Rd
View file @
b4bb38d9
...
@@ -14,7 +14,7 @@ slice(dataset, ...)
...
@@ -14,7 +14,7 @@ slice(dataset, ...)
\item{...}{other parameters (currently not used)}
\item{...}{other parameters (currently not used)}
\item{idxset}{a
n
integer vector of indices of rows needed}
\item{idxset}{a integer vector of indices of rows needed}
}
}
\value{
\value{
constructed sub dataset
constructed sub dataset
...
...
R-package/tests/testthat/test_basic.R
View file @
b4bb38d9
...
@@ -51,6 +51,7 @@ test_that("use of multiple eval metrics works", {
...
@@ -51,6 +51,7 @@ test_that("use of multiple eval metrics works", {
test_that
(
"training continuation works"
,
{
test_that
(
"training continuation works"
,
{
testthat
::
skip
(
"This test is currently broken. See issue #2468 for details."
)
dtrain
<-
lgb.Dataset
(
train
$
data
,
label
=
train
$
label
,
free_raw_data
=
FALSE
)
dtrain
<-
lgb.Dataset
(
train
$
data
,
label
=
train
$
label
,
free_raw_data
=
FALSE
)
watchlist
=
list
(
train
=
dtrain
)
watchlist
=
list
(
train
=
dtrain
)
param
<-
list
(
objective
=
"binary"
,
metric
=
"binary_logloss"
,
num_leaves
=
5
,
learning_rate
=
1
)
param
<-
list
(
objective
=
"binary"
,
metric
=
"binary_logloss"
,
num_leaves
=
5
,
learning_rate
=
1
)
...
...
R-package/tests/testthat/test_lgb.importance.R
0 → 100644
View file @
b4bb38d9
context
(
"lgb.importance"
)
test_that
(
"lgb.importance() should reject bad inputs"
,
{
bad_inputs
<-
list
(
.Machine
$
integer.max
,
Inf
,
-
Inf
,
NA
,
NA_real_
,
-10L
:
10L
,
list
(
c
(
"a"
,
"b"
,
"c"
))
,
data.frame
(
x
=
rnorm
(
20
)
,
y
=
sample
(
x
=
c
(
1
,
2
)
,
size
=
20
,
replace
=
TRUE
)
)
,
data.table
::
data.table
(
x
=
rnorm
(
20
)
,
y
=
sample
(
x
=
c
(
1
,
2
)
,
size
=
20
,
replace
=
TRUE
)
)
,
lgb.Dataset
(
data
=
matrix
(
rnorm
(
100
),
ncol
=
2
)
,
label
=
matrix
(
sample
(
c
(
0
,
1
),
50
,
replace
=
TRUE
))
)
,
"lightgbm.model"
)
for
(
input
in
bad_inputs
){
expect_error
({
lgb.importance
(
input
)
},
regexp
=
"'model' has to be an object of class lgb\\.Booster"
)
}
})
R-package/tests/testthat/test_lgb.interprete.R
0 → 100644
View file @
b4bb38d9
context
(
"lgb.interpete"
)
.sigmoid
<-
function
(
x
){
1
/
(
1
+
exp
(
-
x
))
}
.logit
<-
function
(
x
){
log
(
x
/
(
1
-
x
))
}
test_that
(
"lgb.intereprete works as expected for binary classification"
,
{
data
(
agaricus.train
,
package
=
"lightgbm"
)
train
<-
agaricus.train
dtrain
<-
lgb.Dataset
(
train
$
data
,
label
=
train
$
label
)
setinfo
(
dataset
=
dtrain
,
"init_score"
,
rep
(
.logit
(
mean
(
train
$
label
))
,
length
(
train
$
label
)
)
)
data
(
agaricus.test
,
package
=
"lightgbm"
)
test
<-
agaricus.test
params
<-
list
(
objective
=
"binary"
,
learning_rate
=
0.01
,
num_leaves
=
63
,
max_depth
=
-1
,
min_data_in_leaf
=
1
,
min_sum_hessian_in_leaf
=
1
)
model
<-
lgb.train
(
params
=
params
,
data
=
dtrain
,
nrounds
=
10
)
num_trees
<-
5
tree_interpretation
<-
lgb.interprete
(
model
=
model
,
data
=
test
$
data
,
idxset
=
1
:
num_trees
)
expect_true
(
methods
::
is
(
tree_interpretation
,
"list"
))
expect_true
(
length
(
tree_interpretation
)
==
num_trees
)
expect_null
(
names
(
tree_interpretation
))
expect_true
(
all
(
sapply
(
X
=
tree_interpretation
,
FUN
=
function
(
treeDT
){
checks
<-
c
(
data.table
::
is.data.table
(
treeDT
)
,
identical
(
names
(
treeDT
),
c
(
"Feature"
,
"Contribution"
))
,
is.character
(
treeDT
[,
Feature
])
,
is.numeric
(
treeDT
[,
Contribution
])
)
return
(
all
(
checks
))
}
)
))
})
test_that
(
"lgb.intereprete works as expected for multiclass classification"
,
{
data
(
iris
)
# We must convert factors to numeric
# They must be starting from number 0 to use multiclass
# For instance: 0, 1, 2, 3, 4, 5...
iris
$
Species
<-
as.numeric
(
as.factor
(
iris
$
Species
))
-
1
# Create imbalanced training data (20, 30, 40 examples for classes 0, 1, 2)
train
<-
as.matrix
(
iris
[
c
(
1
:
20
,
51
:
80
,
101
:
140
),
])
# The 10 last samples of each class are for validation
test
<-
as.matrix
(
iris
[
c
(
41
:
50
,
91
:
100
,
141
:
150
),
])
dtrain
<-
lgb.Dataset
(
data
=
train
[,
1
:
4
],
label
=
train
[,
5
])
dtest
<-
lgb.Dataset.create.valid
(
dtrain
,
data
=
test
[,
1
:
4
],
label
=
test
[,
5
])
params
<-
list
(
objective
=
"multiclass"
,
metric
=
"multi_logloss"
,
num_class
=
3
,
learning_rate
=
0.00001
)
model
<-
lgb.train
(
params
=
params
,
data
=
dtrain
,
nrounds
=
10
,
min_data
=
1
)
num_trees
<-
5
tree_interpretation
<-
lgb.interprete
(
model
=
model
,
data
=
test
[,
1
:
4
]
,
idxset
=
1
:
num_trees
)
expect_true
(
methods
::
is
(
tree_interpretation
,
"list"
))
expect_true
(
length
(
tree_interpretation
)
==
num_trees
)
expect_null
(
names
(
tree_interpretation
))
expect_true
(
all
(
sapply
(
X
=
tree_interpretation
,
FUN
=
function
(
treeDT
){
checks
<-
c
(
data.table
::
is.data.table
(
treeDT
)
,
identical
(
names
(
treeDT
),
c
(
"Feature"
,
"Class 0"
,
"Class 1"
,
"Class 2"
))
,
is.character
(
treeDT
[,
Feature
])
,
is.numeric
(
treeDT
[,
`Class 0`
])
,
is.numeric
(
treeDT
[,
`Class 1`
])
,
is.numeric
(
treeDT
[,
`Class 2`
])
)
return
(
all
(
checks
))
}
)
))
})
R-package/tests/testthat/test_lgb.plot.interpretation.R
0 → 100644
View file @
b4bb38d9
context
(
"lgb.plot.interpretation"
)
.sigmoid
<-
function
(
x
){
1
/
(
1
+
exp
(
-
x
))
}
.logit
<-
function
(
x
){
log
(
x
/
(
1
-
x
))
}
test_that
(
"lgb.plot.interepretation works as expected for binary classification"
,
{
data
(
agaricus.train
,
package
=
"lightgbm"
)
train
<-
agaricus.train
dtrain
<-
lgb.Dataset
(
train
$
data
,
label
=
train
$
label
)
setinfo
(
dataset
=
dtrain
,
"init_score"
,
rep
(
.logit
(
mean
(
train
$
label
))
,
length
(
train
$
label
)
)
)
data
(
agaricus.test
,
package
=
"lightgbm"
)
test
<-
agaricus.test
params
<-
list
(
objective
=
"binary"
,
learning_rate
=
0.01
,
num_leaves
=
63
,
max_depth
=
-1
,
min_data_in_leaf
=
1
,
min_sum_hessian_in_leaf
=
1
)
model
<-
lgb.train
(
params
=
params
,
data
=
dtrain
,
nrounds
=
10
)
num_trees
<-
5
tree_interpretation
<-
lgb.interprete
(
model
=
model
,
data
=
test
$
data
,
idxset
=
1
:
num_trees
)
expect_true
({
lgb.plot.interpretation
(
tree_interpretation_dt
=
tree_interpretation
[[
1
]]
,
top_n
=
5
)
TRUE
})
# should also work when you explicitly pass cex
plot_res
<-
lgb.plot.interpretation
(
tree_interpretation_dt
=
tree_interpretation
[[
1
]]
,
top_n
=
5
,
cex
=
0.95
)
expect_null
(
plot_res
)
})
test_that
(
"lgb.plot.interepretation works as expected for multiclass classification"
,
{
data
(
iris
)
# We must convert factors to numeric
# They must be starting from number 0 to use multiclass
# For instance: 0, 1, 2, 3, 4, 5...
iris
$
Species
<-
as.numeric
(
as.factor
(
iris
$
Species
))
-
1
# Create imbalanced training data (20, 30, 40 examples for classes 0, 1, 2)
train
<-
as.matrix
(
iris
[
c
(
1
:
20
,
51
:
80
,
101
:
140
),
])
# The 10 last samples of each class are for validation
test
<-
as.matrix
(
iris
[
c
(
41
:
50
,
91
:
100
,
141
:
150
),
])
dtrain
<-
lgb.Dataset
(
data
=
train
[,
1
:
4
],
label
=
train
[,
5
])
dtest
<-
lgb.Dataset.create.valid
(
dtrain
,
data
=
test
[,
1
:
4
],
label
=
test
[,
5
])
params
<-
list
(
objective
=
"multiclass"
,
metric
=
"multi_logloss"
,
num_class
=
3
,
learning_rate
=
0.00001
)
model
<-
lgb.train
(
params
=
params
,
data
=
dtrain
,
nrounds
=
10
,
min_data
=
1
)
num_trees
<-
5
tree_interpretation
<-
lgb.interprete
(
model
=
model
,
data
=
test
[,
1
:
4
]
,
idxset
=
1
:
num_trees
)
plot_res
<-
lgb.plot.interpretation
(
tree_interpretation_dt
=
tree_interpretation
[[
1
]]
,
top_n
=
5
)
expect_null
(
plot_res
)
})
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment