Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
42204c43
Unverified
Commit
42204c43
authored
Sep 28, 2019
by
James Lamb
Committed by
GitHub
Sep 28, 2019
Browse files
[R-package] factored dependency 'magrittr' out of R package (#2334)
parent
b3c12662
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
120 additions
and
65 deletions
+120
-65
R-package/DESCRIPTION
R-package/DESCRIPTION
+0
-1
R-package/NAMESPACE
R-package/NAMESPACE
+2
-4
R-package/R/lgb.importance.R
R-package/R/lgb.importance.R
+21
-11
R-package/R/lgb.interprete.R
R-package/R/lgb.interprete.R
+54
-22
R-package/R/lgb.model.dt.tree.R
R-package/R/lgb.model.dt.tree.R
+10
-15
R-package/R/lgb.plot.importance.R
R-package/R/lgb.plot.importance.R
+8
-2
R-package/R/lgb.plot.interpretation.R
R-package/R/lgb.plot.interpretation.R
+25
-9
docs/conf.py
docs/conf.py
+0
-1
No files found.
R-package/DESCRIPTION
View file @
42204c43
...
...
@@ -39,7 +39,6 @@ Imports:
data.table (>= 1.9.6),
graphics,
jsonlite (>= 1.0),
magrittr (>= 1.5),
Matrix (>= 1.1-0),
methods
RoxygenNote: 6.0.1
R-package/NAMESPACE
100755 → 100644
View file @
42204c43
...
...
@@ -43,13 +43,11 @@ importFrom(data.table,data.table)
importFrom(data.table,rbindlist)
importFrom(data.table,set)
importFrom(data.table,setnames)
importFrom(data.table,setorder)
importFrom(data.table,setorderv)
importFrom(graphics,barplot)
importFrom(graphics,par)
importFrom(jsonlite,fromJSON)
importFrom(magrittr,"%>%")
importFrom(magrittr,"%T>%")
importFrom(magrittr,extract)
importFrom(magrittr,inset)
importFrom(methods,is)
importFrom(stats,quantile)
useDynLib(lib_lightgbm , .registration = TRUE)
R-package/R/lgb.importance.R
View file @
42204c43
...
...
@@ -29,8 +29,7 @@
#' tree_imp1 <- lgb.importance(model, percentage = TRUE)
#' tree_imp2 <- lgb.importance(model, percentage = FALSE)
#'
#' @importFrom magrittr %>% %T>% extract
#' @importFrom data.table :=
#' @importFrom data.table := setnames setorderv
#' @export
lgb.importance
<-
function
(
model
,
percentage
=
TRUE
)
{
...
...
@@ -43,22 +42,33 @@ lgb.importance <- function(model, percentage = TRUE) {
tree_dt
<-
lgb.model.dt.tree
(
model
)
# Extract elements
tree_imp
<-
tree_dt
%>%
magrittr
::
extract
(
.
,
i
=
!
is.na
(
split_index
),
j
=
.
(
Gain
=
sum
(
split_gain
),
Cover
=
sum
(
internal_count
),
Frequency
=
.N
),
by
=
"split_feature"
)
%T>%
data.table
::
setnames
(
.
,
old
=
"split_feature"
,
new
=
"Feature"
)
%>%
magrittr
::
extract
(
.
,
i
=
order
(
Gain
,
decreasing
=
TRUE
))
tree_imp_dt
<-
tree_dt
[
!
is.na
(
split_index
)
,
.
(
Gain
=
sum
(
split_gain
),
Cover
=
sum
(
internal_count
),
Frequency
=
.N
)
,
by
=
"split_feature"
]
data.table
::
setnames
(
tree_imp_dt
,
old
=
"split_feature"
,
new
=
"Feature"
)
# Sort features by Gain
data.table
::
setorderv
(
x
=
tree_imp_dt
,
cols
=
c
(
"Gain"
)
,
order
=
-1
)
# Check if relative values are requested
if
(
percentage
)
{
tree_imp
[,
":="
(
Gain
=
Gain
/
sum
(
Gain
),
tree_imp
_dt
[,
":="
(
Gain
=
Gain
/
sum
(
Gain
),
Cover
=
Cover
/
sum
(
Cover
),
Frequency
=
Frequency
/
sum
(
Frequency
))]
}
# Return importance table
return
(
tree_imp
)
return
(
tree_imp
_dt
)
}
R-package/R/lgb.interprete.R
View file @
42204c43
...
...
@@ -39,7 +39,6 @@
#' tree_interpretation <- lgb.interprete(model, test$data, 1:5)
#'
#' @importFrom data.table as.data.table
#' @importFrom magrittr %>% %T>%
#' @export
lgb.interprete
<-
function
(
model
,
data
,
...
...
@@ -56,12 +55,18 @@ lgb.interprete <- function(model,
tree_interpretation_dt_list
<-
vector
(
mode
=
"list"
,
length
=
length
(
idxset
))
# Get parsed predictions of data
leaf_index_mat_list
<-
model
$
predict
(
data
[
idxset
,
,
drop
=
FALSE
],
num_iteration
=
num_iteration
,
predleaf
=
TRUE
)
%>%
t
(
.
)
%>%
data.table
::
as.data.table
(
.
)
%>%
lapply
(
.
,
FUN
=
function
(
x
)
matrix
(
x
,
ncol
=
num_class
,
byrow
=
TRUE
))
pred_mat
<-
t
(
model
$
predict
(
data
[
idxset
,
,
drop
=
FALSE
]
,
num_iteration
=
num_iteration
,
predleaf
=
TRUE
)
)
leaf_index_dt
<-
data.table
::
as.data.table
(
pred_mat
)
leaf_index_mat_list
<-
lapply
(
X
=
leaf_index_dt
,
FUN
=
function
(
x
)
matrix
(
x
,
ncol
=
num_class
,
byrow
=
TRUE
)
)
# Get list of trees
tree_index_mat_list
<-
lapply
(
leaf_index_mat_list
,
...
...
@@ -121,20 +126,39 @@ single.tree.interprete <- function(tree_dt,
}
#' @importFrom data.table rbindlist
#' @importFrom magrittr %>% extract
#' @importFrom data.table := rbindlist setorder
multiple.tree.interprete
<-
function
(
tree_dt
,
tree_index
,
leaf_index
)
{
# Apply each trees
mapply
(
single.tree.interprete
,
tree_id
=
tree_index
,
leaf_id
=
leaf_index
,
MoreArgs
=
list
(
tree_dt
=
tree_dt
),
SIMPLIFY
=
FALSE
,
USE.NAMES
=
TRUE
)
%>%
data.table
::
rbindlist
(
.
,
use.names
=
TRUE
)
%>%
magrittr
::
extract
(
.
,
j
=
.
(
Contribution
=
sum
(
Contribution
)),
by
=
"Feature"
)
%>%
magrittr
::
extract
(
.
,
i
=
order
(
abs
(
Contribution
),
decreasing
=
TRUE
))
interp_dt
<-
data.table
::
rbindlist
(
l
=
mapply
(
FUN
=
single.tree.interprete
,
tree_id
=
tree_index
,
leaf_id
=
leaf_index
,
MoreArgs
=
list
(
tree_dt
=
tree_dt
)
,
SIMPLIFY
=
FALSE
,
USE.NAMES
=
TRUE
)
,
use.names
=
TRUE
)
interp_dt
<-
interp_dt
[,
.
(
Contribution
=
sum
(
Contribution
)),
by
=
"Feature"
]
# Sort features in descending order by contribution
interp_dt
[,
abs_contribution
:=
abs
(
Contribution
)]
data.table
::
setorder
(
x
=
interp_dt
,
-
abs_contribution
)
# Drop absolute value of contribution (only needed for sorting)
interp_dt
[,
abs_contribution
:=
NULL
]
return
(
interp_dt
)
}
...
...
@@ -147,14 +171,22 @@ single.row.interprete <- function(tree_dt, num_class, tree_index_mat, leaf_index
# Loop throughout each class
for
(
i
in
seq_len
(
num_class
))
{
tree_interpretation
[[
i
]]
<-
multiple.tree.interprete
(
tree_dt
,
tree_index_mat
[,
i
],
leaf_index_mat
[,
i
])
%T>%
{
# Number of classes larger than 1 requires adjustment
if
(
num_class
>
1
)
{
data.table
::
setnames
(
.
,
old
=
"Contribution"
,
new
=
paste
(
"Class"
,
i
-
1
))
}
next_interp_dt
<-
multiple.tree.interprete
(
tree_dt
=
tree_dt
,
tree_index
=
tree_index_mat
[,
i
]
,
leaf_index
=
leaf_index_mat
[,
i
]
)
if
(
num_class
>
1
){
data.table
::
setnames
(
next_interp_dt
,
old
=
"Contribution"
,
new
=
paste
(
"Class"
,
i
-
1
)
)
}
tree_interpretation
[[
i
]]
<-
next_interp_dt
}
# Check for numbe rof classes larger than 1
...
...
R-package/R/lgb.model.dt.tree.R
View file @
42204c43
...
...
@@ -42,7 +42,6 @@
#'
#' tree_dt <- lgb.model.dt.tree(model)
#'
#' @importFrom magrittr %>%
#' @importFrom data.table := data.table rbindlist
#' @importFrom jsonlite fromJSON
#' @export
...
...
@@ -64,10 +63,16 @@ lgb.model.dt.tree <- function(model, num_iteration = NULL) {
# Combine into single data.table fourth
tree_dt
<-
data.table
::
rbindlist
(
tree_list
,
use.names
=
TRUE
)
# Lookup sequence
tree_dt
[,
split_feature
:=
Lookup
(
split_feature
,
seq.int
(
from
=
0
,
to
=
parsed_json_model
$
max_feature_idx
),
parsed_json_model
$
feature_names
)]
# Substitute feature index with the actual feature name
# Since the index comes from C++ (which is 0-indexed), be sure
# to add 1 (e.g. index 28 means the 29th feature in feature_names)
split_feature_indx
<-
tree_dt
[,
split_feature
]
+
1
# Get corresponding feature names. Positions in split_feature_indx
# which are NA will result in an NA feature name
feature_names
<-
parsed_json_model
$
feature_names
[
split_feature_indx
]
tree_dt
[,
split_feature
:=
feature_names
]
# Return tree
return
(
tree_dt
)
...
...
@@ -159,13 +164,3 @@ single.tree.parse <- function(lgb_tree) {
return
(
single_tree_dt
)
}
#' @importFrom magrittr %>% extract inset
Lookup
<-
function
(
key
,
key_lookup
,
value_lookup
,
missing
=
NA
)
{
# Match key by looked up key
match
(
key
,
key_lookup
)
%>%
magrittr
::
extract
(
value_lookup
,
.
)
%>%
magrittr
::
inset
(
.
,
is.na
(
.
),
missing
)
}
R-package/R/lgb.plot.importance.R
View file @
42204c43
...
...
@@ -60,8 +60,14 @@ lgb.plot.importance <- function(tree_imp,
op
<-
graphics
::
par
(
no.readonly
=
TRUE
)
on.exit
(
graphics
::
par
(
op
))
# Do some magic plotting
graphics
::
par
(
mar
=
op
$
mar
%>%
magrittr
::
inset
(
.
,
2
,
left_margin
))
graphics
::
par
(
mar
=
c
(
op
$
mar
[
1
]
,
left_margin
,
op
$
mar
[
3
]
,
op
$
mar
[
4
]
)
)
# Do plot
tree_imp
[
.N
:
1
,
...
...
R-package/R/lgb.plot.interpretation.R
View file @
42204c43
...
...
@@ -35,7 +35,6 @@
#' lgb.plot.interpretation(tree_interpretation[[1]], top_n = 10)
#' @importFrom data.table setnames
#' @importFrom graphics barplot par
#' @importFrom magrittr inset
#' @export
lgb.plot.interpretation
<-
function
(
tree_interpretation_dt
,
top_n
=
10
,
...
...
@@ -51,7 +50,18 @@ lgb.plot.interpretation <- function(tree_interpretation_dt,
on.exit
(
graphics
::
par
(
op
))
# Do some magic plotting
graphics
::
par
(
mar
=
op
$
mar
%>%
magrittr
::
inset
(
.
,
1
:
3
,
c
(
3
,
left_margin
,
2
)))
bottom_margin
<-
3.0
top_margin
<-
2.0
right_margin
<-
op
$
mar
[
4
]
graphics
::
par
(
mar
=
c
(
bottom_margin
,
left_margin
,
top_margin
,
right_margin
)
)
# Check for number of classes
if
(
num_class
==
1
)
{
...
...
@@ -75,12 +85,18 @@ lgb.plot.interpretation <- function(tree_interpretation_dt,
for
(
i
in
seq_len
(
num_class
))
{
# Prepare interpretation, perform T, get the names, and plot straight away
tree_interpretation_dt
[,
c
(
1
,
i
+
1
),
with
=
FALSE
]
%T>%
data.table
::
setnames
(
.
,
old
=
names
(
.
),
new
=
c
(
"Feature"
,
"Contribution"
))
%>%
multiple.tree.plot.interpretation
(
.
,
# Self
top_n
=
top_n
,
title
=
paste
(
"Class"
,
i
-
1
),
cex
=
cex
)
plot_dt
<-
tree_interpretation_dt
[,
c
(
1
,
i
+
1
),
with
=
FALSE
]
data.table
::
setnames
(
plot_dt
,
old
=
names
(
plot_dt
)
,
new
=
c
(
"Feature"
,
"Contribution"
)
)
multiple.tree.plot.interpretation
(
plot_dt
,
top_n
=
top_n
,
title
=
paste
(
"Class"
,
i
-
1
)
,
cex
=
cex
)
}
}
...
...
@@ -114,6 +130,6 @@ multiple.tree.plot.interpretation <- function(tree_interpretation,
)]
# Return invisibly
invisible
(
NULL
)
return
(
invisible
(
NULL
)
)
}
docs/conf.py
View file @
42204c43
...
...
@@ -229,7 +229,6 @@ def generate_r_docs(app):
r-devtools=1.13.6=r351h6115d3f_0
\
r-data.table=1.11.4=r351h96ca727_0
\
r-jsonlite=1.5=r351h96ca727_0
\
r-magrittr=1.5=r351h6115d3f_4
\
r-matrix=1.2_14=r351h96ca727_0
\
r-testthat=2.0.0=r351h29659fb_0
\
cmake=3.14.0=h52cb24c_0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment