Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
21575cb2
Commit
21575cb2
authored
Aug 27, 2018
by
James Lamb
Committed by
Laurae
Aug 27, 2018
Browse files
updated R dependencies (#1619)
parent
b2240721
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
354 additions
and
341 deletions
+354
-341
R-package/DESCRIPTION
R-package/DESCRIPTION
+10
-10
R-package/NAMESPACE
R-package/NAMESPACE
+6
-0
R-package/R/lgb.Dataset.R
R-package/R/lgb.Dataset.R
+252
-250
R-package/R/lgb.Predictor.R
R-package/R/lgb.Predictor.R
+51
-49
R-package/R/lgb.model.dt.tree.R
R-package/R/lgb.model.dt.tree.R
+35
-32
No files found.
R-package/DESCRIPTION
View file @
21575cb2
...
...
@@ -23,23 +23,23 @@ URL: https://github.com/Microsoft/LightGBM
BugReports: https://github.com/Microsoft/LightGBM/issues
VignetteBuilder: knitr
Suggests:
Ckmeans.1d.dp (>= 3.3.1),
DiagrammeR (>= 0.8.1),
ggplot2 (>= 1.0.1),
igraph (>= 1.0.1),
knitr,
rmarkdown,
ggplot2 (>= 1.0.1),
DiagrammeR (>= 0.8.1),
Ckmeans.1d.dp (>= 3.3.1),
vcd (>= 1.3),
stringi (>= 0.5.2),
testthat,
igraph (>= 1.0.1),
stringi (>= 0.5.2)
vcd (>= 1.3)
Depends:
R (>= 3.0),
R6 (>= 2.0)
Imports:
graphics,
methods,
Matrix (>= 1.1-0),
data.table (>= 1.9.6),
graphics,
jsonlite (>= 1.0),
magrittr (>= 1.5),
jsonlite (>= 1.0)
Matrix (>= 1.1-0),
methods
RoxygenNote: 6.0.1
R-package/NAMESPACE
View file @
21575cb2
...
...
@@ -38,9 +38,15 @@ export(slice)
import(methods)
importFrom(R6,R6Class)
importFrom(data.table,":=")
importFrom(data.table,data.table)
importFrom(data.table,rbindlist)
importFrom(data.table,set)
importFrom(graphics,barplot)
importFrom(graphics,par)
importFrom(jsonlite,fromJSON)
importFrom(magrittr,"%>%")
importFrom(magrittr,"%T>%")
importFrom(magrittr,extract)
importFrom(magrittr,inset)
importFrom(methods,is)
useDynLib(lib_lightgbm)
R-package/R/lgb.Dataset.R
View file @
21575cb2
#' @importFrom methods is
Dataset
<-
R6Class
(
classname
=
"lgb.Dataset"
,
cloneable
=
FALSE
,
public
=
list
(
# Finalize will free up the handles
finalize
=
function
()
{
# Check the need for freeing handle
if
(
!
lgb.is.null.handle
(
private
$
handle
))
{
# Freeing up handle
lgb.call
(
"LGBM_DatasetFree_R"
,
ret
=
NULL
,
private
$
handle
)
private
$
handle
<-
NULL
}
},
# Initialize will create a starter dataset
initialize
=
function
(
data
,
params
=
list
(),
...
...
@@ -28,45 +30,45 @@ Dataset <- R6Class(
used_indices
=
NULL
,
info
=
list
(),
...
)
{
# Check for additional parameters
additional_params
<-
list
(
...
)
# Create known attributes list
INFO_KEYS
<-
c
(
"label"
,
"weight"
,
"init_score"
,
"group"
)
# Check if attribute key is in the known attribute list
for
(
key
in
names
(
additional_params
))
{
# Key existing
if
(
key
%in%
INFO_KEYS
)
{
# Store as info
info
[[
key
]]
<-
additional_params
[[
key
]]
}
else
{
# Store as param
params
[[
key
]]
<-
additional_params
[[
key
]]
}
}
# Check for dataset reference
if
(
!
is.null
(
reference
))
{
if
(
!
lgb.check.r6.class
(
reference
,
"lgb.Dataset"
))
{
stop
(
"lgb.Dataset: Can only use "
,
sQuote
(
"lgb.Dataset"
),
" as reference"
)
}
}
# Check for predictor reference
if
(
!
is.null
(
predictor
))
{
if
(
!
lgb.check.r6.class
(
predictor
,
"lgb.Predictor"
))
{
stop
(
"lgb.Dataset: Only can use "
,
sQuote
(
"lgb.Predictor"
),
" as predictor"
)
}
}
# Check for matrix format
if
(
is.matrix
(
data
))
{
# Check whether matrix is the correct type first ("double")
...
...
@@ -74,7 +76,7 @@ Dataset <- R6Class(
storage.mode
(
data
)
<-
"double"
}
}
# Setup private attributes
private
$
raw_data
<-
data
private
$
params
<-
params
...
...
@@ -86,13 +88,13 @@ Dataset <- R6Class(
private
$
free_raw_data
<-
free_raw_data
private
$
used_indices
<-
used_indices
private
$
info
<-
info
},
create_valid
=
function
(
data
,
info
=
list
(),
...
)
{
# Create new dataset
ret
<-
Dataset
$
new
(
data
,
private
$
params
,
...
...
@@ -104,61 +106,61 @@ Dataset <- R6Class(
NULL
,
info
,
...
)
# Return ret
return
(
invisible
(
ret
))
},
# Dataset constructor
construct
=
function
()
{
# Check for handle null
if
(
!
lgb.is.null.handle
(
private
$
handle
))
{
return
(
invisible
(
self
))
}
# Get feature names
cnames
<-
NULL
if
(
is.matrix
(
private
$
raw_data
)
||
is
(
private
$
raw_data
,
"dgCMatrix"
))
{
if
(
is.matrix
(
private
$
raw_data
)
||
methods
::
is
(
private
$
raw_data
,
"dgCMatrix"
))
{
cnames
<-
colnames
(
private
$
raw_data
)
}
# set feature names if not exist
if
(
is.null
(
private
$
colnames
)
&&
!
is.null
(
cnames
))
{
private
$
colnames
<-
as.character
(
cnames
)
}
# Get categorical feature index
if
(
!
is.null
(
private
$
categorical_feature
))
{
# Check for character name
if
(
is.character
(
private
$
categorical_feature
))
{
cate_indices
<-
as.list
(
match
(
private
$
categorical_feature
,
private
$
colnames
)
-
1
)
# Provided indices, but some indices are not existing?
if
(
sum
(
is.na
(
cate_indices
))
>
0
)
{
stop
(
"lgb.self.get.handle: supplied an unknown feature in categorical_feature: "
,
sQuote
(
private
$
categorical_feature
[
is.na
(
cate_indices
)]))
}
}
else
{
# Check if more categorical features were output over the feature space
if
(
max
(
private
$
categorical_feature
)
>
length
(
private
$
colnames
))
{
stop
(
"lgb.self.get.handle: supplied a too large value in categorical_feature: "
,
max
(
private
$
categorical_feature
),
" but only "
,
length
(
private
$
colnames
),
" features"
)
}
# Store indices as [0, n-1] indexed instead of [1, n] indexed
cate_indices
<-
as.list
(
private
$
categorical_feature
-
1
)
}
# Store indices for categorical features
private
$
params
$
categorical_feature
<-
cate_indices
}
# Check has header or not
has_header
<-
FALSE
if
(
!
is.null
(
private
$
params
$
has_header
)
||
!
is.null
(
private
$
params
$
header
))
{
...
...
@@ -166,31 +168,31 @@ Dataset <- R6Class(
has_header
<-
TRUE
}
}
# Generate parameter str
params_str
<-
lgb.params2str
(
private
$
params
)
# Get handle of reference dataset
ref_handle
<-
NULL
if
(
!
is.null
(
private
$
reference
))
{
ref_handle
<-
private
$
reference
$
.__enclos_env__
$
private
$
get_handle
()
}
handle
<-
NA_real_
# Not subsetting
if
(
is.null
(
private
$
used_indices
))
{
# Are we using a data file?
if
(
is.character
(
private
$
raw_data
))
{
handle
<-
lgb.call
(
"LGBM_DatasetCreateFromFile_R"
,
ret
=
handle
,
lgb.c_str
(
private
$
raw_data
),
params_str
,
ref_handle
)
}
else
if
(
is.matrix
(
private
$
raw_data
))
{
# Are we using a matrix?
handle
<-
lgb.call
(
"LGBM_DatasetCreateFromMat_R"
,
ret
=
handle
,
...
...
@@ -199,8 +201,8 @@ Dataset <- R6Class(
ncol
(
private
$
raw_data
),
params_str
,
ref_handle
)
}
else
if
(
is
(
private
$
raw_data
,
"dgCMatrix"
))
{
}
else
if
(
methods
::
is
(
private
$
raw_data
,
"dgCMatrix"
))
{
if
(
length
(
private
$
raw_data
@
p
)
>
2147483647
)
{
stop
(
"Cannot support large CSC matrix"
)
}
...
...
@@ -215,21 +217,21 @@ Dataset <- R6Class(
nrow
(
private
$
raw_data
),
params_str
,
ref_handle
)
}
else
{
# Unknown data type
stop
(
"lgb.Dataset.construct: does not support constructing from "
,
sQuote
(
class
(
private
$
raw_data
)))
}
}
else
{
# Reference is empty
if
(
is.null
(
private
$
reference
))
{
stop
(
"lgb.Dataset.construct: reference cannot be NULL for constructing data subset"
)
}
# Construct subset
handle
<-
lgb.call
(
"LGBM_DatasetGetSubset_R"
,
ret
=
handle
,
...
...
@@ -237,7 +239,7 @@ Dataset <- R6Class(
c
(
private
$
used_indices
),
# Adding c() fixes issue in R v3.5
length
(
private
$
used_indices
),
params_str
)
}
if
(
lgb.is.null.handle
(
handle
))
{
stop
(
"lgb.Dataset.construct: cannot create Dataset handle"
)
...
...
@@ -245,7 +247,7 @@ Dataset <- R6Class(
# Setup class and private type
class
(
handle
)
<-
"lgb.Dataset.handle"
private
$
handle
<-
handle
# Set feature names
if
(
!
is.null
(
private
$
colnames
))
{
self
$
set_colnames
(
private
$
colnames
)
...
...
@@ -253,139 +255,139 @@ Dataset <- R6Class(
# Load init score if requested
if
(
!
is.null
(
private
$
predictor
)
&&
is.null
(
private
$
used_indices
))
{
# Setup initial scores
init_score
<-
private
$
predictor
$
predict
(
private
$
raw_data
,
rawscore
=
TRUE
,
reshape
=
TRUE
)
# Not needed to transpose, for is col_marjor
init_score
<-
as.vector
(
init_score
)
private
$
info
$
init_score
<-
init_score
}
# Should we free raw data?
if
(
isTRUE
(
private
$
free_raw_data
))
{
private
$
raw_data
<-
NULL
}
# Get private information
if
(
length
(
private
$
info
)
>
0
)
{
# Set infos
for
(
i
in
seq_along
(
private
$
info
))
{
p
<-
private
$
info
[
i
]
self
$
setinfo
(
names
(
p
),
p
[[
1
]])
}
}
# Get label information existence
if
(
is.null
(
self
$
getinfo
(
"label"
)))
{
stop
(
"lgb.Dataset.construct: label should be set"
)
}
# Return self
return
(
invisible
(
self
))
},
# Dimension function
dim
=
function
()
{
# Check for handle
if
(
!
lgb.is.null.handle
(
private
$
handle
))
{
num_row
<-
0L
num_col
<-
0L
# Get numeric data and numeric features
c
(
lgb.call
(
"LGBM_DatasetGetNumData_R"
,
ret
=
num_row
,
private
$
handle
),
lgb.call
(
"LGBM_DatasetGetNumFeature_R"
,
ret
=
num_col
,
private
$
handle
))
}
else
if
(
is.matrix
(
private
$
raw_data
)
||
is
(
private
$
raw_data
,
"dgCMatrix"
))
{
}
else
if
(
is.matrix
(
private
$
raw_data
)
||
methods
::
is
(
private
$
raw_data
,
"dgCMatrix"
))
{
# Check if dgCMatrix (sparse matrix column compressed)
dim
(
private
$
raw_data
)
}
else
{
# Trying to work with unknown dimensions is not possible
stop
(
"dim: cannot get dimensions before dataset has been constructed, please call lgb.Dataset.construct explicitly"
)
}
},
# Get column names
get_colnames
=
function
()
{
# Check for handle
if
(
!
lgb.is.null.handle
(
private
$
handle
))
{
# Get feature names and write them
cnames
<-
lgb.call.return.str
(
"LGBM_DatasetGetFeatureNames_R"
,
private
$
handle
)
private
$
colnames
<-
as.character
(
base
::
strsplit
(
cnames
,
"\t"
)[[
1
]])
private
$
colnames
}
else
if
(
is.matrix
(
private
$
raw_data
)
||
is
(
private
$
raw_data
,
"dgCMatrix"
))
{
}
else
if
(
is.matrix
(
private
$
raw_data
)
||
methods
::
is
(
private
$
raw_data
,
"dgCMatrix"
))
{
# Check if dgCMatrix (sparse matrix column compressed)
colnames
(
private
$
raw_data
)
}
else
{
# Trying to work with unknown dimensions is not possible
stop
(
"dim: cannot get dimensions before dataset has been constructed, please call lgb.Dataset.construct explicitly"
)
}
},
# Set column names
set_colnames
=
function
(
colnames
)
{
# Check column names non-existence
if
(
is.null
(
colnames
))
{
return
(
invisible
(
self
))
}
# Check empty column names
colnames
<-
as.character
(
colnames
)
if
(
length
(
colnames
)
==
0
)
{
return
(
invisible
(
self
))
}
# Write column names
private
$
colnames
<-
colnames
if
(
!
lgb.is.null.handle
(
private
$
handle
))
{
# Merge names with tab separation
merged_name
<-
paste0
(
as.list
(
private
$
colnames
),
collapse
=
"\t"
)
lgb.call
(
"LGBM_DatasetSetFeatureNames_R"
,
ret
=
NULL
,
private
$
handle
,
lgb.c_str
(
merged_name
))
}
# Return self
return
(
invisible
(
self
))
},
# Get information
getinfo
=
function
(
name
)
{
# Create known attributes list
INFONAMES
<-
c
(
"label"
,
"weight"
,
"init_score"
,
"group"
)
# Check if attribute key is in the known attribute list
if
(
!
is.character
(
name
)
||
length
(
name
)
!=
1
||
!
name
%in%
INFONAMES
)
{
stop
(
"getinfo: name must one of the following: "
,
paste0
(
sQuote
(
INFONAMES
),
collapse
=
", "
))
}
# Check for info name and handle
if
(
is.null
(
private
$
info
[[
name
]]))
{
if
(
lgb.is.null.handle
(
private
$
handle
)){
...
...
@@ -397,10 +399,10 @@ Dataset <- R6Class(
ret
=
info_len
,
private
$
handle
,
lgb.c_str
(
name
))
# Check if info is not empty
if
(
info_len
>
0
)
{
# Get back fields
ret
<-
NULL
ret
<-
if
(
name
==
"group"
)
{
...
...
@@ -408,65 +410,65 @@ Dataset <- R6Class(
}
else
{
numeric
(
info_len
)
# Numeric
}
ret
<-
lgb.call
(
"LGBM_DatasetGetField_R"
,
ret
=
ret
,
private
$
handle
,
lgb.c_str
(
name
))
private
$
info
[[
name
]]
<-
ret
}
}
private
$
info
[[
name
]]
},
# Set information
setinfo
=
function
(
name
,
info
)
{
# Create known attributes list
INFONAMES
<-
c
(
"label"
,
"weight"
,
"init_score"
,
"group"
)
# Check if attribute key is in the known attribute list
if
(
!
is.character
(
name
)
||
length
(
name
)
!=
1
||
!
name
%in%
INFONAMES
)
{
stop
(
"setinfo: name must one of the following: "
,
paste0
(
sQuote
(
INFONAMES
),
collapse
=
", "
))
}
# Check for type of information
info
<-
if
(
name
==
"group"
)
{
as.integer
(
info
)
# Integer
}
else
{
as.numeric
(
info
)
# Numeric
}
# Store information privately
private
$
info
[[
name
]]
<-
info
if
(
!
lgb.is.null.handle
(
private
$
handle
)
&&
!
is.null
(
info
))
{
if
(
length
(
info
)
>
0
)
{
lgb.call
(
"LGBM_DatasetSetField_R"
,
ret
=
NULL
,
private
$
handle
,
lgb.c_str
(
name
),
info
,
length
(
info
))
}
}
# Return self
return
(
invisible
(
self
))
},
# Slice dataset
slice
=
function
(
idxset
,
...
)
{
# Perform slicing
Dataset
$
new
(
NULL
,
private
$
params
,
...
...
@@ -478,84 +480,84 @@ Dataset <- R6Class(
idxset
,
NULL
,
...
)
},
# Update parameters
update_params
=
function
(
params
)
{
# Parameter updating
private
$
params
<-
modifyList
(
private
$
params
,
params
)
return
(
invisible
(
self
))
},
# Set categorical feature parameter
set_categorical_feature
=
function
(
categorical_feature
)
{
# Check for identical input
if
(
identical
(
private
$
categorical_feature
,
categorical_feature
))
{
return
(
invisible
(
self
))
}
# Check for empty data
if
(
is.null
(
private
$
raw_data
))
{
stop
(
"set_categorical_feature: cannot set categorical feature after freeing raw data,
please set "
,
sQuote
(
"free_raw_data = FALSE"
),
" when you construct lgb.Dataset"
)
}
# Overwrite categorical features
private
$
categorical_feature
<-
categorical_feature
# Finalize and return self
self
$
finalize
()
return
(
invisible
(
self
))
},
# Set reference
set_reference
=
function
(
reference
)
{
# Set known references
self
$
set_categorical_feature
(
reference
$
.__enclos_env__
$
private
$
categorical_feature
)
self
$
set_colnames
(
reference
$
get_colnames
())
private
$
set_predictor
(
reference
$
.__enclos_env__
$
private
$
predictor
)
# Check for identical references
if
(
identical
(
private
$
reference
,
reference
))
{
return
(
invisible
(
self
))
}
# Check for empty data
if
(
is.null
(
private
$
raw_data
))
{
stop
(
"set_reference: cannot set reference after freeing raw data,
please set "
,
sQuote
(
"free_raw_data = FALSE"
),
" when you construct lgb.Dataset"
)
}
# Check for non-existing reference
if
(
!
is.null
(
reference
))
{
# Reference is unknown
if
(
!
lgb.check.r6.class
(
reference
,
"lgb.Dataset"
))
{
stop
(
"set_reference: Can only use lgb.Dataset as a reference"
)
}
}
# Store reference
private
$
reference
<-
reference
# Finalize and return self
self
$
finalize
()
return
(
invisible
(
self
))
},
# Save binary model
save_binary
=
function
(
fname
)
{
# Store binary data
self
$
construct
()
lgb.call
(
"LGBM_DatasetSaveBinary_R"
,
...
...
@@ -564,7 +566,7 @@ Dataset <- R6Class(
lgb.c_str
(
fname
))
return
(
invisible
(
self
))
}
),
private
=
list
(
handle
=
NULL
,
...
...
@@ -577,51 +579,51 @@ Dataset <- R6Class(
free_raw_data
=
TRUE
,
used_indices
=
NULL
,
info
=
NULL
,
# Get handle
get_handle
=
function
()
{
# Get handle and construct if needed
if
(
lgb.is.null.handle
(
private
$
handle
))
{
self
$
construct
()
}
private
$
handle
},
# Set predictor
set_predictor
=
function
(
predictor
)
{
# Return self is identical predictor
if
(
identical
(
private
$
predictor
,
predictor
))
{
return
(
invisible
(
self
))
}
# Check for empty data
if
(
is.null
(
private
$
raw_data
))
{
stop
(
"set_predictor: cannot set predictor after free raw data,
please set "
,
sQuote
(
"free_raw_data = FALSE"
),
" when you construct lgb.Dataset"
)
}
# Check for empty predictor
if
(
!
is.null
(
predictor
))
{
# Predictor is unknown
if
(
!
lgb.check.r6.class
(
predictor
,
"lgb.Predictor"
))
{
stop
(
"set_predictor: Can only use lgb.Predictor as predictor"
)
}
}
# Store predictor
private
$
predictor
<-
predictor
# Finalize and return self
self
$
finalize
()
return
(
invisible
(
self
))
}
)
)
...
...
@@ -638,9 +640,9 @@ Dataset <- R6Class(
#' @param free_raw_data TRUE for need to free raw data after construct
#' @param info a list of information of the lgb.Dataset object
#' @param ... other information to pass to \code{info} or parameters pass to \code{params}
#'
#'
#' @return constructed dataset
#'
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
...
...
@@ -651,7 +653,7 @@ Dataset <- R6Class(
#' dtrain <- lgb.Dataset("lgb.Dataset.data")
#' lgb.Dataset.construct(dtrain)
#' }
#'
#'
#' @export
lgb.Dataset
<-
function
(
data
,
params
=
list
(),
...
...
@@ -661,7 +663,7 @@ lgb.Dataset <- function(data,
free_raw_data
=
TRUE
,
info
=
list
(),
...
)
{
# Create new dataset
invisible
(
Dataset
$
new
(
data
,
params
,
...
...
@@ -673,20 +675,20 @@ lgb.Dataset <- function(data,
NULL
,
info
,
...
))
}
#' Construct validation data
#'
#'
#' Construct validation data according to training data
#'
#'
#' @param dataset \code{lgb.Dataset} object, training data
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
#' @param info a list of information of the lgb.Dataset object
#' @param ... other information to pass to \code{info}.
#'
#'
#' @return constructed dataset
#'
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
...
...
@@ -697,24 +699,24 @@ lgb.Dataset <- function(data,
#' test <- agaricus.test
#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
#' }
#'
#'
#' @export
lgb.Dataset.create.valid
<-
function
(
dataset
,
data
,
info
=
list
(),
...
)
{
# Check if dataset is not a dataset
if
(
!
lgb.is.Dataset
(
dataset
))
{
stop
(
"lgb.Dataset.create.valid: input data should be an lgb.Dataset object"
)
}
# Create validation dataset
invisible
(
dataset
$
create_valid
(
data
,
info
,
...
))
}
#' Construct Dataset explicitly
#'
#'
#' @param dataset Object of class \code{lgb.Dataset}
#'
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
...
...
@@ -723,56 +725,56 @@ lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) {
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.construct(dtrain)
#' }
#'
#'
#' @export
lgb.Dataset.construct
<-
function
(
dataset
)
{
# Check if dataset is not a dataset
if
(
!
lgb.is.Dataset
(
dataset
))
{
stop
(
"lgb.Dataset.construct: input data should be an lgb.Dataset object"
)
}
# Construct the dataset
invisible
(
dataset
$
construct
())
}
#' Dimensions of an lgb.Dataset
#'
#'
#' Returns a vector of numbers of rows and of columns in an \code{lgb.Dataset}.
#' @param x Object of class \code{lgb.Dataset}
#' @param ... other parameters
#'
#'
#' @return a vector of numbers of rows and of columns
#'
#'
#' @details
#' Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also
#' be directly used with an \code{lgb.Dataset} object.
#'
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#'
#'
#' stopifnot(nrow(dtrain) == nrow(train$data))
#' stopifnot(ncol(dtrain) == ncol(train$data))
#' stopifnot(all(dim(dtrain) == dim(train$data)))
#' }
#'
#'
#' @rdname dim
#' @export
dim.lgb.Dataset
<-
function
(
x
,
...
)
{
# Check if dataset is not a dataset
if
(
!
lgb.is.Dataset
(
x
))
{
stop
(
"dim.lgb.Dataset: input data should be an lgb.Dataset object"
)
}
# Return dimensions
x
$
dim
()
}
#' Handling of column names of \code{lgb.Dataset}
...
...
@@ -800,76 +802,76 @@ dim.lgb.Dataset <- function(x, ...) {
#' colnames(dtrain) <- make.names(1:ncol(train$data))
#' print(dtrain, verbose = TRUE)
#' }
#'
#'
#' @rdname dimnames.lgb.Dataset
#' @export
dimnames.lgb.Dataset
<-
function
(
x
)
{
# Check if dataset is not a dataset
if
(
!
lgb.is.Dataset
(
x
))
{
stop
(
"dimnames.lgb.Dataset: input data should be an lgb.Dataset object"
)
}
# Return dimension names
list
(
NULL
,
x
$
get_colnames
())
}
#' @rdname dimnames.lgb.Dataset
#' @export
`dimnames<-.lgb.Dataset`
<-
function
(
x
,
value
)
{
# Check if invalid element list
if
(
!
is.list
(
value
)
||
length
(
value
)
!=
2L
)
{
stop
(
"invalid "
,
sQuote
(
"value"
),
" given: must be a list of two elements"
)
}
# Check for unknown row names
if
(
!
is.null
(
value
[[
1L
]]))
{
stop
(
"lgb.Dataset does not have rownames"
)
}
# Check for second value missing
if
(
is.null
(
value
[[
2
]]))
{
# No column names
x
$
set_colnames
(
NULL
)
return
(
x
)
}
# Check for unmatching column size
if
(
ncol
(
x
)
!=
length
(
value
[[
2
]]))
{
stop
(
"can't assign "
,
sQuote
(
length
(
value
[[
2
]])),
" colnames to an lgb.Dataset with "
,
sQuote
(
ncol
(
x
)),
" columns"
)
}
# Set column names properly, and return
x
$
set_colnames
(
value
[[
2
]])
x
}
#' Slice a dataset
#'
#'
#' Get a new \code{lgb.Dataset} containing the specified rows of
#' orginal lgb.Dataset object
#'
#'
#' @param dataset Object of class "lgb.Dataset"
#' @param idxset a integer vector of indices of rows needed
#' @param ... other parameters (currently not used)
#' @return constructed sub dataset
#'
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#'
#'
#' dsub <- lightgbm::slice(dtrain, 1:42)
#' labels <- lightgbm::getinfo(dsub, "label")
#' }
#'
#'
#' @export
slice
<-
function
(
dataset
,
...
)
{
UseMethod
(
"slice"
)
...
...
@@ -878,34 +880,34 @@ slice <- function(dataset, ...) {
#' @rdname slice
#' @export
slice.lgb.Dataset
<-
function
(
dataset
,
idxset
,
...
)
{
# Check if dataset is not a dataset
if
(
!
lgb.is.Dataset
(
dataset
))
{
stop
(
"slice.lgb.Dataset: input dataset should be an lgb.Dataset object"
)
}
# Return sliced set
invisible
(
dataset
$
slice
(
idxset
,
...
))
}
#' Get information of an lgb.Dataset object
#'
#'
#' @param dataset Object of class \code{lgb.Dataset}
#' @param name the name of the information field to get (see details)
#' @param ... other parameters
#' @return info data
#'
#'
#' @details
#' The \code{name} field can be one of the following:
#'
#'
#' \itemize{
#' \item \code{label}: label lightgbm learn from ;
#' \item \code{weight}: to do a weight rescale ;
#' \item \code{group}: group size
#' \item \code{init_score}: initial score is the base prediction lightgbm will boost from ;
#' }
#'
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
...
...
@@ -913,14 +915,14 @@ slice.lgb.Dataset <- function(dataset, idxset, ...) {
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.construct(dtrain)
#'
#'
#' labels <- lightgbm::getinfo(dtrain, "label")
#' lightgbm::setinfo(dtrain, "label", 1 - labels)
#'
#'
#' labels2 <- lightgbm::getinfo(dtrain, "label")
#' stopifnot(all(labels2 == 1 - labels))
#' }
#'
#'
#' @export
getinfo
<-
function
(
dataset
,
...
)
{
UseMethod
(
"getinfo"
)
...
...
@@ -929,35 +931,35 @@ getinfo <- function(dataset, ...) {
#' @rdname getinfo
#' @export
getinfo.lgb.Dataset
<-
function
(
dataset
,
name
,
...
)
{
# Check if dataset is not a dataset
if
(
!
lgb.is.Dataset
(
dataset
))
{
stop
(
"getinfo.lgb.Dataset: input dataset should be an lgb.Dataset object"
)
}
# Return information
dataset
$
getinfo
(
name
)
}
#' Set information of an lgb.Dataset object
#'
#'
#' @param dataset Object of class "lgb.Dataset"
#' @param name the name of the field to get
#' @param info the specific field of information to set
#' @param ... other parameters
#' @return passed object
#'
#'
#' @details
#' The \code{name} field can be one of the following:
#'
#'
#' \itemize{
#' \item \code{label}: label lightgbm learn from ;
#' \item \code{weight}: to do a weight rescale ;
#' \item \code{init_score}: initial score is the base prediction lightgbm will boost from ;
#' \item \code{group}.
#' }
#'
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
...
...
@@ -965,14 +967,14 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.construct(dtrain)
#'
#'
#' labels <- lightgbm::getinfo(dtrain, "label")
#' lightgbm::setinfo(dtrain, "label", 1 - labels)
#'
#'
#' labels2 <- lightgbm::getinfo(dtrain, "label")
#' stopifnot(all.equal(labels2, 1 - labels))
#' }
#'
#'
#' @export
setinfo
<-
function
(
dataset
,
...
)
{
UseMethod
(
"setinfo"
)
...
...
@@ -981,23 +983,23 @@ setinfo <- function(dataset, ...) {
#' @rdname setinfo
#' @export
setinfo.lgb.Dataset
<-
function
(
dataset
,
name
,
info
,
...
)
{
# Check if dataset is not a dataset
if
(
!
lgb.is.Dataset
(
dataset
))
{
stop
(
"setinfo.lgb.Dataset: input dataset should be an lgb.Dataset object"
)
}
# Set information
invisible
(
dataset
$
setinfo
(
name
,
info
))
}
#' Set categorical feature of \code{lgb.Dataset}
#'
#'
#' @param dataset object of class \code{lgb.Dataset}
#' @param categorical_feature categorical features
#'
#'
#' @return passed dataset
#'
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
...
...
@@ -1008,30 +1010,30 @@ setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
#' dtrain <- lgb.Dataset("lgb.Dataset.data")
#' lgb.Dataset.set.categorical(dtrain, 1:2)
#' }
#'
#'
#' @rdname lgb.Dataset.set.categorical
#' @export
lgb.Dataset.set.categorical
<-
function
(
dataset
,
categorical_feature
)
{
# Check if dataset is not a dataset
if
(
!
lgb.is.Dataset
(
dataset
))
{
stop
(
"lgb.Dataset.set.categorical: input dataset should be an lgb.Dataset object"
)
}
# Set categoricals
invisible
(
dataset
$
set_categorical_feature
(
categorical_feature
))
}
#' Set reference of \code{lgb.Dataset}
#'
#'
#' If you want to use validation data, you should set reference to training data
#'
#'
#' @param dataset object of class \code{lgb.Dataset}
#' @param reference object of class \code{lgb.Dataset}
#'
#'
#' @return passed dataset
#'
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
...
...
@@ -1043,29 +1045,29 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
#' dtest <- lgb.Dataset(test$data, test = train$label)
#' lgb.Dataset.set.reference(dtest, dtrain)
#' }
#'
#'
#' @rdname lgb.Dataset.set.reference
#' @export
lgb.Dataset.set.reference
<-
function
(
dataset
,
reference
)
{
# Check if dataset is not a dataset
if
(
!
lgb.is.Dataset
(
dataset
))
{
stop
(
"lgb.Dataset.set.reference: input dataset should be an lgb.Dataset object"
)
}
# Set reference
invisible
(
dataset
$
set_reference
(
reference
))
}
#' Save \code{lgb.Dataset} to a binary file
#'
#'
#' @param dataset object of class \code{lgb.Dataset}
#' @param fname object filename of output file
#'
#'
#' @return passed dataset
#'
#'
#' @examples
#'
#'
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
...
...
@@ -1073,21 +1075,21 @@ lgb.Dataset.set.reference <- function(dataset, reference) {
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.save(dtrain, "data.bin")
#' }
#'
#'
#' @rdname lgb.Dataset.save
#' @export
lgb.Dataset.save
<-
function
(
dataset
,
fname
)
{
# Check if dataset is not a dataset
if
(
!
lgb.is.Dataset
(
dataset
))
{
stop
(
"lgb.Dataset.set: input dataset should be an lgb.Dataset object"
)
}
# File-type is not matching
if
(
!
is.character
(
fname
))
{
stop
(
"lgb.Dataset.set: fname should be a character or a file connection"
)
}
# Store binary
invisible
(
dataset
$
save_binary
(
fname
))
}
R-package/R/lgb.Predictor.R
View file @
21575cb2
#' @importFrom methods is
Predictor
<-
R6Class
(
classname
=
"lgb.Predictor"
,
cloneable
=
FALSE
,
public
=
list
(
# Finalize will free up the handles
finalize
=
function
()
{
# Check the need for freeing handle
if
(
private
$
need_free_handle
&&
!
lgb.is.null.handle
(
private
$
handle
))
{
# Freeing up handle
lgb.call
(
"LGBM_BoosterFree_R"
,
ret
=
NULL
,
private
$
handle
)
private
$
handle
<-
NULL
}
},
# Initialize will create a starter model
initialize
=
function
(
modelfile
,
...
)
{
params
<-
list
(
...
)
private
$
params
<-
lgb.params2str
(
params
)
# Create new lgb handle
handle
<-
0.0
# Check if handle is a character
if
(
is.character
(
modelfile
))
{
# Create handle on it
handle
<-
lgb.call
(
"LGBM_BoosterCreateFromModelfile_R"
,
ret
=
handle
,
lgb.c_str
(
modelfile
))
private
$
need_free_handle
<-
TRUE
}
else
if
(
is
(
modelfile
,
"lgb.Booster.handle"
))
{
}
else
if
(
methods
::
is
(
modelfile
,
"lgb.Booster.handle"
))
{
# Check if model file is a booster handle already
handle
<-
modelfile
private
$
need_free_handle
<-
FALSE
}
else
{
# Model file is unknown
stop
(
"lgb.Predictor: modelfile must be either a character filename or an lgb.Booster.handle"
)
}
# Override class and store it
class
(
handle
)
<-
"lgb.Booster.handle"
private
$
handle
<-
handle
},
# Get current iteration
current_iter
=
function
()
{
cur_iter
<-
0L
lgb.call
(
"LGBM_BoosterGetCurrentIteration_R"
,
ret
=
cur_iter
,
private
$
handle
)
},
# Predict from data
predict
=
function
(
data
,
num_iteration
=
NULL
,
...
...
@@ -66,22 +68,22 @@ Predictor <- R6Class(
predcontrib
=
FALSE
,
header
=
FALSE
,
reshape
=
FALSE
)
{
# Check if number of iterations is existing - if not, then set it to -1 (use all)
if
(
is.null
(
num_iteration
))
{
num_iteration
<-
-1
}
# Set temporary variable
num_row
<-
0L
# Check if data is a file name
if
(
is.character
(
data
))
{
# Data is a filename, create a temporary file with a "lightgbm_" pattern in it
tmp_filename
<-
tempfile
(
pattern
=
"lightgbm_"
)
on.exit
(
unlink
(
tmp_filename
),
add
=
TRUE
)
# Predict from temporary file
lgb.call
(
"LGBM_BoosterPredictForFile_R"
,
ret
=
NULL
,
private
$
handle
,
data
,
as.integer
(
header
),
...
...
@@ -91,19 +93,19 @@ Predictor <- R6Class(
as.integer
(
num_iteration
),
private
$
params
,
lgb.c_str
(
tmp_filename
))
# Get predictions from file
preds
<-
read.delim
(
tmp_filename
,
header
=
FALSE
,
seq
=
"\t"
)
num_row
<-
nrow
(
preds
)
preds
<-
as.vector
(
t
(
preds
))
}
else
{
# Not a file, we need to predict from R object
num_row
<-
nrow
(
data
)
npred
<-
0L
# Check number of predictions to do
npred
<-
lgb.call
(
"LGBM_BoosterCalcNumPredict_R"
,
ret
=
npred
,
...
...
@@ -113,10 +115,10 @@ Predictor <- R6Class(
as.integer
(
predleaf
),
as.integer
(
predcontrib
),
as.integer
(
num_iteration
))
# Pre-allocate empty vector
preds
<-
numeric
(
npred
)
# Check if data is a matrix
if
(
is.matrix
(
data
))
{
preds
<-
lgb.call
(
"LGBM_BoosterPredictForMat_R"
,
...
...
@@ -130,8 +132,8 @@ Predictor <- R6Class(
as.integer
(
predcontrib
),
as.integer
(
num_iteration
),
private
$
params
)
}
else
if
(
is
(
data
,
"dgCMatrix"
))
{
}
else
if
(
methods
::
is
(
data
,
"dgCMatrix"
))
{
if
(
length
(
data
@
p
)
>
2147483647
)
{
stop
(
"Cannot support large CSC matrix"
)
}
...
...
@@ -150,44 +152,44 @@ Predictor <- R6Class(
as.integer
(
predcontrib
),
as.integer
(
num_iteration
),
private
$
params
)
}
else
{
# Cannot predict on unknown class
# to-do: predict from lgb.Dataset
stop
(
"predict: cannot predict on data of class "
,
sQuote
(
class
(
data
)))
}
}
# Check if number of rows is strange (not a multiple of the dataset rows)
if
(
length
(
preds
)
%%
num_row
!=
0
)
{
stop
(
"predict: prediction length "
,
sQuote
(
length
(
preds
)),
" is not a multiple of nrows(data): "
,
sQuote
(
num_row
))
}
# Get number of cases per row
npred_per_case
<-
length
(
preds
)
/
num_row
# Data reshaping
if
(
predleaf
|
predcontrib
)
{
# Predict leaves only, reshaping is mandatory
preds
<-
matrix
(
preds
,
ncol
=
npred_per_case
,
byrow
=
TRUE
)
}
else
if
(
reshape
&&
npred_per_case
>
1
)
{
# Predict with data reshaping
preds
<-
matrix
(
preds
,
ncol
=
npred_per_case
,
byrow
=
TRUE
)
}
# Return predictions
return
(
preds
)
}
),
private
=
list
(
handle
=
NULL
,
need_free_handle
=
FALSE
,
...
...
R-package/R/lgb.model.dt.tree.R
View file @
21575cb2
#' Parse a LightGBM model json dump
#'
#'
#' Parse a LightGBM model json dump into a \code{data.table} structure.
#'
#'
#' @param model object of class \code{lgb.Booster}
#' @param num_iteration number of iterations you want to predict with. NULL or
#' @param num_iteration number of iterations you want to predict with. NULL or
#' <= 0 means use best iteration
#'
#'
#' @return
#' A \code{data.table} with detailed information about model trees' nodes and leafs.
#'
#'
#' The columns of the \code{data.table} are:
#'
#'
#' \itemize{
#' \item \code{tree_index}: ID of a tree in a model (integer)
#' \item \code{split_index}: ID of a node in a tree (integer)
...
...
@@ -28,11 +28,11 @@
#' \item \code{leaf_value}: Leaf value
#' \item \code{leaf_count}: The number of observation collected by a leaf
#' }
#'
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#'
#'
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
...
...
@@ -45,43 +45,45 @@
#'
#' tree_dt <- lgb.model.dt.tree(model)
#' }
#'
#'
#' @importFrom magrittr %>%
#' @importFrom data.table :=
#' @importFrom data.table := data.table
#' @importFrom jsonlite fromJSON
#' @export
lgb.model.dt.tree
<-
function
(
model
,
num_iteration
=
NULL
)
{
# Dump json model first
json_model
<-
lgb.dump
(
model
,
num_iteration
=
num_iteration
)
# Parse json model second
parsed_json_model
<-
jsonlite
::
fromJSON
(
json_model
,
simplifyVector
=
TRUE
,
simplifyDataFrame
=
FALSE
,
simplifyMatrix
=
FALSE
,
flatten
=
FALSE
)
# Parse tree model third
tree_list
<-
lapply
(
parsed_json_model
$
tree_info
,
single.tree.parse
)
# Combine into single data.table fourth
tree_dt
<-
data.table
::
rbindlist
(
tree_list
,
use.names
=
TRUE
)
# Lookup sequence
tree_dt
[,
split_feature
:=
Lookup
(
split_feature
,
seq.int
(
from
=
0
,
to
=
parsed_json_model
$
max_feature_idx
),
parsed_json_model
$
feature_names
)]
# Return tree
return
(
tree_dt
)
}
#' @importFrom data.table data.table rbindlist
single.tree.parse
<-
function
(
lgb_tree
)
{
# Traverse tree function
pre_order_traversal
<-
function
(
env
=
NULL
,
tree_node_leaf
,
current_depth
=
0L
,
parent_index
=
NA_integer_
)
{
if
(
is.null
(
env
))
{
# Setup initial default data.table with default types
env
<-
new.env
(
parent
=
emptyenv
())
...
...
@@ -103,10 +105,10 @@ single.tree.parse <- function(lgb_tree) {
# start tree traversal
pre_order_traversal
(
env
,
tree_node_leaf
,
current_depth
,
parent_index
)
}
else
{
# Check if split index is not null in leaf
if
(
!
is.null
(
tree_node_leaf
$
split_index
))
{
# update data.table
env
$
single_tree_dt
<-
data.table
::
rbindlist
(
l
=
list
(
env
$
single_tree_dt
,
c
(
tree_node_leaf
[
c
(
"split_index"
,
...
...
@@ -121,7 +123,7 @@ single.tree.parse <- function(lgb_tree) {
"node_parent"
=
parent_index
)),
use.names
=
TRUE
,
fill
=
TRUE
)
# Traverse tree again both left and right
pre_order_traversal
(
env
,
tree_node_leaf
$
left_child
,
...
...
@@ -131,9 +133,9 @@ single.tree.parse <- function(lgb_tree) {
tree_node_leaf
$
right_child
,
current_depth
=
current_depth
+
1L
,
parent_index
=
tree_node_leaf
$
split_index
)
}
else
if
(
!
is.null
(
tree_node_leaf
$
leaf_index
))
{
# update data.table
env
$
single_tree_dt
<-
data.table
::
rbindlist
(
l
=
list
(
env
$
single_tree_dt
,
c
(
tree_node_leaf
[
c
(
"leaf_index"
,
...
...
@@ -143,29 +145,30 @@ single.tree.parse <- function(lgb_tree) {
"leaf_parent"
=
parent_index
)),
use.names
=
TRUE
,
fill
=
TRUE
)
}
}
return
(
env
$
single_tree_dt
)
}
# Traverse structure
single_tree_dt
<-
pre_order_traversal
(
tree_node_leaf
=
lgb_tree
$
tree_structure
)
# Store index
single_tree_dt
[,
tree_index
:=
lgb_tree
$
tree_index
]
# Return tree
return
(
single_tree_dt
)
}
#' @importFrom magrittr %>% extract inset
Lookup
<-
function
(
key
,
key_lookup
,
value_lookup
,
missing
=
NA
)
{
# Match key by looked up key
match
(
key
,
key_lookup
)
%>%
magrittr
::
extract
(
value_lookup
,
.
)
%>%
magrittr
::
inset
(
.
,
is.na
(
.
),
missing
)
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment