Commit 551d59ca authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

R package (#168)

* finish R's c_api

* clean code

* fix sizeof pointer in 32bit system.

* add predictor class

* add Dataset class

* format code

* add booster

* add type check for expose function

* add a simple callback

* add all callbacks

* finish the basic training logic

* update docs

* add an simple training interface

* add basic test

* adapt the changes in c_api

* add test for Dataset

* add test for custom obj/eval functions

* fix python test

* fix bug in metadata init

* fix R CMD check
parent acbd4f34
Package: lightgbm
Type: Package
Title: Light Gradient Boosting Machine
Version: 0.1
Date: 2016-12-29
Author: Guolin Ke <guolin.ke@microsoft.com>
Maintainer: Guolin Ke <guolin.ke@microsoft.com>
Description: LightGBM is a gradient boosting framework that uses tree based learning algorithms.
It is designed to be distributed and efficient with the following advantages:
1.Faster training speed and higher efficiency.
2.Lower memory usage.
3.Better accuracy.
4.Parallel learning supported
5. Capable of handling large-scale data
License: The MIT License (MIT) | file LICENSE
URL: https://github.com/Microsoft/LightGBM
BugReports: https://github.com/Microsoft/LightGBM/issues
VignetteBuilder: knitr
Suggests:
knitr,
rmarkdown,
ggplot2 (>= 1.0.1),
DiagrammeR (>= 0.8.1),
Ckmeans.1d.dp (>= 3.3.1),
vcd (>= 1.3),
testthat,
igraph (>= 1.0.1),
methods,
data.table (>= 1.9.6),
magrittr (>= 1.5),
stringi (>= 0.5.2)
Depends:
R (>= 3.0),
R6
Imports:
Matrix (>= 1.1-0)
RoxygenNote: 5.0.1
\ No newline at end of file
The MIT License (MIT)
Copyright (c) Microsoft Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
# Generated by roxygen2: do not edit by hand
S3method("dimnames<-",lgb.Dataset)
S3method(dim,lgb.Dataset)
S3method(dimnames,lgb.Dataset)
S3method(getinfo,lgb.Dataset)
S3method(predict,lgb.Booster)
S3method(setinfo,lgb.Dataset)
S3method(slice,lgb.Dataset)
export(getinfo)
export(lgb.Dataset)
export(lgb.Dataset.construct)
export(lgb.Dataset.create.valid)
export(lgb.Dataset.save)
export(lgb.Dataset.set.categorical)
export(lgb.Dataset.set.reference)
export(lgb.dump)
export(lgb.get.eval.result)
export(lgb.load)
export(lgb.save)
export(lgb.train)
export(lightgbm)
export(setinfo)
export(slice)
importFrom(R6,R6Class)
useDynLib(lightgbm)
CB_ENV <- R6Class(
"lgb.cb_env",
cloneable=FALSE,
public = list(
model=NULL,
iteration=NULL,
begin_iteration=NULL,
end_iteration=NULL,
eval_list=list(),
eval_err_list=list(),
best_iter=-1,
met_early_stop=FALSE
)
)
cb.reset.parameters <- function(new_params) {
if (typeof(new_params) != "list")
stop("'new_params' must be a list")
pnames <- gsub("\\.", "_", names(new_params))
nrounds <- NULL
# run some checks in the begining
init <- function(env) {
nrounds <<- env$end_iteration - env$begin_iteration + 1
if (is.null(env$model))
stop("Env should has 'model'")
# Some parameters are not allowed to be changed,
# since changing them would simply wreck some chaos
not_allowed <- pnames %in%
c('num_class', 'metric', 'boosting_type')
if (any(not_allowed))
stop('Parameters ', paste(pnames[not_allowed]), " cannot be changed during boosting.")
for (n in pnames) {
p <- new_params[[n]]
if (is.function(p)) {
if (length(formals(p)) != 2)
stop("Parameter '", n, "' is a function but not of two arguments")
} else if (is.numeric(p) || is.character(p)) {
if (length(p) != nrounds)
stop("Length of '", n, "' has to be equal to 'nrounds'")
} else {
stop("Parameter '", n, "' is not a function or a vector")
}
}
}
callback <- function(env) {
if (is.null(nrounds))
init(env)
i <- env$iteration - env$begin_iteration
pars <- lapply(new_params, function(p) {
if (is.function(p))
return(p(i, nrounds))
p[i]
})
# to-do check pars
if (!is.null(env$model)) {
env$model$reset_parameter(pars)
}
}
attr(callback, 'call') <- match.call()
attr(callback, 'is_pre_iteration') <- TRUE
attr(callback, 'name') <- 'cb.reset.parameters'
return(callback)
}
# Format the evaluation metric string
format.eval.string <- function(eval_res, eval_err=NULL) {
if (is.null(eval_res))
stop('no evaluation results')
if (length(eval_res) == 0)
stop('no evaluation results')
if (!is.null(eval_err)) {
res <- sprintf('%s\'s %s:%g+%g', eval_res$data_name, eval_res$name, eval_res$value, eval_err)
} else {
res <- sprintf('%s\'s %s:%g', eval_res$data_name, eval_res$name, eval_res$value)
}
return(res)
}
merge.eval.string <- function(env){
if(length(env$eval_list) <= 0){
return("")
}
msg <- list(sprintf('[%d]:',env$iteration))
is_eval_err <- FALSE
if(length(env$eval_err_list) > 0){
is_eval_err <- TRUE
}
for(j in 1:length(env$eval_list)) {
eval_err <- NULL
if(is_eval_err){
eval_err <- env$eval_err_list[[j]]
}
msg <- c(msg, format.eval.string(env$eval_list[[j]],eval_err))
}
return(paste0(msg, collapse='\t'))
}
cb.print.evaluation <- function(period=1){
callback <- function(env){
if(period > 0){
i <- env$iteration
if( (i - 1) %% period == 0
| i == env$begin_iteration
| i == env$end_iteration ){
cat(merge.eval.string(env), "\n")
}
}
}
attr(callback, 'call') <- match.call()
attr(callback, 'name') <- 'cb.print.evaluation'
return(callback)
}
cb.record.evaluation <- function() {
callback <- function(env){
if(length(env$eval_list) <= 0) return()
is_eval_err <- FALSE
if(length(env$eval_err_list) > 0){
is_eval_err <- TRUE
}
if(length(env$model$record_evals) == 0){
for(j in 1:length(env$eval_list)) {
data_name <- env$eval_list[[j]]$data_name
name <- env$eval_list[[j]]$name
env$model$record_evals$start_iter <- env$begin_iteration
if(is.null(env$model$record_evals[[data_name]])){
env$model$record_evals[[data_name]] <- list()
}
env$model$record_evals[[data_name]][[name]] <- list()
env$model$record_evals[[data_name]][[name]]$eval <- list()
env$model$record_evals[[data_name]][[name]]$eval_err <- list()
}
}
for(j in 1:length(env$eval_list)) {
eval_res <- env$eval_list[[j]]
eval_err <- NULL
if(is_eval_err){
eval_err <- env$eval_err_list[[j]]
}
data_name <- eval_res$data_name
name <- eval_res$name
env$model$record_evals[[data_name]][[name]]$eval <- c(env$model$record_evals[[data_name]][[name]]$eval, eval_res$value)
env$model$record_evals[[data_name]][[name]]$eval_err <- c(env$model$record_evals[[data_name]][[name]]$eval_err, eval_err)
}
}
attr(callback, 'call') <- match.call()
attr(callback, 'name') <- 'cb.record.evaluation'
return(callback)
}
cb.early.stop <- function(stopping_rounds, verbose=TRUE) {
# state variables
factor_to_bigger_better <- NULL
best_iter <- NULL
best_score <- NULL
best_msg <- NULL
eval_len <- NULL
init <- function(env) {
eval_len <<- length(env$eval_list)
if (eval_len == 0)
stop("For early stopping, valids must have at least one element")
if (verbose)
cat("Will train until hasn't improved in ",
stopping_rounds, " rounds.\n\n", sep = '')
factor_to_bigger_better <<- rep(1.0, eval_len)
best_iter <<- rep(-1, eval_len)
best_score <<- rep(-Inf, eval_len)
best_msg <<- list()
for(i in 1:eval_len){
best_msg <<- c(best_msg, "")
if(!env$eval_list[[i]]$higher_better){
factor_to_bigger_better[i] <<- -1.0
}
}
}
callback <- function(env, finalize = FALSE) {
if (is.null(eval_len))
init(env)
cur_iter <- env$iteration
for(i in 1:eval_len){
score <- env$eval_list[[i]]$value * factor_to_bigger_better[i]
if(score > best_score[i]){
best_score[i] <<- score
best_iter[i] <<- cur_iter
if(verbose){
best_msg[[i]] <<- as.character(merge.eval.string(env))
}
} else {
if(cur_iter - best_iter[i] >= stopping_rounds){
if(!is.null(env$model)){
env$model$best_iter <- best_iter[i]
}
if(verbose){
cat('Early stopping, best iteration is:',"\n")
cat(best_msg[[i]],"\n")
}
env$best_iter <- best_iter[i]
env$met_early_stop <- TRUE
}
}
}
}
attr(callback, 'call') <- match.call()
attr(callback, 'name') <- 'cb.early.stop'
return(callback)
}
# Extract callback names from the list of callbacks
callback.names <- function(cb_list) {
unlist(lapply(cb_list, function(x) attr(x, 'name')))
}
add.cb <- function(cb_list, cb) {
cb_list <- c(cb_list, cb)
names(cb_list) <- callback.names(cb_list)
if ('cb.early.stop' %in% names(cb_list)) {
cb_list <- c(cb_list, cb_list['cb.early.stop'])
# this removes only the first one
cb_list['cb.early.stop'] <- NULL
}
if ('cb.cv.predict' %in% names(cb_list)) {
cb_list <- c(cb_list, cb_list['cb.cv.predict'])
cb_list['cb.cv.predict'] <- NULL
}
cb_list
}
categorize.callbacks <- function(cb_list) {
list(
pre_iter = Filter(function(x) {
pre <- attr(x, 'is_pre_iteration')
!is.null(pre) && pre
}, cb_list),
post_iter = Filter(function(x) {
pre <- attr(x, 'is_pre_iteration')
is.null(pre) || !pre
}, cb_list)
)
}
Booster <- R6Class(
"lgb.Booster",
cloneable=FALSE,
public = list(
best_iter = -1,
record_evals = list(),
finalize = function() {
if(!lgb.is.null.handle(private$handle)){
print("free booster handle")
lgb.call("LGBM_BoosterFree_R", ret=NULL, private$handle)
private$handle <- NULL
}
},
initialize = function(params = list(),
train_set = NULL,
modelfile = NULL,
...) {
params <- append(params, list(...))
params_str <- lgb.params2str(params)
handle <- lgb.new.handle()
if (!is.null(train_set)) {
if (!lgb.check.r6.class(train_set, "lgb.Dataset")) {
stop("lgb.Booster: Only can use lgb.Dataset as training data")
}
handle <-
lgb.call("LGBM_BoosterCreate_R", ret=handle, train_set$.__enclos_env__$private$get_handle(), params_str)
private$train_set <- train_set
private$num_dataset <- 1
private$init_predictor <- train_set$.__enclos_env__$private$predictor
if (!is.null(private$init_predictor)) {
lgb.call("LGBM_BoosterMerge_R", ret=NULL,
handle,
private$init_predictor$.__enclos_env__$private$handle)
}
private$is_predicted_cur_iter <-
c(private$is_predicted_cur_iter, FALSE)
} else if (!is.null(modelfile)) {
if (!is.character(modelfile)) {
stop("lgb.Booster: Only can use string as model file path")
}
handle <-
lgb.call("LGBM_BoosterCreateFromModelfile_R",
ret=handle,
lgb.c_str(modelfile))
} else {
stop(
"lgb.Booster: Need at least one training dataset or model file to create booster instance"
)
}
class(handle) <- "lgb.Booster.handle"
private$handle <- handle
private$num_class <- as.integer(1)
private$num_class <-
lgb.call("LGBM_BoosterGetNumClasses_R", ret=private$num_class, private$handle)
},
set_train_data_name = function(name) {
private$name_train_set <- name
return(self)
},
add_valid = function(data, name) {
if (!lgb.check.r6.class(data, "lgb.Dataset")) {
stop("lgb.Booster.add_valid: Only can use lgb.Dataset as validation data")
}
if (!identical(data$.__enclos_env__$private$predictor, private$init_predictor)) {
stop(
"lgb.Booster.add_valid: Add validation data failed, you should use same predictor for these data"
)
}
if(!is.character(name)){
stop("only can use character as data name")
}
lgb.call("LGBM_BoosterAddValidData_R", ret=NULL, private$handle, data$.__enclos_env__$private$get_handle())
private$valid_sets <- c(private$valid_sets, data)
private$name_valid_sets <- c(private$name_valid_sets, name)
private$num_dataset <- private$num_dataset + 1
private$is_predicted_cur_iter <-
c(private$is_predicted_cur_iter, FALSE)
return(self)
},
reset_parameter = function(params, ...) {
params <- append(params, list(...))
params_str <- algb.params2str(params)
lgb.call("LGBM_BoosterResetParameter_R", ret=NULL,
private$handle,
params_str)
return(self)
},
update = function(train_set = NULL, fobj = NULL) {
if (!is.null(train_set)) {
if (!lgb.check.r6.class(train_set, "lgb.Dataset")) {
stop("lgb.Booster.update: Only can use lgb.Dataset as training data")
}
if (!identical(train_set$predictor, private$init_predictor)) {
stop(
"lgb.Booster.update: Change train_set failed, you should use same predictor for these data"
)
}
lgb.call("LGBM_BoosterResetTrainingData_R", ret=NULL,
private$handle,
train_set$.__enclos_env__$private$get_handle())
private$train_set = train_set
}
if (is.null(fobj)) {
ret <-
lgb.call("LGBM_BoosterUpdateOneIter_R", ret=NULL, private$handle)
} else {
if (typeof(fobj) != 'closure') {
stop("lgb.Booster.update: fobj should be a function")
}
gpair <- fobj(private$inner_predict(1), private$train_set)
ret <-
lgb.call(
"LGBM_BoosterUpdateOneIterCustom_R", ret=NULL,
private$handle,
gpair$grad,
gpair$hess,
length(gpair$grad)
)
}
for (i in 1:length(private$is_predicted_cur_iter)) {
private$is_predicted_cur_iter[[i]] <- FALSE
}
return(ret)
},
rollback_one_iter = function() {
lgb.call("LGBM_BoosterRollbackOneIter_R", ret=NULL, private$handle)
for (i in 1:length(private$is_predicted_cur_iter)) {
private$is_predicted_cur_iter[[i]] <- FALSE
}
return(self)
},
current_iter = function() {
cur_iter <- as.integer(0)
return(lgb.call("LGBM_BoosterGetCurrentIteration_R", ret=cur_iter, private$handle))
},
eval = function(data, name, feval = NULL) {
if (!lgb.check.r6.class(data, "lgb.Dataset")) {
stop("lgb.Booster.eval: only can use lgb.Dataset to eval")
}
data_idx <- 0
if (identical(data, private$train_set)) {
data_idx <- 1
} else {
if(length(private$valid_sets) > 0){
for (i in 1:length(private$valid_sets)) {
if (identical(data, private$valid_sets[[i]])) {
data_idx <- i + 1
break
}
}
}
}
if (data_idx == 0) {
self$add_valid(data, name)
data_idx <- private$num_dataset
}
return(private$inner_eval(name, data_idx, feval))
},
eval_train = function(feval = NULL) {
return(private$inner_eval(private$name_train_set, 1, feval))
},
eval_valid = function(feval = NULL) {
ret = list()
if(length(private$valid_sets) <= 0) return(ret)
for (i in 1:length(private$valid_sets)) {
ret <-
append(ret, private$inner_eval(private$name_valid_sets[[i]], i + 1, feval))
}
return(ret)
},
save_model = function(filename, num_iteration = NULL) {
if (is.null(num_iteration)) {
num_iteration <- self$best_iter
}
lgb.call(
"LGBM_BoosterSaveModel_R",
ret = NULL,
private$handle,
as.integer(num_iteration),
lgb.c_str(filename)
)
return(self)
},
dump_model = function(num_iteration = NULL) {
if (is.null(num_iteration)) {
num_iteration <- self$best_iter
}
return(
lgb.call.return.str(
"LGBM_BoosterDumpModel_R",
private$handle,
as.integer(num_iteration)
)
)
},
predict = function(data,
num_iteration = NULL,
rawscore = FALSE,
predleaf = FALSE,
header = FALSE,
reshape = FALSE) {
if (is.null(num_iteration)) {
num_iteration <- self$best_iter
}
predictor <- Predictor$new(private$handle)
return(predictor$predict(data, num_iteration, rawscore, predleaf, header, reshape))
},
to_predictor = function() {
Predictor$new(private$handle)
}
),
private = list(
handle = NULL,
train_set = NULL,
name_train_set = "training",
valid_sets = list(),
name_valid_sets = list(),
predict_buffer = list(),
is_predicted_cur_iter = list(),
num_class = 1,
num_dataset = 0,
init_predictor = NULL,
eval_names = NULL,
higher_better_inner_eval = NULL,
inner_predict = function(idx) {
data_name <- private$name_train_set
if(idx > 1){
data_name <- private$name_valid_sets[[idx - 1]]
}
if (idx > private$num_dataset) {
stop("data_idx should not be greater than num_dataset")
}
if (is.null(private$predict_buffer[[data_name]])) {
npred <- as.integer(0)
npred <-
lgb.call("LGBM_BoosterGetNumPredict_R",
ret = npred,
private$handle,
as.integer(idx - 1))
private$predict_buffer[[data_name]] <- rep(0.0, npred)
}
if (!private$is_predicted_cur_iter[[idx]]) {
private$predict_buffer[[data_name]] <-
lgb.call(
"LGBM_BoosterGetPredict_R",
ret=private$predict_buffer[[data_name]],
private$handle,
as.integer(idx - 1)
)
private$is_predicted_cur_iter[[idx]] <- TRUE
}
return(private$predict_buffer[[data_name]])
},
get_eval_info = function() {
if (is.null(private$eval_names)) {
names <-
lgb.call.return.str("LGBM_BoosterGetEvalNames_R", private$handle)
if(nchar(names) > 0){
names <- strsplit(names, "\t")[[1]]
private$eval_names <- names
private$higher_better_inner_eval <-
rep(FALSE, length(names))
for (i in 1:length(names)) {
if (startsWith(names[i], "auc") |
startsWith(names[i], "ndcg")) {
private$higher_better_inner_eval[i] <- TRUE
}
}
}
}
return(private$eval_names)
},
inner_eval = function(data_name, data_idx, feval = NULL) {
if (data_idx > private$num_dataset) {
stop("data_idx should not be greater than num_dataset")
}
private$get_eval_info()
ret <- list()
if (length(private$eval_names) > 0) {
tmp_vals <- rep(0.0, length(private$eval_names))
tmp_vals <-
lgb.call("LGBM_BoosterGetEval_R", ret=tmp_vals,
private$handle,
as.integer(data_idx - 1))
for (i in 1:length(private$eval_names)) {
res <- list()
res$data_name <- data_name
res$name <- private$eval_names[i]
res$value <- tmp_vals[i]
res$higher_better <- private$higher_better_inner_eval[i]
ret <- append(ret, list(res))
}
}
if (!is.null(feval)) {
if (typeof(feval) != 'closure') {
stop("lgb.Booster.eval: feval should be a function")
}
data <- private$train_set
if (data_idx > 1) {
data <- private$valid_sets[[data_idx - 1]]
}
res <- feval(private$inner_predict(data_idx), data)
res$data_name <- data_name
ret <- append(ret, list(res))
}
return(ret)
}
)
)
# internal helper method
lgb.is.Booster <- function(x){
if(lgb.check.r6.class(x, "lgb.Booster")){
return(TRUE)
} else{
return(FALSE)
}
}
#' Predict method for LightGBM model
#'
#' Predicted values based on class \code{lgb.Booster}
#'
#' @param object Object of class \code{lgb.Booster}
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
#' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
#' @param rawscore whether the prediction should be returned in the for of original untransformed
#' sum of predictions from boosting iterations' results. E.g., setting \code{rawscore=TRUE} for
#' logistic regression would result in predictions for log-odds instead of probabilities.
#' @param predleaf whether predict leaf index instead.
#' @param header only used for prediction for text file. True if text file has header
#' @param reshape whether to reshape the vector of predictions to a matrix form when there are several
#' prediction outputs per case.
#' @return
#' For regression or binary classification, it returns a vector of length \code{nrows(data)}.
#' For multiclass classification, either a \code{num_class * nrows(data)} vector or
#' a \code{(nrows(data), num_class)} dimension matrix is returned, depending on
#' the \code{reshape} value.
#'
#' When \code{predleaf = TRUE}, the output is a matrix object with the
#' number of columns corresponding to the number of trees.
#' @examples
#' library(lightgbm)
#' data(agaricus.train, package='lightgbm')
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label=train$label)
#' data(agaricus.test, package='lightgbm')
#' test <- agaricus.test
#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label)
#' params <- list(objective="regression", metric="l2")
#' valids <- list(test=dtest)
#' model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10)
#' preds <- predict(model, test$data)
#'
#' @rdname predict.lgb.Booster
#' @export
predict.lgb.Booster <- function(object,
data,
num_iteration = NULL,
rawscore = FALSE,
predleaf = FALSE,
header = FALSE,
reshape = FALSE) {
if(!lgb.is.Booster(object)){
stop("predict.lgb.Booster: should input lgb.Booster object")
}
object$predict(data, num_iteration, rawscore, predleaf, header, reshape)
}
#' Load LightGBM model
#'
#' Load LightGBM model from saved model file
#'
#' @param filename path of model file
#'
#' @return booster
#' @examples
#' library(lightgbm)
#' data(agaricus.train, package='lightgbm')
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label=train$label)
#' data(agaricus.test, package='lightgbm')
#' test <- agaricus.test
#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label)
#' params <- list(objective="regression", metric="l2")
#' valids <- list(test=dtest)
#' model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10)
#' lgb.save(model, "model.txt")
#' load_booster <- lgb.load("model.txt")
#' @rdname lgb.load
#' @export
lgb.load <- function(filename){
if(!is.character(filename)){
stop("lgb.load: filename should be character")
}
Booster$new(modelfile=filename)
}
#' Save LightGBM model
#'
#' Save LightGBM model
#'
#' @param booster Object of class \code{lgb.Booster}
#' @param filename saved filename
#' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
#'
#' @return booster
#' @examples
#' library(lightgbm)
#' data(agaricus.train, package='lightgbm')
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label=train$label)
#' data(agaricus.test, package='lightgbm')
#' test <- agaricus.test
#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label)
#' params <- list(objective="regression", metric="l2")
#' valids <- list(test=dtest)
#' model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10)
#' lgb.save(model, "model.txt")
#' @rdname lgb.save
#' @export
lgb.save <- function(booster, filename, num_iteration=NULL){
if(!lgb.is.Booster(booster)){
stop("lgb.save: should input lgb.Booster object")
}
if(!is.character(filename)){
stop("lgb.save: filename should be character")
}
booster$save_model(filename, num_iteration)
}
#' Dump LightGBM model to json
#'
#' Dump LightGBM model to json
#'
#' @param booster Object of class \code{lgb.Booster}
#' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
#'
#' @return json format of model
#' @examples
#' library(lightgbm)
#' data(agaricus.train, package='lightgbm')
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label=train$label)
#' data(agaricus.test, package='lightgbm')
#' test <- agaricus.test
#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label)
#' params <- list(objective="regression", metric="l2")
#' valids <- list(test=dtest)
#' model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10)
#' json_model <- lgb.dump(model)
#' @rdname lgb.dump
#' @export
lgb.dump <- function(booster, num_iteration=NULL){
if(!lgb.is.Booster(booster)){
stop("lgb.dump: should input lgb.Booster object")
}
booster$dump_model(num_iteration)
}
#' Get record evaluation result from booster
#'
#' Get record evaluation result from booster
#' @param booster Object of class \code{lgb.Booster}
#' @param data_name name of dataset
#' @param eval_name name of evaluation
#' @param iters iterations, NULL will return all
#' @param is_err TRUE will return evaluation error instead
#' @return vector of evaluation result
#'
#' @rdname lgb.get.eval.result
#' @export
lgb.get.eval.result <- function(booster, data_name, eval_name, iters=NULL, is_err=FALSE){
if(!lgb.is.Booster(booster)){
stop("lgb.get.eval.result: only can use booster to get eval result")
}
if(!is.character(data_name) | !is.character(eval_name)){
stop("lgb.get.eval.result: data_name and eval_name should be character")
}
if(is.null(booster$record_evals[[data_name]])){
stop("lgb.get.eval.result: wrong data name")
}
if(is.null(booster$record_evals[[data_name]][[eval_name]])){
stop("lgb.get.eval.result: wrong eval name")
}
result <- booster$record_evals[[data_name]][[eval_name]]$eval
if(is_err){
result <- booster$record_evals[[data_name]][[eval_name]]$eval_err
}
if(is.null(iters)){
return(as.numeric(result))
}
iters <- as.integer(iters)
delta <- booster$record_evals$start_iter - 1
iters <- iters - delta
return(as.numeric(result[iters]))
}
This diff is collapsed.
Predictor <- R6Class(
"lgb.Predictor",
cloneable=FALSE,
public = list(
finalize = function() {
if(private$need_free_handle & !lgb.is.null.handle(private$handle)){
print("free booster handle")
lgb.call("LGBM_BoosterFree_R", ret=NULL, private$handle)
private$handle <- NULL
}
},
initialize = function(modelfile) {
handle <- lgb.new.handle()
if(typeof(modelfile) == "character") {
handle <- lgb.call("LGBM_BoosterCreateFromModelfile_R", ret=handle, lgb.c_str(modelfile))
private$need_free_handle = TRUE
} else if (class(modelfile) == "lgb.Booster.handle") {
handle <- modelfile
private$need_free_handle = FALSE
} else {
stop("lgb.Predictor: modelfile must be either character filename, or lgb.Booster.handle")
}
class(handle) <- "lgb.Booster.handle"
private$handle <- handle
},
current_iter = function() {
cur_iter <- as.integer(0)
return(lgb.call("LGBM_BoosterGetCurrentIteration_R", ret=cur_iter, private$handle))
},
predict = function(data,
num_iteration = NULL, rawscore = FALSE, predleaf = FALSE, header = FALSE,
reshape = FALSE) {
if (is.null(num_iteration)) {
num_iteration <- -1
}
num_row <- 0
if (typeof(data) == "character") {
tmp_filename <- tempfile(pattern = "lightgbm_")
lgb.call("LGBM_BoosterPredictForFile_R", ret=NULL, private$handle, data, as.integer(header),
as.integer(rawscore),
as.integer(predleaf),
as.integer(num_iteration),
lgb.c_str(tmp_filename))
preds <- read.delim(tmp_filename, header=FALSE, seq="\t")
num_row <- nrow(preds)
preds <- as.vector(t(preds))
# delete temp file
if(file.exists(tmp_filename)) { file.remove(tmp_filename) }
} else {
num_row <- nrow(data)
npred <- as.integer(0)
npred <- lgb.call("LGBM_BoosterCalcNumPredict_R", ret=npred,
private$handle,
as.integer(num_row),
as.integer(rawscore),
as.integer(predleaf),
as.integer(num_iteration))
# allocte space for prediction
preds <- rep(0.0, npred)
if (is.matrix(data)) {
preds <- lgb.call("LGBM_BoosterPredictForMat_R", ret=preds,
private$handle,
data,
as.integer(nrow(data)),
as.integer(ncol(data)),
as.integer(rawscore),
as.integer(predleaf),
as.integer(num_iteration))
} else if (class(data) == "dgCMatrix") {
preds <- lgb.call("LGBM_BoosterPredictForCSC_R", ret=preds,
private$handle,
data@p,
data@i,
data@x,
length(data@p),
length(data@x),
nrow(data),
as.integer(rawscore),
as.integer(predleaf),
as.integer(num_iteration))
} else {
stop(paste("predict: does not support to predict from ",
typeof(data)))
}
}
if (length(preds) %% num_row != 0) {
stop("predict: prediction length ", length(preds)," is not multiple of nrows(data) ", num_row)
}
npred_per_case <- length(preds) / num_row
if (reshape && npred_per_case > 1) {
preds <- matrix(preds, ncol = npred_per_case)
}
return(preds)
}
),
private = list(
handle = NULL,
need_free_handle = FALSE
)
)
#' Main training logic for LightGBM
#'
#' Main training logic for LightGBM
#'
#' @param params List of parameters
#' @param data a \code{lgb.Dataset} object, used for training
#' @param nrounds number of training rounds
#' @param valids a list of \code{lgb.Dataset} object, used for validation
#' @param obj objective function, can be character or custom objective function
#' @param eval evaluation function, can be (list of) character or custom eval function
#' @param verbose verbosity for output
#' if verbose > 0 , also will record iteration message to booster$record_evals
#' @param eval_freq evalutaion output frequence
#' @param init_model path of model file of \code{lgb.Booster} object, will continue train from this model
#' @param colnames feature names, if not null, will use this to overwrite the names in dataset
#' @param categorical_feature list of str or int
#' type int represents index,
#' type str represents feature names
#' @param early_stopping_rounds int
#' Activates early stopping.
#' Requires at least one validation data and one metric
#' If there's more than one, will check all of them
#' Returns the model with (best_iter + early_stopping_rounds)
#' If early stopping occurs, the model will have 'best_iter' field
#' @param callbacks list of callback functions
#' List of callback functions that are applied at each iteration.
#' @param ... other parameters, see parameters.md for more informations
#' @return a trained booster model \code{lgb.Booster}.
#' @examples
#' library(lightgbm)
#' data(agaricus.train, package='lightgbm')
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label=train$label)
#' data(agaricus.test, package='lightgbm')
#' test <- agaricus.test
#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label=test$label)
#' params <- list(objective="regression", metric="l2")
#' valids <- list(test=dtest)
#' model <- lgb.train(params, dtrain, 100, valids, min_data=1, learning_rate=1, early_stopping_rounds=10)
#'
#' @rdname lgb.train
#' @export
lgb.train <- function(params=list(), data, nrounds=10,
valids=list(),
obj=NULL, eval=NULL,
verbose=1, eval_freq=1L,
init_model=NULL,
colnames=NULL,
categorical_feature=NULL,
early_stopping_rounds=NULL,
callbacks=list(), ...) {
addiction_params <- list(...)
params <- append(params, addiction_params)
params$verbose <- verbose
params <- lgb.check.obj(params, obj)
params <- lgb.check.eval(params, eval)
fobj <- NULL
feval <- NULL
if(typeof(params$objective) == "closure"){
fobj <- params$objective
params$objective <- "NONE"
}
if (typeof(eval) == "closure"){
feval <- eval
}
lgb.check.params(params)
predictor <- NULL
if(is.character(init_model)){
predictor <- Predictor$new(init_model)
} else if(lgb.is.Booster(init_model)) {
predictor <- init_model$to_predictor()
}
begin_iteration <- 1
if(!is.null(predictor)){
begin_iteration <- predictor$current_iter() + 1
}
end_iteration <- begin_iteration + nrounds - 1
# check dataset
if(!lgb.is.Dataset(data)){
stop("lgb.train: data only accepts lgb.Dataset object")
}
if (length(valids) > 0) {
if (typeof(valids) != "list" ||
!all(sapply(valids, lgb.is.Dataset)))
stop("valids must be a list of lgb.Dataset elements")
evnames <- names(valids)
if (is.null(evnames) || any(evnames == ""))
stop("each element of the valids must have a name tag")
}
data$update_params(params)
data$.__enclos_env__$private$set_predictor(predictor)
if(!is.null(colnames)){
data$set_colnames(colnames)
}
data$set_categorical_feature(categorical_feature)
vaild_contain_train <- FALSE
train_data_name <- "train"
reduced_valid_sets <- list()
if(length(valids) > 0){
for (key in names(valids)) {
valid_data <- valids[[key]]
if(identical(data, valid_data)){
vaild_contain_train <- TRUE
train_data_name <- key
next
}
valid_data$update_params(params)
valid_data$set_reference(data)
reduced_valid_sets[[key]] <- valid_data
}
}
# process callbacks
if(eval_freq > 0){
callbacks <- add.cb(callbacks, cb.print.evaluation(eval_freq))
}
if (verbose > 0 && length(valids) > 0) {
callbacks <- add.cb(callbacks, cb.record.evaluation())
}
# Early stopping callback
if (!is.null(early_stopping_rounds)) {
if(early_stopping_rounds > 0){
callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds, verbose=verbose))
}
}
cb <- categorize.callbacks(callbacks)
# construct booster
booster <- Booster$new(params=params, train_set=data)
if(vaild_contain_train){
booster$set_train_data_name(train_data_name)
}
for (key in names(reduced_valid_sets)) {
booster$add_valid(reduced_valid_sets[[key]], key)
}
# callback env
env <- CB_ENV$new()
env$model <- booster
env$begin_iteration <- begin_iteration
env$end_iteration <- end_iteration
#start training
for(i in begin_iteration:end_iteration){
env$iteration <- i
env$eval_list <- list()
for (f in cb$pre_iter) f(env)
# update one iter
booster$update(fobj=fobj)
# collect eval result
eval_list <- list()
if(length(valids) > 0){
if(vaild_contain_train){
eval_list <- append(eval_list, booster$eval_train(feval=feval))
}
eval_list <- append(eval_list, booster$eval_valid(feval=feval))
}
env$eval_list <- eval_list
for (f in cb$post_iter) f(env)
# met early stopping
if(env$met_early_stop) break
}
return(booster)
}
# Simple interface for training an lightgbm model.
# Its documentation is combined with lgb.train.
#
#' @rdname lgb.train
#' @export
lightgbm <- function(data, label = NULL, weight = NULL,
params = list(), nrounds=10,
verbose = 1, eval_freq = 1L,
early_stopping_rounds = NULL,
save_name = "lightgbm.model",
init_model = NULL, callbacks = list(), ...) {
dtrain <- lgb.Dataset(data, label=label, weight=weight)
valids <- list()
if (verbose > 0)
valids$train = dtrain
bst <- lgb.train(params, dtrain, nrounds, valids, verbose = verbose, eval_freq=eval_freq,
early_stopping_rounds = early_stopping_rounds,
init_model = init_model, callbacks = callbacks, ...)
bst$save_model(save_name)
return(bst)
}
#' Training part from Mushroom Data Set
#'
#' This data set is originally from the Mushroom data set,
#' UCI Machine Learning Repository.
#'
#' This data set includes the following fields:
#'
#' \itemize{
#' \item \code{label} the label for each record
#' \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
#' }
#'
#' @references
#' https://archive.ics.uci.edu/ml/datasets/Mushroom
#'
#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
#' School of Information and Computer Science.
#'
#' @docType data
#' @keywords datasets
#' @name agaricus.train
#' @usage data(agaricus.train)
#' @format A list containing a label vector, and a dgCMatrix object with 6513
#' rows and 127 variables
NULL
#' Test part from Mushroom Data Set
#'
#' This data set is originally from the Mushroom data set,
#' UCI Machine Learning Repository.
#'
#' This data set includes the following fields:
#'
#' \itemize{
#' \item \code{label} the label for each record
#' \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
#' }
#'
#' @references
#' https://archive.ics.uci.edu/ml/datasets/Mushroom
#'
#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
#' School of Information and Computer Science.
#'
#' @docType data
#' @keywords datasets
#' @name agaricus.test
#' @usage data(agaricus.test)
#' @format A list containing a label vector, and a dgCMatrix object with 1611
#' rows and 126 variables
NULL
# Various imports
#' @importFrom R6 R6Class
#' @useDynLib lightgbm
NULL
\ No newline at end of file
lgb.new.handle <- function() {
# use 64bit data to store address
return(0.0)
}
lgb.is.null.handle <- function(x) {
if (is.null(x)) {
return(TRUE)
}
if (x == 0) {
return(TRUE)
}
return(FALSE)
}
lgb.encode.char <- function(arr, len) {
if (typeof(arr) != "raw") {
stop("lgb.encode.char: only can encode from raw type")
}
return(rawToChar(arr[1:len]))
}
lgb.call <- function(fun_name, ret, ...) {
call_state <- as.integer(0)
if (!is.null(ret)) {
call_state <-
.Call(fun_name, ..., ret, call_state , PACKAGE = "lightgbm")
} else {
call_state <- .Call(fun_name, ..., call_state , PACKAGE = "lightgbm")
}
if (call_state != as.integer(0)) {
buf_len <- as.integer(200)
act_len <- as.integer(0)
err_msg <- raw(buf_len)
err_msg <-
.Call("LGBM_GetLastError_R", buf_len, act_len, err_msg, PACKAGE = "lightgbm")
if (act_len > buf_len) {
buf_len <- act_len
err_msg <- raw(buf_len)
err_msg <-
.Call("LGBM_GetLastError_R",
buf_len,
act_len,
err_msg,
PACKAGE = "lightgbm")
}
stop(paste0("api error: ", lgb.encode.char(err_msg, act_len)))
}
return(ret)
}
lgb.call.return.str <- function(fun_name, ...) {
buf_len <- as.integer(1024 * 1024)
act_len <- as.integer(0)
buf <- raw(buf_len)
buf <- lgb.call(fun_name, ret = buf, ..., buf_len, act_len)
if (act_len > buf_len) {
buf_len <- act_len
buf <- raw(buf_len)
buf <- lgb.call(fun_name, ret = buf, ..., buf_len, act_len)
}
return(lgb.encode.char(buf, act_len))
}
lgb.params2str <- function(params, ...) {
if (typeof(params) != "list")
stop("params must be a list")
names(params) <- gsub("\\.", "_", names(params))
# merge parameters from the params and the dots-expansion
dot_params <- list(...)
names(dot_params) <- gsub("\\.", "_", names(dot_params))
if (length(intersect(names(params),
names(dot_params))) > 0)
stop(
"Same parameters in 'params' and in the call are not allowed. Please check your 'params' list."
)
params <- c(params, dot_params)
ret <- list()
for (key in names(params)) {
# join multi value first
val <- paste0(params[[key]], collapse = ",")
if(nchar(val) <= 0) next
# join key value
pair <- paste0(c(key, val), collapse = "=")
ret <- c(ret, pair)
}
if (length(ret) == 0) {
return(lgb.c_str(""))
} else{
return(lgb.c_str(paste0(ret, collapse = " ")))
}
}
lgb.c_str <- function(x) {
ret <- charToRaw(as.character(x))
ret <- c(ret, as.raw(0))
return(ret)
}
lgb.check.r6.class <- function(object, name) {
if (!("R6" %in% class(object))) {
return(FALSE)
}
if (!(name %in% class(object))) {
return(FALSE)
}
return(TRUE)
}
lgb.check.params <- function(params){
# To-do
return(params)
}
lgb.check.obj <- function(params, obj) {
if(!is.null(obj)){
params$objective <- obj
}
if(is.character(params$objective)){
if(!(params$objective %in% c("regression", "binary", "multiclass", "lambdarank"))){
stop("lgb.check.obj: objective name error should be (regression, binary, multiclass, lambdarank)")
}
} else if(typeof(params$objective) != "closure"){
stop("lgb.check.obj: objective should be character or function")
}
return(params)
}
lgb.check.eval <- function(params, eval) {
if(is.null(params$metric)){
params$metric <- list()
}
if(!is.null(eval)){
# append metric
if(is.character(eval) || is.list(eval)){
params$metric <- append(params$metric, eval)
}
}
if (typeof(eval) != "closure"){
if(is.null(params$metric) | length(params$metric) == 0) {
# add default metric
if(is.character(params$objective)){
if(params$objective == "regression"){
params$metric <- "l2"
} else if(params$objective == "binary"){
params$metric <- "binary_logloss"
} else if(params$objective == "multiclass"){
params$metric <- "multi_logloss"
} else if(params$objective == "lambdarank"){
params$metric <- "ndcg"
}
}
}
}
return(params)
}
LightGBM R Package
==================
Installation
------------
```
cd R-package
R CMD INSTALL --build .
```
\ No newline at end of file
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lightgbm.R
\docType{data}
\name{agaricus.test}
\alias{agaricus.test}
\title{Test part from Mushroom Data Set}
\format{A list containing a label vector, and a dgCMatrix object with 1611
rows and 126 variables}
\usage{
data(agaricus.test)
}
\description{
This data set is originally from the Mushroom data set,
UCI Machine Learning Repository.
}
\details{
This data set includes the following fields:
\itemize{
\item \code{label} the label for each record
\item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
}
}
\references{
https://archive.ics.uci.edu/ml/datasets/Mushroom
Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
School of Information and Computer Science.
}
\keyword{datasets}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lightgbm.R
\docType{data}
\name{agaricus.train}
\alias{agaricus.train}
\title{Training part from Mushroom Data Set}
\format{A list containing a label vector, and a dgCMatrix object with 6513
rows and 127 variables}
\usage{
data(agaricus.train)
}
\description{
This data set is originally from the Mushroom data set,
UCI Machine Learning Repository.
}
\details{
This data set includes the following fields:
\itemize{
\item \code{label} the label for each record
\item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
}
}
\references{
https://archive.ics.uci.edu/ml/datasets/Mushroom
Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
School of Information and Computer Science.
}
\keyword{datasets}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Dataset.R
\name{dim.lgb.Dataset}
\alias{dim.lgb.Dataset}
\title{Dimensions of lgb.Dataset}
\usage{
\method{dim}{lgb.Dataset}(x, ...)
}
\arguments{
\item{x}{Object of class \code{lgb.Dataset}}
\item{...}{other parameters}
}
\value{
a vector of numbers of rows and of columns
}
\description{
Dimensions of lgb.Dataset
}
\details{
Returns a vector of numbers of rows and of columns in an \code{lgb.Dataset}.
Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also
be directly used with an \code{lgb.Dataset} object.
}
\examples{
data(agaricus.train, package='lightgbm')
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label)
stopifnot(nrow(dtrain) == nrow(train$data))
stopifnot(ncol(dtrain) == ncol(train$data))
stopifnot(all(dim(dtrain) == dim(train$data)))
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Dataset.R
\name{dimnames.lgb.Dataset}
\alias{dimnames.lgb.Dataset}
\alias{dimnames<-.lgb.Dataset}
\title{Handling of column names of \code{lgb.Dataset}}
\usage{
\method{dimnames}{lgb.Dataset}(x)
\method{dimnames}{lgb.Dataset}(x) <- value
}
\arguments{
\item{x}{object of class \code{lgb.Dataset}}
\item{value}{a list of two elements: the first one is ignored
and the second one is column names}
}
\description{
Handling of column names of \code{lgb.Dataset}
}
\details{
Only column names are supported for \code{lgb.Dataset}, thus setting of
row names would have no effect and returnten row names would be NULL.
Generic \code{dimnames} methods are used by \code{colnames}.
Since row names are irrelevant, it is recommended to use \code{colnames} directly.
}
\examples{
data(agaricus.train, package='lightgbm')
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label)
lgb.Dataset.construct(dtrain)
dimnames(dtrain)
colnames(dtrain)
colnames(dtrain) <- make.names(1:ncol(train$data))
print(dtrain, verbose=TRUE)
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Dataset.R
\name{getinfo}
\alias{getinfo}
\alias{getinfo.lgb.Dataset}
\title{Get information of an lgb.Dataset object}
\usage{
getinfo(dataset, ...)
\method{getinfo}{lgb.Dataset}(dataset, name, ...)
}
\arguments{
\item{dataset}{Object of class \code{lgb.Dataset}}
\item{...}{other parameters}
\item{name}{the name of the information field to get (see details)}
}
\value{
info data
}
\description{
Get information of an lgb.Dataset object
}
\details{
The \code{name} field can be one of the following:
\itemize{
\item \code{label}: label lightgbm learn from ;
\item \code{weight}: to do a weight rescale ;
\item \code{group}: group size
\item \code{init_score}: initial score is the base prediction lightgbm will boost from ;
}
}
\examples{
data(agaricus.train, package='lightgbm')
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label)
lgb.Dataset.construct(dtrain)
labels <- getinfo(dtrain, 'label')
setinfo(dtrain, 'label', 1-labels)
labels2 <- getinfo(dtrain, 'label')
stopifnot(all(labels2 == 1-labels))
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Dataset.R
\name{lgb.Dataset}
\alias{lgb.Dataset}
\title{Contruct lgb.Dataset object}
\usage{
lgb.Dataset(data, params = list(), reference = NULL, colnames = NULL,
categorical_feature = NULL, free_raw_data = TRUE, info = list(), ...)
}
\arguments{
\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename}
\item{params}{a list of parameters}
\item{reference}{reference dataset}
\item{colnames}{names of columns}
\item{categorical_feature}{categorical features}
\item{free_raw_data}{TRUE for need to free raw data after construct}
\item{info}{a list of information of the lgb.Dataset object}
\item{...}{other information to pass to \code{info} or parameters pass to \code{params}}
}
\value{
constructed dataset
}
\description{
Contruct lgb.Dataset object
}
\details{
Contruct lgb.Dataset object from dense matrix, sparse matrix
or local file (that was created previously by saving an \code{lgb.Dataset}).
}
\examples{
data(agaricus.train, package='lightgbm')
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label)
lgb.Dataset.save(dtrain, 'lgb.Dataset.data')
dtrain <- lgb.Dataset('lgb.Dataset.data')
lgb.Dataset.construct(dtrain)
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Dataset.R
\name{lgb.Dataset.construct}
\alias{lgb.Dataset.construct}
\title{Construct Dataset explicit}
\usage{
lgb.Dataset.construct(dataset)
}
\arguments{
\item{dataset}{Object of class \code{lgb.Dataset}}
}
\description{
Construct Dataset explicit
}
\examples{
data(agaricus.train, package='lightgbm')
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label=train$label)
lgb.Dataset.construct(dtrain)
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment