Unverified Commit fc991c9d authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[R-package] added R linting and changed R code to comma-first (fixes #2373) (#2437)

parent b4bb38d9
#' saveRDS for \code{lgb.Booster} models #' saveRDS for \code{lgb.Booster} models
#' #'
#' Attempts to save a model using RDS. Has an additional parameter (\code{raw}) which decides whether to save the raw model or not. #' Attempts to save a model using RDS. Has an additional parameter (\code{raw}) which decides
#' whether to save the raw model or not.
#' #'
#' @param object R object to serialize. #' @param object R object to serialize.
#' @param file a connection or the name of the file where the R object is saved to or read from. #' @param file a connection or the name of the file where the R object is saved to or read from.
#' @param ascii a logical. If TRUE or NA, an ASCII representation is written; otherwise (default), a binary one is used. See the comments in the help for save. #' @param ascii a logical. If TRUE or NA, an ASCII representation is written; otherwise (default),
#' @param version the workspace format version to use. \code{NULL} specifies the current default version (2). Versions prior to 2 are not supported, so this will only be relevant when there are later versions. #' a binary one is used. See the comments in the help for save.
#' @param compress a logical specifying whether saving to a named file is to use "gzip" compression, or one of \code{"gzip"}, \code{"bzip2"} or \code{"xz"} to indicate the type of compression to be used. Ignored if file is a connection. #' @param version the workspace format version to use. \code{NULL} specifies the current default
#' version (2). Versions prior to 2 are not supported, so this will only be relevant
#' when there are later versions.
#' @param compress a logical specifying whether saving to a named file is to use "gzip" compression,
#' or one of \code{"gzip"}, \code{"bzip2"} or \code{"xz"} to indicate the type of
#' compression to be used. Ignored if file is a connection.
#' @param refhook a hook function for handling reference objects. #' @param refhook a hook function for handling reference objects.
#' @param raw whether to save the model in a raw variable or not, recommended to leave it to \code{TRUE}. #' @param raw whether to save the model in a raw variable or not, recommended to leave it to \code{TRUE}.
#' #'
...@@ -23,10 +29,10 @@ ...@@ -23,10 +29,10 @@
#' params <- list(objective = "regression", metric = "l2") #' params <- list(objective = "regression", metric = "l2")
#' valids <- list(test = dtest) #' valids <- list(test = dtest)
#' model <- lgb.train( #' model <- lgb.train(
#' params #' params = params
#' , dtrain #' , data = dtrain
#' , 10 #' , nrounds = 10
#' , valids #' , valids = valids
#' , min_data = 1 #' , min_data = 1
#' , learning_rate = 1 #' , learning_rate = 1
#' , early_stopping_rounds = 5 #' , early_stopping_rounds = 5
...@@ -48,12 +54,14 @@ saveRDS.lgb.Booster <- function(object, ...@@ -48,12 +54,14 @@ saveRDS.lgb.Booster <- function(object,
object$save() object$save()
# Save RDS # Save RDS
saveRDS(object, saveRDS(
file = file, object
ascii = ascii, , file = file
version = version, , ascii = ascii
compress = compress, , version = version
refhook = refhook) , compress = compress
, refhook = refhook
)
# Free model from memory # Free model from memory
object$raw <- NA object$raw <- NA
...@@ -61,12 +69,14 @@ saveRDS.lgb.Booster <- function(object, ...@@ -61,12 +69,14 @@ saveRDS.lgb.Booster <- function(object,
} else { } else {
# Save as usual # Save as usual
saveRDS(object, saveRDS(
file = file, object
ascii = ascii, , file = file
version = version, , ascii = ascii
compress = compress, , version = version
refhook = refhook) , compress = compress
, refhook = refhook
)
} }
......
...@@ -25,9 +25,20 @@ lgb.call <- function(fun_name, ret, ...) { ...@@ -25,9 +25,20 @@ lgb.call <- function(fun_name, ret, ...) {
# Check for a ret call # Check for a ret call
if (!is.null(ret)) { if (!is.null(ret)) {
call_state <- .Call(fun_name, ..., ret, call_state, PACKAGE = "lib_lightgbm") # Call with ret call_state <- .Call(
fun_name
, ...
, ret
, call_state
, PACKAGE = "lib_lightgbm"
)
} else { } else {
call_state <- .Call(fun_name, ..., call_state, PACKAGE = "lib_lightgbm") # Call without ret call_state <- .Call(
fun_name
, ...
, call_state
, PACKAGE = "lib_lightgbm"
)
} }
call_state <- as.integer(call_state) call_state <- as.integer(call_state)
# Check for call state value post call # Check for call state value post call
...@@ -37,17 +48,25 @@ lgb.call <- function(fun_name, ret, ...) { ...@@ -37,17 +48,25 @@ lgb.call <- function(fun_name, ret, ...) {
buf_len <- 200L buf_len <- 200L
act_len <- 0L act_len <- 0L
err_msg <- raw(buf_len) err_msg <- raw(buf_len)
err_msg <- .Call("LGBM_GetLastError_R", buf_len, act_len, err_msg, PACKAGE = "lib_lightgbm") err_msg <- .Call(
"LGBM_GetLastError_R"
, buf_len
, act_len
, err_msg
, PACKAGE = "lib_lightgbm"
)
# Check error buffer # Check error buffer
if (act_len > buf_len) { if (act_len > buf_len) {
buf_len <- act_len buf_len <- act_len
err_msg <- raw(buf_len) err_msg <- raw(buf_len)
err_msg <- .Call("LGBM_GetLastError_R", err_msg <- .Call(
buf_len, "LGBM_GetLastError_R"
act_len, , buf_len
err_msg, , act_len
PACKAGE = "lib_lightgbm") , err_msg
, PACKAGE = "lib_lightgbm"
)
} }
# Return error # Return error
...@@ -97,7 +116,13 @@ lgb.params2str <- function(params, ...) { ...@@ -97,7 +116,13 @@ lgb.params2str <- function(params, ...) {
# Check for identical parameters # Check for identical parameters
if (length(intersect(names(params), names(dot_params))) > 0) { if (length(intersect(names(params), names(dot_params))) > 0) {
stop("Same parameters in ", sQuote("params"), " and in the call are not allowed. Please check your ", sQuote("params"), " list") stop(
"Same parameters in "
, sQuote("params")
, " and in the call are not allowed. Please check your "
, sQuote("params")
, " list"
)
} }
# Merge parameters # Merge parameters
...@@ -160,15 +185,43 @@ lgb.check.params <- function(params) { ...@@ -160,15 +185,43 @@ lgb.check.params <- function(params) {
lgb.check.obj <- function(params, obj) { lgb.check.obj <- function(params, obj) {
# List known objectives in a vector # List known objectives in a vector
OBJECTIVES <- c("regression", "regression_l1", "regression_l2", "mean_squared_error", "mse", "l2_root", "root_mean_squared_error", "rmse", OBJECTIVES <- c(
"mean_absolute_error", "mae", "quantile", "regression"
"huber", "fair", "poisson", "binary", "lambdarank", , "regression_l1"
"multiclass", "softmax", "multiclassova", "multiclass_ova", "ova", "ovr", , "regression_l2"
"xentropy", "cross_entropy", "xentlambda", "cross_entropy_lambda", "mean_absolute_percentage_error", "mape", , "mean_squared_error"
"gamma", "tweedie") , "mse"
, "l2_root"
, "root_mean_squared_error"
, "rmse"
, "mean_absolute_error"
, "mae"
, "quantile"
, "huber"
, "fair"
, "poisson"
, "binary"
, "lambdarank"
, "multiclass"
, "softmax"
, "multiclassova"
, "multiclass_ova"
, "ova"
, "ovr"
, "xentropy"
, "cross_entropy"
, "xentlambda"
, "cross_entropy_lambda"
, "mean_absolute_percentage_error"
, "mape"
, "gamma"
, "tweedie"
)
# Check whether the objective is empty or not, and take it from params if needed # Check whether the objective is empty or not, and take it from params if needed
if (!is.null(obj)) { params$objective <- obj } if (!is.null(obj)) {
params$objective <- obj
}
# Check whether the objective is a character # Check whether the objective is a character
if (is.character(params$objective)) { if (is.character(params$objective)) {
......
...@@ -17,62 +17,82 @@ class(train$data) ...@@ -17,62 +17,82 @@ class(train$data)
# Note: we are putting in sparse matrix here, lightgbm naturally handles sparse input # Note: we are putting in sparse matrix here, lightgbm naturally handles sparse input
# Use sparse matrix when your feature is sparse (e.g. when you are using one-hot encoding vector) # Use sparse matrix when your feature is sparse (e.g. when you are using one-hot encoding vector)
print("Training lightgbm with sparseMatrix") print("Training lightgbm with sparseMatrix")
bst <- lightgbm(data = train$data, bst <- lightgbm(
label = train$label, data = train$data
num_leaves = 4, , label = train$label
learning_rate = 1, , num_leaves = 4
nrounds = 2, , learning_rate = 1
objective = "binary") , nrounds = 2
, objective = "binary"
)
# Alternatively, you can put in dense matrix, i.e. basic R-matrix # Alternatively, you can put in dense matrix, i.e. basic R-matrix
print("Training lightgbm with Matrix") print("Training lightgbm with Matrix")
bst <- lightgbm(data = as.matrix(train$data), bst <- lightgbm(
label = train$label, data = as.matrix(train$data)
num_leaves = 4, , label = train$label
learning_rate = 1, , num_leaves = 4
nrounds = 2, , learning_rate = 1
objective = "binary") , nrounds = 2
, objective = "binary"
)
# You can also put in lgb.Dataset object, which stores label, data and other meta datas needed for advanced features # You can also put in lgb.Dataset object, which stores label, data and other meta datas needed for advanced features
print("Training lightgbm with lgb.Dataset") print("Training lightgbm with lgb.Dataset")
dtrain <- lgb.Dataset(data = train$data, dtrain <- lgb.Dataset(
label = train$label) data = train$data
bst <- lightgbm(data = dtrain, , label = train$label
num_leaves = 4, )
learning_rate = 1, bst <- lightgbm(
nrounds = 2, data = dtrain
objective = "binary") , num_leaves = 4
, learning_rate = 1
, nrounds = 2
, objective = "binary"
)
# Verbose = 0,1,2 # Verbose = 0,1,2
print("Train lightgbm with verbose 0, no message") print("Train lightgbm with verbose 0, no message")
bst <- lightgbm(data = dtrain, bst <- lightgbm(
num_leaves = 4, data = dtrain
learning_rate = 1, , num_leaves = 4
nrounds = 2, , learning_rate = 1
objective = "binary", , nrounds = 2
verbose = 0) , objective = "binary"
, verbose = 0
)
print("Train lightgbm with verbose 1, print evaluation metric") print("Train lightgbm with verbose 1, print evaluation metric")
bst <- lightgbm(data = dtrain, bst <- lightgbm(
num_leaves = 4, data = dtrain
learning_rate = 1, , num_leaves = 4
nrounds = 2, , learning_rate = 1
nthread = 2, , nrounds = 2
objective = "binary", , nthread = 2
verbose = 1) , objective = "binary"
, verbose = 1
)
print("Train lightgbm with verbose 2, also print information about tree") print("Train lightgbm with verbose 2, also print information about tree")
bst <- lightgbm(data = dtrain, bst <- lightgbm(
num_leaves = 4, data = dtrain
learning_rate = 1, , num_leaves = 4
nrounds = 2, , learning_rate = 1
nthread = 2, , nrounds = 2
objective = "binary", , nthread = 2
verbose = 2) , objective = "binary"
, verbose = 2
)
# You can also specify data as file path to a LibSVM/TCV/CSV format input # You can also specify data as file path to a LibSVM/TCV/CSV format input
# Since we do not have this file with us, the following line is just for illustration # Since we do not have this file with us, the following line is just for illustration
# bst <- lightgbm(data = "agaricus.train.svm", num_leaves = 4, learning_rate = 1, nrounds = 2,objective = "binary") # bst <- lightgbm(
# data = "agaricus.train.svm"
# , num_leaves = 4
# , learning_rate = 1
# , nrounds = 2
# , objective = "binary"
# )
#--------------------Basic prediction using lightgbm-------------- #--------------------Basic prediction using lightgbm--------------
# You can do prediction using the following line # You can do prediction using the following line
...@@ -104,37 +124,43 @@ valids <- list(train = dtrain, test = dtest) ...@@ -104,37 +124,43 @@ valids <- list(train = dtrain, test = dtest)
# To train with valids, use lgb.train, which contains more advanced features # To train with valids, use lgb.train, which contains more advanced features
# valids allows us to monitor the evaluation result on all data in the list # valids allows us to monitor the evaluation result on all data in the list
print("Train lightgbm using lgb.train with valids") print("Train lightgbm using lgb.train with valids")
bst <- lgb.train(data = dtrain, bst <- lgb.train(
num_leaves = 4, data = dtrain
learning_rate = 1, , num_leaves = 4
nrounds = 2, , learning_rate = 1
valids = valids, , nrounds = 2
nthread = 2, , valids = valids
objective = "binary") , nthread = 2
, objective = "binary"
)
# We can change evaluation metrics, or use multiple evaluation metrics # We can change evaluation metrics, or use multiple evaluation metrics
print("Train lightgbm using lgb.train with valids, watch logloss and error") print("Train lightgbm using lgb.train with valids, watch logloss and error")
bst <- lgb.train(data = dtrain, bst <- lgb.train(
num_leaves = 4, data = dtrain
learning_rate = 1, , num_leaves = 4
nrounds = 2, , learning_rate = 1
valids = valids, , nrounds = 2
eval = c("binary_error", "binary_logloss"), , valids = valids
nthread = 2, , eval = c("binary_error", "binary_logloss")
objective = "binary") , nthread = 2
, objective = "binary"
)
# lgb.Dataset can also be saved using lgb.Dataset.save # lgb.Dataset can also be saved using lgb.Dataset.save
lgb.Dataset.save(dtrain, "dtrain.buffer") lgb.Dataset.save(dtrain, "dtrain.buffer")
# To load it in, simply call lgb.Dataset # To load it in, simply call lgb.Dataset
dtrain2 <- lgb.Dataset("dtrain.buffer") dtrain2 <- lgb.Dataset("dtrain.buffer")
bst <- lgb.train(data = dtrain2, bst <- lgb.train(
num_leaves = 4, data = dtrain2
learning_rate = 1, , num_leaves = 4
nrounds = 2, , learning_rate = 1
valids = valids, , nrounds = 2
nthread = 2, , valids = valids
objective = "binary") , nthread = 2
, objective = "binary"
)
# information can be extracted from lgb.Dataset using getinfo # information can be extracted from lgb.Dataset using getinfo
label = getinfo(dtest, "label") label = getinfo(dtest, "label")
......
...@@ -13,10 +13,12 @@ valids <- list(eval = dtest, train = dtrain) ...@@ -13,10 +13,12 @@ valids <- list(eval = dtest, train = dtrain)
print("Start running example to start from an initial prediction") print("Start running example to start from an initial prediction")
# Train lightgbm for 1 round # Train lightgbm for 1 round
param <- list(num_leaves = 4, param <- list(
learning_rate = 1, num_leaves = 4
nthread = 2, , learning_rate = 1
objective = "binary") , nthread = 2
, objective = "binary"
)
bst <- lgb.train(param, dtrain, 1, valids = valids) bst <- lgb.train(param, dtrain, 1, valids = valids)
# Note: we need the margin value instead of transformed prediction in set_init_score # Note: we need the margin value instead of transformed prediction in set_init_score
...@@ -29,7 +31,9 @@ setinfo(dtrain, "init_score", ptrain) ...@@ -29,7 +31,9 @@ setinfo(dtrain, "init_score", ptrain)
setinfo(dtest, "init_score", ptest) setinfo(dtest, "init_score", ptest)
print("This is result of boost from initial prediction") print("This is result of boost from initial prediction")
bst <- lgb.train(params = param, bst <- lgb.train(
data = dtrain, params = param
nrounds = 5, , data = dtrain
valids = valids) , nrounds = 5
, valids = valids
)
...@@ -60,21 +60,28 @@ my_data <- as.matrix(bank[, 1:16, with = FALSE]) ...@@ -60,21 +60,28 @@ my_data <- as.matrix(bank[, 1:16, with = FALSE])
# Creating the LightGBM dataset with categorical features # Creating the LightGBM dataset with categorical features
# The categorical features must be indexed like in R (1-indexed, not 0-indexed) # The categorical features must be indexed like in R (1-indexed, not 0-indexed)
lgb_data <- lgb.Dataset(data = my_data, lgb_data <- lgb.Dataset(
label = bank$y, data = my_data
categorical_feature = c(2, 3, 4, 5, 7, 8, 9, 11, 16)) , label = bank$y
, categorical_feature = c(2, 3, 4, 5, 7, 8, 9, 11, 16)
)
# We can now train a model # We can now train a model
model <- lgb.train(list(objective = "binary", params <- list(
metric = "l2", objective = "binary"
min_data = 1, , metric = "l2"
learning_rate = 0.1, , min_data = 1
min_data = 0, , learning_rate = 0.1
min_hessian = 1, , min_data = 0
max_depth = 2), , min_hessian = 1
lgb_data, , max_depth = 2
100, )
valids = list(train = lgb_data)) model <- lgb.train(
params = params
, data = lgb_data
, nrounds = 100
, valids = list(train = lgb_data)
)
# Try to find split_feature: 2 # Try to find split_feature: 2
# If you find it, it means it used a categorical feature in the first tree # If you find it, it means it used a categorical feature in the first tree
......
...@@ -68,24 +68,33 @@ my_data_test <- as.matrix(bank_test[, 1:16, with = FALSE]) ...@@ -68,24 +68,33 @@ my_data_test <- as.matrix(bank_test[, 1:16, with = FALSE])
# Creating the LightGBM dataset with categorical features # Creating the LightGBM dataset with categorical features
# The categorical features can be passed to lgb.train to not copy and paste a lot # The categorical features can be passed to lgb.train to not copy and paste a lot
dtrain <- lgb.Dataset(data = my_data_train, dtrain <- lgb.Dataset(
label = bank_train$y, data = my_data_train
categorical_feature = c(2, 3, 4, 5, 7, 8, 9, 11, 16)) , label = bank_train$y
dtest <- lgb.Dataset.create.valid(dtrain, , categorical_feature = c(2, 3, 4, 5, 7, 8, 9, 11, 16)
data = my_data_test, )
label = bank_test$y) dtest <- lgb.Dataset.create.valid(
dtrain
, data = my_data_test
, label = bank_test$y
)
# We can now train a model # We can now train a model
model <- lgb.train(list(objective = "binary", params <- list(
metric = "l2", objective = "binary"
min_data = 1, , metric = "l2"
learning_rate = 0.1, , min_data = 1
min_data = 0, , learning_rate = 0.1
min_hessian = 1, , min_data = 0
max_depth = 2), , min_hessian = 1
dtrain, , max_depth = 2
100, )
valids = list(train = dtrain, valid = dtest)) model <- lgb.train(
params = params
, data = dtrain
, nrounds = 100
, valids = list(train = dtrain, valid = dtest)
)
# Try to find split_feature: 11 # Try to find split_feature: 11
# If you find it, it means it used a categorical feature in the first tree # If you find it, it means it used a categorical feature in the first tree
......
...@@ -6,30 +6,36 @@ dtrain <- lgb.Dataset(agaricus.train$data, label = agaricus.train$label) ...@@ -6,30 +6,36 @@ dtrain <- lgb.Dataset(agaricus.train$data, label = agaricus.train$label)
dtest <- lgb.Dataset.create.valid(dtrain, data = agaricus.test$data, label = agaricus.test$label) dtest <- lgb.Dataset.create.valid(dtrain, data = agaricus.test$data, label = agaricus.test$label)
nrounds <- 2 nrounds <- 2
param <- list(num_leaves = 4, param <- list(
learning_rate = 1, num_leaves = 4
objective = "binary") , learning_rate = 1
, objective = "binary"
)
print("Running cross validation") print("Running cross validation")
# Do cross validation, this will print result out as # Do cross validation, this will print result out as
# [iteration] metric_name:mean_value+std_value # [iteration] metric_name:mean_value+std_value
# std_value is standard deviation of the metric # std_value is standard deviation of the metric
lgb.cv(param, lgb.cv(
dtrain, param
nrounds, , dtrain
nfold = 5, , nrounds
eval = "binary_error") , nfold = 5
, eval = "binary_error"
)
print("Running cross validation, disable standard deviation display") print("Running cross validation, disable standard deviation display")
# do cross validation, this will print result out as # do cross validation, this will print result out as
# [iteration] metric_name:mean_value+std_value # [iteration] metric_name:mean_value+std_value
# std_value is standard deviation of the metric # std_value is standard deviation of the metric
lgb.cv(param, lgb.cv(
dtrain, param
nrounds, , dtrain
nfold = 5, , nrounds
eval = "binary_error", , nfold = 5
showsd = FALSE) , eval = "binary_error"
, showsd = FALSE
)
# You can also do cross validation with cutomized loss function # You can also do cross validation with cutomized loss function
print("Running cross validation, with cutomsized loss function") print("Running cross validation, with cutomsized loss function")
...@@ -48,9 +54,11 @@ evalerror <- function(preds, dtrain) { ...@@ -48,9 +54,11 @@ evalerror <- function(preds, dtrain) {
} }
# train with customized objective # train with customized objective
lgb.cv(params = param, lgb.cv(
data = dtrain, params = param
nrounds = nrounds, , data = dtrain
obj = logregobj, , nrounds = nrounds
eval = evalerror, , obj = logregobj
nfold = 5) , eval = evalerror
, nfold = 5
)
...@@ -11,8 +11,10 @@ dtest <- lgb.Dataset.create.valid(dtrain, data = agaricus.test$data, label = aga ...@@ -11,8 +11,10 @@ dtest <- lgb.Dataset.create.valid(dtrain, data = agaricus.test$data, label = aga
# Note: for customized objective function, we leave objective as default # Note: for customized objective function, we leave objective as default
# Note: what we are getting is margin value in prediction # Note: what we are getting is margin value in prediction
# You must know what you are doing # You must know what you are doing
param <- list(num_leaves = 4, param <- list(
learning_rate = 1) num_leaves = 4
, learning_rate = 1
)
valids <- list(eval = dtest) valids <- list(eval = dtest)
num_round <- 20 num_round <- 20
...@@ -39,10 +41,12 @@ evalerror <- function(preds, dtrain) { ...@@ -39,10 +41,12 @@ evalerror <- function(preds, dtrain) {
} }
print("Start training with early Stopping setting") print("Start training with early Stopping setting")
bst <- lgb.train(param, bst <- lgb.train(
dtrain, param
num_round, , dtrain
valids, , num_round
objective = logregobj, , valids
eval = evalerror, , objective = logregobj
early_stopping_round = 3) , eval = evalerror
, early_stopping_round = 3
)
...@@ -26,9 +26,11 @@ gbm <- list() ...@@ -26,9 +26,11 @@ gbm <- list()
for (i in 1:1000) { for (i in 1:1000) {
print(i) print(i)
gbm[[i]] <- lgb.train(params = list(objective = "regression"), gbm[[i]] <- lgb.train(
data = data, params = list(objective = "regression")
1, , data = data
reset_data = TRUE) , 1
, reset_data = TRUE
)
gc(verbose = FALSE) gc(verbose = FALSE)
} }
...@@ -17,32 +17,47 @@ dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) ...@@ -17,32 +17,47 @@ dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
# Third, we setup parameters and we train a model # Third, we setup parameters and we train a model
params <- list(objective = "regression", metric = "l2") params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest) valids <- list(test = dtest)
model <- lgb.train(params, model <- lgb.train(
dtrain, params
50, , dtrain
valids, , 50
min_data = 1, , valids
learning_rate = 0.1, , min_data = 1
bagging_fraction = 0.1, , learning_rate = 0.1
bagging_freq = 1, , bagging_fraction = 0.1
bagging_seed = 1) , bagging_freq = 1
, bagging_seed = 1
)
# We create a data.frame with the following structure: # We create a data.frame with the following structure:
# X = average leaf of the observation throughout all trees # X = average leaf of the observation throughout all trees
# Y = prediction probability (clamped to [1e-15, 1-1e-15]) # Y = prediction probability (clamped to [1e-15, 1-1e-15])
# Z = logloss # Z = logloss
# binned = binned quantile of average leaf # binned = binned quantile of average leaf
new_data <- data.frame(X = rowMeans(predict(model, new_data <- data.frame(
agaricus.test$data, X = rowMeans(predict(
predleaf = TRUE)), model
Y = pmin(pmax(predict(model, , agaricus.test$data
agaricus.test$data), 1e-15), 1 - 1e-15)) , predleaf = TRUE
new_data$Z <- -(agaricus.test$label * log(new_data$Y) + (1 - agaricus.test$label) * log(1 - new_data$Y)) ))
new_data$binned <- .bincode(x = new_data$X, , Y = pmin(
breaks = quantile(x = new_data$X, pmax(
probs = (1:9)/10), predict(model, agaricus.test$data)
right = TRUE, , 1e-15
include.lowest = TRUE) )
, 1 - 1e-15
)
)
new_data$Z <- -1 * (agaricus.test$label * log(new_data$Y) + (1 - agaricus.test$label) * log(1 - new_data$Y))
new_data$binned <- .bincode(
x = new_data$X
, breaks = quantile(
x = new_data$X
, probs = (1:9) / 10
)
, right = TRUE
, include.lowest = TRUE
)
new_data$binned[is.na(new_data$binned)] <- 0 new_data$binned[is.na(new_data$binned)] <- 0
new_data$binned <- as.factor(new_data$binned) new_data$binned <- as.factor(new_data$binned)
...@@ -52,31 +67,64 @@ table(new_data$binned) ...@@ -52,31 +67,64 @@ table(new_data$binned)
# We can plot the binned content # We can plot the binned content
# On the second plot, we clearly notice the lower the bin (the lower the leaf value), the higher the loss # On the second plot, we clearly notice the lower the bin (the lower the leaf value), the higher the loss
# On the third plot, it is smooth! # On the third plot, it is smooth!
ggplot(data = new_data, mapping = aes(x = X, y = Y, color = binned)) + geom_point() + theme_bw() + labs(title = "Prediction Depth", x = "Leaf Bin", y = "Prediction Probability") ggplot(
ggplot(data = new_data, mapping = aes(x = binned, y = Z, fill = binned, group = binned)) + geom_boxplot() + theme_bw() + labs(title = "Prediction Depth Spread", x = "Leaf Bin", y = "Logloss") data = new_data
ggplot(data = new_data, mapping = aes(x = Y, y = ..count.., fill = binned)) + geom_density(position = "fill") + theme_bw() + labs(title = "Depth Density", x = "Prediction Probability", y = "Bin Density") , mapping = aes(x = X, y = Y, color = binned)
) + geom_point() +
theme_bw() +
labs(title = "Prediction Depth", x = "Leaf Bin", y = "Prediction Probability")
ggplot(
data = new_data
, mapping = aes(x = binned, y = Z, fill = binned, group = binned)
) + geom_boxplot() +
theme_bw() +
labs(title = "Prediction Depth Spread", x = "Leaf Bin", y = "Logloss")
ggplot(
data = new_data
, mapping = aes(x = Y, y = ..count.., fill = binned)
) + geom_density(position = "fill") +
theme_bw() +
labs(title = "Depth Density", x = "Prediction Probability", y = "Bin Density")
# Now, let's show with other parameters # Now, let's show with other parameters
model2 <- lgb.train(params, model2 <- lgb.train(
dtrain, params
100, , dtrain
valids, , 100
min_data = 1, , valids
learning_rate = 1) , min_data = 1
, learning_rate = 1
)
# We create the data structure, but for model2 # We create the data structure, but for model2
new_data2 <- data.frame(X = rowMeans(predict(model2, new_data2 <- data.frame(
agaricus.test$data, X = rowMeans(predict(
predleaf = TRUE)), model2
Y = pmin(pmax(predict(model2, , agaricus.test$data
agaricus.test$data), 1e-15), 1 - 1e-15)) , predleaf = TRUE
new_data2$Z <- -(agaricus.test$label * log(new_data2$Y) + (1 - agaricus.test$label) * log(1 - new_data2$Y)) ))
new_data2$binned <- .bincode(x = new_data2$X, , Y = pmin(
breaks = quantile(x = new_data2$X, pmax(
probs = (1:9)/10), predict(
right = TRUE, model2
include.lowest = TRUE) , agaricus.test$data
)
, 1e-15
)
, 1 - 1e-15
)
)
new_data2$Z <- -1 * (agaricus.test$label * log(new_data2$Y) + (1 - agaricus.test$label) * log(1 - new_data2$Y))
new_data2$binned <- .bincode(
x = new_data2$X
, breaks = quantile(
x = new_data2$X
, probs = (1:9) / 10
)
, right = TRUE
, include.lowest = TRUE
)
new_data2$binned[is.na(new_data2$binned)] <- 0 new_data2$binned[is.na(new_data2$binned)] <- 0
new_data2$binned <- as.factor(new_data2$binned) new_data2$binned <- as.factor(new_data2$binned)
...@@ -87,31 +135,64 @@ table(new_data2$binned) ...@@ -87,31 +135,64 @@ table(new_data2$binned)
# On the second plot, we clearly notice the lower the bin (the lower the leaf value), the higher the loss # On the second plot, we clearly notice the lower the bin (the lower the leaf value), the higher the loss
# On the third plot, it is clearly not smooth! We are severely overfitting the data, but the rules are real thus it is not an issue # On the third plot, it is clearly not smooth! We are severely overfitting the data, but the rules are real thus it is not an issue
# However, if the rules were not true, the loss would explode. # However, if the rules were not true, the loss would explode.
ggplot(data = new_data2, mapping = aes(x = X, y = Y, color = binned)) + geom_point() + theme_bw() + labs(title = "Prediction Depth", x = "Leaf Bin", y = "Prediction Probability") ggplot(
ggplot(data = new_data2, mapping = aes(x = binned, y = Z, fill = binned, group = binned)) + geom_boxplot() + theme_bw() + labs(title = "Prediction Depth Spread", x = "Leaf Bin", y = "Logloss") data = new_data2
ggplot(data = new_data2, mapping = aes(x = Y, y = ..count.., fill = binned)) + geom_density(position = "fill") + theme_bw() + labs(title = "Depth Density", x = "Prediction Probability", y = "Bin Density") , mapping = aes(x = X, y = Y, color = binned)
) + geom_point() +
theme_bw() +
labs(title = "Prediction Depth", x = "Leaf Bin", y = "Prediction Probability")
ggplot(
data = new_data2
, mapping = aes(x = binned, y = Z, fill = binned, group = binned)
) + geom_boxplot() +
theme_bw() +
labs(title = "Prediction Depth Spread", x = "Leaf Bin", y = "Logloss")
ggplot(
data = new_data2
, mapping = aes(x = Y, y = ..count.., fill = binned)
) + geom_density(position = "fill") +
theme_bw() +
labs(title = "Depth Density", x = "Prediction Probability", y = "Bin Density")
# Now, try with very severe overfitting # Now, try with very severe overfitting
model3 <- lgb.train(params, model3 <- lgb.train(
dtrain, params
1000, , dtrain
valids, , 1000
min_data = 1, , valids
learning_rate = 1) , min_data = 1
, learning_rate = 1
)
# We create the data structure, but for model3 # We create the data structure, but for model3
new_data3 <- data.frame(X = rowMeans(predict(model3, new_data3 <- data.frame(
agaricus.test$data, X = rowMeans(predict(
predleaf = TRUE)), model3
Y = pmin(pmax(predict(model3, , agaricus.test$data
agaricus.test$data), 1e-15), 1 - 1e-15)) , predleaf = TRUE
new_data3$Z <- -(agaricus.test$label * log(new_data3$Y) + (1 - agaricus.test$label) * log(1 - new_data3$Y)) ))
new_data3$binned <- .bincode(x = new_data3$X, , Y = pmin(
breaks = quantile(x = new_data3$X, pmax(
probs = (1:9)/10), predict(
right = TRUE, model3
include.lowest = TRUE) , agaricus.test$data
)
, 1e-15
)
, 1 - 1e-15
)
)
new_data3$Z <- -1 * (agaricus.test$label * log(new_data3$Y) + (1 - agaricus.test$label) * log(1 - new_data3$Y))
new_data3$binned <- .bincode(
x = new_data3$X
, breaks = quantile(
x = new_data3$X
, probs = (1:9) / 10
)
, right = TRUE
, include.lowest = TRUE
)
new_data3$binned[is.na(new_data3$binned)] <- 0 new_data3$binned[is.na(new_data3$binned)] <- 0
new_data3$binned <- as.factor(new_data3$binned) new_data3$binned <- as.factor(new_data3$binned)
...@@ -119,9 +200,21 @@ new_data3$binned <- as.factor(new_data3$binned) ...@@ -119,9 +200,21 @@ new_data3$binned <- as.factor(new_data3$binned)
table(new_data3$binned) table(new_data3$binned)
# We can plot the binned content # We can plot the binned content
# On the third plot, it is clearly not smooth! We are severely overfitting the data, but the rules are real thus it is not an issue. # On the third plot, it is clearly not smooth! We are severely overfitting the data, but the rules
# are real thus it is not an issue.
# However, if the rules were not true, the loss would explode. See the sudden spikes? # However, if the rules were not true, the loss would explode. See the sudden spikes?
ggplot(data = new_data3, mapping = aes(x = Y, y = ..count.., fill = binned)) + geom_density(position = "fill") + theme_bw() + labs(title = "Depth Density", x = "Prediction Probability", y = "Bin Density") ggplot(
data = new_data3
, mapping = aes(x = Y, y = ..count.., fill = binned)
) +
geom_density(position = "fill") +
theme_bw() +
labs(title = "Depth Density", x = "Prediction Probability", y = "Bin Density")
# Compare with our second model, the difference is severe. This is smooth. # Compare with our second model, the difference is severe. This is smooth.
ggplot(data = new_data2, mapping = aes(x = Y, y = ..count.., fill = binned)) + geom_density(position = "fill") + theme_bw() + labs(title = "Depth Density", x = "Prediction Probability", y = "Bin Density") ggplot(
data = new_data2
, mapping = aes(x = Y, y = ..count.., fill = binned)
) + geom_density(position = "fill") +
theme_bw() +
labs(title = "Depth Density", x = "Prediction Probability", y = "Bin Density")
...@@ -19,29 +19,33 @@ valids <- list(test = dtest) ...@@ -19,29 +19,33 @@ valids <- list(test = dtest)
# Method 1 of training # Method 1 of training
params <- list(objective = "multiclass", metric = "multi_error", num_class = 3) params <- list(objective = "multiclass", metric = "multi_error", num_class = 3)
model <- lgb.train(params, model <- lgb.train(
dtrain, params
100, , dtrain
valids, , 100
min_data = 1, , valids
learning_rate = 1, , min_data = 1
early_stopping_rounds = 10) , learning_rate = 1
, early_stopping_rounds = 10
)
# We can predict on test data, outputs a 90-length vector # We can predict on test data, outputs a 90-length vector
# Order: obs1 class1, obs1 class2, obs1 class3, obs2 class1, obs2 class2, obs2 class3... # Order: obs1 class1, obs1 class2, obs1 class3, obs2 class1, obs2 class2, obs2 class3...
my_preds <- predict(model, test[, 1:4]) my_preds <- predict(model, test[, 1:4])
# Method 2 of training, identical # Method 2 of training, identical
model <- lgb.train(list(), model <- lgb.train(
dtrain, list()
100, , dtrain
valids, , 100
min_data = 1, , valids
learning_rate = 1, , min_data = 1
early_stopping_rounds = 10, , learning_rate = 1
objective = "multiclass", , early_stopping_rounds = 10
metric = "multi_error", , objective = "multiclass"
num_class = 3) , metric = "multi_error"
, num_class = 3
)
# We can predict on test data, identical # We can predict on test data, identical
my_preds <- predict(model, test[, 1:4]) my_preds <- predict(model, test[, 1:4])
......
...@@ -20,17 +20,19 @@ valids <- list(train = dtrain, test = dtest) ...@@ -20,17 +20,19 @@ valids <- list(train = dtrain, test = dtest)
# Method 1 of training with built-in multiclass objective # Method 1 of training with built-in multiclass objective
# Note: need to turn off boost from average to match custom objective # Note: need to turn off boost from average to match custom objective
# (https://github.com/microsoft/LightGBM/issues/1846) # (https://github.com/microsoft/LightGBM/issues/1846)
model_builtin <- lgb.train(list(), model_builtin <- lgb.train(
dtrain, list()
boost_from_average = FALSE, , dtrain
100, , boost_from_average = FALSE
valids, , 100
min_data = 1, , valids
learning_rate = 1, , min_data = 1
early_stopping_rounds = 10, , learning_rate = 1
objective = "multiclass", , early_stopping_rounds = 10
metric = "multi_logloss", , objective = "multiclass"
num_class = 3) , metric = "multi_logloss"
, num_class = 3
)
preds_builtin <- predict(model_builtin, test[, 1:4], rawscore = TRUE, reshape = TRUE) preds_builtin <- predict(model_builtin, test[, 1:4], rawscore = TRUE, reshape = TRUE)
probs_builtin <- exp(preds_builtin) / rowSums(exp(preds_builtin)) probs_builtin <- exp(preds_builtin) / rowSums(exp(preds_builtin))
...@@ -65,21 +67,25 @@ custom_multiclass_metric = function(preds, dtrain) { ...@@ -65,21 +67,25 @@ custom_multiclass_metric = function(preds, dtrain) {
preds = preds - apply(preds, 1, max) preds = preds - apply(preds, 1, max)
prob = exp(preds) / rowSums(exp(preds)) prob = exp(preds) / rowSums(exp(preds))
return(list(name = "error", return(list(
value = -mean(log(prob[cbind(1:length(labels), labels + 1)])), name = "error"
higher_better = FALSE)) , value = -mean(log(prob[cbind(1:length(labels), labels + 1)]))
, higher_better = FALSE
))
} }
model_custom <- lgb.train(list(), model_custom <- lgb.train(
dtrain, list()
100, , dtrain
valids, , 100
min_data = 1, , valids
learning_rate = 1, , min_data = 1
early_stopping_rounds = 10, , learning_rate = 1
objective = custom_multiclass_obj, , early_stopping_rounds = 10
eval = custom_multiclass_metric, , objective = custom_multiclass_obj
num_class = 3) , eval = custom_multiclass_metric
, num_class = 3
)
preds_custom <- predict(model_custom, test[, 1:4], rawscore = TRUE, reshape = TRUE) preds_custom <- predict(model_custom, test[, 1:4], rawscore = TRUE, reshape = TRUE)
probs_custom <- exp(preds_custom) / rowSums(exp(preds_custom)) probs_custom <- exp(preds_custom) / rowSums(exp(preds_custom))
...@@ -87,4 +93,3 @@ probs_custom <- exp(preds_custom) / rowSums(exp(preds_custom)) ...@@ -87,4 +93,3 @@ probs_custom <- exp(preds_custom) / rowSums(exp(preds_custom))
# compare predictions # compare predictions
stopifnot(identical(probs_builtin, probs_custom)) stopifnot(identical(probs_builtin, probs_custom))
stopifnot(identical(preds_builtin, preds_custom)) stopifnot(identical(preds_builtin, preds_custom))
...@@ -11,8 +11,8 @@ library(lightgbm) ...@@ -11,8 +11,8 @@ library(lightgbm)
# - Run 3: sum of weights equal to 6513 (x 1e5) with adjusted regularization (learning) # - Run 3: sum of weights equal to 6513 (x 1e5) with adjusted regularization (learning)
# Setup small weights # Setup small weights
weights1 <- rep(1/100000, 6513) weights1 <- rep(1 / 100000, 6513)
weights2 <- rep(1/100000, 1611) weights2 <- rep(1 / 100000, 1611)
# Load data and create datasets # Load data and create datasets
data(agaricus.train, package = "lightgbm") data(agaricus.train, package = "lightgbm")
...@@ -26,40 +26,48 @@ valids <- list(test = dtest) ...@@ -26,40 +26,48 @@ valids <- list(test = dtest)
# Run 1: sum of weights equal to 0.06513 without adjusted regularization (not learning) # Run 1: sum of weights equal to 0.06513 without adjusted regularization (not learning)
# It cannot learn because regularization is too large! # It cannot learn because regularization is too large!
# min_sum_hessian alone is bigger than the sum of weights, thus you will never learn anything # min_sum_hessian alone is bigger than the sum of weights, thus you will never learn anything
params <- list(objective = "regression", params <- list(
metric = "l2", objective = "regression"
device = "cpu", , metric = "l2"
min_sum_hessian = 10, , device = "cpu"
num_leaves = 7, , min_sum_hessian = 10
max_depth = 3, , num_leaves = 7
nthread = 1) , max_depth = 3
model <- lgb.train(params, , nthread = 1
dtrain, )
50, model <- lgb.train(
valids, params
min_data = 1, , dtrain
learning_rate = 1, , 50
early_stopping_rounds = 10) , valids
, min_data = 1
, learning_rate = 1
, early_stopping_rounds = 10
)
weight_loss <- as.numeric(model$record_evals$test$l2$eval) weight_loss <- as.numeric(model$record_evals$test$l2$eval)
plot(weight_loss) # Shows how poor the learning was: a straight line! plot(weight_loss) # Shows how poor the learning was: a straight line!
# Run 2: sum of weights equal to 0.06513 with adjusted regularization (learning) # Run 2: sum of weights equal to 0.06513 with adjusted regularization (learning)
# Adjusted regularization just consisting in multiplicating results by 1e4 (x10000) # Adjusted regularization just consisting in multiplicating results by 1e4 (x10000)
# Notice how it learns, there is no issue as we adjusted regularization ourselves # Notice how it learns, there is no issue as we adjusted regularization ourselves
params <- list(objective = "regression", params <- list(
metric = "l2", objective = "regression"
device = "cpu", , metric = "l2"
min_sum_hessian = 1e-4, , device = "cpu"
num_leaves = 7, , min_sum_hessian = 1e-4
max_depth = 3, , num_leaves = 7
nthread = 1) , max_depth = 3
model <- lgb.train(params, , nthread = 1
dtrain, )
50, model <- lgb.train(
valids, params
min_data = 1, , dtrain
learning_rate = 1, , 50
early_stopping_rounds = 10) , valids
, min_data = 1
, learning_rate = 1
, early_stopping_rounds = 10
)
small_weight_loss <- as.numeric(model$record_evals$test$l2$eval) small_weight_loss <- as.numeric(model$record_evals$test$l2$eval)
plot(small_weight_loss) # It learns! plot(small_weight_loss) # It learns!
...@@ -78,24 +86,28 @@ dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) ...@@ -78,24 +86,28 @@ dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
valids <- list(test = dtest) valids <- list(test = dtest)
# Setup parameters and run model... # Setup parameters and run model...
params <- list(objective = "regression", params <- list(
metric = "l2", objective = "regression"
device = "cpu", , metric = "l2"
min_sum_hessian = 10, , device = "cpu"
num_leaves = 7, , min_sum_hessian = 10
max_depth = 3, , num_leaves = 7
nthread = 1) , max_depth = 3
model <- lgb.train(params, , nthread = 1
dtrain, )
50, model <- lgb.train(
valids, params
min_data = 1, , dtrain
learning_rate = 1, , 50
early_stopping_rounds = 10) , valids
, min_data = 1
, learning_rate = 1
, early_stopping_rounds = 10
)
large_weight_loss <- as.numeric(model$record_evals$test$l2$eval) large_weight_loss <- as.numeric(model$record_evals$test$l2$eval)
plot(large_weight_loss) # It learns! plot(large_weight_loss) # It learns!
# Do you want to compare the learning? They both converge. # Do you want to compare the learning? They both converge.
plot(small_weight_loss, large_weight_loss) plot(small_weight_loss, large_weight_loss)
curve(1*x, from = 0, to = 0.02, add = TRUE) curve(1 * x, from = 0, to = 0.02, add = TRUE)
...@@ -12,9 +12,9 @@ getinfo(dataset, ...) ...@@ -12,9 +12,9 @@ getinfo(dataset, ...)
\arguments{ \arguments{
\item{dataset}{Object of class \code{lgb.Dataset}} \item{dataset}{Object of class \code{lgb.Dataset}}
\item{...}{other parameters}
\item{name}{the name of the information field to get (see details)} \item{name}{the name of the information field to get (see details)}
\item{...}{other parameters}
} }
\value{ \value{
info data info data
......
...@@ -4,9 +4,16 @@ ...@@ -4,9 +4,16 @@
\alias{lgb.Dataset} \alias{lgb.Dataset}
\title{Construct \code{lgb.Dataset} object} \title{Construct \code{lgb.Dataset} object}
\usage{ \usage{
lgb.Dataset(data, params = list(), reference = NULL, colnames = NULL, lgb.Dataset(
categorical_feature = NULL, free_raw_data = TRUE, info = list(), data,
...) params = list(),
reference = NULL,
colnames = NULL,
categorical_feature = NULL,
free_raw_data = TRUE,
info = list(),
...
)
} }
\arguments{ \arguments{
\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename} \item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename}
......
...@@ -4,13 +4,29 @@ ...@@ -4,13 +4,29 @@
\alias{lgb.cv} \alias{lgb.cv}
\title{Main CV logic for LightGBM} \title{Main CV logic for LightGBM}
\usage{ \usage{
lgb.cv(params = list(), data, nrounds = 10, nfold = 3, lgb.cv(
label = NULL, weight = NULL, obj = NULL, eval = NULL, params = list(),
verbose = 1, record = TRUE, eval_freq = 1L, showsd = TRUE, data,
stratified = TRUE, folds = NULL, init_model = NULL, nrounds = 10,
colnames = NULL, categorical_feature = NULL, nfold = 3,
early_stopping_rounds = NULL, callbacks = list(), label = NULL,
reset_data = FALSE, ...) weight = NULL,
obj = NULL,
eval = NULL,
verbose = 1,
record = TRUE,
eval_freq = 1L,
showsd = TRUE,
stratified = TRUE,
folds = NULL,
init_model = NULL,
colnames = NULL,
categorical_feature = NULL,
early_stopping_rounds = NULL,
callbacks = list(),
reset_data = FALSE,
...
)
} }
\arguments{ \arguments{
\item{params}{List of parameters} \item{params}{List of parameters}
...@@ -27,7 +43,7 @@ lgb.cv(params = list(), data, nrounds = 10, nfold = 3, ...@@ -27,7 +43,7 @@ lgb.cv(params = list(), data, nrounds = 10, nfold = 3,
\item{obj}{objective function, can be character or custom objective function. Examples include \item{obj}{objective function, can be character or custom objective function. Examples include
\code{regression}, \code{regression_l1}, \code{huber}, \code{regression}, \code{regression_l1}, \code{huber},
\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
\item{eval}{evaluation function, can be (list of) character or custom eval function} \item{eval}{evaluation function, can be (list of) character or custom eval function}
...@@ -54,17 +70,15 @@ the \code{nfold} and \code{stratified} parameters are ignored.} ...@@ -54,17 +70,15 @@ the \code{nfold} and \code{stratified} parameters are ignored.}
type int represents index, type int represents index,
type str represents feature names} type str represents feature names}
\item{early_stopping_rounds}{int \item{early_stopping_rounds}{int. Activates early stopping. Requires at least one validation data
Activates early stopping. and one metric. If there's more than one, will check all of them
Requires at least one validation data and one metric except the training data. Returns the model with (best_iter + early_stopping_rounds).
If there's more than one, will check all of them except the training data If early stopping occurs, the model will have 'best_iter' field.}
Returns the model with (best_iter + early_stopping_rounds)
If early stopping occurs, the model will have 'best_iter' field}
\item{callbacks}{list of callback functions \item{callbacks}{List of callback functions that are applied at each iteration.}
List of callback functions that are applied at each iteration.}
\item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model into a predictor model which frees up memory and the original datasets} \item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model
into a predictor model which frees up memory and the original datasets}
\item{...}{other parameters, see Parameters.rst for more information. A few key parameters: \item{...}{other parameters, see Parameters.rst for more information. A few key parameters:
\itemize{ \itemize{
...@@ -89,11 +103,13 @@ data(agaricus.train, package = "lightgbm") ...@@ -89,11 +103,13 @@ data(agaricus.train, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
params <- list(objective = "regression", metric = "l2") params <- list(objective = "regression", metric = "l2")
model <- lgb.cv(params, model <- lgb.cv(
dtrain, params = params
10, , data = dtrain
nfold = 3, , nrounds = 10
min_data = 1, , nfold = 3
learning_rate = 1, , min_data = 1
early_stopping_rounds = 5) , learning_rate = 1
, early_stopping_rounds = 5
)
} }
...@@ -27,13 +27,15 @@ test <- agaricus.test ...@@ -27,13 +27,15 @@ test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective = "regression", metric = "l2") params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest) valids <- list(test = dtest)
model <- lgb.train(params, model <- lgb.train(
dtrain, params = params
10, , data = dtrain
valids, , nrounds = 10
min_data = 1, , valids = valids
learning_rate = 1, , min_data = 1
early_stopping_rounds = 5) , learning_rate = 1
, early_stopping_rounds = 5
)
json_model <- lgb.dump(model) json_model <- lgb.dump(model)
} }
...@@ -4,8 +4,13 @@ ...@@ -4,8 +4,13 @@
\alias{lgb.get.eval.result} \alias{lgb.get.eval.result}
\title{Get record evaluation result from booster} \title{Get record evaluation result from booster}
\usage{ \usage{
lgb.get.eval.result(booster, data_name, eval_name, iters = NULL, lgb.get.eval.result(
is_err = FALSE) booster,
data_name,
eval_name,
iters = NULL,
is_err = FALSE
)
} }
\arguments{ \arguments{
\item{booster}{Object of class \code{lgb.Booster}} \item{booster}{Object of class \code{lgb.Booster}}
...@@ -34,12 +39,14 @@ test <- agaricus.test ...@@ -34,12 +39,14 @@ test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective = "regression", metric = "l2") params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest) valids <- list(test = dtest)
model <- lgb.train(params, model <- lgb.train(
dtrain, params = params
10, , data = dtrain
valids, , nrounds = 10
min_data = 1, , valids = valids
learning_rate = 1, , min_data = 1
early_stopping_rounds = 5) , learning_rate = 1
, early_stopping_rounds = 5
)
lgb.get.eval.result(model, "test", "l2") lgb.get.eval.result(model, "test", "l2")
} }
...@@ -29,9 +29,14 @@ data(agaricus.train, package = "lightgbm") ...@@ -29,9 +29,14 @@ data(agaricus.train, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
params <- list(objective = "binary", params <- list(
learning_rate = 0.01, num_leaves = 63, max_depth = -1, objective = "binary"
min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1) , learning_rate = 0.01
, num_leaves = 63
, max_depth = -1
, min_data_in_leaf = 1
, min_sum_hessian_in_leaf = 1
)
model <- lgb.train(params, dtrain, 10) model <- lgb.train(params, dtrain, 10)
tree_imp1 <- lgb.importance(model, percentage = TRUE) tree_imp1 <- lgb.importance(model, percentage = TRUE)
......
...@@ -11,7 +11,7 @@ lgb.interprete(model, data, idxset, num_iteration = NULL) ...@@ -11,7 +11,7 @@ lgb.interprete(model, data, idxset, num_iteration = NULL)
\item{data}{a matrix object or a dgCMatrix object.} \item{data}{a matrix object or a dgCMatrix object.}
\item{idxset}{a integer vector of indices of rows needed.} \item{idxset}{an integer vector of indices of rows needed.}
\item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration.} \item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration.}
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment