Unverified Commit fc991c9d authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[R-package] added R linting and changed R code to comma-first (fixes #2373) (#2437)

parent b4bb38d9
...@@ -29,13 +29,15 @@ test <- agaricus.test ...@@ -29,13 +29,15 @@ test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective = "regression", metric = "l2") params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest) valids <- list(test = dtest)
model <- lgb.train(params, model <- lgb.train(
dtrain, params = params
10, , data = dtrain
valids, , nrounds = 10
min_data = 1, , valids = valids
learning_rate = 1, , min_data = 1
early_stopping_rounds = 5) , learning_rate = 1
, early_stopping_rounds = 5
)
lgb.save(model, "model.txt") lgb.save(model, "model.txt")
load_booster <- lgb.load(filename = "model.txt") load_booster <- lgb.load(filename = "model.txt")
model_string <- model$save_model_to_string(NULL) # saves best iteration model_string <- model$save_model_to_string(NULL) # saves best iteration
......
...@@ -44,9 +44,14 @@ data(agaricus.train, package = "lightgbm") ...@@ -44,9 +44,14 @@ data(agaricus.train, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
params <- list(objective = "binary", params <- list(
learning_rate = 0.01, num_leaves = 63, max_depth = -1, objective = "binary"
min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1) , learning_rate = 0.01
, num_leaves = 63
, max_depth = -1
, min_data_in_leaf = 1
, min_sum_hessian_in_leaf = 1
)
model <- lgb.train(params, dtrain, 10) model <- lgb.train(params, dtrain, 10)
tree_dt <- lgb.model.dt.tree(model) tree_dt <- lgb.model.dt.tree(model)
......
...@@ -4,8 +4,13 @@ ...@@ -4,8 +4,13 @@
\alias{lgb.plot.importance} \alias{lgb.plot.importance}
\title{Plot feature importance as a bar graph} \title{Plot feature importance as a bar graph}
\usage{ \usage{
lgb.plot.importance(tree_imp, top_n = 10, measure = "Gain", lgb.plot.importance(
left_margin = 10, cex = NULL) tree_imp,
top_n = 10,
measure = "Gain",
left_margin = 10,
cex = NULL
)
} }
\arguments{ \arguments{
\item{tree_imp}{a \code{data.table} returned by \code{\link{lgb.importance}}.} \item{tree_imp}{a \code{data.table} returned by \code{\link{lgb.importance}}.}
......
...@@ -4,8 +4,13 @@ ...@@ -4,8 +4,13 @@
\alias{lgb.plot.interpretation} \alias{lgb.plot.interpretation}
\title{Plot feature contribution as a bar graph} \title{Plot feature contribution as a bar graph}
\usage{ \usage{
lgb.plot.interpretation(tree_interpretation_dt, top_n = 10, cols = 1, lgb.plot.interpretation(
left_margin = 10, cex = NULL) tree_interpretation_dt,
top_n = 10,
cols = 1,
left_margin = 10,
cex = NULL
)
} }
\arguments{ \arguments{
\item{tree_interpretation_dt}{a \code{data.table} returned by \code{\link{lgb.interprete}}.} \item{tree_interpretation_dt}{a \code{data.table} returned by \code{\link{lgb.interprete}}.}
...@@ -25,8 +30,8 @@ The \code{lgb.plot.interpretation} function creates a \code{barplot}. ...@@ -25,8 +30,8 @@ The \code{lgb.plot.interpretation} function creates a \code{barplot}.
Plot previously calculated feature contribution as a bar graph. Plot previously calculated feature contribution as a bar graph.
} }
\details{ \details{
The graph represents each feature as a horizontal bar of length proportional to the defined contribution of a feature. The graph represents each feature as a horizontal bar of length proportional to the defined
Features are shown ranked in a decreasing contribution order. contribution of a feature. Features are shown ranked in a decreasing contribution order.
} }
\examples{ \examples{
library(lightgbm) library(lightgbm)
...@@ -39,9 +44,14 @@ setinfo(dtrain, "init_score", rep(Logit(mean(train$label)), length(train$label)) ...@@ -39,9 +44,14 @@ setinfo(dtrain, "init_score", rep(Logit(mean(train$label)), length(train$label))
data(agaricus.test, package = "lightgbm") data(agaricus.test, package = "lightgbm")
test <- agaricus.test test <- agaricus.test
params <- list(objective = "binary", params <- list(
learning_rate = 0.01, num_leaves = 63, max_depth = -1, objective = "binary"
min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1) , learning_rate = 0.01
, num_leaves = 63
, max_depth = -1
, min_data_in_leaf = 1
, min_sum_hessian_in_leaf = 1
)
model <- lgb.train(params, dtrain, 10) model <- lgb.train(params, dtrain, 10)
tree_interpretation <- lgb.interprete(model, test$data, 1:5) tree_interpretation <- lgb.interprete(model, test$data, 1:5)
......
...@@ -10,10 +10,13 @@ lgb.prepare(data) ...@@ -10,10 +10,13 @@ lgb.prepare(data)
\item{data}{A data.frame or data.table to prepare.} \item{data}{A data.frame or data.table to prepare.}
} }
\value{ \value{
The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in \code{lgb.Dataset}. The cleaned dataset. It must be converted to a matrix format (\code{as.matrix})
for input in \code{lgb.Dataset}.
} }
\description{ \description{
Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}. Factors and characters are converted to numeric without integers. Please use \code{lgb.prepare_rules} if you want to apply this transformation to other datasets. Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
Factors and characters are converted to numeric without integers. Please use
\code{lgb.prepare_rules} if you want to apply this transformation to other datasets.
} }
\examples{ \examples{
library(lightgbm) library(lightgbm)
......
...@@ -10,10 +10,16 @@ lgb.prepare2(data) ...@@ -10,10 +10,16 @@ lgb.prepare2(data)
\item{data}{A data.frame or data.table to prepare.} \item{data}{A data.frame or data.table to prepare.}
} }
\value{ \value{
The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in \code{lgb.Dataset}. The cleaned dataset. It must be converted to a matrix format (\code{as.matrix})
for input in \code{lgb.Dataset}.
} }
\description{ \description{
Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}. Factors and characters are converted to numeric (specifically: integer). Please use \code{lgb.prepare_rules2} if you want to apply this transformation to other datasets. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM. Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
Factors and characters are converted to numeric (specifically: integer).
Please use \code{lgb.prepare_rules2} if you want to apply this transformation to other datasets.
This is useful if you have a specific need for integer dataset instead of numeric dataset.
Note that there are programs which do not support integer-only input. Consider this as a half
memory technique which is dangerous, especially for LightGBM.
} }
\examples{ \examples{
library(lightgbm) library(lightgbm)
......
...@@ -12,10 +12,14 @@ lgb.prepare_rules(data, rules = NULL) ...@@ -12,10 +12,14 @@ lgb.prepare_rules(data, rules = NULL)
\item{rules}{A set of rules from the data preparator, if already used.} \item{rules}{A set of rules from the data preparator, if already used.}
} }
\value{ \value{
A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The data must be converted to a matrix format (\code{as.matrix}) for input in \code{lgb.Dataset}. A list with the cleaned dataset (\code{data}) and the rules (\code{rules}).
The data must be converted to a matrix format (\code{as.matrix}) for input
in \code{lgb.Dataset}.
} }
\description{ \description{
Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}. Factors and characters are converted to numeric. In addition, keeps rules created so you can convert other datasets using this converter. Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
Factors and characters are converted to numeric. In addition, keeps rules created
so you can convert other datasets using this converter.
} }
\examples{ \examples{
library(lightgbm) library(lightgbm)
......
...@@ -12,43 +12,15 @@ lgb.prepare_rules2(data, rules = NULL) ...@@ -12,43 +12,15 @@ lgb.prepare_rules2(data, rules = NULL)
\item{rules}{A set of rules from the data preparator, if already used.} \item{rules}{A set of rules from the data preparator, if already used.}
} }
\value{ \value{
A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The data must be converted to a matrix format (\code{as.matrix}) for input in \code{lgb.Dataset}. A list with the cleaned dataset (\code{data}) and the rules (\code{rules}).
The data must be converted to a matrix format (\code{as.matrix}) for input in
\code{lgb.Dataset}.
} }
\description{ \description{
Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}. Factors and characters are converted to numeric (specifically: integer). In addition, keeps rules created so you can convert other datasets using this converter. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM. Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
} Factors and characters are converted to numeric (specifically: integer).
\examples{ In addition, keeps rules created so you can convert other datasets using this converter.
library(lightgbm) This is useful if you have a specific need for integer dataset instead of numeric dataset.
data(iris) Note that there are programs which do not support integer-only input.
Consider this as a half memory technique which is dangerous, especially for LightGBM.
str(iris)
new_iris <- lgb.prepare_rules2(data = iris) # Autoconverter
str(new_iris$data)
data(iris) # Erase iris dataset
iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA)
# Use conversion using known rules
# Unknown factors become 0, excellent for sparse datasets
newer_iris <- lgb.prepare_rules2(data = iris, rules = new_iris$rules)
# Unknown factor is now zero, perfect for sparse datasets
newer_iris$data[1, ] # Species became 0 as it is an unknown factor
newer_iris$data[1, 5] <- 1 # Put back real initial value
# Is the newly created dataset equal? YES!
all.equal(new_iris$data, newer_iris$data)
# Can we test our own rules?
data(iris) # Erase iris dataset
# We remapped values differently
personal_rules <- list(Species = c("setosa" = 3L,
"versicolor" = 2L,
"virginica" = 1L))
newest_iris <- lgb.prepare_rules2(data = iris, rules = personal_rules)
str(newest_iris$data) # SUCCESS!
} }
...@@ -29,13 +29,15 @@ test <- agaricus.test ...@@ -29,13 +29,15 @@ test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective = "regression", metric = "l2") params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest) valids <- list(test = dtest)
model <- lgb.train(params, model <- lgb.train(
dtrain, params = params
10, , data = dtrain
valids, , nrounds = 10
min_data = 1, , valids = valids
learning_rate = 1, , min_data = 1
early_stopping_rounds = 5) , learning_rate = 1
, early_stopping_rounds = 5
)
lgb.save(model, "model.txt") lgb.save(model, "model.txt")
} }
...@@ -4,11 +4,24 @@ ...@@ -4,11 +4,24 @@
\alias{lgb.train} \alias{lgb.train}
\title{Main training logic for LightGBM} \title{Main training logic for LightGBM}
\usage{ \usage{
lgb.train(params = list(), data, nrounds = 10, valids = list(), lgb.train(
obj = NULL, eval = NULL, verbose = 1, record = TRUE, params = list(),
eval_freq = 1L, init_model = NULL, colnames = NULL, data,
categorical_feature = NULL, early_stopping_rounds = NULL, nrounds = 10,
callbacks = list(), reset_data = FALSE, ...) valids = list(),
obj = NULL,
eval = NULL,
verbose = 1,
record = TRUE,
eval_freq = 1L,
init_model = NULL,
colnames = NULL,
categorical_feature = NULL,
early_stopping_rounds = NULL,
callbacks = list(),
reset_data = FALSE,
...
)
} }
\arguments{ \arguments{
\item{params}{List of parameters} \item{params}{List of parameters}
...@@ -39,17 +52,16 @@ lgb.train(params = list(), data, nrounds = 10, valids = list(), ...@@ -39,17 +52,16 @@ lgb.train(params = list(), data, nrounds = 10, valids = list(),
type int represents index, type int represents index,
type str represents feature names} type str represents feature names}
\item{early_stopping_rounds}{int \item{early_stopping_rounds}{int. Activates early stopping. Requires at least one validation data
Activates early stopping. and one metric. If there's more than one, will check all of them
Requires at least one validation data and one metric except the training data. Returns the model with (best_iter + early_stopping_rounds).
If there's more than one, will check all of them except the training data If early stopping occurs, the model will have 'best_iter' field.}
Returns the model with (best_iter + early_stopping_rounds)
If early stopping occurs, the model will have 'best_iter' field}
\item{callbacks}{list of callback functions \item{callbacks}{List of callback functions that are applied at each iteration.}
List of callback functions that are applied at each iteration.}
\item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model into a predictor model which frees up memory and the original datasets} \item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the
booster model into a predictor model which frees up memory and the
original datasets}
\item{...}{other parameters, see Parameters.rst for more information. A few key parameters: \item{...}{other parameters, see Parameters.rst for more information. A few key parameters:
\itemize{ \itemize{
...@@ -78,11 +90,13 @@ test <- agaricus.test ...@@ -78,11 +90,13 @@ test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective = "regression", metric = "l2") params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest) valids <- list(test = dtest)
model <- lgb.train(params, model <- lgb.train(
dtrain, params = params
10, , data = dtrain
valids, , nrounds = 10
min_data = 1, , valids = valids
learning_rate = 1, , min_data = 1
early_stopping_rounds = 5) , learning_rate = 1
, early_stopping_rounds = 5
)
} }
...@@ -7,11 +7,15 @@ ...@@ -7,11 +7,15 @@
lgb.unloader(restore = TRUE, wipe = FALSE, envir = .GlobalEnv) lgb.unloader(restore = TRUE, wipe = FALSE, envir = .GlobalEnv)
} }
\arguments{ \arguments{
\item{restore}{Whether to reload \code{LightGBM} immediately after detaching from R. Defaults to \code{TRUE} which means automatically reload \code{LightGBM} once unloading is performed.} \item{restore}{Whether to reload \code{LightGBM} immediately after detaching from R.
Defaults to \code{TRUE} which means automatically reload \code{LightGBM} once
unloading is performed.}
\item{wipe}{Whether to wipe all \code{lgb.Dataset} and \code{lgb.Booster} from the global environment. Defaults to \code{FALSE} which means to not remove them.} \item{wipe}{Whether to wipe all \code{lgb.Dataset} and \code{lgb.Booster} from the global
environment. Defaults to \code{FALSE} which means to not remove them.}
\item{envir}{The environment to perform wiping on if \code{wipe == TRUE}. Defaults to \code{.GlobalEnv} which is the global environment.} \item{envir}{The environment to perform wiping on if \code{wipe == TRUE}. Defaults to
\code{.GlobalEnv} which is the global environment.}
} }
\value{ \value{
NULL invisibly. NULL invisibly.
...@@ -29,13 +33,15 @@ test <- agaricus.test ...@@ -29,13 +33,15 @@ test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective = "regression", metric = "l2") params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest) valids <- list(test = dtest)
model <- lgb.train(params, model <- lgb.train(
dtrain, params = params
10, , data = dtrain
valids, , nrounds = 10
min_data = 1, , valids = valids
learning_rate = 1, , min_data = 1
early_stopping_rounds = 5) , learning_rate = 1
, early_stopping_rounds = 5
)
\dontrun{ \dontrun{
lgb.unloader(restore = FALSE, wipe = FALSE, envir = .GlobalEnv) lgb.unloader(restore = FALSE, wipe = FALSE, envir = .GlobalEnv)
......
...@@ -9,12 +9,10 @@ List of callback functions that are applied at each iteration.} ...@@ -9,12 +9,10 @@ List of callback functions that are applied at each iteration.}
\item{data}{a \code{lgb.Dataset} object, used for training} \item{data}{a \code{lgb.Dataset} object, used for training}
\item{early_stopping_rounds}{int \item{early_stopping_rounds}{int. Activates early stopping. Requires at least one validation data
Activates early stopping. and one metric. If there's more than one, will check all of them
Requires at least one validation data and one metric except the training data. Returns the model with (best_iter + early_stopping_rounds).
If there's more than one, will check all of them except the training data If early stopping occurs, the model will have 'best_iter' field.}
Returns the model with (best_iter + early_stopping_rounds)
If early stopping occurs, the model will have 'best_iter' field}
\item{eval_freq}{evaluation output frequency, only effect when verbose > 0} \item{eval_freq}{evaluation output frequency, only effect when verbose > 0}
......
...@@ -4,10 +4,20 @@ ...@@ -4,10 +4,20 @@
\alias{lightgbm} \alias{lightgbm}
\title{Train a LightGBM model} \title{Train a LightGBM model}
\usage{ \usage{
lightgbm(data, label = NULL, weight = NULL, params = list(), lightgbm(
nrounds = 10, verbose = 1, eval_freq = 1L, data,
early_stopping_rounds = NULL, save_name = "lightgbm.model", label = NULL,
init_model = NULL, callbacks = list(), ...) weight = NULL,
params = list(),
nrounds = 10,
verbose = 1,
eval_freq = 1L,
early_stopping_rounds = NULL,
save_name = "lightgbm.model",
init_model = NULL,
callbacks = list(),
...
)
} }
\arguments{ \arguments{
\item{data}{a \code{lgb.Dataset} object, used for training} \item{data}{a \code{lgb.Dataset} object, used for training}
...@@ -24,12 +34,10 @@ lightgbm(data, label = NULL, weight = NULL, params = list(), ...@@ -24,12 +34,10 @@ lightgbm(data, label = NULL, weight = NULL, params = list(),
\item{eval_freq}{evaluation output frequency, only effect when verbose > 0} \item{eval_freq}{evaluation output frequency, only effect when verbose > 0}
\item{early_stopping_rounds}{int \item{early_stopping_rounds}{int. Activates early stopping. Requires at least one validation data
Activates early stopping. and one metric. If there's more than one, will check all of them
Requires at least one validation data and one metric except the training data. Returns the model with (best_iter + early_stopping_rounds).
If there's more than one, will check all of them except the training data If early stopping occurs, the model will have 'best_iter' field.}
Returns the model with (best_iter + early_stopping_rounds)
If early stopping occurs, the model will have 'best_iter' field}
\item{save_name}{File name to use when writing the trained model to disk. Should end in ".model".} \item{save_name}{File name to use when writing the trained model to disk. Should end in ".model".}
......
...@@ -4,9 +4,17 @@ ...@@ -4,9 +4,17 @@
\alias{predict.lgb.Booster} \alias{predict.lgb.Booster}
\title{Predict method for LightGBM model} \title{Predict method for LightGBM model}
\usage{ \usage{
\method{predict}{lgb.Booster}(object, data, num_iteration = NULL, \method{predict}{lgb.Booster}(
rawscore = FALSE, predleaf = FALSE, predcontrib = FALSE, object,
header = FALSE, reshape = FALSE, ...) data,
num_iteration = NULL,
rawscore = FALSE,
predleaf = FALSE,
predcontrib = FALSE,
header = FALSE,
reshape = FALSE,
...
)
} }
\arguments{ \arguments{
\item{object}{Object of class \code{lgb.Booster}} \item{object}{Object of class \code{lgb.Booster}}
...@@ -16,8 +24,8 @@ ...@@ -16,8 +24,8 @@
\item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration} \item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration}
\item{rawscore}{whether the prediction should be returned in the for of original untransformed \item{rawscore}{whether the prediction should be returned in the for of original untransformed
sum of predictions from boosting iterations' results. E.g., setting \code{rawscore=TRUE} for sum of predictions from boosting iterations' results. E.g., setting \code{rawscore=TRUE}
logistic regression would result in predictions for log-odds instead of probabilities.} for logistic regression would result in predictions for log-odds instead of probabilities.}
\item{predleaf}{whether predict leaf index instead.} \item{predleaf}{whether predict leaf index instead.}
...@@ -53,13 +61,15 @@ test <- agaricus.test ...@@ -53,13 +61,15 @@ test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective = "regression", metric = "l2") params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest) valids <- list(test = dtest)
model <- lgb.train(params, model <- lgb.train(
dtrain, params = params
10, , data = dtrain
valids, , nrounds = 10
min_data = 1, , valids = valids
learning_rate = 1, , min_data = 1
early_stopping_rounds = 5) , learning_rate = 1
, early_stopping_rounds = 5
)
preds <- predict(model, test$data) preds <- predict(model, test$data)
} }
...@@ -27,13 +27,15 @@ test <- agaricus.test ...@@ -27,13 +27,15 @@ test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective = "regression", metric = "l2") params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest) valids <- list(test = dtest)
model <- lgb.train(params, model <- lgb.train(
dtrain, params = params
10, , data = dtrain
valids, , nrounds = 10
min_data = 1, , valids = valids
learning_rate = 1, , min_data = 1
early_stopping_rounds = 5) , learning_rate = 1
, early_stopping_rounds = 5
)
saveRDS.lgb.Booster(model, "model.rds") saveRDS.lgb.Booster(model, "model.rds")
new_model <- readRDS.lgb.Booster("model.rds") new_model <- readRDS.lgb.Booster("model.rds")
......
...@@ -4,19 +4,31 @@ ...@@ -4,19 +4,31 @@
\alias{saveRDS.lgb.Booster} \alias{saveRDS.lgb.Booster}
\title{saveRDS for \code{lgb.Booster} models} \title{saveRDS for \code{lgb.Booster} models}
\usage{ \usage{
saveRDS.lgb.Booster(object, file = "", ascii = FALSE, version = NULL, saveRDS.lgb.Booster(
compress = TRUE, refhook = NULL, raw = TRUE) object,
file = "",
ascii = FALSE,
version = NULL,
compress = TRUE,
refhook = NULL,
raw = TRUE
)
} }
\arguments{ \arguments{
\item{object}{R object to serialize.} \item{object}{R object to serialize.}
\item{file}{a connection or the name of the file where the R object is saved to or read from.} \item{file}{a connection or the name of the file where the R object is saved to or read from.}
\item{ascii}{a logical. If TRUE or NA, an ASCII representation is written; otherwise (default), a binary one is used. See the comments in the help for save.} \item{ascii}{a logical. If TRUE or NA, an ASCII representation is written; otherwise (default),
a binary one is used. See the comments in the help for save.}
\item{version}{the workspace format version to use. \code{NULL} specifies the current default version (2). Versions prior to 2 are not supported, so this will only be relevant when there are later versions.} \item{version}{the workspace format version to use. \code{NULL} specifies the current default
version (2). Versions prior to 2 are not supported, so this will only be relevant
when there are later versions.}
\item{compress}{a logical specifying whether saving to a named file is to use "gzip" compression, or one of \code{"gzip"}, \code{"bzip2"} or \code{"xz"} to indicate the type of compression to be used. Ignored if file is a connection.} \item{compress}{a logical specifying whether saving to a named file is to use "gzip" compression,
or one of \code{"gzip"}, \code{"bzip2"} or \code{"xz"} to indicate the type of
compression to be used. Ignored if file is a connection.}
\item{refhook}{a hook function for handling reference objects.} \item{refhook}{a hook function for handling reference objects.}
...@@ -26,7 +38,8 @@ saveRDS.lgb.Booster(object, file = "", ascii = FALSE, version = NULL, ...@@ -26,7 +38,8 @@ saveRDS.lgb.Booster(object, file = "", ascii = FALSE, version = NULL,
NULL invisibly. NULL invisibly.
} }
\description{ \description{
Attempts to save a model using RDS. Has an additional parameter (\code{raw}) which decides whether to save the raw model or not. Attempts to save a model using RDS. Has an additional parameter (\code{raw}) which decides
whether to save the raw model or not.
} }
\examples{ \examples{
library(lightgbm) library(lightgbm)
...@@ -39,10 +52,10 @@ dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) ...@@ -39,10 +52,10 @@ dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective = "regression", metric = "l2") params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest) valids <- list(test = dtest)
model <- lgb.train( model <- lgb.train(
params params = params
, dtrain , data = dtrain
, 10 , nrounds = 10
, valids , valids = valids
, min_data = 1 , min_data = 1
, learning_rate = 1 , learning_rate = 1
, early_stopping_rounds = 5 , early_stopping_rounds = 5
......
...@@ -12,11 +12,11 @@ setinfo(dataset, ...) ...@@ -12,11 +12,11 @@ setinfo(dataset, ...)
\arguments{ \arguments{
\item{dataset}{Object of class \code{lgb.Dataset}} \item{dataset}{Object of class \code{lgb.Dataset}}
\item{...}{other parameters}
\item{name}{the name of the field to get} \item{name}{the name of the field to get}
\item{info}{the specific field of information to set} \item{info}{the specific field of information to set}
\item{...}{other parameters}
} }
\value{ \value{
passed object passed object
......
...@@ -12,9 +12,9 @@ slice(dataset, ...) ...@@ -12,9 +12,9 @@ slice(dataset, ...)
\arguments{ \arguments{
\item{dataset}{Object of class \code{lgb.Dataset}} \item{dataset}{Object of class \code{lgb.Dataset}}
\item{...}{other parameters (currently not used)} \item{idxset}{an integer vector of indices of rows needed}
\item{idxset}{a integer vector of indices of rows needed} \item{...}{other parameters (currently not used)}
} }
\value{ \value{
constructed sub dataset constructed sub dataset
......
...@@ -8,7 +8,7 @@ if (.Machine$sizeof.pointer != 8){ ...@@ -8,7 +8,7 @@ if (.Machine$sizeof.pointer != 8){
} }
R_int_UUID <- .Internal(internalsID()) R_int_UUID <- .Internal(internalsID())
R_ver <- as.double(R.Version()$major) + as.double(R.Version()$minor)/10 R_ver <- as.double(R.Version()$major) + as.double(R.Version()$minor) / 10
if (!(R_int_UUID == "0310d4b8-ccb1-4bb8-ba94-d36a55f60262" if (!(R_int_UUID == "0310d4b8-ccb1-4bb8-ba94-d36a55f60262"
|| R_int_UUID == "2fdf6c18-697a-4ba7-b8ef-11c0d92f1327")){ || R_int_UUID == "2fdf6c18-697a-4ba7-b8ef-11c0d92f1327")){
...@@ -74,7 +74,7 @@ if (!use_precompile) { ...@@ -74,7 +74,7 @@ if (!use_precompile) {
try_vs <- 0 try_vs <- 0
local_vs_def <- "" local_vs_def <- ""
vs_versions <- c("Visual Studio 16 2019", "Visual Studio 15 2017", "Visual Studio 14 2015") vs_versions <- c("Visual Studio 16 2019", "Visual Studio 15 2017", "Visual Studio 14 2015")
for(vs in vs_versions){ for (vs in vs_versions){
vs_def <- paste0(" -G \"", vs, "\" -A x64") vs_def <- paste0(" -G \"", vs, "\" -A x64")
tmp_cmake_cmd <- paste0(cmake_cmd, vs_def) tmp_cmake_cmd <- paste0(cmake_cmd, vs_def)
try_vs <- system(paste0(tmp_cmake_cmd, " ..")) try_vs <- system(paste0(tmp_cmake_cmd, " .."))
...@@ -106,14 +106,29 @@ if (!use_precompile) { ...@@ -106,14 +106,29 @@ if (!use_precompile) {
# Has precompiled package # Has precompiled package
lib_folder <- file.path(R_PACKAGE_SOURCE, "../", fsep = "/") lib_folder <- file.path(R_PACKAGE_SOURCE, "../", fsep = "/")
if (file.exists(file.path(lib_folder, paste0("lib_lightgbm", SHLIB_EXT), fsep = "/"))) { shared_object_file <- file.path(
src <- file.path(lib_folder, paste0("lib_lightgbm", SHLIB_EXT), fsep = "/") lib_folder
} else if (file.exists(file.path(lib_folder, paste0("Release/lib_lightgbm", SHLIB_EXT), fsep = "/"))) { , paste0("lib_lightgbm", SHLIB_EXT)
src <- file.path(lib_folder, paste0("Release/lib_lightgbm", SHLIB_EXT), fsep = "/") , fsep = "/"
)
release_file <- file.path(
lib_folder
, paste0("Release/lib_lightgbm", SHLIB_EXT)
, fsep = "/"
)
windows_shared_object_file <- file.path(
lib_folder
, paste0("/windows/x64/DLL/lib_lightgbm", SHLIB_EXT)
, fsep = "/"
)
if (file.exists(shared_object_file)) {
src <- shared_object_file
} else if (file.exists(release_file)) {
src <- release_file
} else { } else {
src <- file.path(lib_folder, paste0("/windows/x64/DLL/lib_lightgbm", SHLIB_EXT), fsep = "/") # Expected result: installation will fail if it is not here or any other # Expected result: installation will fail if it is not here or any other
src <- windows_shared_object_file
} }
} }
# Check installation correctness # Check installation correctness
......
context("basic functions") context("basic functions")
data(agaricus.train, package='lightgbm') data(agaricus.train, package = 'lightgbm')
data(agaricus.test, package='lightgbm') data(agaricus.test, package = 'lightgbm')
train <- agaricus.train train <- agaricus.train
test <- agaricus.test test <- agaricus.test
...@@ -9,8 +9,14 @@ windows_flag = grepl('Windows', Sys.info()[['sysname']]) ...@@ -9,8 +9,14 @@ windows_flag = grepl('Windows', Sys.info()[['sysname']])
test_that("train and predict binary classification", { test_that("train and predict binary classification", {
nrounds = 10 nrounds = 10
bst <- lightgbm(data = train$data, label = train$label, num_leaves = 5, bst <- lightgbm(
nrounds = nrounds, objective = "binary", metric="binary_error") data = train$data
, label = train$label
, num_leaves = 5
, nrounds = nrounds
, objective = "binary"
, metric = "binary_error"
)
expect_false(is.null(bst$record_evals)) expect_false(is.null(bst$record_evals))
record_results <- lgb.get.eval.result(bst, "train", "binary_error") record_results <- lgb.get.eval.result(bst, "train", "binary_error")
expect_lt(min(record_results), 0.02) expect_lt(min(record_results), 0.02)
...@@ -20,7 +26,7 @@ test_that("train and predict binary classification", { ...@@ -20,7 +26,7 @@ test_that("train and predict binary classification", {
pred1 <- predict(bst, train$data, num_iteration = 1) pred1 <- predict(bst, train$data, num_iteration = 1)
expect_equal(length(pred1), 6513) expect_equal(length(pred1), 6513)
err_pred1 <- sum((pred1 > 0.5) != train$label)/length(train$label) err_pred1 <- sum( (pred1 > 0.5) != train$label) / length(train$label)
err_log <- record_results[1] err_log <- record_results[1]
expect_lt(abs(err_pred1 - err_log), 10e-6) expect_lt(abs(err_pred1 - err_log), 10e-6)
}) })
...@@ -29,9 +35,18 @@ test_that("train and predict binary classification", { ...@@ -29,9 +35,18 @@ test_that("train and predict binary classification", {
test_that("train and predict softmax", { test_that("train and predict softmax", {
lb <- as.numeric(iris$Species) - 1 lb <- as.numeric(iris$Species) - 1
bst <- lightgbm(data = as.matrix(iris[, -5]), label = lb, bst <- lightgbm(
num_leaves = 4, learning_rate = 0.1, nrounds = 20, min_data=20, min_hess=20, data = as.matrix(iris[, -5])
objective = "multiclass", metric="multi_error", num_class=3) , label = lb
, num_leaves = 4
, learning_rate = 0.1
, nrounds = 20
, min_data = 20
, min_hess = 20
, objective = "multiclass"
, metric = "multi_error"
, num_class = 3
)
expect_false(is.null(bst$record_evals)) expect_false(is.null(bst$record_evals))
record_results <- lgb.get.eval.result(bst, "train", "multi_error") record_results <- lgb.get.eval.result(bst, "train", "multi_error")
...@@ -43,18 +58,33 @@ test_that("train and predict softmax", { ...@@ -43,18 +58,33 @@ test_that("train and predict softmax", {
test_that("use of multiple eval metrics works", { test_that("use of multiple eval metrics works", {
bst <- lightgbm(data = train$data, label = train$label, num_leaves = 4, bst <- lightgbm(
learning_rate=1, nrounds = 10, objective = "binary", data = train$data
metric = list("binary_error","auc","binary_logloss") ) , label = train$label
, num_leaves = 4
, learning_rate = 1
, nrounds = 10
, objective = "binary"
, metric = list("binary_error","auc","binary_logloss")
)
expect_false(is.null(bst$record_evals)) expect_false(is.null(bst$record_evals))
}) })
test_that("training continuation works", { test_that("training continuation works", {
testthat::skip("This test is currently broken. See issue #2468 for details.") testthat::skip("This test is currently broken. See issue #2468 for details.")
dtrain <- lgb.Dataset(train$data, label = train$label, free_raw_data=FALSE) dtrain <- lgb.Dataset(
watchlist = list(train=dtrain) train$data
param <- list(objective = "binary", metric="binary_logloss", num_leaves = 5, learning_rate = 1) , label = train$label
, free_raw_data = FALSE
)
watchlist = list(train = dtrain)
param <- list(
objective = "binary"
, metric = "binary_logloss"
, num_leaves = 5
, learning_rate = 1
)
# for the reference, use 10 iterations at once: # for the reference, use 10 iterations at once:
bst <- lgb.train(param, dtrain, nrounds = 10, watchlist) bst <- lgb.train(param, dtrain, nrounds = 10, watchlist)
...@@ -75,8 +105,16 @@ test_that("training continuation works", { ...@@ -75,8 +105,16 @@ test_that("training continuation works", {
test_that("cv works", { test_that("cv works", {
dtrain <- lgb.Dataset(train$data, label=train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
params <- list(objective="regression", metric="l2,l1") params <- list(objective = "regression", metric = "l2,l1")
bst <- lgb.cv(params, dtrain, 10, nfold=5, min_data=1, learning_rate=1, early_stopping_rounds=10) bst <- lgb.cv(
params
, dtrain
, 10
, nfold = 5
, min_data = 1
, learning_rate = 1
, early_stopping_rounds = 10
)
expect_false(is.null(bst$record_evals)) expect_false(is.null(bst$record_evals))
}) })
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment