Commit 029bcc42 authored by James Lamb's avatar James Lamb Committed by Laurae
Browse files

[R-package] updated examples and removed dontrun guards on them in roxygen (#1626)

parent abd73765
......@@ -633,7 +633,6 @@ Booster <- R6::R6Class(
#' number of columns corresponding to the number of trees.
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
......@@ -651,7 +650,6 @@ Booster <- R6::R6Class(
#' learning_rate = 1,
#' early_stopping_rounds = 10)
#' preds <- predict(model, test$data)
#' }
#'
#' @rdname predict.lgb.Booster
#' @export
......@@ -692,7 +690,6 @@ predict.lgb.Booster <- function(object,
#' @return lgb.Booster
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
......@@ -713,7 +710,6 @@ predict.lgb.Booster <- function(object,
#' load_booster <- lgb.load(filename = "model.txt")
#' model_string <- model$save_model_to_string(NULL) # saves best iteration
#' load_booster_from_str <- lgb.load(model_str = model_string)
#' }
#'
#' @rdname lgb.load
#' @export
......@@ -752,7 +748,6 @@ lgb.load <- function(filename = NULL, model_str = NULL){
#' @return lgb.Booster
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
......@@ -770,7 +765,6 @@ lgb.load <- function(filename = NULL, model_str = NULL){
#' learning_rate = 1,
#' early_stopping_rounds = 10)
#' lgb.save(model, "model.txt")
#' }
#'
#' @rdname lgb.save
#' @export
......@@ -801,7 +795,6 @@ lgb.save <- function(booster, filename, num_iteration = NULL){
#' @return json format of model
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
......@@ -819,7 +812,6 @@ lgb.save <- function(booster, filename, num_iteration = NULL){
#' learning_rate = 1,
#' early_stopping_rounds = 10)
#' json_model <- lgb.dump(model)
#' }
#'
#' @rdname lgb.dump
#' @export
......@@ -847,7 +839,6 @@ lgb.dump <- function(booster, num_iteration = NULL){
#' @return vector of evaluation result
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
......@@ -865,7 +856,6 @@ lgb.dump <- function(booster, num_iteration = NULL){
#' learning_rate = 1,
#' early_stopping_rounds = 10)
#' lgb.get.eval.result(model, "test", "l2")
#' }
#'
#' @rdname lgb.get.eval.result
#' @export
......
......@@ -311,6 +311,7 @@ Dataset <- R6::R6Class(
} else if (is.matrix(private$raw_data) || methods::is(private$raw_data, "dgCMatrix")) {
# Check if dgCMatrix (sparse matrix column compressed)
# NOTE: requires Matrix package
dim(private$raw_data)
} else {
......@@ -392,9 +393,11 @@ Dataset <- R6::R6Class(
# Check for info name and handle
if (is.null(private$info[[name]])) {
if (lgb.is.null.handle(private$handle)){
stop("Cannot perform getinfo before construct Dataset.")
stop("Cannot perform getinfo before constructing Dataset.")
}
# Get field size of info
info_len <- 0L
info_len <- lgb.call("LGBM_DatasetGetFieldSize_R",
......@@ -646,7 +649,6 @@ Dataset <- R6::R6Class(
#' @return constructed dataset
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
......@@ -654,7 +656,6 @@ Dataset <- R6::R6Class(
#' lgb.Dataset.save(dtrain, "lgb.Dataset.data")
#' dtrain <- lgb.Dataset("lgb.Dataset.data")
#' lgb.Dataset.construct(dtrain)
#' }
#'
#' @export
lgb.Dataset <- function(data,
......@@ -692,7 +693,6 @@ lgb.Dataset <- function(data,
#' @return constructed dataset
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
......@@ -700,7 +700,6 @@ lgb.Dataset <- function(data,
#' data(agaricus.test, package = "lightgbm")
#' test <- agaricus.test
#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
#' }
#'
#' @export
lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) {
......@@ -720,13 +719,11 @@ lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) {
#' @param dataset Object of class \code{lgb.Dataset}
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.construct(dtrain)
#' }
#'
#' @export
lgb.Dataset.construct <- function(dataset) {
......@@ -754,7 +751,6 @@ lgb.Dataset.construct <- function(dataset) {
#' be directly used with an \code{lgb.Dataset} object.
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
......@@ -763,7 +759,6 @@ lgb.Dataset.construct <- function(dataset) {
#' stopifnot(nrow(dtrain) == nrow(train$data))
#' stopifnot(ncol(dtrain) == ncol(train$data))
#' stopifnot(all(dim(dtrain) == dim(train$data)))
#' }
#'
#' @rdname dim
#' @export
......@@ -793,7 +788,6 @@ dim.lgb.Dataset <- function(x, ...) {
#' Since row names are irrelevant, it is recommended to use \code{colnames} directly.
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
......@@ -803,7 +797,6 @@ dim.lgb.Dataset <- function(x, ...) {
#' colnames(dtrain)
#' colnames(dtrain) <- make.names(1:ncol(train$data))
#' print(dtrain, verbose = TRUE)
#' }
#'
#' @rdname dimnames.lgb.Dataset
#' @export
......@@ -864,15 +857,14 @@ dimnames.lgb.Dataset <- function(x) {
#' @return constructed sub dataset
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#'
#' dsub <- lightgbm::slice(dtrain, 1:42)
#' lgb.Dataset.construct(dsub)
#' labels <- lightgbm::getinfo(dsub, "label")
#' }
#'
#' @export
slice <- function(dataset, ...) {
......@@ -911,7 +903,6 @@ slice.lgb.Dataset <- function(dataset, idxset, ...) {
#' }
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
......@@ -923,7 +914,6 @@ slice.lgb.Dataset <- function(dataset, idxset, ...) {
#'
#' labels2 <- lightgbm::getinfo(dtrain, "label")
#' stopifnot(all(labels2 == 1 - labels))
#' }
#'
#' @export
getinfo <- function(dataset, ...) {
......@@ -963,7 +953,6 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
#' }
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
......@@ -975,7 +964,6 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
#'
#' labels2 <- lightgbm::getinfo(dtrain, "label")
#' stopifnot(all.equal(labels2, 1 - labels))
#' }
#'
#' @export
setinfo <- function(dataset, ...) {
......@@ -1003,7 +991,6 @@ setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
#' @return passed dataset
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
......@@ -1011,7 +998,6 @@ setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
#' lgb.Dataset.save(dtrain, "lgb.Dataset.data")
#' dtrain <- lgb.Dataset("lgb.Dataset.data")
#' lgb.Dataset.set.categorical(dtrain, 1:2)
#' }
#'
#' @rdname lgb.Dataset.set.categorical
#' @export
......@@ -1037,7 +1023,6 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
#' @return passed dataset
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package ="lightgbm")
#' train <- agaricus.train
......@@ -1046,7 +1031,6 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
#' test <- agaricus.test
#' dtest <- lgb.Dataset(test$data, test = train$label)
#' lgb.Dataset.set.reference(dtest, dtrain)
#' }
#'
#' @rdname lgb.Dataset.set.reference
#' @export
......@@ -1070,13 +1054,11 @@ lgb.Dataset.set.reference <- function(dataset, reference) {
#'
#' @examples
#'
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.save(dtrain, "data.bin")
#' }
#'
#' @rdname lgb.Dataset.save
#' @export
......
......@@ -55,7 +55,6 @@ CVBooster <- R6::R6Class(
#' @return a trained model \code{lgb.CVBooster}.
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
......@@ -68,7 +67,6 @@ CVBooster <- R6::R6Class(
#' min_data = 1,
#' learning_rate = 1,
#' early_stopping_rounds = 10)
#' }
#' @export
lgb.cv <- function(params = list(),
data,
......
......@@ -16,13 +16,12 @@
#' }
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#'
#' params = list(objective = "binary",
#' params <- list(objective = "binary",
#' learning_rate = 0.01, num_leaves = 63, max_depth = -1,
#' min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1)
#' model <- lgb.train(params, dtrain, 20)
......@@ -30,7 +29,6 @@
#'
#' tree_imp1 <- lgb.importance(model, percentage = TRUE)
#' tree_imp2 <- lgb.importance(model, percentage = FALSE)
#' }
#'
#' @importFrom magrittr %>% %T>%
#' @importFrom data.table :=
......
......@@ -17,8 +17,6 @@
#' For multiclass classification, a \code{list} of \code{data.table} with the Feature column and Contribution columns to each class.
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' Sigmoid <- function(x) 1 / (1 + exp(-x))
#' Logit <- function(x) log(x / (1 - x))
#' data(agaricus.train, package = "lightgbm")
......@@ -27,15 +25,18 @@
#' setinfo(dtrain, "init_score", rep(Logit(mean(train$label)), length(train$label)))
#' data(agaricus.test, package = "lightgbm")
#' test <- agaricus.test
#'
#' params = list(objective = "binary",
#' learning_rate = 0.01, num_leaves = 63, max_depth = -1,
#' min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1)
#' model <- lgb.train(params, dtrain, 20)
#'
#' params <- list(
#' objective = "binary"
#' , learning_rate = 0.01
#' , num_leaves = 63
#' , max_depth = -1
#' , min_data_in_leaf = 1
#' , min_sum_hessian_in_leaf = 1
#' )
#' model <- lgb.train(params, dtrain, 20)
#'
#'
#' tree_interpretation <- lgb.interprete(model, test$data, 1:5)
#' }
#'
#' @importFrom magrittr %>% %T>%
#' @export
......
......@@ -30,21 +30,18 @@
#' }
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#'
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#'
#' params = list(objective = "binary",
#' params <- list(objective = "binary",
#' learning_rate = 0.01, num_leaves = 63, max_depth = -1,
#' min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1)
#' model <- lgb.train(params, dtrain, 20)
#' model <- lgb.train(params, dtrain, 20)
#'
#' tree_dt <- lgb.model.dt.tree(model)
#' }
#'
#' @importFrom magrittr %>%
#' @importFrom data.table := data.table rbindlist
......
......@@ -17,20 +17,23 @@
#' and silently returns a processed data.table with \code{top_n} features sorted by defined importance.
#'
#' @examples
#' \dontrun{
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#'
#' params = list(objective = "binary",
#' learning_rate = 0.01, num_leaves = 63, max_depth = -1,
#' min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1)
#' model <- lgb.train(params, dtrain, 20)
#' model <- lgb.train(params, dtrain, 20)
#'
#' tree_imp <- lgb.importance(model, percentage = TRUE)
#' lgb.plot.importance(tree_imp, top_n = 10, measure = "Gain")
#' }
# data(agaricus.train, package = "lightgbm")
# train <- agaricus.train
# dtrain <- lgb.Dataset(train$data, label = train$label)
#
# params <- list(
# objective = "binary"
# , learning_rate = 0.01
# , num_leaves = 63
# , max_depth = -1
# , min_data_in_leaf = 1
# , min_sum_hessian_in_leaf = 1
# )
#
# model <- lgb.train(params, dtrain, 20)
#
# tree_imp <- lgb.importance(model, percentage = TRUE)
# lgb.plot.importance(tree_imp, top_n = 10, measure = "Gain")
#' @importFrom graphics barplot par
#' @export
lgb.plot.importance <- function(tree_imp,
......
......@@ -16,7 +16,6 @@
#' The \code{lgb.plot.interpretation} function creates a \code{barplot}.
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' Sigmoid <- function(x) {1 / (1 + exp(-x))}
#' Logit <- function(x) {log(x / (1 - x))}
......@@ -27,7 +26,7 @@
#' data(agaricus.test, package = "lightgbm")
#' test <- agaricus.test
#'
#' params = list(objective = "binary",
#' params <- list(objective = "binary",
#' learning_rate = 0.01, num_leaves = 63, max_depth = -1,
#' min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1)
#' model <- lgb.train(params, dtrain, 20)
......@@ -35,7 +34,6 @@
#'
#' tree_interpretation <- lgb.interprete(model, test$data, 1:5)
#' lgb.plot.interpretation(tree_interpretation[[1]], top_n = 10)
#' }
#' @importFrom graphics barplot par
#' @export
lgb.plot.interpretation <- function(tree_interpretation_dt,
......
#' Data preparator for LightGBM datasets (numeric)
#'
#' Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric without integers. Please use \code{lgb.prepare_rules} if you want to apply this transformation to other datasets.
#'
#' @param data A data.frame or data.table to prepare.
#'
#' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset.
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(iris)
#'
#' str(iris)
#' # 'data.frame': 150 obs. of 5 variables:
#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
#' # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ...
#'
#' str(lgb.prepare(data = iris)) # Convert all factors/chars to numeric
#' # 'data.frame': 150 obs. of 5 variables:
#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
#' # $ Species : num 1 1 1 1 1 1 1 1 1 1 ...
#'
#' # When lightgbm package is installed, and you do not want to load it
#' # You can still use the function!
#' lgb.unloader()
#' str(lightgbm::lgb.prepare(data = iris))
#' # 'data.frame': 150 obs. of 5 variables:
#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
#' # $ Species : num 1 1 1 1 1 1 1 1 1 1 ...
#' }
#'
#' @export
lgb.prepare <- function(data) {
# data.table not behaving like data.frame
if ("data.table" %in% class(data)) {
# Get data classes
list_classes <- sapply(data, class)
# Convert characters to factors only (we can change them to numeric after)
is_char <- which(list_classes == "character")
if (length(is_char) > 0) {
data[, (is_char) := lapply(.SD, function(x) {as.numeric(as.factor(x))}), .SDcols = is_char]
}
# Convert factors to numeric (integer is more efficient actually)
is_fact <- c(which(list_classes == "factor"), is_char)
if (length(is_fact) > 0) {
data[, (is_fact) := lapply(.SD, function(x) {as.numeric(x)}), .SDcols = is_fact]
}
} else {
# Default routine (data.frame)
if ("data.frame" %in% class(data)) {
# Get data classes
list_classes <- sapply(data, class)
# Convert characters to factors to numeric (integer is more efficient actually)
is_char <- which(list_classes == "character")
if (length(is_char) > 0) {
data[is_char] <- lapply(data[is_char], function(x) {as.numeric(as.factor(x))})
}
# Convert factors to numeric (integer is more efficient actually)
is_fact <- which(list_classes == "factor")
if (length(is_fact) > 0) {
data[is_fact] <- lapply(data[is_fact], function(x) {as.numeric(x)})
}
} else {
# What do you think you are doing here? Throw error.
stop("lgb.prepare2: you provided ", paste(class(data), collapse = " & "), " but data should have class data.frame")
}
}
return(data)
}
#' Data preparator for LightGBM datasets (numeric)
#'
#' Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric without integers. Please use \code{lgb.prepare_rules} if you want to apply this transformation to other datasets.
#'
#' @param data A data.frame or data.table to prepare.
#'
#' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset.
#'
#' @examples
#' library(lightgbm)
#' data(iris)
#'
#' str(iris)
#' # 'data.frame': 150 obs. of 5 variables:
#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
#' # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ...
#'
#' str(lgb.prepare(data = iris)) # Convert all factors/chars to numeric
#' # 'data.frame': 150 obs. of 5 variables:
#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
#' # $ Species : num 1 1 1 1 1 1 1 1 1 1 ...
#'
#' # When lightgbm package is installed, and you do not want to load it
#' # You can still use the function!
#' lgb.unloader()
#' str(lightgbm::lgb.prepare(data = iris))
#' # 'data.frame': 150 obs. of 5 variables:
#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
#' # $ Species : num 1 1 1 1 1 1 1 1 1 1 ...
#'
#' @export
lgb.prepare <- function(data) {
# data.table not behaving like data.frame
if ("data.table" %in% class(data)) {
# Get data classes
list_classes <- sapply(data, class)
# Convert characters to factors only (we can change them to numeric after)
is_char <- which(list_classes == "character")
if (length(is_char) > 0) {
data[, (is_char) := lapply(.SD, function(x) {as.numeric(as.factor(x))}), .SDcols = is_char]
}
# Convert factors to numeric (integer is more efficient actually)
is_fact <- c(which(list_classes == "factor"), is_char)
if (length(is_fact) > 0) {
data[, (is_fact) := lapply(.SD, function(x) {as.numeric(x)}), .SDcols = is_fact]
}
} else {
# Default routine (data.frame)
if ("data.frame" %in% class(data)) {
# Get data classes
list_classes <- sapply(data, class)
# Convert characters to factors to numeric (integer is more efficient actually)
is_char <- which(list_classes == "character")
if (length(is_char) > 0) {
data[is_char] <- lapply(data[is_char], function(x) {as.numeric(as.factor(x))})
}
# Convert factors to numeric (integer is more efficient actually)
is_fact <- which(list_classes == "factor")
if (length(is_fact) > 0) {
data[is_fact] <- lapply(data[is_fact], function(x) {as.numeric(x)})
}
} else {
# What do you think you are doing here? Throw error.
stop("lgb.prepare2: you provided ", paste(class(data), collapse = " & "), " but data should have class data.frame")
}
}
return(data)
}
#' Data preparator for LightGBM datasets (integer)
#'
#' Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric (specifically: integer). Please use \code{lgb.prepare_rules2} if you want to apply this transformation to other datasets. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM.
#'
#' @param data A data.frame or data.table to prepare.
#'
#' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset.
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(iris)
#'
#' str(iris)
#' # 'data.frame': 150 obs. of 5 variables:
#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
#' # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ...
#'
#' str(lgb.prepare2(data = iris)) # Convert all factors/chars to integer
#' # 'data.frame': 150 obs. of 5 variables:
#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
#' # $ Species : int 1 1 1 1 1 1 1 1 1 1 ...
#'
#' # When lightgbm package is installed, and you do not want to load it
#' # You can still use the function!
#' lgb.unloader()
#' str(lightgbm::lgb.prepare2(data = iris))
#' # 'data.frame': 150 obs. of 5 variables:
#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
#' # $ Species : int 1 1 1 1 1 1 1 1 1 1 ...
#'
#' }
#'
#' @export
lgb.prepare2 <- function(data) {
# data.table not behaving like data.frame
if (inherits(data, "data.table")) {
# Get data classes
list_classes <- vapply(data, class, character(1))
# Convert characters to factors only (we can change them to numeric after)
is_char <- which(list_classes == "character")
if (length(is_char) > 0) {
data[, (is_char) := lapply(.SD, function(x) {as.integer(as.factor(x))}), .SDcols = is_char]
}
# Convert factors to numeric (integer is more efficient actually)
is_fact <- c(which(list_classes == "factor"), is_char)
if (length(is_fact) > 0) {
data[, (is_fact) := lapply(.SD, function(x) {as.integer(x)}), .SDcols = is_fact]
}
} else {
# Default routine (data.frame)
if (inherits(data, "data.frame")) {
# Get data classes
list_classes <- vapply(data, class, character(1))
# Convert characters to factors to numeric (integer is more efficient actually)
is_char <- which(list_classes == "character")
if (length(is_char) > 0) {
data[is_char] <- lapply(data[is_char], function(x) {as.integer(as.factor(x))})
}
# Convert factors to numeric (integer is more efficient actually)
is_fact <- which(list_classes == "factor")
if (length(is_fact) > 0) {
data[is_fact] <- lapply(data[is_fact], function(x) {as.integer(x)})
}
} else {
# What do you think you are doing here? Throw error.
stop("lgb.prepare: you provided ", paste(class(data), collapse = " & "), " but data should have class data.frame")
}
}
return(data)
}
#' Data preparator for LightGBM datasets (integer)
#'
#' Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric (specifically: integer). Please use \code{lgb.prepare_rules2} if you want to apply this transformation to other datasets. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM.
#'
#' @param data A data.frame or data.table to prepare.
#'
#' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset.
#'
#' @examples
#' library(lightgbm)
#' data(iris)
#'
#' str(iris)
#' # 'data.frame': 150 obs. of 5 variables:
#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
#' # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ...
#'
#' # Convert all factors/chars to integer
#' str(lgb.prepare2(data = iris))
#' # 'data.frame': 150 obs. of 5 variables:
#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
#' # $ Species : int 1 1 1 1 1 1 1 1 1 1 ...
#'
#' # When lightgbm package is installed, and you do not want to load it
#' # You can still use the function!
#' lgb.unloader()
#' str(lightgbm::lgb.prepare2(data = iris))
#' # 'data.frame': 150 obs. of 5 variables:
#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
#' # $ Species : int 1 1 1 1 1 1 1 1 1 1 ...
#'
#' @export
lgb.prepare2 <- function(data) {
# data.table not behaving like data.frame
if (inherits(data, "data.table")) {
# Get data classes
list_classes <- vapply(data, class, character(1))
# Convert characters to factors only (we can change them to numeric after)
is_char <- which(list_classes == "character")
if (length(is_char) > 0) {
data[, (is_char) := lapply(.SD, function(x) {as.integer(as.factor(x))}), .SDcols = is_char]
}
# Convert factors to numeric (integer is more efficient actually)
is_fact <- c(which(list_classes == "factor"), is_char)
if (length(is_fact) > 0) {
data[, (is_fact) := lapply(.SD, function(x) {as.integer(x)}), .SDcols = is_fact]
}
} else {
# Default routine (data.frame)
if (inherits(data, "data.frame")) {
# Get data classes
list_classes <- vapply(data, class, character(1))
# Convert characters to factors to numeric (integer is more efficient actually)
is_char <- which(list_classes == "character")
if (length(is_char) > 0) {
data[is_char] <- lapply(data[is_char], function(x) {as.integer(as.factor(x))})
}
# Convert factors to numeric (integer is more efficient actually)
is_fact <- which(list_classes == "factor")
if (length(is_fact) > 0) {
data[is_fact] <- lapply(data[is_fact], function(x) {as.integer(x)})
}
} else {
# What do you think you are doing here? Throw error.
stop("lgb.prepare: you provided ", paste(class(data), collapse = " & "), " but data should have class data.frame")
}
}
return(data)
}
......@@ -8,7 +8,6 @@
#' @return A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The data must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset.
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(iris)
#'
......@@ -66,8 +65,6 @@
#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
#' # $ Species : num 3 3 3 3 3 3 3 3 3 3 ...
#'
#' }
#'
#' @importFrom data.table set
#' @export
lgb.prepare_rules <- function(data, rules = NULL) {
......
......@@ -8,7 +8,6 @@
#' @return A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The data must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset.
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(iris)
#'
......@@ -66,8 +65,6 @@
#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
#' # $ Species : int 3 3 3 3 3 3 3 3 3 3 ...
#'
#' }
#'
#' @importFrom data.table set
#' @export
lgb.prepare_rules2 <- function(data, rules = NULL) {
......
......@@ -26,7 +26,6 @@
#' @return a trained booster model \code{lgb.Booster}.
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
......@@ -43,7 +42,6 @@
#' min_data = 1,
#' learning_rate = 1,
#' early_stopping_rounds = 10)
#' }
#'
#' @export
lgb.train <- function(params = list(),
......
......@@ -9,7 +9,6 @@
#' @return NULL invisibly.
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
......@@ -32,7 +31,6 @@
#'
#' library(lightgbm)
#' # Do whatever you want again with LightGBM without object clashing
#' }
#'
#' @export
lgb.unloader <- function(restore = TRUE, wipe = FALSE, envir = .GlobalEnv) {
......
......@@ -8,7 +8,6 @@
#' @return lgb.Booster.
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
......@@ -27,7 +26,6 @@
#' early_stopping_rounds = 10)
#' saveRDS.lgb.Booster(model, "model.rds")
#' new_model <- readRDS.lgb.Booster("model.rds")
#' }
#'
#' @export
readRDS.lgb.Booster <- function(file = "", refhook = NULL) {
......
......@@ -13,7 +13,6 @@
#' @return NULL invisibly.
#'
#' @examples
#' \dontrun{
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
......@@ -23,16 +22,16 @@
#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
#' params <- list(objective = "regression", metric = "l2")
#' valids <- list(test = dtest)
#' model <- lgb.train(params,
#' dtrain,
#' 100,
#' valids,
#' min_data = 1,
#' learning_rate = 1,
#' early_stopping_rounds = 10)
#' model <- lgb.train(
#' params
#' , dtrain
#' , 100
#' , valids
#' , min_data = 1
#' , learning_rate = 1
#' , early_stopping_rounds = 10
#' )
#' saveRDS.lgb.Booster(model, "model.rds")
#' }
#'
#' @export
saveRDS.lgb.Booster <- function(object,
file = "",
......
......@@ -22,7 +22,6 @@ Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also
be directly used with an \code{lgb.Dataset} object.
}
\examples{
\dontrun{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
......@@ -31,6 +30,5 @@ dtrain <- lgb.Dataset(train$data, label = train$label)
stopifnot(nrow(dtrain) == nrow(train$data))
stopifnot(ncol(dtrain) == ncol(train$data))
stopifnot(all(dim(dtrain) == dim(train$data)))
}
}
......@@ -24,7 +24,6 @@ Generic \code{dimnames} methods are used by \code{colnames}.
Since row names are irrelevant, it is recommended to use \code{colnames} directly.
}
\examples{
\dontrun{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
......@@ -34,6 +33,5 @@ dimnames(dtrain)
colnames(dtrain)
colnames(dtrain) <- make.names(1:ncol(train$data))
print(dtrain, verbose = TRUE)
}
}
......@@ -33,7 +33,6 @@ The \code{name} field can be one of the following:
}
}
\examples{
\dontrun{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
......@@ -45,6 +44,5 @@ lightgbm::setinfo(dtrain, "label", 1 - labels)
labels2 <- lightgbm::getinfo(dtrain, "label")
stopifnot(all(labels2 == 1 - labels))
}
}
......@@ -32,7 +32,6 @@ Construct lgb.Dataset object from dense matrix, sparse matrix
or local file (that was created previously by saving an \code{lgb.Dataset}).
}
\examples{
\dontrun{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
......@@ -40,6 +39,5 @@ dtrain <- lgb.Dataset(train$data, label = train$label)
lgb.Dataset.save(dtrain, "lgb.Dataset.data")
dtrain <- lgb.Dataset("lgb.Dataset.data")
lgb.Dataset.construct(dtrain)
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment