Unverified Commit 4c8418ea authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[docs] R docs cleanup (#2375)

* R docs cleanup

* regenerated R documentation files
parent df26b65d
......@@ -40,10 +40,9 @@ data(agaricus.test, package = "lightgbm")
test <- agaricus.test
params <- list(objective = "binary",
learning_rate = 0.01, num_leaves = 63, max_depth = -1,
min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1)
model <- lgb.train(params, dtrain, 20)
model <- lgb.train(params, dtrain, 20)
learning_rate = 0.01, num_leaves = 63, max_depth = -1,
min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1)
model <- lgb.train(params, dtrain, 10)
tree_interpretation <- lgb.interprete(model, test$data, 1:5)
lgb.plot.interpretation(tree_interpretation[[1]], top_n = 10)
......
......@@ -10,31 +10,20 @@ lgb.prepare(data)
\item{data}{A data.frame or data.table to prepare.}
}
\value{
The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset.
The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in \code{lgb.Dataset}.
}
\description{
Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric without integers. Please use \code{lgb.prepare_rules} if you want to apply this transformation to other datasets.
Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}. Factors and characters are converted to numeric without integers. Please use \code{lgb.prepare_rules} if you want to apply this transformation to other datasets.
}
\examples{
library(lightgbm)
data(iris)
str(iris)
# 'data.frame': 150 obs. of 5 variables:
# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ...
str(lgb.prepare(data = iris)) # Convert all factors/chars to numeric
# 'data.frame': 150 obs. of 5 variables:
# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
# $ Species : num 1 1 1 1 1 1 1 1 1 1 ...
\dontrun{
# When lightgbm package is installed, and you do not want to load it
# You can still use the function!
lgb.unloader()
......@@ -45,5 +34,6 @@ str(lightgbm::lgb.prepare(data = iris))
# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
# $ Species : num 1 1 1 1 1 1 1 1 1 1 ...
}
}
......@@ -10,32 +10,21 @@ lgb.prepare2(data)
\item{data}{A data.frame or data.table to prepare.}
}
\value{
The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset.
The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in \code{lgb.Dataset}.
}
\description{
Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric (specifically: integer). Please use \code{lgb.prepare_rules2} if you want to apply this transformation to other datasets. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM.
Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}. Factors and characters are converted to numeric (specifically: integer). Please use \code{lgb.prepare_rules2} if you want to apply this transformation to other datasets. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM.
}
\examples{
library(lightgbm)
data(iris)
str(iris)
# 'data.frame': 150 obs. of 5 variables:
# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ...
# Convert all factors/chars to integer
str(lgb.prepare2(data = iris))
# 'data.frame': 150 obs. of 5 variables:
# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
# $ Species : int 1 1 1 1 1 1 1 1 1 1 ...
\dontrun{
# When lightgbm package is installed, and you do not want to load it
# You can still use the function!
lgb.unloader()
......@@ -46,5 +35,6 @@ str(lightgbm::lgb.prepare2(data = iris))
# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
# $ Species : int 1 1 1 1 1 1 1 1 1 1 ...
}
}
......@@ -12,37 +12,22 @@ lgb.prepare_rules(data, rules = NULL)
\item{rules}{A set of rules from the data preparator, if already used.}
}
\value{
A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The data must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset.
A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The data must be converted to a matrix format (\code{as.matrix}) for input in \code{lgb.Dataset}.
}
\description{
Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric. In addition, keeps rules created so you can convert other datasets using this converter.
Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}. Factors and characters are converted to numeric. In addition, keeps rules created so you can convert other datasets using this converter.
}
\examples{
library(lightgbm)
data(iris)
str(iris)
# 'data.frame': 150 obs. of 5 variables:
# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ...
new_iris <- lgb.prepare_rules(data = iris) # Autoconverter
str(new_iris$data)
# 'data.frame': 150 obs. of 5 variables:
# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
# $ Species : num 1 1 1 1 1 1 1 1 1 1 ...
data(iris) # Erase iris dataset
iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA)
# Warning message:
# In `[<-.factor`(`*tmp*`, 1, value = c(NA, 1L, 1L, 1L, 1L, 1L, 1L, :
# invalid factor level, NA generated
# Use conversion using known rules
# Unknown factors become 0, excellent for sparse datasets
......@@ -50,14 +35,11 @@ newer_iris <- lgb.prepare_rules(data = iris, rules = new_iris$rules)
# Unknown factor is now zero, perfect for sparse datasets
newer_iris$data[1, ] # Species became 0 as it is an unknown factor
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# 1 5.1 3.5 1.4 0.2 0
newer_iris$data[1, 5] <- 1 # Put back real initial value
# Is the newly created dataset equal? YES!
all.equal(new_iris$data, newer_iris$data)
# [1] TRUE
# Can we test our own rules?
data(iris) # Erase iris dataset
......@@ -68,11 +50,5 @@ personal_rules <- list(Species = c("setosa" = 3,
"virginica" = 1))
newest_iris <- lgb.prepare_rules(data = iris, rules = personal_rules)
str(newest_iris$data) # SUCCESS!
# 'data.frame': 150 obs. of 5 variables:
# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
# $ Species : num 3 3 3 3 3 3 3 3 3 3 ...
}
......@@ -12,37 +12,22 @@ lgb.prepare_rules2(data, rules = NULL)
\item{rules}{A set of rules from the data preparator, if already used.}
}
\value{
A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The data must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset.
A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The data must be converted to a matrix format (\code{as.matrix}) for input in \code{lgb.Dataset}.
}
\description{
Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric (specifically: integer). In addition, keeps rules created so you can convert other datasets using this converter. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM.
Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}. Factors and characters are converted to numeric (specifically: integer). In addition, keeps rules created so you can convert other datasets using this converter. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM.
}
\examples{
library(lightgbm)
data(iris)
str(iris)
# 'data.frame': 150 obs. of 5 variables:
# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ...
new_iris <- lgb.prepare_rules2(data = iris) # Autoconverter
str(new_iris$data)
# 'data.frame': 150 obs. of 5 variables:
# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
# $ Species : int 1 1 1 1 1 1 1 1 1 1 ...
data(iris) # Erase iris dataset
iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA)
# Warning message:
# In `[<-.factor`(`*tmp*`, 1, value = c(NA, 1L, 1L, 1L, 1L, 1L, 1L, :
# invalid factor level, NA generated
# Use conversion using known rules
# Unknown factors become 0, excellent for sparse datasets
......@@ -50,14 +35,11 @@ newer_iris <- lgb.prepare_rules2(data = iris, rules = new_iris$rules)
# Unknown factor is now zero, perfect for sparse datasets
newer_iris$data[1, ] # Species became 0 as it is an unknown factor
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# 1 5.1 3.5 1.4 0.2 0
newer_iris$data[1, 5] <- 1 # Put back real initial value
# Is the newly created dataset equal? YES!
all.equal(new_iris$data, newer_iris$data)
# [1] TRUE
# Can we test our own rules?
data(iris) # Erase iris dataset
......@@ -68,11 +50,5 @@ personal_rules <- list(Species = c("setosa" = 3L,
"virginica" = 1L))
newest_iris <- lgb.prepare_rules2(data = iris, rules = personal_rules)
str(newest_iris$data) # SUCCESS!
# 'data.frame': 150 obs. of 5 variables:
# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
# $ Species : int 3 3 3 3 3 3 3 3 3 3 ...
}
......@@ -31,11 +31,11 @@ params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest)
model <- lgb.train(params,
dtrain,
100,
10,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
early_stopping_rounds = 5)
lgb.save(model, "model.txt")
}
......@@ -80,10 +80,9 @@ params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest)
model <- lgb.train(params,
dtrain,
100,
10,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
early_stopping_rounds = 5)
}
......@@ -31,16 +31,19 @@ params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest)
model <- lgb.train(params,
dtrain,
100,
10,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
early_stopping_rounds = 5)
\dontrun{
lgb.unloader(restore = FALSE, wipe = FALSE, envir = .GlobalEnv)
rm(model, dtrain, dtest) # Not needed if wipe = TRUE
gc() # Not needed if wipe = TRUE
library(lightgbm)
# Do whatever you want again with LightGBM without object clashing
}
}
......@@ -55,11 +55,11 @@ params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest)
model <- lgb.train(params,
dtrain,
100,
10,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
early_stopping_rounds = 5)
preds <- predict(model, test$data)
}
......@@ -2,7 +2,7 @@
% Please edit documentation in R/readRDS.lgb.Booster.R
\name{readRDS.lgb.Booster}
\alias{readRDS.lgb.Booster}
\title{readRDS for lgb.Booster models}
\title{readRDS for \code{lgb.Booster} models}
\usage{
readRDS.lgb.Booster(file = "", refhook = NULL)
}
......@@ -12,7 +12,7 @@ readRDS.lgb.Booster(file = "", refhook = NULL)
\item{refhook}{a hook function for handling reference objects.}
}
\value{
lgb.Booster.
\code{lgb.Booster}.
}
\description{
Attempts to load a model using RDS.
......@@ -29,11 +29,11 @@ params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest)
model <- lgb.train(params,
dtrain,
100,
10,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
early_stopping_rounds = 5)
saveRDS.lgb.Booster(model, "model.rds")
new_model <- readRDS.lgb.Booster("model.rds")
......
......@@ -2,7 +2,7 @@
% Please edit documentation in R/saveRDS.lgb.Booster.R
\name{saveRDS.lgb.Booster}
\alias{saveRDS.lgb.Booster}
\title{saveRDS for lgb.Booster models}
\title{saveRDS for \code{lgb.Booster} models}
\usage{
saveRDS.lgb.Booster(object, file = "", ascii = FALSE, version = NULL,
compress = TRUE, refhook = NULL, raw = TRUE)
......@@ -41,11 +41,11 @@ valids <- list(test = dtest)
model <- lgb.train(
params
, dtrain
, 100
, 10
, valids
, min_data = 1
, learning_rate = 1
, early_stopping_rounds = 10
, early_stopping_rounds = 5
)
saveRDS.lgb.Booster(model, "model.rds")
}
......@@ -3,14 +3,14 @@
\name{setinfo}
\alias{setinfo}
\alias{setinfo.lgb.Dataset}
\title{Set information of an lgb.Dataset object}
\title{Set information of an \code{lgb.Dataset} object}
\usage{
setinfo(dataset, ...)
\method{setinfo}{lgb.Dataset}(dataset, name, info, ...)
}
\arguments{
\item{dataset}{Object of class "lgb.Dataset"}
\item{dataset}{Object of class \code{lgb.Dataset}}
\item{...}{other parameters}
......@@ -22,7 +22,7 @@ setinfo(dataset, ...)
passed object
}
\description{
Set information of an lgb.Dataset object
Set information of an \code{lgb.Dataset} object
}
\details{
The \code{name} field can be one of the following:
......
......@@ -10,7 +10,7 @@ slice(dataset, ...)
\method{slice}{lgb.Dataset}(dataset, idxset, ...)
}
\arguments{
\item{dataset}{Object of class "lgb.Dataset"}
\item{dataset}{Object of class \code{lgb.Dataset}}
\item{...}{other parameters (currently not used)}
......@@ -21,7 +21,7 @@ constructed sub dataset
}
\description{
Get a new \code{lgb.Dataset} containing the specified rows of
original lgb.Dataset object
original \code{lgb.Dataset} object
}
\examples{
library(lightgbm)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment