Commit 60412658 authored by Guolin Ke's avatar Guolin Ke
Browse files

Add R's document back.

parent d2a6eb0e
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Booster.R
\name{lgb.save}
\alias{lgb.save}
\title{Save LightGBM model}
\usage{
lgb.save(booster, filename, num_iteration = NULL)
}
\arguments{
\item{booster}{Object of class \code{lgb.Booster}}
\item{filename}{saved filename}
\item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration}
}
\value{
booster
}
\description{
Save LightGBM model
}
\examples{
\dontrun{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
data(agaricus.test, package = "lightgbm")
test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest)
model <- lgb.train(params,
dtrain,
100,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
lgb.save(model, "model.txt")
}
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.cv.R, R/lgb.train.R, R/lightgbm.R
\name{lgb.cv}
\alias{lgb.cv}
\alias{lgb.train}
\alias{lightgbm}
\title{Main CV logic for LightGBM}
\usage{
lgb.cv(params = list(), data, nrounds = 10, nfold = 3, label = NULL,
weight = NULL, obj = NULL, eval = NULL, verbose = 1, record = TRUE,
eval_freq = 1L, showsd = TRUE, stratified = TRUE, folds = NULL,
init_model = NULL, colnames = NULL, categorical_feature = NULL,
early_stopping_rounds = NULL, callbacks = list(), ...)
lgb.train(params = list(), data, nrounds = 10, valids = list(),
obj = NULL, eval = NULL, verbose = 1, record = TRUE, eval_freq = 1L,
init_model = NULL, colnames = NULL, categorical_feature = NULL,
early_stopping_rounds = NULL, callbacks = list(), ...)
lightgbm(data, label = NULL, weight = NULL, params = list(),
nrounds = 10, verbose = 1, eval_freq = 1L,
early_stopping_rounds = NULL, save_name = "lightgbm.model",
init_model = NULL, callbacks = list(), ...)
}
\arguments{
\item{params}{List of parameters}
\item{data}{a \code{lgb.Dataset} object, used for CV}
\item{nrounds}{number of CV rounds}
\item{nfold}{the original dataset is randomly partitioned into \code{nfold} equal size subsamples.}
\item{label}{vector of response values. Should be provided only when data is an R-matrix.}
\item{weight}{vector of response values. If not NULL, will set to dataset}
\item{obj}{objective function, can be character or custom objective function. Examples include
\code{regression}, \code{regression_l1}, \code{huber},
\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
\item{eval}{evaluation function, can be (list of) character or custom eval function}
\item{verbose}{verbosity for output, if <= 0, also will disable the print of evalutaion during training}
\item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}}
\item{eval_freq}{evalutaion output frequence, only effect when verbose > 0}
\item{showsd}{\code{boolean}, whether to show standard deviation of cross validation}
\item{stratified}{a \code{boolean} indicating whether sampling of folds should be stratified
by the values of outcome labels.}
\item{folds}{\code{list} provides a possibility to use a list of pre-defined CV folds
(each element must be a vector of test fold's indices). When folds are supplied,
the \code{nfold} and \code{stratified} parameters are ignored.}
\item{init_model}{path of model file of \code{lgb.Booster} object, will continue train from this model}
\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
\item{categorical_feature}{list of str or int
type int represents index,
type str represents feature names}
\item{early_stopping_rounds}{int
Activates early stopping.
Requires at least one validation data and one metric
If there's more than one, will check all of them
Returns the model with (best_iter + early_stopping_rounds)
If early stopping occurs, the model will have 'best_iter' field}
\item{callbacks}{list of callback functions
List of callback functions that are applied at each iteration.}
\item{...}{other parameters, see parameters.md for more informations}
\item{valids}{a list of \code{lgb.Dataset} objects, used for validation}
\item{boosting}{boosting type. \code{gbdt}, \code{dart}}
\item{num_leaves}{number of leaves in one tree. defaults to 127}
\item{max_depth}{Limit the max depth for tree model. This is used to deal with overfit when #data is small.
Tree still grow by leaf-wise.}
\item{num_threads}{Number of threads for LightGBM. For the best speed, set this to the number of real CPU cores, not the number of threads (most CPU using hyper-threading to generate 2 threads per CPU core).}
\item{params}{List of parameters}
\item{data}{a \code{lgb.Dataset} object, used for training}
\item{nrounds}{number of training rounds}
\item{obj}{objective function, can be character or custom objective function. Examples include
\code{regression}, \code{regression_l1}, \code{huber},
\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
\item{boosting}{boosting type. \code{gbdt}, \code{dart}}
\item{num_leaves}{number of leaves in one tree. defaults to 127}
\item{max_depth}{Limit the max depth for tree model. This is used to deal with overfit when #data is small.
Tree still grow by leaf-wise.}
\item{num_threads}{Number of threads for LightGBM. For the best speed, set this to the number of real CPU cores, not the number of threads (most CPU using hyper-threading to generate 2 threads per CPU core).}
\item{eval}{evaluation function, can be (a list of) character or custom eval function}
\item{verbose}{verbosity for output, if <= 0, also will disable the print of evalutaion during training}
\item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}}
\item{eval_freq}{evalutaion output frequency, only effect when verbose > 0}
\item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model}
\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
\item{categorical_feature}{list of str or int
type int represents index,
type str represents feature names}
\item{early_stopping_rounds}{int
Activates early stopping.
Requires at least one validation data and one metric
If there's more than one, will check all of them
Returns the model with (best_iter + early_stopping_rounds)
If early stopping occurs, the model will have 'best_iter' field}
\item{callbacks}{list of callback functions
List of callback functions that are applied at each iteration.}
\item{...}{other parameters, see parameters.md for more informations}
}
\value{
a trained model \code{lgb.CVBooster}.
a trained booster model \code{lgb.Booster}.
}
\description{
Main CV logic for LightGBM
Main training logic for LightGBM
Simple interface for training an lightgbm model.
Its documentation is combined with lgb.train.
}
\examples{
\dontrun{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
params <- list(objective = "regression", metric = "l2")
model <- lgb.cv(params,
dtrain,
10,
nfold = 5,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
}
\dontrun{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
data(agaricus.test, package = "lightgbm")
test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest)
model <- lgb.train(params,
dtrain,
100,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
}
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.unloader.R
\name{lgb.unloader}
\alias{lgb.unloader}
\title{LightGBM unloading error fix}
\usage{
lgb.unloader(restore = TRUE, wipe = FALSE, envir = .GlobalEnv)
}
\arguments{
\item{wipe}{Whether to wipe all \code{lgb.Dataset} and \code{lgb.Booster} from the global environment. Defaults to \code{FALSE} which means to not remove them.}
\item{envir}{The environment to perform wiping on if \code{wipe == TRUE}. Defaults to \code{.GlobalEnv} which is the global environment.}
\item{restart}{Whether to reload \code{LightGBM} immediately after detaching from R. Defaults to \code{TRUE} which means automatically reload \code{LightGBM} once unloading is performed.}
}
\value{
NULL invisibly.
}
\description{
Attempts to unload LightGBM packages so you can remove objects cleanly without having to restart R. This is useful for instance if an object becomes stuck for no apparent reason and you do not want to restart R to fix the lost object.
}
\examples{
\dontrun{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
data(agaricus.test, package = "lightgbm")
test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest)
model <- lgb.train(params,
dtrain,
100,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
lgb.unloader(restore = FALSE, wipe = FALSE, envir = .GlobalEnv)
rm(model, dtrain, dtest) # Not needed if wipe = TRUE
gc() # Not needed if wipe = TRUE
library(lightgbm)
# Do whatever you want again with LightGBM without object clashing
}
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Booster.R
\name{predict.lgb.Booster}
\alias{predict.lgb.Booster}
\title{Predict method for LightGBM model}
\usage{
\method{predict}{lgb.Booster}(object, data, num_iteration = NULL,
rawscore = FALSE, predleaf = FALSE, header = FALSE, reshape = FALSE)
}
\arguments{
\item{object}{Object of class \code{lgb.Booster}}
\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename}
\item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration}
\item{rawscore}{whether the prediction should be returned in the for of original untransformed
sum of predictions from boosting iterations' results. E.g., setting \code{rawscore=TRUE} for
logistic regression would result in predictions for log-odds instead of probabilities.}
\item{predleaf}{whether predict leaf index instead.}
\item{header}{only used for prediction for text file. True if text file has header}
\item{reshape}{whether to reshape the vector of predictions to a matrix form when there are several
prediction outputs per case.}
}
\value{
For regression or binary classification, it returns a vector of length \code{nrows(data)}.
For multiclass classification, either a \code{num_class * nrows(data)} vector or
a \code{(nrows(data), num_class)} dimension matrix is returned, depending on
the \code{reshape} value.
When \code{predleaf = TRUE}, the output is a matrix object with the
number of columns corresponding to the number of trees.
}
\description{
Predicted values based on class \code{lgb.Booster}
}
\examples{
\dontrun{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
data(agaricus.test, package = "lightgbm")
test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest)
model <- lgb.train(params,
dtrain,
100,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
preds <- predict(model, test$data)
}
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/readRDS.lgb.Booster.R
\name{readRDS.lgb.Booster}
\alias{readRDS.lgb.Booster}
\title{readRDS for lgb.Booster models}
\usage{
readRDS.lgb.Booster(file = "", refhook = NULL)
}
\arguments{
\item{file}{a connection or the name of the file where the R object is saved to or read from.}
\item{refhook}{a hook function for handling reference objects.}
}
\value{
an R object.
}
\description{
Attemps to load a model using RDS.
}
\examples{
\dontrun{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
data(agaricus.test, package = "lightgbm")
test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest)
model <- lgb.train(params,
dtrain,
100,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
saveRDS.lgb.Booster(model, "model.rds")
new_model <- readRDS.lgb.Booster("model.rds")
}
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/saveRDS.lgb.Booster.R
\name{saveRDS.lgb.Booster}
\alias{saveRDS.lgb.Booster}
\title{saveRDS for lgb.Booster models}
\usage{
saveRDS.lgb.Booster(object, file = "", ascii = FALSE, version = NULL,
compress = TRUE, refhook = NULL, raw = TRUE)
}
\arguments{
\item{object}{R object to serialize.}
\item{file}{a connection or the name of the file where the R object is saved to or read from.}
\item{ascii}{a logical. If TRUE or NA, an ASCII representation is written; otherwise (default), a binary one is used. See the comments in the help for save.}
\item{version}{the workspace format version to use. \code{NULL} specifies the current default version (2). Versions prior to 2 are not supported, so this will only be relevant when there are later versions.}
\item{compress}{a logical specifying whether saving to a named file is to use "gzip" compression, or one of \code{"gzip"}, \code{"bzip2"} or \code{"xz"} to indicate the type of compression to be used. Ignored if file is a connection.}
\item{refhook}{a hook function for handling reference objects.}
\item{raw}{whether to save the model in a raw variable or not, recommended to leave it to \code{TRUE}.}
}
\value{
NULL invisibly.
}
\description{
Attemps to save a model using RDS. Has an additional parameter (\code{raw}) which decides whether to save the raw model or not.
}
\examples{
\dontrun{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
data(agaricus.test, package = "lightgbm")
test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
params <- list(objective = "regression", metric = "l2")
valids <- list(test = dtest)
model <- lgb.train(params,
dtrain,
100,
valids,
min_data = 1,
learning_rate = 1,
early_stopping_rounds = 10)
saveRDS.lgb.Booster(model, "model.rds")
}
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Dataset.R
\name{setinfo}
\alias{setinfo}
\alias{setinfo.lgb.Dataset}
\title{Set information of an lgb.Dataset object}
\usage{
setinfo(dataset, ...)
\method{setinfo}{lgb.Dataset}(dataset, name, info, ...)
}
\arguments{
\item{dataset}{Object of class "lgb.Dataset"}
\item{...}{other parameters}
\item{name}{the name of the field to get}
\item{info}{the specific field of information to set}
}
\value{
passed object
}
\description{
Set information of an lgb.Dataset object
}
\details{
The \code{name} field can be one of the following:
\itemize{
\item \code{label}: label lightgbm learn from ;
\item \code{weight}: to do a weight rescale ;
\item \code{init_score}: initial score is the base prediction lightgbm will boost from ;
\item \code{group}.
}
}
\examples{
\dontrun{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
lgb.Dataset.construct(dtrain)
labels <- lightgbm::getinfo(dtrain, "label")
lightgbm::setinfo(dtrain, "label", 1 - labels)
labels2 <- lightgbm::getinfo(dtrain, "label")
stopifnot(all.equal(labels2, 1 - labels))
}
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.Dataset.R
\name{slice}
\alias{slice}
\alias{slice.lgb.Dataset}
\title{Slice a dataset}
\usage{
slice(dataset, ...)
\method{slice}{lgb.Dataset}(dataset, idxset, ...)
}
\arguments{
\item{dataset}{Object of class "lgb.Dataset"}
\item{...}{other parameters (currently not used)}
\item{idxset}{a integer vector of indices of rows needed}
}
\value{
constructed sub dataset
}
\description{
Get a new \code{lgb.Dataset} containing the specified rows of
orginal lgb.Dataset object
}
\examples{
\dontrun{
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
dsub <- lightgbm::slice(dtrain, 1:42)
labels <- lightgbm::getinfo(dsub, "label")
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment