Commit f2afb2cd authored by James Lamb's avatar James Lamb Committed by Nikita Titov
Browse files

[R-package][docs] made roxygen2 tags explicit and cleaned up documentation (#2688)



* [R-package] made roxygen2 tags explicit and cleaned up documentation

* Apply suggestions from code review
Co-Authored-By: default avatarNikita Titov <nekit94-08@mail.ru>

* Apply suggestions from code review
Co-Authored-By: default avatarNikita Titov <nekit94-08@mail.ru>

* Update R-package/man/lightgbm.Rd
Co-Authored-By: default avatarNikita Titov <nekit94-08@mail.ru>

* [R-package] moved @name to the top of roxygen blocks and removed some inaccurate information in documentation on parameters
Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
parent c7ae833e
......@@ -35,5 +35,6 @@ Imports:
graphics,
jsonlite (>= 1.0),
Matrix (>= 1.1-0),
methods
methods,
utils
RoxygenNote: 7.0.2
......@@ -50,4 +50,5 @@ importFrom(graphics,par)
importFrom(jsonlite,fromJSON)
importFrom(methods,is)
importFrom(stats,quantile)
importFrom(utils,read.delim)
useDynLib(lib_lightgbm , .registration = TRUE)
......@@ -662,11 +662,9 @@ Booster <- R6::R6Class(
)
)
#' Predict method for LightGBM model
#'
#' Predicted values based on class \code{lgb.Booster}
#'
#' @name predict.lgb.Booster
#' @title Predict method for LightGBM model
#' @description Predicted values based on class \code{lgb.Booster}
#' @param object Object of class \code{lgb.Booster}
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
#' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
......@@ -708,8 +706,6 @@ Booster <- R6::R6Class(
#' , early_stopping_rounds = 5L
#' )
#' preds <- predict(model, test$data)
#'
#' @rdname predict.lgb.Booster
#' @export
predict.lgb.Booster <- function(object,
data,
......@@ -739,12 +735,10 @@ predict.lgb.Booster <- function(object,
)
}
#' Load LightGBM model
#'
#' Load LightGBM model from saved model file or string
#' Load LightGBM takes in either a file path or model string
#' @name lgb.load
#' @title Load LightGBM model
#' @description Load LightGBM takes in either a file path or model string.
#' If both are provided, Load will default to loading from file
#'
#' @param filename path of model file
#' @param model_str a str containing the model
#'
......@@ -774,7 +768,6 @@ predict.lgb.Booster <- function(object,
#' model_string <- model$save_model_to_string(NULL) # saves best iteration
#' load_booster_from_str <- lgb.load(model_str = model_string)
#'
#' @rdname lgb.load
#' @export
lgb.load <- function(filename = NULL, model_str = NULL) {
......@@ -800,10 +793,9 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
}
#' Save LightGBM model
#'
#' Save LightGBM model
#'
#' @name lgb.save
#' @title Save LightGBM model
#' @description Save LightGBM model
#' @param booster Object of class \code{lgb.Booster}
#' @param filename saved filename
#' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
......@@ -830,8 +822,6 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
#' , early_stopping_rounds = 5L
#' )
#' lgb.save(model, "model.txt")
#'
#' @rdname lgb.save
#' @export
lgb.save <- function(booster, filename, num_iteration = NULL) {
......@@ -850,10 +840,9 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
}
#' Dump LightGBM model to json
#'
#' Dump LightGBM model to json
#'
#' @name lgb.dump
#' @title Dump LightGBM model to json
#' @description Dump LightGBM model to json
#' @param booster Object of class \code{lgb.Booster}
#' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
#'
......@@ -880,7 +869,6 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
#' )
#' json_model <- lgb.dump(model)
#'
#' @rdname lgb.dump
#' @export
lgb.dump <- function(booster, num_iteration = NULL) {
......@@ -894,9 +882,9 @@ lgb.dump <- function(booster, num_iteration = NULL) {
}
#' Get record evaluation result from booster
#'
#' Get record evaluation result from booster
#' @name lgb.get.eval.result
#' @title Get record evaluation result from booster
#' @description Get record evaluation result from booster
#' @param booster Object of class \code{lgb.Booster}
#' @param data_name name of dataset
#' @param eval_name name of evaluation
......@@ -925,7 +913,6 @@ lgb.dump <- function(booster, num_iteration = NULL) {
#' , early_stopping_rounds = 5L
#' )
#' lgb.get.eval.result(model, "test", "l2")
#' @rdname lgb.get.eval.result
#' @export
lgb.get.eval.result <- function(booster, data_name, eval_name, iters = NULL, is_err = FALSE) {
......
......@@ -690,11 +690,9 @@ Dataset <- R6::R6Class(
)
)
#' Construct \code{lgb.Dataset} object
#'
#' Construct \code{lgb.Dataset} object from dense matrix, sparse matrix
#' @title Construct \code{lgb.Dataset} object
#' @description Construct \code{lgb.Dataset} object from dense matrix, sparse matrix
#' or local file (that was created previously by saving an \code{lgb.Dataset}).
#'
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
#' @param params a list of parameters
#' @param reference reference dataset
......@@ -741,10 +739,9 @@ lgb.Dataset <- function(data,
}
#' Construct validation data
#'
#' Construct validation data according to training data
#'
#' @name lgb.Dataset.create.valid
#' @title Construct validation data
#' @description Construct validation data according to training data
#' @param dataset \code{lgb.Dataset} object, training data
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
#' @param info a list of information of the \code{lgb.Dataset} object
......@@ -774,8 +771,9 @@ lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) {
}
#' Construct Dataset explicitly
#'
#' @name lgb.Dataset.construct
#' @title Construct Dataset explicitly
#' @description Construct Dataset explicitly
#' @param dataset Object of class \code{lgb.Dataset}
#'
#' @examples
......@@ -798,9 +796,8 @@ lgb.Dataset.construct <- function(dataset) {
}
#' Dimensions of an \code{lgb.Dataset}
#'
#' Returns a vector of numbers of rows and of columns in an \code{lgb.Dataset}.
#' @title Dimensions of an \code{lgb.Dataset}
#' @description Returns a vector of numbers of rows and of columns in an \code{lgb.Dataset}.
#' @param x Object of class \code{lgb.Dataset}
#' @param ... other parameters
#'
......@@ -834,11 +831,9 @@ dim.lgb.Dataset <- function(x, ...) {
}
#' Handling of column names of \code{lgb.Dataset}
#'
#' Only column names are supported for \code{lgb.Dataset}, thus setting of
#' @title Handling of column names of \code{lgb.Dataset}
#' @description Only column names are supported for \code{lgb.Dataset}, thus setting of
#' row names would have no effect and returned row names would be NULL.
#'
#' @param x object of class \code{lgb.Dataset}
#' @param value a list of two elements: the first one is ignored
#' and the second one is column names
......@@ -912,11 +907,9 @@ dimnames.lgb.Dataset <- function(x) {
}
#' Slice a dataset
#'
#' Get a new \code{lgb.Dataset} containing the specified rows of
#' @title Slice a dataset
#' @description Get a new \code{lgb.Dataset} containing the specified rows of
#' original \code{lgb.Dataset} object
#'
#' @param dataset Object of class \code{lgb.Dataset}
#' @param idxset an integer vector of indices of rows needed
#' @param ... other parameters (currently not used)
......@@ -951,8 +944,9 @@ slice.lgb.Dataset <- function(dataset, idxset, ...) {
}
#' Get information of an \code{lgb.Dataset} object
#'
#' @name getinfo
#' @title Get information of an \code{lgb.Dataset} object
#' @description Get one attribute of a \code{lgb.Dataset}
#' @param dataset Object of class \code{lgb.Dataset}
#' @param name the name of the information field to get (see details)
#' @param ... other parameters
......@@ -1000,8 +994,9 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
}
#' Set information of an \code{lgb.Dataset} object
#'
#' @name setinfo
#' @title Set information of an \code{lgb.Dataset} object
#' @description Set one attribute of a \code{lgb.Dataset}
#' @param dataset Object of class \code{lgb.Dataset}
#' @param name the name of the field to get
#' @param info the specific field of information to set
......@@ -1012,10 +1007,13 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
#' The \code{name} field can be one of the following:
#'
#' \itemize{
#' \item \code{label}: label lightgbm learn from ;
#' \item \code{weight}: to do a weight rescale ;
#' \item \code{init_score}: initial score is the base prediction lightgbm will boost from ;
#' \item \code{group}.
#' \item{\code{label}: vector of labels to use as the target variable}
#' \item{\code{weight}: to do a weight rescale}
#' \item{\code{init_score}: initial score is the base prediction lightgbm will boost from}
#' \item{\code{group}: used for learning-to-rank tasks. An integer vector describing how to
#' group rows together as ordered results from the same set of candidate results to be ranked.
#' For example, if you have a 1000-row dataset that contains 250 4-document query results,
#' set this to \code{rep(4L, 250L)}}
#' }
#'
#' @examples
......@@ -1049,11 +1047,14 @@ setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
invisible(dataset$setinfo(name, info))
}
#' Set categorical feature of \code{lgb.Dataset}
#'
#' @name lgb.Dataset.set.categorical
#' @title Set categorical feature of \code{lgb.Dataset}
#' @description Set the categorical features of an \code{lgb.Dataset} object. Use this function
#' to tell LightGBM which features should be treated as categorical.
#' @param dataset object of class \code{lgb.Dataset}
#' @param categorical_feature categorical features
#'
#' @param categorical_feature categorical features. This can either be a character vector of feature
#' names or an integer vector with the indices of the features (e.g.
#' \code{c(1L, 10L)} to say "the first and tenth columns").
#' @return passed dataset
#'
#' @examples
......@@ -1079,10 +1080,9 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
}
#' Set reference of \code{lgb.Dataset}
#'
#' If you want to use validation data, you should set reference to training data
#'
#' @name lgb.Dataset.set.reference
#' @title Set reference of \code{lgb.Dataset}
#' @description If you want to use validation data, you should set reference to training data
#' @param dataset object of class \code{lgb.Dataset}
#' @param reference object of class \code{lgb.Dataset}
#'
......@@ -1111,11 +1111,10 @@ lgb.Dataset.set.reference <- function(dataset, reference) {
invisible(dataset$set_reference(reference))
}
#' Save \code{lgb.Dataset} to a binary file
#'
#' Please note that \code{init_score} is not saved in binary file.
#' @name lgb.Dataset.save
#' @title Save \code{lgb.Dataset} to a binary file
#' @description Please note that \code{init_score} is not saved in binary file.
#' If you need it, please set it again after loading Dataset.
#'
#' @param dataset object of class \code{lgb.Dataset}
#' @param fname object filename of output file
#'
......@@ -1127,8 +1126,6 @@ lgb.Dataset.set.reference <- function(dataset, reference) {
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.save(dtrain, "data.bin")
#'
#' @rdname lgb.Dataset.save
#' @export
lgb.Dataset.save <- function(dataset, fname) {
......
#' @importFrom methods is
#' @importFrom R6 R6Class
#' @importFrom utils read.delim
Predictor <- R6::R6Class(
classname = "lgb.Predictor",
......@@ -113,7 +114,7 @@ Predictor <- R6::R6Class(
)
# Get predictions from file
preds <- read.delim(tmp_filename, header = FALSE, sep = "\t")
preds <- utils::read.delim(tmp_filename, header = FALSE, sep = "\t")
num_row <- nrow(preds)
preds <- as.vector(t(preds))
......
......@@ -17,9 +17,9 @@ CVBooster <- R6::R6Class(
)
)
#' @name lgb.cv
#' @title Main CV logic for LightGBM
#' @description Cross validation logic used by LightGBM
#' @name lgb.cv
#' @inheritParams lgb_shared_params
#' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
#' @param label vector of response values. Should be provided only when data is an R-matrix.
......@@ -36,19 +36,19 @@ CVBooster <- R6::R6Class(
#' (each element must be a vector of test fold's indices). When folds are supplied,
#' the \code{nfold} and \code{stratified} parameters are ignored.
#' @param colnames feature names, if not null, will use this to overwrite the names in dataset
#' @param categorical_feature list of str or int
#' type int represents index,
#' type str represents feature names
#' @param categorical_feature categorical features. This can either be a character vector of feature
#' names or an integer vector with the indices of the features (e.g.
#' \code{c(1L, 10L)} to say "the first and tenth columns").
#' @param callbacks List of callback functions that are applied at each iteration.
#' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the booster model
#' into a predictor model which frees up memory and the original datasets
#' @param ... other parameters, see Parameters.rst for more information. A few key parameters:
#' \itemize{
#' \item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}}
#' \item{num_leaves}{number of leaves in one tree. defaults to 127}
#' \item{max_depth}{Limit the max depth for tree model. This is used to deal with
#' \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
#' \item{\code{num_leaves}: Maximum number of leaves in one tree.}
#' \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
#' overfit when #data is small. Tree still grow by leaf-wise.}
#' \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to
#' \item{\code{num_threads}: Number of threads for LightGBM. For the best speed, set this to
#' the number of real CPU cores, not the number of threads (most
#' CPU using hyper-threading to generate 2 threads per CPU core).}
#' }
......
#' Compute feature importance in a model
#'
#' Creates a \code{data.table} of feature importances in a model.
#'
#' @name lgb.importance
#' @title Compute feature importance in a model
#' @description Creates a \code{data.table} of feature importances in a model.
#' @param model object of class \code{lgb.Booster}.
#' @param percentage whether to show importance in relative percentage.
#'
#' @return
#'
#' For a tree model, a \code{data.table} with the following columns:
#' @return For a tree model, a \code{data.table} with the following columns:
#' \itemize{
#' \item \code{Feature} Feature names in the model.
#' \item \code{Gain} The total gain of this feature's splits.
#' \item \code{Cover} The number of observation related to this feature.
#' \item \code{Frequency} The number of times a feature splited in trees.
#' \item{\code{Feature}: Feature names in the model.}
#' \item{\code{Gain}: The total gain of this feature's splits.}
#' \item{\code{Cover}: The number of observation related to this feature.}
#' \item{\code{Frequency}: The number of times a feature splited in trees.}
#' }
#'
#' @examples
......
#' Compute feature contribution of prediction
#'
#' Computes feature contribution components of rawscore prediction.
#'
#' @name lgb.interprete
#' @title Compute feature contribution of prediction
#' @description Computes feature contribution components of rawscore prediction.
#' @param model object of class \code{lgb.Booster}.
#' @param data a matrix object or a dgCMatrix object.
#' @param idxset an integer vector of indices of rows needed.
......@@ -10,8 +9,8 @@
#' @return For regression, binary classification and lambdarank model, a \code{list} of \code{data.table}
#' with the following columns:
#' \itemize{
#' \item \code{Feature} Feature names in the model.
#' \item \code{Contribution} The total contribution of this feature's splits.
#' \item{\code{Feature}: Feature names in the model.}
#' \item{\code{Contribution}: The total contribution of this feature's splits.}
#' }
#' For multiclass classification, a \code{list} of \code{data.table} with the Feature column and
#' Contribution columns to each class.
......
#' Parse a LightGBM model json dump
#'
#' Parse a LightGBM model json dump into a \code{data.table} structure.
#'
#' @name lgb.model.dt.tree
#' @title Parse a LightGBM model json dump
#' @description Parse a LightGBM model json dump into a \code{data.table} structure.
#' @param model object of class \code{lgb.Booster}
#' @param num_iteration number of iterations you want to predict with. NULL or
#' <= 0 means use best iteration
#'
#' @return
#' A \code{data.table} with detailed information about model trees' nodes and leafs.
#'
#' The columns of the \code{data.table} are:
#'
#' \itemize{
#' \item \code{tree_index}: ID of a tree in a model (integer)
#' \item \code{split_index}: ID of a node in a tree (integer)
#' \item \code{split_feature}: for a node, it's a feature name (character);
#' for a leaf, it simply labels it as \code{"NA"}
#' \item \code{node_parent}: ID of the parent node for current node (integer)
#' \item \code{leaf_index}: ID of a leaf in a tree (integer)
#' \item \code{leaf_parent}: ID of the parent node for current leaf (integer)
#' \item \code{split_gain}: Split gain of a node
#' \item \code{threshold}: Splitting threshold value of a node
#' \item \code{decision_type}: Decision type of a node
#' \item \code{default_left}: Determine how to handle NA value, TRUE -> Left, FALSE -> Right
#' \item \code{internal_value}: Node value
#' \item \code{internal_count}: The number of observation collected by a node
#' \item \code{leaf_value}: Leaf value
#' \item \code{leaf_count}: The number of observation collected by a leaf
#' \item{\code{tree_index}: ID of a tree in a model (integer)}
#' \item{\code{split_index}: ID of a node in a tree (integer)}
#' \item{\code{split_feature}: for a node, it's a feature name (character);
#' for a leaf, it simply labels it as \code{"NA"}}
#' \item{\code{node_parent}: ID of the parent node for current node (integer)}
#' \item{\code{leaf_index}: ID of a leaf in a tree (integer)}
#' \item{\code{leaf_parent}: ID of the parent node for current leaf (integer)}
#' \item{\code{split_gain}: Split gain of a node}
#' \item{\code{threshold}: Splitting threshold value of a node}
#' \item{\code{decision_type}: Decision type of a node}
#' \item{\code{default_left}: Determine how to handle NA value, TRUE -> Left, FALSE -> Right}
#' \item{\code{internal_value}: Node value}
#' \item{\code{internal_count}: The number of observation collected by a node}
#' \item{\code{leaf_value}: Leaf value}
#' \item{\code{leaf_count}: The number of observation collected by a leaf}
#' }
#'
#' @examples
......@@ -47,7 +45,7 @@
#'
#' tree_dt <- lgb.model.dt.tree(model)
#'
#' @importFrom data.table := data.table rbindlist
#' @importFrom data.table := rbindlist
#' @importFrom jsonlite fromJSON
#' @export
lgb.model.dt.tree <- function(model, num_iteration = NULL) {
......@@ -87,7 +85,7 @@ lgb.model.dt.tree <- function(model, num_iteration = NULL) {
}
#' @importFrom data.table data.table rbindlist
#' @importFrom data.table := data.table rbindlist
single.tree.parse <- function(lgb_tree) {
# Traverse tree function
......
#' Plot feature importance as a bar graph
#'
#' Plot previously calculated feature importance: Gain, Cover and Frequency, as a bar graph.
#'
#' @name lgb.plot.importance
#' @title Plot feature importance as a bar graph
#' @description Plot previously calculated feature importance: Gain, Cover and Frequency, as a bar graph.
#' @param tree_imp a \code{data.table} returned by \code{\link{lgb.importance}}.
#' @param top_n maximal number of top features to include into the plot.
#' @param measure the name of importance measure to plot, can be "Gain", "Cover" or "Frequency".
......
#' Plot feature contribution as a bar graph
#'
#' Plot previously calculated feature contribution as a bar graph.
#'
#' @name lgb.plot.interpretation
#' @title Plot feature contribution as a bar graph
#' @description Plot previously calculated feature contribution as a bar graph.
#' @param tree_interpretation_dt a \code{data.table} returned by \code{\link{lgb.interprete}}.
#' @param top_n maximal number of top features to include into the plot.
#' @param cols the column numbers of layout, will be used only for multiclass classification feature contribution.
......
#' Data preparator for LightGBM datasets (numeric)
#'
#' Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
#' @name lgb.prepare
#' @title Data preparator for LightGBM datasets (numeric)
#' @description Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
#' Factors and characters are converted to numeric without integers. Please use
#' \code{lgb.prepare_rules} if you want to apply this transformation to other datasets.
#'
#' \code{\link{lgb.prepare_rules}} if you want to apply this transformation to other datasets.
#' @param data A data.frame or data.table to prepare.
#'
#' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix})
#' for input in \code{lgb.Dataset}.
#'
......
#' Data preparator for LightGBM datasets (integer)
#'
#' Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
#' @name lgb.prepare2
#' @title Data preparator for LightGBM datasets (integer)
#' @description Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
#' Factors and characters are converted to numeric (specifically: integer).
#' Please use \code{lgb.prepare_rules2} if you want to apply this transformation to other datasets.
#' This is useful if you have a specific need for integer dataset instead of numeric dataset.
#' Note that there are programs which do not support integer-only input. Consider this as a half
#' memory technique which is dangerous, especially for LightGBM.
#'
#' Please use \code{\link{lgb.prepare_rules2}} if you want to apply this transformation to
#' other datasets. This is useful if you have a specific need for integer dataset instead
#' of numeric dataset. Note that there are programs which do not support integer-only
#' input. Consider this as a half memory technique which is dangerous, especially for LightGBM.
#' @param data A data.frame or data.table to prepare.
#'
#' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix})
#' for input in \code{lgb.Dataset}.
#'
......
#' Data preparator for LightGBM datasets with rules (numeric)
#'
#' Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
#' @name lgb.prepare_rules
#' @title Data preparator for LightGBM datasets with rules (numeric)
#' @description Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
#' Factors and characters are converted to numeric. In addition, keeps rules created
#' so you can convert other datasets using this converter.
#'
#' @param data A data.frame or data.table to prepare.
#' @param rules A set of rules from the data preparator, if already used.
#'
#' @return A list with the cleaned dataset (\code{data}) and the rules (\code{rules}).
#' The data must be converted to a matrix format (\code{as.matrix}) for input
#' in \code{lgb.Dataset}.
......
#' Data preparator for LightGBM datasets with rules (integer)
#'
#' Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
#' @name lgb.prepare_rules2
#' @title Data preparator for LightGBM datasets with rules (integer)
#' @description Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
#' Factors and characters are converted to numeric (specifically: integer).
#' In addition, keeps rules created so you can convert other datasets using this converter.
#' This is useful if you have a specific need for integer dataset instead of numeric dataset.
#' Note that there are programs which do not support integer-only input.
#' Consider this as a half memory technique which is dangerous, especially for LightGBM.
#'
#' @param data A data.frame or data.table to prepare.
#' @param rules A set of rules from the data preparator, if already used.
#'
#' @return A list with the cleaned dataset (\code{data}) and the rules (\code{rules}).
#' The data must be converted to a matrix format (\code{as.matrix}) for input in
#' \code{lgb.Dataset}.
......
#' @title Main training logic for LightGBM
#' @name lgb.train
#' @title Main training logic for LightGBM
#' @description Logic to train with LightGBM
#' @inheritParams lgb_shared_params
#' @param valids a list of \code{lgb.Dataset} objects, used for validation
......@@ -18,11 +18,11 @@
#' original datasets
#' @param ... other parameters, see Parameters.rst for more information. A few key parameters:
#' \itemize{
#' \item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}}
#' \item{num_leaves}{number of leaves in one tree. defaults to 127}
#' \item{max_depth}{Limit the max depth for tree model. This is used to deal with
#' \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
#' \item{\code{num_leaves}: Maximum number of leaves in one tree.}
#' \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
#' overfit when #data is small. Tree still grow by leaf-wise.}
#' \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to
#' \item{\code{num_threads}: Number of threads for LightGBM. For the best speed, set this to
#' the number of real CPU cores, not the number of threads (most
#' CPU using hyper-threading to generate 2 threads per CPU core).}
#' }
......
#' LightGBM unloading error fix
#'
#' Attempts to unload LightGBM packages so you can remove objects cleanly without having to restart R.
#' This is useful for instance if an object becomes stuck for no apparent reason and you do not want
#' to restart R to fix the lost object.
#'
#' @name lgb.unloader
#' @title LightGBM unloading error fix
#' @description Attempts to unload LightGBM packages so you can remove objects cleanly without
#' having to restart R. This is useful for instance if an object becomes stuck for no
#' apparent reason and you do not want to restart R to fix the lost object.
#' @param restore Whether to reload \code{LightGBM} immediately after detaching from R.
#' Defaults to \code{TRUE} which means automatically reload \code{LightGBM} once
#' unloading is performed.
......
#' @name lgb_shared_params
#' @title Shared parameter docs
#' @description Parameter docs shared by \code{lgb.train}, \code{lgb.cv}, and \code{lightgbm}
#' @param callbacks list of callback functions
#' List of callback functions that are applied at each iteration.
#' @param callbacks List of callback functions that are applied at each iteration.
#' @param data a \code{lgb.Dataset} object, used for training
#' @param early_stopping_rounds int. Activates early stopping. Requires at least one validation data
#' and one metric. If there's more than one, will check all of them
......@@ -15,9 +14,8 @@
#' @param verbose verbosity for output, if <= 0, also will disable the print of evaluation during training
NULL
#' @title Train a LightGBM model
#' @name lightgbm
#' @title Train a LightGBM model
#' @description Simple interface for training a LightGBM model.
#' @inheritParams lgb_shared_params
#' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}
......@@ -25,21 +23,23 @@ NULL
#' @param save_name File name to use when writing the trained model to disk. Should end in ".model".
#' @param ... Additional arguments passed to \code{\link{lgb.train}}. For example
#' \itemize{
#' \item{valids}{a list of \code{lgb.Dataset} objects, used for validation}
#' \item{obj}{objective function, can be character or custom objective function. Examples include
#' \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
#' \item{\code{obj}: objective function, can be character or custom objective function. Examples include
#' \code{regression}, \code{regression_l1}, \code{huber},
#' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
#' \item{eval}{evaluation function, can be (a list of) character or custom eval function}
#' \item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}}
#' \item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
#' \item{categorical_feature}{list of str or int. type int represents index, type str represents feature names}
#' \item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model
#' \item{\code{eval}: evaluation function, can be (a list of) character or custom eval function}
#' \item{\code{record}: Boolean, TRUE will record iteration message to \code{booster$record_evals}}
#' \item{\code{colnames}: feature names, if not null, will use this to overwrite the names in dataset}
#' \item{\code{categorical_feature}: categorical features. This can either be a character vector of feature
#' names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to
#' say "the first and tenth columns").}
#' \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
#' into a predictor model which frees up memory and the original datasets}
#' \item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}}
#' \item{num_leaves}{number of leaves in one tree. defaults to 127}
#' \item{max_depth}{Limit the max depth for tree model. This is used to deal with
#' \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
#' \item{\code{num_leaves}: Maximum number of leaves in one tree.}
#' \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
#' overfit when #data is small. Tree still grow by leaf-wise.}
#' \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to
#' \item{\code{num_threads}: Number of threads for LightGBM. For the best speed, set this to
#' the number of real CPU cores, not the number of threads (most
#' CPU using hyper-threading to generate 2 threads per CPU core).}
#' }
......@@ -94,16 +94,15 @@ lightgbm <- function(data,
return(bst)
}
#' Training part from Mushroom Data Set
#'
#' This data set is originally from the Mushroom data set,
#' @name agaricus.train
#' @title Training part from Mushroom Data Set
#' @description This data set is originally from the Mushroom data set,
#' UCI Machine Learning Repository.
#'
#' This data set includes the following fields:
#'
#' \itemize{
#' \item \code{label} the label for each record
#' \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
#' \item{\code{label}: the label for each record}
#' \item{\code{data}: a sparse Matrix of \code{dgCMatrix} class, with 126 columns.}
#' }
#'
#' @references
......@@ -115,24 +114,21 @@ lightgbm <- function(data,
#'
#' @docType data
#' @keywords datasets
#' @name agaricus.train
#' @usage data(agaricus.train)
#' @format A list containing a label vector, and a dgCMatrix object with 6513
#' rows and 127 variables
NULL
#' Test part from Mushroom Data Set
#'
#' This data set is originally from the Mushroom data set,
#' @name agaricus.test
#' @title Test part from Mushroom Data Set
#' @description This data set is originally from the Mushroom data set,
#' UCI Machine Learning Repository.
#'
#' This data set includes the following fields:
#'
#' \itemize{
#' \item \code{label} the label for each record
#' \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
#' \item{\code{label}: the label for each record}
#' \item{\code{data}: a sparse Matrix of \code{dgCMatrix} class, with 126 columns.}
#' }
#'
#' @references
#' https://archive.ics.uci.edu/ml/datasets/Mushroom
#'
......@@ -142,15 +138,14 @@ NULL
#'
#' @docType data
#' @keywords datasets
#' @name agaricus.test
#' @usage data(agaricus.test)
#' @format A list containing a label vector, and a dgCMatrix object with 1611
#' rows and 126 variables
NULL
#' Bank Marketing Data Set
#'
#' This data set is originally from the Bank Marketing data set,
#' @name bank
#' @title Bank Marketing Data Set
#' @description This data set is originally from the Bank Marketing data set,
#' UCI Machine Learning Repository.
#'
#' It contains only the following: bank.csv with 10% of the examples and 17 inputs,
......@@ -164,7 +159,6 @@ NULL
#'
#' @docType data
#' @keywords datasets
#' @name bank
#' @usage data(bank)
#' @format A data.table with 4521 rows and 17 variables
NULL
......
#' readRDS for \code{lgb.Booster} models
#'
#' Attempts to load a model using RDS.
#'
#' @name readRDS.lgb.Booster
#' @title readRDS for \code{lgb.Booster} models
#' @description Attempts to load a model stored in a \code{.rds} file, using \code{\link[base]{readRDS}}
#' @param file a connection or the name of the file where the R object is saved to or read from.
#' @param refhook a hook function for handling reference objects.
#'
......
#' saveRDS for \code{lgb.Booster} models
#'
#' Attempts to save a model using RDS. Has an additional parameter (\code{raw}) which decides
#' whether to save the raw model or not.
#'
#' @name saveRDS.lgb.Booster
#' @title saveRDS for \code{lgb.Booster} models
#' @description Attempts to save a model using RDS. Has an additional parameter (\code{raw})
#' which decides whether to save the raw model or not.
#' @param object R object to serialize.
#' @param file a connection or the name of the file where the R object is saved to or read from.
#' @param ascii a logical. If TRUE or NA, an ASCII representation is written; otherwise (default),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment