Commit f2afb2cd authored by James Lamb's avatar James Lamb Committed by Nikita Titov
Browse files

[R-package][docs] made roxygen2 tags explicit and cleaned up documentation (#2688)



* [R-package] made roxygen2 tags explicit and cleaned up documentation

* Apply suggestions from code review
Co-Authored-By: default avatarNikita Titov <nekit94-08@mail.ru>

* Apply suggestions from code review
Co-Authored-By: default avatarNikita Titov <nekit94-08@mail.ru>

* Update R-package/man/lightgbm.Rd
Co-Authored-By: default avatarNikita Titov <nekit94-08@mail.ru>

* [R-package] moved @name to the top of roxygen blocks and removed some inaccurate information in documentation on parameters
Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
parent c7ae833e
...@@ -35,5 +35,6 @@ Imports: ...@@ -35,5 +35,6 @@ Imports:
graphics, graphics,
jsonlite (>= 1.0), jsonlite (>= 1.0),
Matrix (>= 1.1-0), Matrix (>= 1.1-0),
methods methods,
utils
RoxygenNote: 7.0.2 RoxygenNote: 7.0.2
...@@ -50,4 +50,5 @@ importFrom(graphics,par) ...@@ -50,4 +50,5 @@ importFrom(graphics,par)
importFrom(jsonlite,fromJSON) importFrom(jsonlite,fromJSON)
importFrom(methods,is) importFrom(methods,is)
importFrom(stats,quantile) importFrom(stats,quantile)
importFrom(utils,read.delim)
useDynLib(lib_lightgbm , .registration = TRUE) useDynLib(lib_lightgbm , .registration = TRUE)
...@@ -662,11 +662,9 @@ Booster <- R6::R6Class( ...@@ -662,11 +662,9 @@ Booster <- R6::R6Class(
) )
) )
#' @name predict.lgb.Booster
#' Predict method for LightGBM model #' @title Predict method for LightGBM model
#' #' @description Predicted values based on class \code{lgb.Booster}
#' Predicted values based on class \code{lgb.Booster}
#'
#' @param object Object of class \code{lgb.Booster} #' @param object Object of class \code{lgb.Booster}
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename #' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
#' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration #' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
...@@ -708,8 +706,6 @@ Booster <- R6::R6Class( ...@@ -708,8 +706,6 @@ Booster <- R6::R6Class(
#' , early_stopping_rounds = 5L #' , early_stopping_rounds = 5L
#' ) #' )
#' preds <- predict(model, test$data) #' preds <- predict(model, test$data)
#'
#' @rdname predict.lgb.Booster
#' @export #' @export
predict.lgb.Booster <- function(object, predict.lgb.Booster <- function(object,
data, data,
...@@ -739,12 +735,10 @@ predict.lgb.Booster <- function(object, ...@@ -739,12 +735,10 @@ predict.lgb.Booster <- function(object,
) )
} }
#' Load LightGBM model #' @name lgb.load
#' #' @title Load LightGBM model
#' Load LightGBM model from saved model file or string #' @description Load LightGBM takes in either a file path or model string.
#' Load LightGBM takes in either a file path or model string
#' If both are provided, Load will default to loading from file #' If both are provided, Load will default to loading from file
#'
#' @param filename path of model file #' @param filename path of model file
#' @param model_str a str containing the model #' @param model_str a str containing the model
#' #'
...@@ -774,7 +768,6 @@ predict.lgb.Booster <- function(object, ...@@ -774,7 +768,6 @@ predict.lgb.Booster <- function(object,
#' model_string <- model$save_model_to_string(NULL) # saves best iteration #' model_string <- model$save_model_to_string(NULL) # saves best iteration
#' load_booster_from_str <- lgb.load(model_str = model_string) #' load_booster_from_str <- lgb.load(model_str = model_string)
#' #'
#' @rdname lgb.load
#' @export #' @export
lgb.load <- function(filename = NULL, model_str = NULL) { lgb.load <- function(filename = NULL, model_str = NULL) {
...@@ -800,10 +793,9 @@ lgb.load <- function(filename = NULL, model_str = NULL) { ...@@ -800,10 +793,9 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
} }
#' Save LightGBM model #' @name lgb.save
#' #' @title Save LightGBM model
#' Save LightGBM model #' @description Save LightGBM model
#'
#' @param booster Object of class \code{lgb.Booster} #' @param booster Object of class \code{lgb.Booster}
#' @param filename saved filename #' @param filename saved filename
#' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration #' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
...@@ -830,8 +822,6 @@ lgb.load <- function(filename = NULL, model_str = NULL) { ...@@ -830,8 +822,6 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
#' , early_stopping_rounds = 5L #' , early_stopping_rounds = 5L
#' ) #' )
#' lgb.save(model, "model.txt") #' lgb.save(model, "model.txt")
#'
#' @rdname lgb.save
#' @export #' @export
lgb.save <- function(booster, filename, num_iteration = NULL) { lgb.save <- function(booster, filename, num_iteration = NULL) {
...@@ -850,10 +840,9 @@ lgb.save <- function(booster, filename, num_iteration = NULL) { ...@@ -850,10 +840,9 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
} }
#' Dump LightGBM model to json #' @name lgb.dump
#' #' @title Dump LightGBM model to json
#' Dump LightGBM model to json #' @description Dump LightGBM model to json
#'
#' @param booster Object of class \code{lgb.Booster} #' @param booster Object of class \code{lgb.Booster}
#' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration #' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
#' #'
...@@ -880,7 +869,6 @@ lgb.save <- function(booster, filename, num_iteration = NULL) { ...@@ -880,7 +869,6 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
#' ) #' )
#' json_model <- lgb.dump(model) #' json_model <- lgb.dump(model)
#' #'
#' @rdname lgb.dump
#' @export #' @export
lgb.dump <- function(booster, num_iteration = NULL) { lgb.dump <- function(booster, num_iteration = NULL) {
...@@ -894,9 +882,9 @@ lgb.dump <- function(booster, num_iteration = NULL) { ...@@ -894,9 +882,9 @@ lgb.dump <- function(booster, num_iteration = NULL) {
} }
#' Get record evaluation result from booster #' @name lgb.get.eval.result
#' #' @title Get record evaluation result from booster
#' Get record evaluation result from booster #' @description Get record evaluation result from booster
#' @param booster Object of class \code{lgb.Booster} #' @param booster Object of class \code{lgb.Booster}
#' @param data_name name of dataset #' @param data_name name of dataset
#' @param eval_name name of evaluation #' @param eval_name name of evaluation
...@@ -925,7 +913,6 @@ lgb.dump <- function(booster, num_iteration = NULL) { ...@@ -925,7 +913,6 @@ lgb.dump <- function(booster, num_iteration = NULL) {
#' , early_stopping_rounds = 5L #' , early_stopping_rounds = 5L
#' ) #' )
#' lgb.get.eval.result(model, "test", "l2") #' lgb.get.eval.result(model, "test", "l2")
#' @rdname lgb.get.eval.result
#' @export #' @export
lgb.get.eval.result <- function(booster, data_name, eval_name, iters = NULL, is_err = FALSE) { lgb.get.eval.result <- function(booster, data_name, eval_name, iters = NULL, is_err = FALSE) {
......
...@@ -690,11 +690,9 @@ Dataset <- R6::R6Class( ...@@ -690,11 +690,9 @@ Dataset <- R6::R6Class(
) )
) )
#' Construct \code{lgb.Dataset} object #' @title Construct \code{lgb.Dataset} object
#' #' @description Construct \code{lgb.Dataset} object from dense matrix, sparse matrix
#' Construct \code{lgb.Dataset} object from dense matrix, sparse matrix
#' or local file (that was created previously by saving an \code{lgb.Dataset}). #' or local file (that was created previously by saving an \code{lgb.Dataset}).
#'
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename #' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
#' @param params a list of parameters #' @param params a list of parameters
#' @param reference reference dataset #' @param reference reference dataset
...@@ -741,10 +739,9 @@ lgb.Dataset <- function(data, ...@@ -741,10 +739,9 @@ lgb.Dataset <- function(data,
} }
#' Construct validation data #' @name lgb.Dataset.create.valid
#' #' @title Construct validation data
#' Construct validation data according to training data #' @description Construct validation data according to training data
#'
#' @param dataset \code{lgb.Dataset} object, training data #' @param dataset \code{lgb.Dataset} object, training data
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename #' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
#' @param info a list of information of the \code{lgb.Dataset} object #' @param info a list of information of the \code{lgb.Dataset} object
...@@ -774,8 +771,9 @@ lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) { ...@@ -774,8 +771,9 @@ lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) {
} }
#' Construct Dataset explicitly #' @name lgb.Dataset.construct
#' #' @title Construct Dataset explicitly
#' @description Construct Dataset explicitly
#' @param dataset Object of class \code{lgb.Dataset} #' @param dataset Object of class \code{lgb.Dataset}
#' #'
#' @examples #' @examples
...@@ -798,9 +796,8 @@ lgb.Dataset.construct <- function(dataset) { ...@@ -798,9 +796,8 @@ lgb.Dataset.construct <- function(dataset) {
} }
#' Dimensions of an \code{lgb.Dataset} #' @title Dimensions of an \code{lgb.Dataset}
#' #' @description Returns a vector of numbers of rows and of columns in an \code{lgb.Dataset}.
#' Returns a vector of numbers of rows and of columns in an \code{lgb.Dataset}.
#' @param x Object of class \code{lgb.Dataset} #' @param x Object of class \code{lgb.Dataset}
#' @param ... other parameters #' @param ... other parameters
#' #'
...@@ -834,11 +831,9 @@ dim.lgb.Dataset <- function(x, ...) { ...@@ -834,11 +831,9 @@ dim.lgb.Dataset <- function(x, ...) {
} }
#' Handling of column names of \code{lgb.Dataset} #' @title Handling of column names of \code{lgb.Dataset}
#' #' @description Only column names are supported for \code{lgb.Dataset}, thus setting of
#' Only column names are supported for \code{lgb.Dataset}, thus setting of
#' row names would have no effect and returned row names would be NULL. #' row names would have no effect and returned row names would be NULL.
#'
#' @param x object of class \code{lgb.Dataset} #' @param x object of class \code{lgb.Dataset}
#' @param value a list of two elements: the first one is ignored #' @param value a list of two elements: the first one is ignored
#' and the second one is column names #' and the second one is column names
...@@ -912,11 +907,9 @@ dimnames.lgb.Dataset <- function(x) { ...@@ -912,11 +907,9 @@ dimnames.lgb.Dataset <- function(x) {
} }
#' Slice a dataset #' @title Slice a dataset
#' #' @description Get a new \code{lgb.Dataset} containing the specified rows of
#' Get a new \code{lgb.Dataset} containing the specified rows of
#' original \code{lgb.Dataset} object #' original \code{lgb.Dataset} object
#'
#' @param dataset Object of class \code{lgb.Dataset} #' @param dataset Object of class \code{lgb.Dataset}
#' @param idxset an integer vector of indices of rows needed #' @param idxset an integer vector of indices of rows needed
#' @param ... other parameters (currently not used) #' @param ... other parameters (currently not used)
...@@ -951,8 +944,9 @@ slice.lgb.Dataset <- function(dataset, idxset, ...) { ...@@ -951,8 +944,9 @@ slice.lgb.Dataset <- function(dataset, idxset, ...) {
} }
#' Get information of an \code{lgb.Dataset} object #' @name getinfo
#' #' @title Get information of an \code{lgb.Dataset} object
#' @description Get one attribute of a \code{lgb.Dataset}
#' @param dataset Object of class \code{lgb.Dataset} #' @param dataset Object of class \code{lgb.Dataset}
#' @param name the name of the information field to get (see details) #' @param name the name of the information field to get (see details)
#' @param ... other parameters #' @param ... other parameters
...@@ -1000,8 +994,9 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) { ...@@ -1000,8 +994,9 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
} }
#' Set information of an \code{lgb.Dataset} object #' @name setinfo
#' #' @title Set information of an \code{lgb.Dataset} object
#' @description Set one attribute of a \code{lgb.Dataset}
#' @param dataset Object of class \code{lgb.Dataset} #' @param dataset Object of class \code{lgb.Dataset}
#' @param name the name of the field to get #' @param name the name of the field to get
#' @param info the specific field of information to set #' @param info the specific field of information to set
...@@ -1012,10 +1007,13 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) { ...@@ -1012,10 +1007,13 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
#' The \code{name} field can be one of the following: #' The \code{name} field can be one of the following:
#' #'
#' \itemize{ #' \itemize{
#' \item \code{label}: label lightgbm learn from ; #' \item{\code{label}: vector of labels to use as the target variable}
#' \item \code{weight}: to do a weight rescale ; #' \item{\code{weight}: to do a weight rescale}
#' \item \code{init_score}: initial score is the base prediction lightgbm will boost from ; #' \item{\code{init_score}: initial score is the base prediction lightgbm will boost from}
#' \item \code{group}. #' \item{\code{group}: used for learning-to-rank tasks. An integer vector describing how to
#' group rows together as ordered results from the same set of candidate results to be ranked.
#' For example, if you have a 1000-row dataset that contains 250 4-document query results,
#' set this to \code{rep(4L, 250L)}}
#' } #' }
#' #'
#' @examples #' @examples
...@@ -1049,11 +1047,14 @@ setinfo.lgb.Dataset <- function(dataset, name, info, ...) { ...@@ -1049,11 +1047,14 @@ setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
invisible(dataset$setinfo(name, info)) invisible(dataset$setinfo(name, info))
} }
#' Set categorical feature of \code{lgb.Dataset} #' @name lgb.Dataset.set.categorical
#' #' @title Set categorical feature of \code{lgb.Dataset}
#' @description Set the categorical features of an \code{lgb.Dataset} object. Use this function
#' to tell LightGBM which features should be treated as categorical.
#' @param dataset object of class \code{lgb.Dataset} #' @param dataset object of class \code{lgb.Dataset}
#' @param categorical_feature categorical features #' @param categorical_feature categorical features. This can either be a character vector of feature
#' #' names or an integer vector with the indices of the features (e.g.
#' \code{c(1L, 10L)} to say "the first and tenth columns").
#' @return passed dataset #' @return passed dataset
#' #'
#' @examples #' @examples
...@@ -1079,10 +1080,9 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) { ...@@ -1079,10 +1080,9 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
} }
#' Set reference of \code{lgb.Dataset} #' @name lgb.Dataset.set.reference
#' #' @title Set reference of \code{lgb.Dataset}
#' If you want to use validation data, you should set reference to training data #' @description If you want to use validation data, you should set reference to training data
#'
#' @param dataset object of class \code{lgb.Dataset} #' @param dataset object of class \code{lgb.Dataset}
#' @param reference object of class \code{lgb.Dataset} #' @param reference object of class \code{lgb.Dataset}
#' #'
...@@ -1111,11 +1111,10 @@ lgb.Dataset.set.reference <- function(dataset, reference) { ...@@ -1111,11 +1111,10 @@ lgb.Dataset.set.reference <- function(dataset, reference) {
invisible(dataset$set_reference(reference)) invisible(dataset$set_reference(reference))
} }
#' Save \code{lgb.Dataset} to a binary file #' @name lgb.Dataset.save
#' #' @title Save \code{lgb.Dataset} to a binary file
#' Please note that \code{init_score} is not saved in binary file. #' @description Please note that \code{init_score} is not saved in binary file.
#' If you need it, please set it again after loading Dataset. #' If you need it, please set it again after loading Dataset.
#'
#' @param dataset object of class \code{lgb.Dataset} #' @param dataset object of class \code{lgb.Dataset}
#' @param fname object filename of output file #' @param fname object filename of output file
#' #'
...@@ -1127,8 +1126,6 @@ lgb.Dataset.set.reference <- function(dataset, reference) { ...@@ -1127,8 +1126,6 @@ lgb.Dataset.set.reference <- function(dataset, reference) {
#' train <- agaricus.train #' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label) #' dtrain <- lgb.Dataset(train$data, label = train$label)
#' lgb.Dataset.save(dtrain, "data.bin") #' lgb.Dataset.save(dtrain, "data.bin")
#'
#' @rdname lgb.Dataset.save
#' @export #' @export
lgb.Dataset.save <- function(dataset, fname) { lgb.Dataset.save <- function(dataset, fname) {
......
#' @importFrom methods is #' @importFrom methods is
#' @importFrom R6 R6Class #' @importFrom R6 R6Class
#' @importFrom utils read.delim
Predictor <- R6::R6Class( Predictor <- R6::R6Class(
classname = "lgb.Predictor", classname = "lgb.Predictor",
...@@ -113,7 +114,7 @@ Predictor <- R6::R6Class( ...@@ -113,7 +114,7 @@ Predictor <- R6::R6Class(
) )
# Get predictions from file # Get predictions from file
preds <- read.delim(tmp_filename, header = FALSE, sep = "\t") preds <- utils::read.delim(tmp_filename, header = FALSE, sep = "\t")
num_row <- nrow(preds) num_row <- nrow(preds)
preds <- as.vector(t(preds)) preds <- as.vector(t(preds))
......
...@@ -17,9 +17,9 @@ CVBooster <- R6::R6Class( ...@@ -17,9 +17,9 @@ CVBooster <- R6::R6Class(
) )
) )
#' @name lgb.cv
#' @title Main CV logic for LightGBM #' @title Main CV logic for LightGBM
#' @description Cross validation logic used by LightGBM #' @description Cross validation logic used by LightGBM
#' @name lgb.cv
#' @inheritParams lgb_shared_params #' @inheritParams lgb_shared_params
#' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples. #' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
#' @param label vector of response values. Should be provided only when data is an R-matrix. #' @param label vector of response values. Should be provided only when data is an R-matrix.
...@@ -36,19 +36,19 @@ CVBooster <- R6::R6Class( ...@@ -36,19 +36,19 @@ CVBooster <- R6::R6Class(
#' (each element must be a vector of test fold's indices). When folds are supplied, #' (each element must be a vector of test fold's indices). When folds are supplied,
#' the \code{nfold} and \code{stratified} parameters are ignored. #' the \code{nfold} and \code{stratified} parameters are ignored.
#' @param colnames feature names, if not null, will use this to overwrite the names in dataset #' @param colnames feature names, if not null, will use this to overwrite the names in dataset
#' @param categorical_feature list of str or int #' @param categorical_feature categorical features. This can either be a character vector of feature
#' type int represents index, #' names or an integer vector with the indices of the features (e.g.
#' type str represents feature names #' \code{c(1L, 10L)} to say "the first and tenth columns").
#' @param callbacks List of callback functions that are applied at each iteration. #' @param callbacks List of callback functions that are applied at each iteration.
#' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the booster model #' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the booster model
#' into a predictor model which frees up memory and the original datasets #' into a predictor model which frees up memory and the original datasets
#' @param ... other parameters, see Parameters.rst for more information. A few key parameters: #' @param ... other parameters, see Parameters.rst for more information. A few key parameters:
#' \itemize{ #' \itemize{
#' \item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}} #' \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
#' \item{num_leaves}{number of leaves in one tree. defaults to 127} #' \item{\code{num_leaves}: Maximum number of leaves in one tree.}
#' \item{max_depth}{Limit the max depth for tree model. This is used to deal with #' \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
#' overfit when #data is small. Tree still grow by leaf-wise.} #' overfit when #data is small. Tree still grow by leaf-wise.}
#' \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to #' \item{\code{num_threads}: Number of threads for LightGBM. For the best speed, set this to
#' the number of real CPU cores, not the number of threads (most #' the number of real CPU cores, not the number of threads (most
#' CPU using hyper-threading to generate 2 threads per CPU core).} #' CPU using hyper-threading to generate 2 threads per CPU core).}
#' } #' }
......
#' Compute feature importance in a model #' @name lgb.importance
#' #' @title Compute feature importance in a model
#' Creates a \code{data.table} of feature importances in a model. #' @description Creates a \code{data.table} of feature importances in a model.
#'
#' @param model object of class \code{lgb.Booster}. #' @param model object of class \code{lgb.Booster}.
#' @param percentage whether to show importance in relative percentage. #' @param percentage whether to show importance in relative percentage.
#' #'
#' @return #' @return For a tree model, a \code{data.table} with the following columns:
#'
#' For a tree model, a \code{data.table} with the following columns:
#' \itemize{ #' \itemize{
#' \item \code{Feature} Feature names in the model. #' \item{\code{Feature}: Feature names in the model.}
#' \item \code{Gain} The total gain of this feature's splits. #' \item{\code{Gain}: The total gain of this feature's splits.}
#' \item \code{Cover} The number of observation related to this feature. #' \item{\code{Cover}: The number of observation related to this feature.}
#' \item \code{Frequency} The number of times a feature splited in trees. #' \item{\code{Frequency}: The number of times a feature splited in trees.}
#' } #' }
#' #'
#' @examples #' @examples
......
#' Compute feature contribution of prediction #' @name lgb.interprete
#' #' @title Compute feature contribution of prediction
#' Computes feature contribution components of rawscore prediction. #' @description Computes feature contribution components of rawscore prediction.
#'
#' @param model object of class \code{lgb.Booster}. #' @param model object of class \code{lgb.Booster}.
#' @param data a matrix object or a dgCMatrix object. #' @param data a matrix object or a dgCMatrix object.
#' @param idxset an integer vector of indices of rows needed. #' @param idxset an integer vector of indices of rows needed.
...@@ -10,8 +9,8 @@ ...@@ -10,8 +9,8 @@
#' @return For regression, binary classification and lambdarank model, a \code{list} of \code{data.table} #' @return For regression, binary classification and lambdarank model, a \code{list} of \code{data.table}
#' with the following columns: #' with the following columns:
#' \itemize{ #' \itemize{
#' \item \code{Feature} Feature names in the model. #' \item{\code{Feature}: Feature names in the model.}
#' \item \code{Contribution} The total contribution of this feature's splits. #' \item{\code{Contribution}: The total contribution of this feature's splits.}
#' } #' }
#' For multiclass classification, a \code{list} of \code{data.table} with the Feature column and #' For multiclass classification, a \code{list} of \code{data.table} with the Feature column and
#' Contribution columns to each class. #' Contribution columns to each class.
......
#' Parse a LightGBM model json dump #' @name lgb.model.dt.tree
#' #' @title Parse a LightGBM model json dump
#' Parse a LightGBM model json dump into a \code{data.table} structure. #' @description Parse a LightGBM model json dump into a \code{data.table} structure.
#'
#' @param model object of class \code{lgb.Booster} #' @param model object of class \code{lgb.Booster}
#' @param num_iteration number of iterations you want to predict with. NULL or #' @param num_iteration number of iterations you want to predict with. NULL or
#' <= 0 means use best iteration #' <= 0 means use best iteration
#'
#' @return #' @return
#' A \code{data.table} with detailed information about model trees' nodes and leafs. #' A \code{data.table} with detailed information about model trees' nodes and leafs.
#' #'
#' The columns of the \code{data.table} are: #' The columns of the \code{data.table} are:
#' #'
#' \itemize{ #' \itemize{
#' \item \code{tree_index}: ID of a tree in a model (integer) #' \item{\code{tree_index}: ID of a tree in a model (integer)}
#' \item \code{split_index}: ID of a node in a tree (integer) #' \item{\code{split_index}: ID of a node in a tree (integer)}
#' \item \code{split_feature}: for a node, it's a feature name (character); #' \item{\code{split_feature}: for a node, it's a feature name (character);
#' for a leaf, it simply labels it as \code{"NA"} #' for a leaf, it simply labels it as \code{"NA"}}
#' \item \code{node_parent}: ID of the parent node for current node (integer) #' \item{\code{node_parent}: ID of the parent node for current node (integer)}
#' \item \code{leaf_index}: ID of a leaf in a tree (integer) #' \item{\code{leaf_index}: ID of a leaf in a tree (integer)}
#' \item \code{leaf_parent}: ID of the parent node for current leaf (integer) #' \item{\code{leaf_parent}: ID of the parent node for current leaf (integer)}
#' \item \code{split_gain}: Split gain of a node #' \item{\code{split_gain}: Split gain of a node}
#' \item \code{threshold}: Splitting threshold value of a node #' \item{\code{threshold}: Splitting threshold value of a node}
#' \item \code{decision_type}: Decision type of a node #' \item{\code{decision_type}: Decision type of a node}
#' \item \code{default_left}: Determine how to handle NA value, TRUE -> Left, FALSE -> Right #' \item{\code{default_left}: Determine how to handle NA value, TRUE -> Left, FALSE -> Right}
#' \item \code{internal_value}: Node value #' \item{\code{internal_value}: Node value}
#' \item \code{internal_count}: The number of observation collected by a node #' \item{\code{internal_count}: The number of observation collected by a node}
#' \item \code{leaf_value}: Leaf value #' \item{\code{leaf_value}: Leaf value}
#' \item \code{leaf_count}: The number of observation collected by a leaf #' \item{\code{leaf_count}: The number of observation collected by a leaf}
#' } #' }
#' #'
#' @examples #' @examples
...@@ -47,7 +45,7 @@ ...@@ -47,7 +45,7 @@
#' #'
#' tree_dt <- lgb.model.dt.tree(model) #' tree_dt <- lgb.model.dt.tree(model)
#' #'
#' @importFrom data.table := data.table rbindlist #' @importFrom data.table := rbindlist
#' @importFrom jsonlite fromJSON #' @importFrom jsonlite fromJSON
#' @export #' @export
lgb.model.dt.tree <- function(model, num_iteration = NULL) { lgb.model.dt.tree <- function(model, num_iteration = NULL) {
...@@ -87,7 +85,7 @@ lgb.model.dt.tree <- function(model, num_iteration = NULL) { ...@@ -87,7 +85,7 @@ lgb.model.dt.tree <- function(model, num_iteration = NULL) {
} }
#' @importFrom data.table data.table rbindlist #' @importFrom data.table := data.table rbindlist
single.tree.parse <- function(lgb_tree) { single.tree.parse <- function(lgb_tree) {
# Traverse tree function # Traverse tree function
......
#' Plot feature importance as a bar graph #' @name lgb.plot.importance
#' #' @title Plot feature importance as a bar graph
#' Plot previously calculated feature importance: Gain, Cover and Frequency, as a bar graph. #' @description Plot previously calculated feature importance: Gain, Cover and Frequency, as a bar graph.
#'
#' @param tree_imp a \code{data.table} returned by \code{\link{lgb.importance}}. #' @param tree_imp a \code{data.table} returned by \code{\link{lgb.importance}}.
#' @param top_n maximal number of top features to include into the plot. #' @param top_n maximal number of top features to include into the plot.
#' @param measure the name of importance measure to plot, can be "Gain", "Cover" or "Frequency". #' @param measure the name of importance measure to plot, can be "Gain", "Cover" or "Frequency".
......
#' Plot feature contribution as a bar graph #' @name lgb.plot.interpretation
#' #' @title Plot feature contribution as a bar graph
#' Plot previously calculated feature contribution as a bar graph. #' @description Plot previously calculated feature contribution as a bar graph.
#'
#' @param tree_interpretation_dt a \code{data.table} returned by \code{\link{lgb.interprete}}. #' @param tree_interpretation_dt a \code{data.table} returned by \code{\link{lgb.interprete}}.
#' @param top_n maximal number of top features to include into the plot. #' @param top_n maximal number of top features to include into the plot.
#' @param cols the column numbers of layout, will be used only for multiclass classification feature contribution. #' @param cols the column numbers of layout, will be used only for multiclass classification feature contribution.
......
#' Data preparator for LightGBM datasets (numeric) #' @name lgb.prepare
#' #' @title Data preparator for LightGBM datasets (numeric)
#' Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}. #' @description Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
#' Factors and characters are converted to numeric without integers. Please use #' Factors and characters are converted to numeric without integers. Please use
#' \code{lgb.prepare_rules} if you want to apply this transformation to other datasets. #' \code{\link{lgb.prepare_rules}} if you want to apply this transformation to other datasets.
#'
#' @param data A data.frame or data.table to prepare. #' @param data A data.frame or data.table to prepare.
#'
#' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) #' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix})
#' for input in \code{lgb.Dataset}. #' for input in \code{lgb.Dataset}.
#' #'
......
#' Data preparator for LightGBM datasets (integer) #' @name lgb.prepare2
#' #' @title Data preparator for LightGBM datasets (integer)
#' Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}. #' @description Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
#' Factors and characters are converted to numeric (specifically: integer). #' Factors and characters are converted to numeric (specifically: integer).
#' Please use \code{lgb.prepare_rules2} if you want to apply this transformation to other datasets. #' Please use \code{\link{lgb.prepare_rules2}} if you want to apply this transformation to
#' This is useful if you have a specific need for integer dataset instead of numeric dataset. #' other datasets. This is useful if you have a specific need for integer dataset instead
#' Note that there are programs which do not support integer-only input. Consider this as a half #' of numeric dataset. Note that there are programs which do not support integer-only
#' memory technique which is dangerous, especially for LightGBM. #' input. Consider this as a half memory technique which is dangerous, especially for LightGBM.
#'
#' @param data A data.frame or data.table to prepare. #' @param data A data.frame or data.table to prepare.
#'
#' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) #' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix})
#' for input in \code{lgb.Dataset}. #' for input in \code{lgb.Dataset}.
#' #'
......
#' Data preparator for LightGBM datasets with rules (numeric) #' @name lgb.prepare_rules
#' #' @title Data preparator for LightGBM datasets with rules (numeric)
#' Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}. #' @description Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
#' Factors and characters are converted to numeric. In addition, keeps rules created #' Factors and characters are converted to numeric. In addition, keeps rules created
#' so you can convert other datasets using this converter. #' so you can convert other datasets using this converter.
#'
#' @param data A data.frame or data.table to prepare. #' @param data A data.frame or data.table to prepare.
#' @param rules A set of rules from the data preparator, if already used. #' @param rules A set of rules from the data preparator, if already used.
#'
#' @return A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). #' @return A list with the cleaned dataset (\code{data}) and the rules (\code{rules}).
#' The data must be converted to a matrix format (\code{as.matrix}) for input #' The data must be converted to a matrix format (\code{as.matrix}) for input
#' in \code{lgb.Dataset}. #' in \code{lgb.Dataset}.
......
#' Data preparator for LightGBM datasets with rules (integer) #' @name lgb.prepare_rules2
#' #' @title Data preparator for LightGBM datasets with rules (integer)
#' Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}. #' @description Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
#' Factors and characters are converted to numeric (specifically: integer). #' Factors and characters are converted to numeric (specifically: integer).
#' In addition, keeps rules created so you can convert other datasets using this converter. #' In addition, keeps rules created so you can convert other datasets using this converter.
#' This is useful if you have a specific need for integer dataset instead of numeric dataset. #' This is useful if you have a specific need for integer dataset instead of numeric dataset.
#' Note that there are programs which do not support integer-only input. #' Note that there are programs which do not support integer-only input.
#' Consider this as a half memory technique which is dangerous, especially for LightGBM. #' Consider this as a half memory technique which is dangerous, especially for LightGBM.
#'
#' @param data A data.frame or data.table to prepare. #' @param data A data.frame or data.table to prepare.
#' @param rules A set of rules from the data preparator, if already used. #' @param rules A set of rules from the data preparator, if already used.
#'
#' @return A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). #' @return A list with the cleaned dataset (\code{data}) and the rules (\code{rules}).
#' The data must be converted to a matrix format (\code{as.matrix}) for input in #' The data must be converted to a matrix format (\code{as.matrix}) for input in
#' \code{lgb.Dataset}. #' \code{lgb.Dataset}.
......
#' @title Main training logic for LightGBM
#' @name lgb.train #' @name lgb.train
#' @title Main training logic for LightGBM
#' @description Logic to train with LightGBM #' @description Logic to train with LightGBM
#' @inheritParams lgb_shared_params #' @inheritParams lgb_shared_params
#' @param valids a list of \code{lgb.Dataset} objects, used for validation #' @param valids a list of \code{lgb.Dataset} objects, used for validation
...@@ -18,11 +18,11 @@ ...@@ -18,11 +18,11 @@
#' original datasets #' original datasets
#' @param ... other parameters, see Parameters.rst for more information. A few key parameters: #' @param ... other parameters, see Parameters.rst for more information. A few key parameters:
#' \itemize{ #' \itemize{
#' \item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}} #' \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
#' \item{num_leaves}{number of leaves in one tree. defaults to 127} #' \item{\code{num_leaves}: Maximum number of leaves in one tree.}
#' \item{max_depth}{Limit the max depth for tree model. This is used to deal with #' \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
#' overfit when #data is small. Tree still grow by leaf-wise.} #' overfit when #data is small. Tree still grow by leaf-wise.}
#' \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to #' \item{\code{num_threads}: Number of threads for LightGBM. For the best speed, set this to
#' the number of real CPU cores, not the number of threads (most #' the number of real CPU cores, not the number of threads (most
#' CPU using hyper-threading to generate 2 threads per CPU core).} #' CPU using hyper-threading to generate 2 threads per CPU core).}
#' } #' }
......
#' LightGBM unloading error fix #' @name lgb.unloader
#' #' @title LightGBM unloading error fix
#' Attempts to unload LightGBM packages so you can remove objects cleanly without having to restart R. #' @description Attempts to unload LightGBM packages so you can remove objects cleanly without
#' This is useful for instance if an object becomes stuck for no apparent reason and you do not want #' having to restart R. This is useful for instance if an object becomes stuck for no
#' to restart R to fix the lost object. #' apparent reason and you do not want to restart R to fix the lost object.
#'
#' @param restore Whether to reload \code{LightGBM} immediately after detaching from R. #' @param restore Whether to reload \code{LightGBM} immediately after detaching from R.
#' Defaults to \code{TRUE} which means automatically reload \code{LightGBM} once #' Defaults to \code{TRUE} which means automatically reload \code{LightGBM} once
#' unloading is performed. #' unloading is performed.
......
#' @name lgb_shared_params #' @name lgb_shared_params
#' @title Shared parameter docs #' @title Shared parameter docs
#' @description Parameter docs shared by \code{lgb.train}, \code{lgb.cv}, and \code{lightgbm} #' @description Parameter docs shared by \code{lgb.train}, \code{lgb.cv}, and \code{lightgbm}
#' @param callbacks list of callback functions #' @param callbacks List of callback functions that are applied at each iteration.
#' List of callback functions that are applied at each iteration.
#' @param data a \code{lgb.Dataset} object, used for training #' @param data a \code{lgb.Dataset} object, used for training
#' @param early_stopping_rounds int. Activates early stopping. Requires at least one validation data #' @param early_stopping_rounds int. Activates early stopping. Requires at least one validation data
#' and one metric. If there's more than one, will check all of them #' and one metric. If there's more than one, will check all of them
...@@ -15,9 +14,8 @@ ...@@ -15,9 +14,8 @@
#' @param verbose verbosity for output, if <= 0, also will disable the print of evaluation during training #' @param verbose verbosity for output, if <= 0, also will disable the print of evaluation during training
NULL NULL
#' @title Train a LightGBM model
#' @name lightgbm #' @name lightgbm
#' @title Train a LightGBM model
#' @description Simple interface for training a LightGBM model. #' @description Simple interface for training a LightGBM model.
#' @inheritParams lgb_shared_params #' @inheritParams lgb_shared_params
#' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}} #' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}
...@@ -25,21 +23,23 @@ NULL ...@@ -25,21 +23,23 @@ NULL
#' @param save_name File name to use when writing the trained model to disk. Should end in ".model". #' @param save_name File name to use when writing the trained model to disk. Should end in ".model".
#' @param ... Additional arguments passed to \code{\link{lgb.train}}. For example #' @param ... Additional arguments passed to \code{\link{lgb.train}}. For example
#' \itemize{ #' \itemize{
#' \item{valids}{a list of \code{lgb.Dataset} objects, used for validation} #' \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
#' \item{obj}{objective function, can be character or custom objective function. Examples include #' \item{\code{obj}: objective function, can be character or custom objective function. Examples include
#' \code{regression}, \code{regression_l1}, \code{huber}, #' \code{regression}, \code{regression_l1}, \code{huber},
#' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} #' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
#' \item{eval}{evaluation function, can be (a list of) character or custom eval function} #' \item{\code{eval}: evaluation function, can be (a list of) character or custom eval function}
#' \item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}} #' \item{\code{record}: Boolean, TRUE will record iteration message to \code{booster$record_evals}}
#' \item{colnames}{feature names, if not null, will use this to overwrite the names in dataset} #' \item{\code{colnames}: feature names, if not null, will use this to overwrite the names in dataset}
#' \item{categorical_feature}{list of str or int. type int represents index, type str represents feature names} #' \item{\code{categorical_feature}: categorical features. This can either be a character vector of feature
#' \item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model #' names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to
#' say "the first and tenth columns").}
#' \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
#' into a predictor model which frees up memory and the original datasets} #' into a predictor model which frees up memory and the original datasets}
#' \item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}} #' \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
#' \item{num_leaves}{number of leaves in one tree. defaults to 127} #' \item{\code{num_leaves}: Maximum number of leaves in one tree.}
#' \item{max_depth}{Limit the max depth for tree model. This is used to deal with #' \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
#' overfit when #data is small. Tree still grow by leaf-wise.} #' overfit when #data is small. Tree still grow by leaf-wise.}
#' \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to #' \item{\code{num_threads}: Number of threads for LightGBM. For the best speed, set this to
#' the number of real CPU cores, not the number of threads (most #' the number of real CPU cores, not the number of threads (most
#' CPU using hyper-threading to generate 2 threads per CPU core).} #' CPU using hyper-threading to generate 2 threads per CPU core).}
#' } #' }
...@@ -94,16 +94,15 @@ lightgbm <- function(data, ...@@ -94,16 +94,15 @@ lightgbm <- function(data,
return(bst) return(bst)
} }
#' Training part from Mushroom Data Set #' @name agaricus.train
#' #' @title Training part from Mushroom Data Set
#' This data set is originally from the Mushroom data set, #' @description This data set is originally from the Mushroom data set,
#' UCI Machine Learning Repository. #' UCI Machine Learning Repository.
#'
#' This data set includes the following fields: #' This data set includes the following fields:
#' #'
#' \itemize{ #' \itemize{
#' \item \code{label} the label for each record #' \item{\code{label}: the label for each record}
#' \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns. #' \item{\code{data}: a sparse Matrix of \code{dgCMatrix} class, with 126 columns.}
#' } #' }
#' #'
#' @references #' @references
...@@ -115,24 +114,21 @@ lightgbm <- function(data, ...@@ -115,24 +114,21 @@ lightgbm <- function(data,
#' #'
#' @docType data #' @docType data
#' @keywords datasets #' @keywords datasets
#' @name agaricus.train
#' @usage data(agaricus.train) #' @usage data(agaricus.train)
#' @format A list containing a label vector, and a dgCMatrix object with 6513 #' @format A list containing a label vector, and a dgCMatrix object with 6513
#' rows and 127 variables #' rows and 127 variables
NULL NULL
#' Test part from Mushroom Data Set #' @name agaricus.test
#' #' @title Test part from Mushroom Data Set
#' This data set is originally from the Mushroom data set, #' @description This data set is originally from the Mushroom data set,
#' UCI Machine Learning Repository. #' UCI Machine Learning Repository.
#'
#' This data set includes the following fields: #' This data set includes the following fields:
#' #'
#' \itemize{ #' \itemize{
#' \item \code{label} the label for each record #' \item{\code{label}: the label for each record}
#' \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns. #' \item{\code{data}: a sparse Matrix of \code{dgCMatrix} class, with 126 columns.}
#' } #' }
#'
#' @references #' @references
#' https://archive.ics.uci.edu/ml/datasets/Mushroom #' https://archive.ics.uci.edu/ml/datasets/Mushroom
#' #'
...@@ -142,15 +138,14 @@ NULL ...@@ -142,15 +138,14 @@ NULL
#' #'
#' @docType data #' @docType data
#' @keywords datasets #' @keywords datasets
#' @name agaricus.test
#' @usage data(agaricus.test) #' @usage data(agaricus.test)
#' @format A list containing a label vector, and a dgCMatrix object with 1611 #' @format A list containing a label vector, and a dgCMatrix object with 1611
#' rows and 126 variables #' rows and 126 variables
NULL NULL
#' Bank Marketing Data Set #' @name bank
#' #' @title Bank Marketing Data Set
#' This data set is originally from the Bank Marketing data set, #' @description This data set is originally from the Bank Marketing data set,
#' UCI Machine Learning Repository. #' UCI Machine Learning Repository.
#' #'
#' It contains only the following: bank.csv with 10% of the examples and 17 inputs, #' It contains only the following: bank.csv with 10% of the examples and 17 inputs,
...@@ -164,7 +159,6 @@ NULL ...@@ -164,7 +159,6 @@ NULL
#' #'
#' @docType data #' @docType data
#' @keywords datasets #' @keywords datasets
#' @name bank
#' @usage data(bank) #' @usage data(bank)
#' @format A data.table with 4521 rows and 17 variables #' @format A data.table with 4521 rows and 17 variables
NULL NULL
......
#' readRDS for \code{lgb.Booster} models #' @name readRDS.lgb.Booster
#' #' @title readRDS for \code{lgb.Booster} models
#' Attempts to load a model using RDS. #' @description Attempts to load a model stored in a \code{.rds} file, using \code{\link[base]{readRDS}}
#'
#' @param file a connection or the name of the file where the R object is saved to or read from. #' @param file a connection or the name of the file where the R object is saved to or read from.
#' @param refhook a hook function for handling reference objects. #' @param refhook a hook function for handling reference objects.
#' #'
......
#' saveRDS for \code{lgb.Booster} models #' @name saveRDS.lgb.Booster
#' #' @title saveRDS for \code{lgb.Booster} models
#' Attempts to save a model using RDS. Has an additional parameter (\code{raw}) which decides #' @description Attempts to save a model using RDS. Has an additional parameter (\code{raw})
#' whether to save the raw model or not. #' which decides whether to save the raw model or not.
#'
#' @param object R object to serialize. #' @param object R object to serialize.
#' @param file a connection or the name of the file where the R object is saved to or read from. #' @param file a connection or the name of the file where the R object is saved to or read from.
#' @param ascii a logical. If TRUE or NA, an ASCII representation is written; otherwise (default), #' @param ascii a logical. If TRUE or NA, an ASCII representation is written; otherwise (default),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment