[R-package][docs] made roxygen2 tags explicit and cleaned up documentation (#2688)

* [R-package] made roxygen2 tags explicit and cleaned up documentation * Apply suggestions from code review Co-Authored-By: Nikita Titov <nekit94-08@mail.ru> * Apply suggestions from code review Co-Authored-By: Nikita Titov <nekit94-08@mail.ru> * Update R-package/man/lightgbm.Rd Co-Authored-By: Nikita Titov <nekit94-08@mail.ru> * [R-package] moved @name to the top of roxygen blocks and removed some inaccurate information in documentation on parameters Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

[R-package][docs] made roxygen2 tags explicit and cleaned up documentation (#2688)
* [R-package] made roxygen2 tags explicit and cleaned up documentation * Apply suggestions from code review Co-Authored-By: Nikita Titov <nekit94-08@mail.ru> * Apply suggestions from code review Co-Authored-By: Nikita Titov <nekit94-08@mail.ru> * Update R-package/man/lightgbm.Rd Co-Authored-By: Nikita Titov <nekit94-08@mail.ru> * [R-package] moved @name to the top of roxygen blocks and removed some inaccurate information in documentation on parameters Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
f2afb2cd · James Lamb · Nikita Titov · c7ae833e · f2afb2cd · f2afb2cd
Commit f2afb2cd authored Jan 20, 2020 by James Lamb Committed by Nikita Titov Jan 20, 2020
20 changed files
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -35,5 +35,6 @@ Imports:
    graphics,
    jsonlite (>= 1.0),
    Matrix (>= 1.1-0),
-    methods
+    methods,
+    utils
 RoxygenNote: 7.0.2
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -50,4 +50,5 @@ importFrom(graphics,par)
 importFrom(jsonlite,fromJSON)
 importFrom(methods,is)
 importFrom(stats,quantile)
+importFrom(utils,read.delim)
 useDynLib(lib_lightgbm , .registration = TRUE)
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@@ -662,11 +662,9 @@ Booster <- R6::R6Class(
  )
 )
+#' @name predict.lgb.Booster
-#' Predict method for LightGBM model
+#' @title Predict method for LightGBM model
-#'
+#' @description Predicted values based on class \code{lgb.Booster}
-#' Predicted values based on class \code{lgb.Booster}
-#'
 #' @param object Object of class \code{lgb.Booster}
 #' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
 #' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
@@ -708,8 +706,6 @@ Booster <- R6::R6Class(
 #'   , early_stopping_rounds = 5L
 #' )
 #' preds <- predict(model, test$data)
-#'
-#' @rdname predict.lgb.Booster
 #' @export
 predict.lgb.Booster <- function(object,
                                data,
@@ -739,12 +735,10 @@ predict.lgb.Booster <- function(object,
  )
 }
-#' Load LightGBM model
+#' @name lgb.load
-#'
+#' @title Load LightGBM model
-#' Load LightGBM model from saved model file or string
+#' @description  Load LightGBM takes in either a file path or model string.
-#' Load LightGBM takes in either a file path or model string
 #'               If both are provided, Load will default to loading from file
-#'
 #' @param filename path of model file
 #' @param model_str a str containing the model
 #'
@@ -774,7 +768,6 @@ predict.lgb.Booster <- function(object,
 #' model_string <- model$save_model_to_string(NULL) # saves best iteration
 #' load_booster_from_str <- lgb.load(model_str = model_string)
 #'
-#' @rdname lgb.load
 #' @export
 lgb.load <- function(filename = NULL, model_str = NULL) {
@@ -800,10 +793,9 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
 }
-#' Save LightGBM model
+#' @name lgb.save
-#'
+#' @title Save LightGBM model
-#' Save LightGBM model
+#' @description Save LightGBM model
-#'
 #' @param booster Object of class \code{lgb.Booster}
 #' @param filename saved filename
 #' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
@@ -830,8 +822,6 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
 #'   , early_stopping_rounds = 5L
 #' )
 #' lgb.save(model, "model.txt")
-#'
-#' @rdname lgb.save
 #' @export
 lgb.save <- function(booster, filename, num_iteration = NULL) {
@@ -850,10 +840,9 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
 }
-#' Dump LightGBM model to json
+#' @name lgb.dump
-#'
+#' @title Dump LightGBM model to json
-#' Dump LightGBM model to json
+#' @description Dump LightGBM model to json
-#'
 #' @param booster Object of class \code{lgb.Booster}
 #' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration
 #'
@@ -880,7 +869,6 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
 #' )
 #' json_model <- lgb.dump(model)
 #'
-#' @rdname lgb.dump
 #' @export
 lgb.dump <- function(booster, num_iteration = NULL) {
@@ -894,9 +882,9 @@ lgb.dump <- function(booster, num_iteration = NULL) {
 }
-#' Get record evaluation result from booster
+#' @name lgb.get.eval.result
-#'
+#' @title Get record evaluation result from booster
-#' Get record evaluation result from booster
+#' @description Get record evaluation result from booster
 #' @param booster Object of class \code{lgb.Booster}
 #' @param data_name name of dataset
 #' @param eval_name name of evaluation
@@ -925,7 +913,6 @@ lgb.dump <- function(booster, num_iteration = NULL) {
 #'   , early_stopping_rounds = 5L
 #' )
 #' lgb.get.eval.result(model, "test", "l2")
-#' @rdname lgb.get.eval.result
 #' @export
 lgb.get.eval.result <- function(booster, data_name, eval_name, iters = NULL, is_err = FALSE) {

--- a/R-package/R/lgb.Dataset.R
+++ b/R-package/R/lgb.Dataset.R
@@ -690,11 +690,9 @@ Dataset <- R6::R6Class(
  )
 )
-#' Construct \code{lgb.Dataset} object
+#' @title Construct \code{lgb.Dataset} object
-#'
+#' @description Construct \code{lgb.Dataset} object from dense matrix, sparse matrix
-#' Construct \code{lgb.Dataset} object from dense matrix, sparse matrix
 #'              or local file (that was created previously by saving an \code{lgb.Dataset}).
-#'
 #' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
 #' @param params a list of parameters
 #' @param reference reference dataset
@@ -741,10 +739,9 @@ lgb.Dataset <- function(data,
 }
-#' Construct validation data
+#' @name lgb.Dataset.create.valid
-#'
+#' @title Construct validation data
-#' Construct validation data according to training data
+#' @description Construct validation data according to training data
-#'
 #' @param dataset \code{lgb.Dataset} object, training data
 #' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
 #' @param info a list of information of the \code{lgb.Dataset} object
@@ -774,8 +771,9 @@ lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) {
 }
-#' Construct Dataset explicitly
+#' @name lgb.Dataset.construct
-#'
+#' @title Construct Dataset explicitly
+#' @description Construct Dataset explicitly
 #' @param dataset Object of class \code{lgb.Dataset}
 #'
 #' @examples
@@ -798,9 +796,8 @@ lgb.Dataset.construct <- function(dataset) {
 }
-#' Dimensions of an \code{lgb.Dataset}
+#' @title Dimensions of an \code{lgb.Dataset}
-#'
+#' @description Returns a vector of numbers of rows and of columns in an \code{lgb.Dataset}.
-#' Returns a vector of numbers of rows and of columns in an \code{lgb.Dataset}.
 #' @param x Object of class \code{lgb.Dataset}
 #' @param ... other parameters
 #'
@@ -834,11 +831,9 @@ dim.lgb.Dataset <- function(x, ...) {
 }
-#' Handling of column names of \code{lgb.Dataset}
+#' @title Handling of column names of \code{lgb.Dataset}
-#'
+#' @description Only column names are supported for \code{lgb.Dataset}, thus setting of
-#' Only column names are supported for \code{lgb.Dataset}, thus setting of
 #'              row names would have no effect and returned row names would be NULL.
-#'
 #' @param x object of class \code{lgb.Dataset}
 #' @param value a list of two elements: the first one is ignored
 #'              and the second one is column names
@@ -912,11 +907,9 @@ dimnames.lgb.Dataset <- function(x) {
 }
-#' Slice a dataset
+#' @title Slice a dataset
-#'
+#' @description Get a new \code{lgb.Dataset} containing the specified rows of
-#' Get a new \code{lgb.Dataset} containing the specified rows of
 #'              original \code{lgb.Dataset} object
-#'
 #' @param dataset Object of class \code{lgb.Dataset}
 #' @param idxset an integer vector of indices of rows needed
 #' @param ... other parameters (currently not used)
@@ -951,8 +944,9 @@ slice.lgb.Dataset <- function(dataset, idxset, ...) {
 }
-#' Get information of an \code{lgb.Dataset} object
+#' @name getinfo
-#'
+#' @title Get information of an \code{lgb.Dataset} object
+#' @description Get one attribute of a \code{lgb.Dataset}
 #' @param dataset Object of class \code{lgb.Dataset}
 #' @param name the name of the information field to get (see details)
 #' @param ... other parameters
@@ -1000,8 +994,9 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
 }
-#' Set information of an \code{lgb.Dataset} object
+#' @name setinfo
-#'
+#' @title Set information of an \code{lgb.Dataset} object
+#' @description Set one attribute of a \code{lgb.Dataset}
 #' @param dataset Object of class \code{lgb.Dataset}
 #' @param name the name of the field to get
 #' @param info the specific field of information to set
@@ -1012,10 +1007,13 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
 #' The \code{name} field can be one of the following:
 #'
 #' \itemize{
-#'     \item \code{label}: label lightgbm learn from ;
+#'     \item{\code{label}: vector of labels to use as the target variable}
-#'     \item \code{weight}: to do a weight rescale ;
+#'     \item{\code{weight}: to do a weight rescale}
-#'     \item \code{init_score}: initial score is the base prediction lightgbm will boost from ;
+#'     \item{\code{init_score}: initial score is the base prediction lightgbm will boost from}
-#'     \item \code{group}.
+#'     \item{\code{group}: used for learning-to-rank tasks. An integer vector describing how to
+#'         group rows together as ordered results from the same set of candidate results to be ranked.
+#'         For example, if you have a 1000-row dataset that contains 250 4-document query results,
+#'         set this to \code{rep(4L, 250L)}}
 #' }
 #'
 #' @examples
@@ -1049,11 +1047,14 @@ setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
  invisible(dataset$setinfo(name, info))
 }
-#' Set categorical feature of \code{lgb.Dataset}
+#' @name lgb.Dataset.set.categorical
-#'
+#' @title Set categorical feature of \code{lgb.Dataset}
+#' @description Set the categorical features of an \code{lgb.Dataset} object. Use this function
+#'              to tell LightGBM which features should be treated as categorical.
 #' @param dataset object of class \code{lgb.Dataset}
-#' @param categorical_feature categorical features
+#' @param categorical_feature categorical features. This can either be a character vector of feature
-#'
+#'                            names or an integer vector with the indices of the features (e.g.
+#'                            \code{c(1L, 10L)} to say "the first and tenth columns").
 #' @return passed dataset
 #'
 #' @examples
@@ -1079,10 +1080,9 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
 }
-#' Set reference of \code{lgb.Dataset}
+#' @name lgb.Dataset.set.reference
-#'
+#' @title Set reference of \code{lgb.Dataset}
-#' If you want to use validation data, you should set reference to training data
+#' @description If you want to use validation data, you should set reference to training data
-#'
 #' @param dataset object of class \code{lgb.Dataset}
 #' @param reference object of class \code{lgb.Dataset}
 #'
@@ -1111,11 +1111,10 @@ lgb.Dataset.set.reference <- function(dataset, reference) {
  invisible(dataset$set_reference(reference))
 }
-#' Save \code{lgb.Dataset} to a binary file
+#' @name lgb.Dataset.save
-#'
+#' @title Save \code{lgb.Dataset} to a binary file
-#' Please note that \code{init_score} is not saved in binary file.
+#' @description Please note that \code{init_score} is not saved in binary file.
 #'              If you need it, please set it again after loading Dataset.
-#'
 #' @param dataset object of class \code{lgb.Dataset}
 #' @param fname object filename of output file
 #'
@@ -1127,8 +1126,6 @@ lgb.Dataset.set.reference <- function(dataset, reference) {
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
 #' lgb.Dataset.save(dtrain, "data.bin")
-#'
-#' @rdname lgb.Dataset.save
 #' @export
 lgb.Dataset.save <- function(dataset, fname) {

--- a/R-package/R/lgb.Predictor.R
+++ b/R-package/R/lgb.Predictor.R
 #' @importFrom methods is
 #' @importFrom R6 R6Class
+#' @importFrom utils read.delim
 Predictor <- R6::R6Class(
  classname = "lgb.Predictor",
@@ -113,7 +114,7 @@ Predictor <- R6::R6Class(
        )
        # Get predictions from file
-        preds <- read.delim(tmp_filename, header = FALSE, sep = "\t")
+        preds <- utils::read.delim(tmp_filename, header = FALSE, sep = "\t")
        num_row <- nrow(preds)
        preds <- as.vector(t(preds))

--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -17,9 +17,9 @@ CVBooster <- R6::R6Class(
  )
 )
+#' @name lgb.cv
 #' @title Main CV logic for LightGBM
 #' @description Cross validation logic used by LightGBM
-#' @name lgb.cv
 #' @inheritParams lgb_shared_params
 #' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
 #' @param label vector of response values. Should be provided only when data is an R-matrix.
@@ -36,19 +36,19 @@ CVBooster <- R6::R6Class(
 #'              (each element must be a vector of test fold's indices). When folds are supplied,
 #'              the \code{nfold} and \code{stratified} parameters are ignored.
 #' @param colnames feature names, if not null, will use this to overwrite the names in dataset
-#' @param categorical_feature list of str or int
+#' @param categorical_feature categorical features. This can either be a character vector of feature
-#'                            type int represents index,
+#'                            names or an integer vector with the indices of the features (e.g.
-#'                            type str represents feature names
+#'                            \code{c(1L, 10L)} to say "the first and tenth columns").
 #' @param callbacks List of callback functions that are applied at each iteration.
 #' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the booster model
 #'                   into a predictor model which frees up memory and the original datasets
 #' @param ... other parameters, see Parameters.rst for more information. A few key parameters:
 #'            \itemize{
-#'                \item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}}
+#'                \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
-#'                \item{num_leaves}{number of leaves in one tree. defaults to 127}
+#'                \item{\code{num_leaves}: Maximum number of leaves in one tree.}
-#'                \item{max_depth}{Limit the max depth for tree model. This is used to deal with
+#'                \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
 #'                                 overfit when #data is small. Tree still grow by leaf-wise.}
-#'                \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to
+#'                \item{\code{num_threads}: Number of threads for LightGBM. For the best speed, set this to
 #'                                   the number of real CPU cores, not the number of threads (most
 #'                                   CPU using hyper-threading to generate 2 threads per CPU core).}
 #'            }

--- a/R-package/R/lgb.importance.R
+++ b/R-package/R/lgb.importance.R
-#' Compute feature importance in a model
+#' @name lgb.importance
-#'
+#' @title Compute feature importance in a model
-#' Creates a \code{data.table} of feature importances in a model.
+#' @description Creates a \code{data.table} of feature importances in a model.
-#'
 #' @param model object of class \code{lgb.Booster}.
 #' @param percentage whether to show importance in relative percentage.
 #'
-#' @return
+#' @return For a tree model, a \code{data.table} with the following columns:
-#'
-#' For a tree model, a \code{data.table} with the following columns:
 #' \itemize{
-#'   \item \code{Feature} Feature names in the model.
+#'   \item{\code{Feature}: Feature names in the model.}
-#'   \item \code{Gain} The total gain of this feature's splits.
+#'   \item{\code{Gain}: The total gain of this feature's splits.}
-#'   \item \code{Cover} The number of observation related to this feature.
+#'   \item{\code{Cover}: The number of observation related to this feature.}
-#'   \item \code{Frequency} The number of times a feature splited in trees.
+#'   \item{\code{Frequency}: The number of times a feature splited in trees.}
 #' }
 #'
 #' @examples

--- a/R-package/R/lgb.interprete.R
+++ b/R-package/R/lgb.interprete.R
-#' Compute feature contribution of prediction
+#' @name lgb.interprete
-#'
+#' @title Compute feature contribution of prediction
-#' Computes feature contribution components of rawscore prediction.
+#' @description Computes feature contribution components of rawscore prediction.
-#'
 #' @param model object of class \code{lgb.Booster}.
 #' @param data a matrix object or a dgCMatrix object.
 #' @param idxset an integer vector of indices of rows needed.
@@ -10,8 +9,8 @@
 #' @return For regression, binary classification and lambdarank model, a \code{list} of \code{data.table}
 #'         with the following columns:
 #'         \itemize{
-#'             \item \code{Feature} Feature names in the model.
+#'             \item{\code{Feature}: Feature names in the model.}
-#'             \item \code{Contribution} The total contribution of this feature's splits.
+#'             \item{\code{Contribution}: The total contribution of this feature's splits.}
 #'         }
 #'         For multiclass classification, a \code{list} of \code{data.table} with the Feature column and
 #'         Contribution columns to each class.

--- a/R-package/R/lgb.model.dt.tree.R
+++ b/R-package/R/lgb.model.dt.tree.R
-#' Parse a LightGBM model json dump
+#' @name lgb.model.dt.tree
-#'
+#' @title Parse a LightGBM model json dump
-#' Parse a LightGBM model json dump into a \code{data.table} structure.
+#' @description Parse a LightGBM model json dump into a \code{data.table} structure.
-#'
 #' @param model object of class \code{lgb.Booster}
 #' @param num_iteration number of iterations you want to predict with. NULL or
 #'                      <= 0 means use best iteration
-#'
 #' @return
 #' A \code{data.table} with detailed information about model trees' nodes and leafs.
 #'
 #' The columns of the \code{data.table} are:
 #'
 #' \itemize{
-#'  \item \code{tree_index}: ID of a tree in a model (integer)
+#'  \item{\code{tree_index}: ID of a tree in a model (integer)}
-#'  \item \code{split_index}: ID of a node in a tree (integer)
+#'  \item{\code{split_index}: ID of a node in a tree (integer)}
-#'  \item \code{split_feature}: for a node, it's a feature name (character);
+#'  \item{\code{split_feature}: for a node, it's a feature name (character);
-#'                              for a leaf, it simply labels it as \code{"NA"}
+#'                              for a leaf, it simply labels it as \code{"NA"}}
-#'  \item \code{node_parent}: ID of the parent node for current node (integer)
+#'  \item{\code{node_parent}: ID of the parent node for current node (integer)}
-#'  \item \code{leaf_index}: ID of a leaf in a tree (integer)
+#'  \item{\code{leaf_index}: ID of a leaf in a tree (integer)}
-#'  \item \code{leaf_parent}: ID of the parent node for current leaf (integer)
+#'  \item{\code{leaf_parent}: ID of the parent node for current leaf (integer)}
-#'  \item \code{split_gain}: Split gain of a node
+#'  \item{\code{split_gain}: Split gain of a node}
-#'  \item \code{threshold}: Splitting threshold value of a node
+#'  \item{\code{threshold}: Splitting threshold value of a node}
-#'  \item \code{decision_type}: Decision type of a node
+#'  \item{\code{decision_type}: Decision type of a node}
-#'  \item \code{default_left}: Determine how to handle NA value, TRUE -> Left, FALSE -> Right
+#'  \item{\code{default_left}: Determine how to handle NA value, TRUE -> Left, FALSE -> Right}
-#'  \item \code{internal_value}: Node value
+#'  \item{\code{internal_value}: Node value}
-#'  \item \code{internal_count}: The number of observation collected by a node
+#'  \item{\code{internal_count}: The number of observation collected by a node}
-#'  \item \code{leaf_value}: Leaf value
+#'  \item{\code{leaf_value}: Leaf value}
-#'  \item \code{leaf_count}: The number of observation collected by a leaf
+#'  \item{\code{leaf_count}: The number of observation collected by a leaf}
 #' }
 #'
 #' @examples
@@ -47,7 +45,7 @@
 #'
 #' tree_dt <- lgb.model.dt.tree(model)
 #'
-#' @importFrom data.table := data.table rbindlist
+#' @importFrom data.table := rbindlist
 #' @importFrom jsonlite fromJSON
 #' @export
 lgb.model.dt.tree <- function(model, num_iteration = NULL) {
@@ -87,7 +85,7 @@ lgb.model.dt.tree <- function(model, num_iteration = NULL) {
 }
-#' @importFrom data.table data.table rbindlist
+#' @importFrom data.table := data.table rbindlist
 single.tree.parse <- function(lgb_tree) {
  # Traverse tree function

--- a/R-package/R/lgb.plot.importance.R
+++ b/R-package/R/lgb.plot.importance.R
-#' Plot feature importance as a bar graph
+#' @name lgb.plot.importance
-#'
+#' @title Plot feature importance as a bar graph
-#' Plot previously calculated feature importance: Gain, Cover and Frequency, as a bar graph.
+#' @description Plot previously calculated feature importance: Gain, Cover and Frequency, as a bar graph.
-#'
 #' @param tree_imp a \code{data.table} returned by \code{\link{lgb.importance}}.
 #' @param top_n maximal number of top features to include into the plot.
 #' @param measure the name of importance measure to plot, can be "Gain", "Cover" or "Frequency".

--- a/R-package/R/lgb.plot.interpretation.R
+++ b/R-package/R/lgb.plot.interpretation.R
-#' Plot feature contribution as a bar graph
+#' @name lgb.plot.interpretation
-#'
+#' @title Plot feature contribution as a bar graph
-#' Plot previously calculated feature contribution as a bar graph.
+#' @description Plot previously calculated feature contribution as a bar graph.
-#'
 #' @param tree_interpretation_dt a \code{data.table} returned by \code{\link{lgb.interprete}}.
 #' @param top_n maximal number of top features to include into the plot.
 #' @param cols the column numbers of layout, will be used only for multiclass classification feature contribution.

--- a/R-package/R/lgb.prepare.R
+++ b/R-package/R/lgb.prepare.R
-#' Data preparator for LightGBM datasets (numeric)
+#' @name lgb.prepare
-#'
+#' @title Data preparator for LightGBM datasets (numeric)
-#' Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
+#' @description Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
 #'              Factors and characters are converted to numeric without integers. Please use
-#' \code{lgb.prepare_rules} if you want to apply this transformation to other datasets.
+#'              \code{\link{lgb.prepare_rules}} if you want to apply this transformation to other datasets.
-#'
 #' @param data A data.frame or data.table to prepare.
-#'
 #' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix})
 #'         for input in \code{lgb.Dataset}.
 #'

--- a/R-package/R/lgb.prepare2.R
+++ b/R-package/R/lgb.prepare2.R
-#' Data preparator for LightGBM datasets (integer)
+#' @name lgb.prepare2
-#'
+#' @title Data preparator for LightGBM datasets (integer)
-#' Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
+#' @description Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
 #'              Factors and characters are converted to numeric (specifically: integer).
-#' Please use \code{lgb.prepare_rules2} if you want to apply this transformation to other datasets.
+#'              Please use \code{\link{lgb.prepare_rules2}} if you want to apply this transformation to
-#' This is useful if you have a specific need for integer dataset instead of numeric dataset.
+#'              other datasets. This is useful if you have a specific need for integer dataset instead
-#' Note that there are programs which do not support integer-only input. Consider this as a half
+#'              of numeric dataset. Note that there are programs which do not support integer-only
-#' memory technique which is dangerous, especially for LightGBM.
+#'              input. Consider this as a half memory technique which is dangerous, especially for LightGBM.
-#'
 #' @param data A data.frame or data.table to prepare.
-#'
 #' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix})
 #'         for input in \code{lgb.Dataset}.
 #'

--- a/R-package/R/lgb.prepare_rules.R
+++ b/R-package/R/lgb.prepare_rules.R
-#' Data preparator for LightGBM datasets with rules (numeric)
+#' @name lgb.prepare_rules
-#'
+#' @title Data preparator for LightGBM datasets with rules (numeric)
-#' Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
+#' @description Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
 #'              Factors and characters are converted to numeric. In addition, keeps rules created
 #'              so you can convert other datasets using this converter.
-#'
 #' @param data A data.frame or data.table to prepare.
 #' @param rules A set of rules from the data preparator, if already used.
-#'
 #' @return A list with the cleaned dataset (\code{data}) and the rules (\code{rules}).
 #'         The data must be converted to a matrix format (\code{as.matrix}) for input
 #'         in \code{lgb.Dataset}.

--- a/R-package/R/lgb.prepare_rules2.R
+++ b/R-package/R/lgb.prepare_rules2.R
-#' Data preparator for LightGBM datasets with rules (integer)
+#' @name lgb.prepare_rules2
-#'
+#' @title Data preparator for LightGBM datasets with rules (integer)
-#' Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
+#' @description Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}.
 #'              Factors and characters are converted to numeric (specifically: integer).
 #'              In addition, keeps rules created so you can convert other datasets using this converter.
 #'              This is useful if you have a specific need for integer dataset instead of numeric dataset.
 #'              Note that there are programs which do not support integer-only input.
 #'              Consider this as a half memory technique which is dangerous, especially for LightGBM.
-#'
 #' @param data A data.frame or data.table to prepare.
 #' @param rules A set of rules from the data preparator, if already used.
-#'
 #' @return A list with the cleaned dataset (\code{data}) and the rules (\code{rules}).
 #'         The data must be converted to a matrix format (\code{as.matrix}) for input in
 #'         \code{lgb.Dataset}.

--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
-#' @title Main training logic for LightGBM
 #' @name lgb.train
+#' @title Main training logic for LightGBM
 #' @description Logic to train with LightGBM
 #' @inheritParams lgb_shared_params
 #' @param valids a list of \code{lgb.Dataset} objects, used for validation
@@ -18,11 +18,11 @@
 #'                   original datasets
 #' @param ... other parameters, see Parameters.rst for more information. A few key parameters:
 #'            \itemize{
-#'                \item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}}
+#'                \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
-#'                \item{num_leaves}{number of leaves in one tree. defaults to 127}
+#'                \item{\code{num_leaves}: Maximum number of leaves in one tree.}
-#'                \item{max_depth}{Limit the max depth for tree model. This is used to deal with
+#'                \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
 #'                                 overfit when #data is small. Tree still grow by leaf-wise.}
-#'                \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to
+#'                \item{\code{num_threads}: Number of threads for LightGBM. For the best speed, set this to
 #'                                   the number of real CPU cores, not the number of threads (most
 #'                                   CPU using hyper-threading to generate 2 threads per CPU core).}
 #'            }

--- a/R-package/R/lgb.unloader.R
+++ b/R-package/R/lgb.unloader.R
-#' LightGBM unloading error fix
+#' @name lgb.unloader
-#'
+#' @title LightGBM unloading error fix
-#' Attempts to unload LightGBM packages so you can remove objects cleanly without having to restart R.
+#' @description Attempts to unload LightGBM packages so you can remove objects cleanly without
-#' This is useful for instance if an object becomes stuck for no apparent reason and you do not want
+#'              having to restart R. This is useful for instance if an object becomes stuck for no
-#' to restart R to fix the lost object.
+#'              apparent reason and you do not want to restart R to fix the lost object.
-#'
 #' @param restore Whether to reload \code{LightGBM} immediately after detaching from R.
 #'                Defaults to \code{TRUE} which means automatically reload \code{LightGBM} once
 #'                unloading is performed.

--- a/R-package/R/lightgbm.R
+++ b/R-package/R/lightgbm.R
 #' @name lgb_shared_params
 #' @title Shared parameter docs
 #' @description Parameter docs shared by \code{lgb.train}, \code{lgb.cv}, and \code{lightgbm}
-#' @param callbacks list of callback functions
+#' @param callbacks List of callback functions that are applied at each iteration.
-#'        List of callback functions that are applied at each iteration.
 #' @param data a \code{lgb.Dataset} object, used for training
 #' @param early_stopping_rounds int. Activates early stopping. Requires at least one validation data
 #'                              and one metric. If there's more than one, will check all of them
@@ -15,9 +14,8 @@
 #' @param verbose verbosity for output, if <= 0, also will disable the print of evaluation during training
 NULL
-#' @title Train a LightGBM model
 #' @name lightgbm
+#' @title Train a LightGBM model
 #' @description Simple interface for training a LightGBM model.
 #' @inheritParams lgb_shared_params
 #' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}
@@ -25,21 +23,23 @@ NULL
 #' @param save_name File name to use when writing the trained model to disk. Should end in ".model".
 #' @param ... Additional arguments passed to \code{\link{lgb.train}}. For example
 #'     \itemize{
-#'        \item{valids}{a list of \code{lgb.Dataset} objects, used for validation}
+#'        \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
-#'        \item{obj}{objective function, can be character or custom objective function. Examples include
+#'        \item{\code{obj}: objective function, can be character or custom objective function. Examples include
 #'                   \code{regression}, \code{regression_l1}, \code{huber},
 #'                    \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
-#'        \item{eval}{evaluation function, can be (a list of) character or custom eval function}
+#'        \item{\code{eval}: evaluation function, can be (a list of) character or custom eval function}
-#'        \item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}}
+#'        \item{\code{record}: Boolean, TRUE will record iteration message to \code{booster$record_evals}}
-#'        \item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
+#'        \item{\code{colnames}: feature names, if not null, will use this to overwrite the names in dataset}
-#'        \item{categorical_feature}{list of str or int. type int represents index, type str represents feature names}
+#'        \item{\code{categorical_feature}: categorical features. This can either be a character vector of feature
-#'        \item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model
+#'                            names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to
+#'                            say "the first and tenth columns").}
+#'        \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
 #'                          into a predictor model which frees up memory and the original datasets}
-#'         \item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}}
+#'         \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
-#'         \item{num_leaves}{number of leaves in one tree. defaults to 127}
+#'         \item{\code{num_leaves}: Maximum number of leaves in one tree.}
-#'         \item{max_depth}{Limit the max depth for tree model. This is used to deal with
+#'         \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
 #'                          overfit when #data is small. Tree still grow by leaf-wise.}
-#'          \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to
+#'          \item{\code{num_threads}: Number of threads for LightGBM. For the best speed, set this to
 #'                             the number of real CPU cores, not the number of threads (most
 #'                             CPU using hyper-threading to generate 2 threads per CPU core).}
 #'     }
@@ -94,16 +94,15 @@ lightgbm <- function(data,
  return(bst)
 }
-#' Training part from Mushroom Data Set
+#' @name agaricus.train
-#'
+#' @title Training part from Mushroom Data Set
-#' This data set is originally from the Mushroom data set,
+#' @description This data set is originally from the Mushroom data set,
 #'              UCI Machine Learning Repository.
-#'
 #'              This data set includes the following fields:
 #'
 #'               \itemize{
-#'  \item \code{label} the label for each record
+#'                   \item{\code{label}: the label for each record}
-#'  \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
+#'                   \item{\code{data}: a sparse Matrix of \code{dgCMatrix} class, with 126 columns.}
 #'                }
 #'
 #' @references
@@ -115,24 +114,21 @@ lightgbm <- function(data,
 #'
 #' @docType data
 #' @keywords datasets
-#' @name agaricus.train
 #' @usage data(agaricus.train)
 #' @format A list containing a label vector, and a dgCMatrix object with 6513
 #' rows and 127 variables
 NULL
-#' Test part from Mushroom Data Set
+#' @name agaricus.test
-#'
+#' @title Test part from Mushroom Data Set
-#' This data set is originally from the Mushroom data set,
+#' @description This data set is originally from the Mushroom data set,
 #'              UCI Machine Learning Repository.
-#'
 #'              This data set includes the following fields:
 #'
 #'              \itemize{
-#'  \item \code{label} the label for each record
+#'                  \item{\code{label}: the label for each record}
-#'  \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
+#'                  \item{\code{data}: a sparse Matrix of \code{dgCMatrix} class, with 126 columns.}
 #'              }
-#'
 #' @references
 #' https://archive.ics.uci.edu/ml/datasets/Mushroom
 #'
@@ -142,15 +138,14 @@ NULL
 #'
 #' @docType data
 #' @keywords datasets
-#' @name agaricus.test
 #' @usage data(agaricus.test)
 #' @format A list containing a label vector, and a dgCMatrix object with 1611
 #' rows and 126 variables
 NULL
-#' Bank Marketing Data Set
+#' @name bank
-#'
+#' @title Bank Marketing Data Set
-#' This data set is originally from the Bank Marketing data set,
+#' @description This data set is originally from the Bank Marketing data set,
 #'              UCI Machine Learning Repository.
 #'
 #'              It contains only the following: bank.csv with 10% of the examples and 17 inputs,
@@ -164,7 +159,6 @@ NULL
 #'
 #' @docType data
 #' @keywords datasets
-#' @name bank
 #' @usage data(bank)
 #' @format A data.table with 4521 rows and 17 variables
 NULL

--- a/R-package/R/readRDS.lgb.Booster.R
+++ b/R-package/R/readRDS.lgb.Booster.R
-#' readRDS for \code{lgb.Booster} models
+#' @name readRDS.lgb.Booster
-#'
+#' @title readRDS for \code{lgb.Booster} models
-#' Attempts to load a model using RDS.
+#' @description Attempts to load a model stored in a \code{.rds} file, using \code{\link[base]{readRDS}}
-#'
 #' @param file a connection or the name of the file where the R object is saved to or read from.
 #' @param refhook a hook function for handling reference objects.
 #'

--- a/R-package/R/saveRDS.lgb.Booster.R
+++ b/R-package/R/saveRDS.lgb.Booster.R
-#' saveRDS for \code{lgb.Booster} models
+#' @name saveRDS.lgb.Booster
-#'
+#' @title saveRDS for \code{lgb.Booster} models
-#' Attempts to save a model using RDS. Has an additional parameter (\code{raw}) which decides
+#' @description Attempts to save a model using RDS. Has an additional parameter (\code{raw})
-#' whether to save the raw model or not.
+#'              which decides whether to save the raw model or not.
-#'
 #' @param object R object to serialize.
 #' @param file a connection or the name of the file where the R object is saved to or read from.
 #' @param ascii a logical. If TRUE or NA, an ASCII representation is written; otherwise (default),