lgb.cv.Rd 7.33 KB
Newer Older
James Lamb's avatar
James Lamb committed
1
2
3
4
5
6
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lgb.cv.R
\name{lgb.cv}
\alias{lgb.cv}
\title{Main CV logic for LightGBM}
\usage{
7
8
9
lgb.cv(
  params = list(),
  data,
10
  nrounds = 100L,
11
  nfold = 3L,
12
13
14
15
  label = NULL,
  weight = NULL,
  obj = NULL,
  eval = NULL,
16
  verbose = 1L,
17
18
  record = TRUE,
  eval_freq = 1L,
19
  showsd = TRUE,
20
21
22
23
24
25
26
27
28
29
  stratified = TRUE,
  folds = NULL,
  init_model = NULL,
  colnames = NULL,
  categorical_feature = NULL,
  early_stopping_rounds = NULL,
  callbacks = list(),
  reset_data = FALSE,
  ...
)
James Lamb's avatar
James Lamb committed
30
31
}
\arguments{
32
33
\item{params}{a list of parameters. See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{
the "Parameters" section of the documentation} for a list of parameters and valid values.}
James Lamb's avatar
James Lamb committed
34

35
36
37
\item{data}{a \code{lgb.Dataset} object, used for training. Some functions, such as \code{\link{lgb.cv}},
may allow you to pass other types of data like \code{matrix} and then separately supply
\code{label} as a keyword argument.}
James Lamb's avatar
James Lamb committed
38
39
40
41
42

\item{nrounds}{number of training rounds}

\item{nfold}{the original dataset is randomly partitioned into \code{nfold} equal size subsamples.}

43
\item{label}{Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}}
James Lamb's avatar
James Lamb committed
44
45
46

\item{weight}{vector of response values. If not NULL, will set to dataset}

47
\item{obj}{objective function, can be character or custom objective function. Examples include
James Lamb's avatar
James Lamb committed
48
\code{regression}, \code{regression_l1}, \code{huber},
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}

\item{eval}{evaluation function(s). This can be a character vector, function, or list with a mixture of
            strings and functions.

            \itemize{
                \item{\bold{a. character vector}:
                    If you provide a character vector to this argument, it should contain strings with valid
                    evaluation metrics.
                    See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{
                    The "metric" section of the documentation}
                    for a list of valid metrics.
                }
                \item{\bold{b. function}:
                     You can provide a custom evaluation function. This
                     should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named
                     list with three elements:
                     \itemize{
                         \item{\code{name}: A string with the name of the metric, used for printing
                             and storing results.
                         }
                         \item{\code{value}: A single number indicating the value of the metric for the
                             given predictions and true values
                         }
                         \item{
                             \code{higher_better}: A boolean indicating whether higher values indicate a better fit.
                             For example, this would be \code{FALSE} for metrics like MAE or RMSE.
                         }
                     }
                }
                \item{\bold{c. list}:
                    If a list is given, it should only contain character vectors and functions.
                    These should follow the requirements from the descriptions above.
                }
            }}
James Lamb's avatar
James Lamb committed
84
85
86
87
88
89
90

\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training}

\item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}}

\item{eval_freq}{evaluation output frequency, only effect when verbose > 0}

91
92
93
\item{showsd}{\code{boolean}, whether to show standard deviation of cross validation.
This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a
slight speedup by avoiding unnecessary computation.}
94

James Lamb's avatar
James Lamb committed
95
96
97
98
99
100
101
102
103
104
105
\item{stratified}{a \code{boolean} indicating whether sampling of folds should be stratified
by the values of outcome labels.}

\item{folds}{\code{list} provides a possibility to use a list of pre-defined CV folds
(each element must be a vector of test fold's indices). When folds are supplied,
the \code{nfold} and \code{stratified} parameters are ignored.}

\item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model}

\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}

106
107
108
\item{categorical_feature}{categorical features. This can either be a character vector of feature
names or an integer vector with the indices of the features (e.g.
\code{c(1L, 10L)} to say "the first and tenth columns").}
James Lamb's avatar
James Lamb committed
109

110
111
112
113
114
\item{early_stopping_rounds}{int. Activates early stopping. When this parameter is non-null,
training will stop if the evaluation of any metric on any validation set
fails to improve for \code{early_stopping_rounds} consecutive boosting rounds.
If training stops early, the returned model will have attribute \code{best_iter}
set to the iteration number of the best iteration.}
James Lamb's avatar
James Lamb committed
115

116
\item{callbacks}{List of callback functions that are applied at each iteration.}
James Lamb's avatar
James Lamb committed
117

118
119
\item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model
into a predictor model which frees up memory and the original datasets}
120

James Lamb's avatar
James Lamb committed
121
122
\item{...}{other parameters, see Parameters.rst for more information. A few key parameters:
\itemize{
123
124
125
    \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
    \item{\code{num_leaves}: Maximum number of leaves in one tree.}
    \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
James Lamb's avatar
James Lamb committed
126
                     overfit when #data is small. Tree still grow by leaf-wise.}
127
    \item{\code{num_threads}: Number of threads for LightGBM. For the best speed, set this to
128
129
130
                 the number of real CPU cores(\code{parallel::detectCores(logical = FALSE)}),
                 not the number of threads (most CPU using hyper-threading to generate 2 threads
                 per CPU core).}
131
132
}
NOTE: As of v3.3.0, use of \code{...} is deprecated. Add parameters to \code{params} directly.}
James Lamb's avatar
James Lamb committed
133
134
135
136
137
138
139
}
\value{
a trained model \code{lgb.CVBooster}.
}
\description{
Cross validation logic used by LightGBM
}
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
\section{Early Stopping}{


         "early stopping" refers to stopping the training process if the model's performance on a given
         validation set does not improve for several consecutive iterations.

         If multiple arguments are given to \code{eval}, their order will be preserved. If you enable
         early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all
         metrics will be considered for early stopping.

         If you want to only consider the first metric for early stopping, pass
         \code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric}
         in \code{params}, that metric will be considered the "first" one. If you omit \code{metric},
         a default metric will be used based on your choice for the parameter \code{obj} (keyword argument)
         or \code{objective} (passed into \code{params}).
}

James Lamb's avatar
James Lamb committed
157
\examples{
158
\donttest{
James Lamb's avatar
James Lamb committed
159
160
161
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
162
163
164
165
166
167
params <- list(
  objective = "regression"
  , metric = "l2"
  , min_data = 1L
  , learning_rate = 1.0
)
168
169
170
model <- lgb.cv(
  params = params
  , data = dtrain
171
  , nrounds = 5L
172
  , nfold = 3L
173
)
James Lamb's avatar
James Lamb committed
174
}
175
}