Unverified Commit d4325c5a authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[R-package] added support for first_metric_only (fixes #2368) (#2912)

* [R-package] started implementing first_metric_only

* trying stuff

* more changes

* fixed handling of multiple metrics

* fixed tests

* remove duplicate tests

* get training tests

* fixes for lgb.cv()

* fixes for lgb.cv()

* fix linting
parent 636e4eee
...@@ -108,3 +108,17 @@ ...@@ -108,3 +108,17 @@
) )
return(c(learning_params, .DATASET_PARAMETERS())) return(c(learning_params, .DATASET_PARAMETERS()))
} }
# [description]
# Per https://github.com/microsoft/LightGBM/blob/master/docs/Parameters.rst#metric,
# a few different strings can be used to indicate "no metrics".
# [returns]
# A character vector
.NO_METRIC_STRINGS <- function() {
return(c(
"na"
, "None"
, "null"
, "custom"
))
}
...@@ -268,7 +268,7 @@ cb.record.evaluation <- function() { ...@@ -268,7 +268,7 @@ cb.record.evaluation <- function() {
} }
cb.early.stop <- function(stopping_rounds, verbose = TRUE) { cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose = TRUE) {
# Initialize variables # Initialize variables
factor_to_bigger_better <- NULL factor_to_bigger_better <- NULL
...@@ -325,8 +325,16 @@ cb.early.stop <- function(stopping_rounds, verbose = TRUE) { ...@@ -325,8 +325,16 @@ cb.early.stop <- function(stopping_rounds, verbose = TRUE) {
# Store iteration # Store iteration
cur_iter <- env$iteration cur_iter <- env$iteration
# By default, any metric can trigger early stopping. This can be disabled
# with 'first_metric_only = TRUE'
if (isTRUE(first_metric_only)) {
evals_to_check <- 1L
} else {
evals_to_check <- seq_len(eval_len)
}
# Loop through evaluation # Loop through evaluation
for (i in seq_len(eval_len)) { for (i in evals_to_check) {
# Store score # Store score
score <- env$eval_list[[i]]$value * factor_to_bigger_better[i] score <- env$eval_list[[i]]$value * factor_to_bigger_better[i]
......
...@@ -24,10 +24,6 @@ CVBooster <- R6::R6Class( ...@@ -24,10 +24,6 @@ CVBooster <- R6::R6Class(
#' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples. #' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
#' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}} #' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}
#' @param weight vector of response values. If not NULL, will set to dataset #' @param weight vector of response values. If not NULL, will set to dataset
#' @param obj objective function, can be character or custom objective function. Examples include
#' \code{regression}, \code{regression_l1}, \code{huber},
#' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}
#' @param eval evaluation function, can be (list of) character or custom eval function
#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals} #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
#' @param showsd \code{boolean}, whether to show standard deviation of cross validation #' @param showsd \code{boolean}, whether to show standard deviation of cross validation
#' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified #' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified
...@@ -52,7 +48,7 @@ CVBooster <- R6::R6Class( ...@@ -52,7 +48,7 @@ CVBooster <- R6::R6Class(
#' the number of real CPU cores, not the number of threads (most #' the number of real CPU cores, not the number of threads (most
#' CPU using hyper-threading to generate 2 threads per CPU core).} #' CPU using hyper-threading to generate 2 threads per CPU core).}
#' } #' }
#' #' @inheritSection lgb_shared_params Early Stopping
#' @return a trained model \code{lgb.CVBooster}. #' @return a trained model \code{lgb.CVBooster}.
#' #'
#' @examples #' @examples
...@@ -114,7 +110,7 @@ lgb.cv <- function(params = list() ...@@ -114,7 +110,7 @@ lgb.cv <- function(params = list()
params <- lgb.check.obj(params, obj) params <- lgb.check.obj(params, obj)
params <- lgb.check.eval(params, eval) params <- lgb.check.eval(params, eval)
fobj <- NULL fobj <- NULL
feval <- NULL eval_functions <- list(NULL)
# Check for objective (function or not) # Check for objective (function or not)
if (is.function(params$objective)) { if (is.function(params$objective)) {
...@@ -122,9 +118,17 @@ lgb.cv <- function(params = list() ...@@ -122,9 +118,17 @@ lgb.cv <- function(params = list()
params$objective <- "NONE" params$objective <- "NONE"
} }
# Check for loss (function or not) # If loss is a single function, store it as a 1-element list
# (for backwards compatibility). If it is a list of functions, store
# all of them
if (is.function(eval)) { if (is.function(eval)) {
feval <- eval eval_functions <- list(eval)
}
if (methods::is(eval, "list")) {
eval_functions <- Filter(
f = is.function
, x = eval
)
} }
# Init predictor to empty # Init predictor to empty
...@@ -266,6 +270,7 @@ lgb.cv <- function(params = list() ...@@ -266,6 +270,7 @@ lgb.cv <- function(params = list()
callbacks callbacks
, cb.early.stop( , cb.early.stop(
stopping_rounds = early_stopping_rounds stopping_rounds = early_stopping_rounds
, first_metric_only = isTRUE(params[["first_metric_only"]])
, verbose = verbose , verbose = verbose
) )
) )
...@@ -357,7 +362,11 @@ lgb.cv <- function(params = list() ...@@ -357,7 +362,11 @@ lgb.cv <- function(params = list()
# Update one boosting iteration # Update one boosting iteration
msg <- lapply(cv_booster$boosters, function(fd) { msg <- lapply(cv_booster$boosters, function(fd) {
fd$booster$update(fobj = fobj) fd$booster$update(fobj = fobj)
fd$booster$eval_valid(feval = feval) out <- list()
for (eval_function in eval_functions) {
out <- append(out, fd$booster$eval_valid(feval = eval_function))
}
return(out)
}) })
# Prepare collection of evaluation results # Prepare collection of evaluation results
...@@ -384,7 +393,13 @@ lgb.cv <- function(params = list() ...@@ -384,7 +393,13 @@ lgb.cv <- function(params = list()
# When early stopping is not activated, we compute the best iteration / score ourselves # When early stopping is not activated, we compute the best iteration / score ourselves
# based on the first first metric # based on the first first metric
if (record && is.na(env$best_score)) { if (record && is.na(env$best_score)) {
first_metric <- cv_booster$boosters[[1L]][[1L]]$.__enclos_env__$private$eval_names[1L] # when using a custom eval function, the metric name is returned from the
# function, so figure it out from record_evals
if (!is.null(eval_functions[1L])) {
first_metric <- names(cv_booster$record_evals[["valid"]])[1L]
} else {
first_metric <- cv_booster$.__enclos_env__$private$eval_names[1L]
}
.find_best <- which.min .find_best <- which.min
if (isTRUE(env$eval_list[[1L]]$higher_better[1L])) { if (isTRUE(env$eval_list[[1L]]$higher_better[1L])) {
.find_best <- which.max .find_best <- which.max
...@@ -576,7 +591,8 @@ lgb.merge.cv.result <- function(msg, showsd = TRUE) { ...@@ -576,7 +591,8 @@ lgb.merge.cv.result <- function(msg, showsd = TRUE) {
msg[[i]][[j]]$value })) msg[[i]][[j]]$value }))
}) })
# Get evaluation # Get evaluation. Just taking the first element here to
# get structture (name, higher_bettter, data_name)
ret_eval <- msg[[1L]] ret_eval <- msg[[1L]]
# Go through evaluation length items # Go through evaluation length items
......
...@@ -3,10 +3,6 @@ ...@@ -3,10 +3,6 @@
#' @description Logic to train with LightGBM #' @description Logic to train with LightGBM
#' @inheritParams lgb_shared_params #' @inheritParams lgb_shared_params
#' @param valids a list of \code{lgb.Dataset} objects, used for validation #' @param valids a list of \code{lgb.Dataset} objects, used for validation
#' @param obj objective function, can be character or custom objective function. Examples include
#' \code{regression}, \code{regression_l1}, \code{huber},
#' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}
#' @param eval evaluation function, can be (a list of) character or custom eval function
#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals} #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
#' @param colnames feature names, if not null, will use this to overwrite the names in dataset #' @param colnames feature names, if not null, will use this to overwrite the names in dataset
#' @param categorical_feature list of str or int #' @param categorical_feature list of str or int
...@@ -26,6 +22,7 @@ ...@@ -26,6 +22,7 @@
#' the number of real CPU cores, not the number of threads (most #' the number of real CPU cores, not the number of threads (most
#' CPU using hyper-threading to generate 2 threads per CPU core).} #' CPU using hyper-threading to generate 2 threads per CPU core).}
#' } #' }
#' @inheritSection lgb_shared_params Early Stopping
#' @return a trained booster model \code{lgb.Booster}. #' @return a trained booster model \code{lgb.Booster}.
#' #'
#' @examples #' @examples
...@@ -90,7 +87,7 @@ lgb.train <- function(params = list(), ...@@ -90,7 +87,7 @@ lgb.train <- function(params = list(),
params <- lgb.check.obj(params, obj) params <- lgb.check.obj(params, obj)
params <- lgb.check.eval(params, eval) params <- lgb.check.eval(params, eval)
fobj <- NULL fobj <- NULL
feval <- NULL eval_functions <- list(NULL)
# Check for objective (function or not) # Check for objective (function or not)
if (is.function(params$objective)) { if (is.function(params$objective)) {
...@@ -98,9 +95,17 @@ lgb.train <- function(params = list(), ...@@ -98,9 +95,17 @@ lgb.train <- function(params = list(),
params$objective <- "NONE" params$objective <- "NONE"
} }
# Check for loss (function or not) # If loss is a single function, store it as a 1-element list
# (for backwards compatibility). If it is a list of functions, store
# all of them
if (is.function(eval)) { if (is.function(eval)) {
feval <- eval eval_functions <- list(eval)
}
if (methods::is(eval, "list")) {
eval_functions <- Filter(
f = is.function
, x = eval
)
} }
# Init predictor to empty # Init predictor to empty
...@@ -235,6 +240,7 @@ lgb.train <- function(params = list(), ...@@ -235,6 +240,7 @@ lgb.train <- function(params = list(),
callbacks callbacks
, cb.early.stop( , cb.early.stop(
stopping_rounds = early_stopping_rounds stopping_rounds = early_stopping_rounds
, first_metric_only = isTRUE(params[["first_metric_only"]])
, verbose = verbose , verbose = verbose
) )
) )
...@@ -280,13 +286,28 @@ lgb.train <- function(params = list(), ...@@ -280,13 +286,28 @@ lgb.train <- function(params = list(),
# Collection: Has validation dataset? # Collection: Has validation dataset?
if (length(valids) > 0L) { if (length(valids) > 0L) {
# Validation has training dataset? # Get evaluation results with passed-in functions
if (valid_contain_train) { for (eval_function in eval_functions) {
eval_list <- append(eval_list, booster$eval_train(feval = feval))
# Validation has training dataset?
if (valid_contain_train) {
eval_list <- append(eval_list, booster$eval_train(feval = eval_function))
}
eval_list <- append(eval_list, booster$eval_valid(feval = eval_function))
}
# Calling booster$eval_valid() will get
# evaluation results with the metrics in params$metric by calling LGBM_BoosterGetEval_R",
# so need to be sure that gets called, which it wouldn't be above if no functions
# were passed in
if (length(eval_functions) == 0L) {
if (valid_contain_train) {
eval_list <- append(eval_list, booster$eval_train(feval = eval_function))
}
eval_list <- append(eval_list, booster$eval_valid(feval = eval_function))
} }
# Has no validation dataset
eval_list <- append(eval_list, booster$eval_valid(feval = feval))
} }
# Write evaluation result in environment # Write evaluation result in environment
...@@ -312,7 +333,7 @@ lgb.train <- function(params = list(), ...@@ -312,7 +333,7 @@ lgb.train <- function(params = list(),
# when using a custom eval function, the metric name is returned from the # when using a custom eval function, the metric name is returned from the
# function, so figure it out from record_evals # function, so figure it out from record_evals
if (!is.null(feval)) { if (!is.null(eval_functions[1L])) {
first_metric <- names(booster$record_evals[[first_valid_name]])[1L] first_metric <- names(booster$record_evals[[first_valid_name]])[1L]
} else { } else {
first_metric <- booster$.__enclos_env__$private$eval_names[1L] first_metric <- booster$.__enclos_env__$private$eval_names[1L]
......
...@@ -10,11 +10,61 @@ ...@@ -10,11 +10,61 @@
#' and one metric. If there's more than one, will check all of them #' and one metric. If there's more than one, will check all of them
#' except the training data. Returns the model with (best_iter + early_stopping_rounds). #' except the training data. Returns the model with (best_iter + early_stopping_rounds).
#' If early stopping occurs, the model will have 'best_iter' field. #' If early stopping occurs, the model will have 'best_iter' field.
#' @param eval evaluation function(s). This can be a character vector, function, or list with a mixture of
#' strings and functions.
#'
#' \itemize{
#' \item{\bold{a. character vector}:
#' If you provide a character vector to this argument, it should contain strings with valid
#' evaluation metrics.
#' See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{
#' The "metric" section of the documentation}
#' for a list of valid metrics.
#' }
#' \item{\bold{b. function}:
#' You can provide a custom evaluation function. This
#' should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named
#' list with three elements:
#' \itemize{
#' \item{\code{name}: A string with the name of the metric, used for printing
#' and storing results.
#' }
#' \item{\code{value}: A single number indicating the value of the metric for the
#' given predictions and true values
#' }
#' \item{
#' \code{higher_better}: A boolean indicating whether higher values indicate a better fit.
#' For example, this would be \code{FALSE} for metrics like MAE or RMSE.
#' }
#' }
#' }
#' \item{\bold{c. list}:
#' If a list is given, it should only contain character vectors and functions.
#' These should follow the requirements from the descriptions above.
#' }
#' }
#' @param eval_freq evaluation output frequency, only effect when verbose > 0 #' @param eval_freq evaluation output frequency, only effect when verbose > 0
#' @param init_model path of model file of \code{lgb.Booster} object, will continue training from this model #' @param init_model path of model file of \code{lgb.Booster} object, will continue training from this model
#' @param nrounds number of training rounds #' @param nrounds number of training rounds
#' @param obj objective function, can be character or custom objective function. Examples include
#' \code{regression}, \code{regression_l1}, \code{huber},
#' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}
#' @param params List of parameters #' @param params List of parameters
#' @param verbose verbosity for output, if <= 0, also will disable the print of evaluation during training #' @param verbose verbosity for output, if <= 0, also will disable the print of evaluation during training
#' @section Early Stopping:
#'
#' "early stopping" refers to stopping the training process if the model's performance on a given
#' validation set does not improve for several consecutive iterations.
#'
#' If multiple arguments are given to \code{eval}, their order will be preserved. If you enable
#' early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all
#' metrics will be considered for early stopping.
#'
#' If you want to only consider the first metric for early stopping, pass
#' \code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric}
#' in \code{params}, that metric will be considered the "first" one. If you omit \code{metric},
#' a default metric will be used based on your choice for the parameter \code{obj} (keyword argument)
#' or \code{objective} (passed into \code{params}).
#' @keywords internal #' @keywords internal
NULL NULL
...@@ -47,6 +97,7 @@ NULL ...@@ -47,6 +97,7 @@ NULL
#' the number of real CPU cores, not the number of threads (most #' the number of real CPU cores, not the number of threads (most
#' CPU using hyper-threading to generate 2 threads per CPU core).} #' CPU using hyper-threading to generate 2 threads per CPU core).}
#' } #' }
#' @inheritSection lgb_shared_params Early Stopping
#' @export #' @export
lightgbm <- function(data, lightgbm <- function(data,
label = NULL, label = NULL,
......
...@@ -318,10 +318,10 @@ lgb.check.obj <- function(params, obj) { ...@@ -318,10 +318,10 @@ lgb.check.obj <- function(params, obj) {
} }
# [description] # [description]
# make sure that "metric" is populated on params, # Take any character values from eval and store them in params$metric.
# and add any eval values to it # This has to account for the fact that `eval` could be a character vector,
# [return] # a function, a list of functions, or a list with a mix of strings and
# params, where "metric" is a list # functions
lgb.check.eval <- function(params, eval) { lgb.check.eval <- function(params, eval) {
if (is.null(params$metric)) { if (is.null(params$metric)) {
...@@ -330,13 +330,30 @@ lgb.check.eval <- function(params, eval) { ...@@ -330,13 +330,30 @@ lgb.check.eval <- function(params, eval) {
params$metric <- as.list(params$metric) params$metric <- as.list(params$metric)
} }
if (is.character(eval)) { # if 'eval' is a character vector or list, find the character
params$metric <- append(params$metric, eval) # elements and add them to 'metric'
if (!is.function(eval)) {
for (i in seq_along(eval)) {
element <- eval[[i]]
if (is.character(element)) {
params$metric <- append(params$metric, element)
}
}
} }
if (identical(class(eval), "list")) { # If more than one character metric was given, then "None" should
params$metric <- append(params$metric, unlist(eval)) # not be included
if (length(params$metric) > 1L) {
params$metric <- Filter(
f = function(metric) {
!(metric %in% .NO_METRIC_STRINGS())
}
, x = params$metric
)
} }
# duplicate metrics should be filtered out
params$metric <- as.list(unique(unlist(params$metric)))
return(params) return(params)
} }
...@@ -45,9 +45,41 @@ may allow you to pass other types of data like \code{matrix} and then separately ...@@ -45,9 +45,41 @@ may allow you to pass other types of data like \code{matrix} and then separately
\item{obj}{objective function, can be character or custom objective function. Examples include \item{obj}{objective function, can be character or custom objective function. Examples include
\code{regression}, \code{regression_l1}, \code{huber}, \code{regression}, \code{regression_l1}, \code{huber},
\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
\item{eval}{evaluation function, can be (list of) character or custom eval function} \item{eval}{evaluation function(s). This can be a character vector, function, or list with a mixture of
strings and functions.
\itemize{
\item{\bold{a. character vector}:
If you provide a character vector to this argument, it should contain strings with valid
evaluation metrics.
See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{
The "metric" section of the documentation}
for a list of valid metrics.
}
\item{\bold{b. function}:
You can provide a custom evaluation function. This
should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named
list with three elements:
\itemize{
\item{\code{name}: A string with the name of the metric, used for printing
and storing results.
}
\item{\code{value}: A single number indicating the value of the metric for the
given predictions and true values
}
\item{
\code{higher_better}: A boolean indicating whether higher values indicate a better fit.
For example, this would be \code{FALSE} for metrics like MAE or RMSE.
}
}
}
\item{\bold{c. list}:
If a list is given, it should only contain character vectors and functions.
These should follow the requirements from the descriptions above.
}
}}
\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training} \item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training}
...@@ -99,6 +131,23 @@ a trained model \code{lgb.CVBooster}. ...@@ -99,6 +131,23 @@ a trained model \code{lgb.CVBooster}.
\description{ \description{
Cross validation logic used by LightGBM Cross validation logic used by LightGBM
} }
\section{Early Stopping}{
"early stopping" refers to stopping the training process if the model's performance on a given
validation set does not improve for several consecutive iterations.
If multiple arguments are given to \code{eval}, their order will be preserved. If you enable
early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all
metrics will be considered for early stopping.
If you want to only consider the first metric for early stopping, pass
\code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric}
in \code{params}, that metric will be considered the "first" one. If you omit \code{metric},
a default metric will be used based on your choice for the parameter \code{obj} (keyword argument)
or \code{objective} (passed into \code{params}).
}
\examples{ \examples{
\dontrun{ \dontrun{
data(agaricus.train, package = "lightgbm") data(agaricus.train, package = "lightgbm")
......
...@@ -38,7 +38,39 @@ may allow you to pass other types of data like \code{matrix} and then separately ...@@ -38,7 +38,39 @@ may allow you to pass other types of data like \code{matrix} and then separately
\code{regression}, \code{regression_l1}, \code{huber}, \code{regression}, \code{regression_l1}, \code{huber},
\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
\item{eval}{evaluation function, can be (a list of) character or custom eval function} \item{eval}{evaluation function(s). This can be a character vector, function, or list with a mixture of
strings and functions.
\itemize{
\item{\bold{a. character vector}:
If you provide a character vector to this argument, it should contain strings with valid
evaluation metrics.
See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{
The "metric" section of the documentation}
for a list of valid metrics.
}
\item{\bold{b. function}:
You can provide a custom evaluation function. This
should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named
list with three elements:
\itemize{
\item{\code{name}: A string with the name of the metric, used for printing
and storing results.
}
\item{\code{value}: A single number indicating the value of the metric for the
given predictions and true values
}
\item{
\code{higher_better}: A boolean indicating whether higher values indicate a better fit.
For example, this would be \code{FALSE} for metrics like MAE or RMSE.
}
}
}
\item{\bold{c. list}:
If a list is given, it should only contain character vectors and functions.
These should follow the requirements from the descriptions above.
}
}}
\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training} \item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training}
...@@ -82,6 +114,23 @@ a trained booster model \code{lgb.Booster}. ...@@ -82,6 +114,23 @@ a trained booster model \code{lgb.Booster}.
\description{ \description{
Logic to train with LightGBM Logic to train with LightGBM
} }
\section{Early Stopping}{
"early stopping" refers to stopping the training process if the model's performance on a given
validation set does not improve for several consecutive iterations.
If multiple arguments are given to \code{eval}, their order will be preserved. If you enable
early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all
metrics will be considered for early stopping.
If you want to only consider the first metric for early stopping, pass
\code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric}
in \code{params}, that metric will be considered the "first" one. If you omit \code{metric},
a default metric will be used based on your choice for the parameter \code{obj} (keyword argument)
or \code{objective} (passed into \code{params}).
}
\examples{ \examples{
\dontrun{ \dontrun{
data(agaricus.train, package = "lightgbm") data(agaricus.train, package = "lightgbm")
......
...@@ -16,12 +16,50 @@ and one metric. If there's more than one, will check all of them ...@@ -16,12 +16,50 @@ and one metric. If there's more than one, will check all of them
except the training data. Returns the model with (best_iter + early_stopping_rounds). except the training data. Returns the model with (best_iter + early_stopping_rounds).
If early stopping occurs, the model will have 'best_iter' field.} If early stopping occurs, the model will have 'best_iter' field.}
\item{eval}{evaluation function(s). This can be a character vector, function, or list with a mixture of
strings and functions.
\itemize{
\item{\bold{a. character vector}:
If you provide a character vector to this argument, it should contain strings with valid
evaluation metrics.
See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{
The "metric" section of the documentation}
for a list of valid metrics.
}
\item{\bold{b. function}:
You can provide a custom evaluation function. This
should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named
list with three elements:
\itemize{
\item{\code{name}: A string with the name of the metric, used for printing
and storing results.
}
\item{\code{value}: A single number indicating the value of the metric for the
given predictions and true values
}
\item{
\code{higher_better}: A boolean indicating whether higher values indicate a better fit.
For example, this would be \code{FALSE} for metrics like MAE or RMSE.
}
}
}
\item{\bold{c. list}:
If a list is given, it should only contain character vectors and functions.
These should follow the requirements from the descriptions above.
}
}}
\item{eval_freq}{evaluation output frequency, only effect when verbose > 0} \item{eval_freq}{evaluation output frequency, only effect when verbose > 0}
\item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model} \item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model}
\item{nrounds}{number of training rounds} \item{nrounds}{number of training rounds}
\item{obj}{objective function, can be character or custom objective function. Examples include
\code{regression}, \code{regression_l1}, \code{huber},
\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
\item{params}{List of parameters} \item{params}{List of parameters}
\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training} \item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training}
...@@ -29,4 +67,21 @@ If early stopping occurs, the model will have 'best_iter' field.} ...@@ -29,4 +67,21 @@ If early stopping occurs, the model will have 'best_iter' field.}
\description{ \description{
Parameter docs shared by \code{lgb.train}, \code{lgb.cv}, and \code{lightgbm} Parameter docs shared by \code{lgb.train}, \code{lgb.cv}, and \code{lightgbm}
} }
\section{Early Stopping}{
"early stopping" refers to stopping the training process if the model's performance on a given
validation set does not improve for several consecutive iterations.
If multiple arguments are given to \code{eval}, their order will be preserved. If you enable
early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all
metrics will be considered for early stopping.
If you want to only consider the first metric for early stopping, pass
\code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric}
in \code{params}, that metric will be considered the "first" one. If you omit \code{metric},
a default metric will be used based on your choice for the parameter \code{obj} (keyword argument)
or \code{objective} (passed into \code{params}).
}
\keyword{internal} \keyword{internal}
...@@ -74,3 +74,20 @@ List of callback functions that are applied at each iteration.} ...@@ -74,3 +74,20 @@ List of callback functions that are applied at each iteration.}
\description{ \description{
Simple interface for training a LightGBM model. Simple interface for training a LightGBM model.
} }
\section{Early Stopping}{
"early stopping" refers to stopping the training process if the model's performance on a given
validation set does not improve for several consecutive iterations.
If multiple arguments are given to \code{eval}, their order will be preserved. If you enable
early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all
metrics will be considered for early stopping.
If you want to only consider the first metric for early stopping, pass
\code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric}
in \code{params}, that metric will be considered the "first" one. If you omit \code{metric},
a default metric will be used based on your choice for the parameter \code{obj} (keyword argument)
or \code{objective} (passed into \code{params}).
}
...@@ -6,6 +6,59 @@ train <- agaricus.train ...@@ -6,6 +6,59 @@ train <- agaricus.train
test <- agaricus.test test <- agaricus.test
TOLERANCE <- 1e-6 TOLERANCE <- 1e-6
set.seed(708L)
# [description] Every time this function is called, it adds 0.1
# to an accumulator then returns the current value.
# This is used to mock the situation where an evaluation
# metric increases every iteration
ACCUMULATOR_NAME <- "INCREASING_METRIC_ACUMULATOR"
assign(x = "INCREASING_METRIC_ACUMULATOR", value = 0.0, envir = .GlobalEnv)
.increasing_metric <- function(preds, dtrain) {
if (!exists(ACCUMULATOR_NAME, envir = .GlobalEnv)) {
assign(ACCUMULATOR_NAME, 0.0, envir = .GlobalEnv)
}
assign(
x = ACCUMULATOR_NAME
, value = get(ACCUMULATOR_NAME, envir = .GlobalEnv) + 0.1
, envir = .GlobalEnv
)
return(list(
name = "increasing_metric"
, value = get(ACCUMULATOR_NAME, envir = .GlobalEnv)
, higher_better = TRUE
))
}
# [description] Evaluation function that always returns the
# same value
CONSTANT_METRIC_VALUE <- 0.2
.constant_metric <- function(preds, dtrain) {
return(list(
name = "constant_metric"
, value = CONSTANT_METRIC_VALUE
, higher_better = FALSE
))
}
# sample datasets to test early stopping
DTRAIN_RANDOM_REGRESSION <- lgb.Dataset(
data = as.matrix(rnorm(100L), ncol = 1L, drop = FALSE)
, label = rnorm(100L)
)
DVALID_RANDOM_REGRESSION <- lgb.Dataset(
data = as.matrix(rnorm(50L), ncol = 1L, drop = FALSE)
, label = rnorm(50L)
)
DTRAIN_RANDOM_CLASSIFICATION <- lgb.Dataset(
data = as.matrix(rnorm(120L), ncol = 1L, drop = FALSE)
, label = sample(c(0L, 1L), size = 120L, replace = TRUE)
)
DVALID_RANDOM_CLASSIFICATION <- lgb.Dataset(
data = as.matrix(rnorm(37L), ncol = 1L, drop = FALSE)
, label = sample(c(0L, 1L), size = 37L, replace = TRUE)
)
test_that("train and predict binary classification", { test_that("train and predict binary classification", {
nrounds <- 10L nrounds <- 10L
...@@ -707,7 +760,6 @@ test_that("lgb.train() works with early stopping for regression", { ...@@ -707,7 +760,6 @@ test_that("lgb.train() works with early stopping for regression", {
params = list( params = list(
objective = "regression" objective = "regression"
, metric = "rmse" , metric = "rmse"
, min_data_in_bin = 5L
) )
, data = dtrain , data = dtrain
, nrounds = nrounds , nrounds = nrounds
...@@ -730,7 +782,6 @@ test_that("lgb.train() works with early stopping for regression", { ...@@ -730,7 +782,6 @@ test_that("lgb.train() works with early stopping for regression", {
params = list( params = list(
objective = "regression" objective = "regression"
, metric = "rmse" , metric = "rmse"
, min_data_in_bin = 5L
, early_stopping_rounds = early_stopping_rounds , early_stopping_rounds = early_stopping_rounds
) )
, data = dtrain , data = dtrain
...@@ -750,6 +801,314 @@ test_that("lgb.train() works with early stopping for regression", { ...@@ -750,6 +801,314 @@ test_that("lgb.train() works with early stopping for regression", {
) )
}) })
test_that("lgb.train() does not stop early if early_stopping_rounds is not given", {
set.seed(708L)
increasing_metric_starting_value <- get(
ACCUMULATOR_NAME
, envir = .GlobalEnv
)
nrounds <- 10L
metrics <- list(
.constant_metric
, .increasing_metric
)
bst <- lgb.train(
params = list(
objective = "regression"
, metric = "None"
)
, data = DTRAIN_RANDOM_REGRESSION
, nrounds = nrounds
, valids = list("valid1" = DVALID_RANDOM_REGRESSION)
, eval = metrics
)
# Only the two functions provided to "eval" should have been evaluated
expect_equal(length(bst$record_evals[["valid1"]]), 2L)
# all 10 iterations should have happen, and the best_iter should be
# the first one (based on constant_metric)
best_iter <- 1L
expect_equal(bst$best_iter, best_iter)
# best_score should be taken from the first metric
expect_equal(
bst$best_score
, bst$record_evals[["valid1"]][["constant_metric"]][["eval"]][[best_iter]]
)
# early stopping should not have happened. Even though constant_metric
# had 9 consecutive iterations with no improvement, it is ignored because of
# first_metric_only = TRUE
expect_equal(
length(bst$record_evals[["valid1"]][["constant_metric"]][["eval"]])
, nrounds
)
expect_equal(
length(bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]])
, nrounds
)
})
test_that("If first_metric_only is not given or is FALSE, lgb.train() decides to stop early based on all metrics", {
set.seed(708L)
early_stopping_rounds <- 3L
param_variations <- list(
list(
objective = "regression"
, metric = "None"
, early_stopping_rounds = early_stopping_rounds
)
, list(
objective = "regression"
, metric = "None"
, early_stopping_rounds = early_stopping_rounds
, first_metric_only = FALSE
)
)
for (params in param_variations) {
nrounds <- 10L
bst <- lgb.train(
params = params
, data = DTRAIN_RANDOM_REGRESSION
, nrounds = nrounds
, valids = list(
"valid1" = DVALID_RANDOM_REGRESSION
)
, eval = list(
.increasing_metric
, .constant_metric
)
)
# Only the two functions provided to "eval" should have been evaluated
expect_equal(length(bst$record_evals[["valid1"]]), 2L)
# early stopping should have happened, and should have stopped early_stopping_rounds + 1 rounds in
# because constant_metric never improves
#
# the best iteration should be the last one, because increasing_metric was first
# and gets better every iteration
best_iter <- early_stopping_rounds + 1L
expect_equal(bst$best_iter, best_iter)
# best_score should be taken from "increasing_metric" because it was first
expect_equal(
bst$best_score
, bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]][[best_iter]]
)
# early stopping should not have happened. even though increasing_metric kept
# getting better, early stopping should have happened because "constant_metric"
# did not improve
expect_equal(
length(bst$record_evals[["valid1"]][["constant_metric"]][["eval"]])
, early_stopping_rounds + 1L
)
expect_equal(
length(bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]])
, early_stopping_rounds + 1L
)
}
})
test_that("If first_metric_only is TRUE, lgb.train() decides to stop early based on only the first metric", {
set.seed(708L)
nrounds <- 10L
early_stopping_rounds <- 3L
increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
bst <- lgb.train(
params = list(
objective = "regression"
, metric = "None"
, early_stopping_rounds = early_stopping_rounds
, first_metric_only = TRUE
)
, data = DTRAIN_RANDOM_REGRESSION
, nrounds = nrounds
, valids = list(
"valid1" = DVALID_RANDOM_REGRESSION
)
, eval = list(
.increasing_metric
, .constant_metric
)
)
# Only the two functions provided to "eval" should have been evaluated
expect_equal(length(bst$record_evals[["valid1"]]), 2L)
# all 10 iterations should happen, and the best_iter should be the final one
expect_equal(bst$best_iter, nrounds)
# best_score should be taken from "increasing_metric"
expect_equal(
bst$best_score
, increasing_metric_starting_value + 0.1 * nrounds
)
# early stopping should not have happened. Even though constant_metric
# had 9 consecutive iterations with no improvement, it is ignored because of
# first_metric_only = TRUE
expect_equal(
length(bst$record_evals[["valid1"]][["constant_metric"]][["eval"]])
, nrounds
)
expect_equal(
length(bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]])
, nrounds
)
})
test_that("lgb.train() works when a mixture of functions and strings are passed to eval", {
set.seed(708L)
nrounds <- 10L
increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
bst <- lgb.train(
params = list(
objective = "regression"
, metric = "None"
)
, data = DTRAIN_RANDOM_REGRESSION
, nrounds = nrounds
, valids = list(
"valid1" = DVALID_RANDOM_REGRESSION
)
, eval = list(
.increasing_metric
, "rmse"
, .constant_metric
, "l2"
)
)
# all 4 metrics should have been used
expect_named(
bst$record_evals[["valid1"]]
, expected = c("rmse", "l2", "increasing_metric", "constant_metric")
, ignore.order = TRUE
, ignore.case = FALSE
)
# the difference metrics shouldn't have been mixed up with each other
results <- bst$record_evals[["valid1"]]
expect_true(abs(results[["rmse"]][["eval"]][[1L]] - 1.105012) < TOLERANCE)
expect_true(abs(results[["l2"]][["eval"]][[1L]] - 1.221051) < TOLERANCE)
expected_increasing_metric <- increasing_metric_starting_value + 0.1
expect_true(
abs(
results[["increasing_metric"]][["eval"]][[1L]] - expected_increasing_metric
) < TOLERANCE
)
expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE)
})
test_that("lgb.train() works when a list of strings or a character vector is passed to eval", {
# testing list and character vector, as well as length-1 and length-2
eval_variations <- list(
c("binary_error", "binary_logloss")
, "binary_logloss"
, list("binary_error", "binary_logloss")
, list("binary_logloss")
)
for (eval_variation in eval_variations) {
set.seed(708L)
nrounds <- 10L
increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
bst <- lgb.train(
params = list(
objective = "binary"
, metric = "None"
)
, data = DTRAIN_RANDOM_CLASSIFICATION
, nrounds = nrounds
, valids = list(
"valid1" = DVALID_RANDOM_CLASSIFICATION
)
, eval = eval_variation
)
# both metrics should have been used
expect_named(
bst$record_evals[["valid1"]]
, expected = unlist(eval_variation)
, ignore.order = TRUE
, ignore.case = FALSE
)
# the difference metrics shouldn't have been mixed up with each other
results <- bst$record_evals[["valid1"]]
if ("binary_error" %in% unlist(eval_variation)) {
expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.4864865) < TOLERANCE)
}
if ("binary_logloss" %in% unlist(eval_variation)) {
expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6932548) < TOLERANCE)
}
}
})
test_that("lgb.train() works when you specify both 'metric' and 'eval' with strings", {
set.seed(708L)
nrounds <- 10L
increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
bst <- lgb.train(
params = list(
objective = "binary"
, metric = "binary_error"
)
, data = DTRAIN_RANDOM_CLASSIFICATION
, nrounds = nrounds
, valids = list(
"valid1" = DVALID_RANDOM_CLASSIFICATION
)
, eval = "binary_logloss"
)
# both metrics should have been used
expect_named(
bst$record_evals[["valid1"]]
, expected = c("binary_error", "binary_logloss")
, ignore.order = TRUE
, ignore.case = FALSE
)
# the difference metrics shouldn't have been mixed up with each other
results <- bst$record_evals[["valid1"]]
expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.4864865) < TOLERANCE)
expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6932548) < TOLERANCE)
})
test_that("lgb.train() works when you give a function for eval", {
set.seed(708L)
nrounds <- 10L
increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
bst <- lgb.train(
params = list(
objective = "binary"
, metric = "None"
)
, data = DTRAIN_RANDOM_CLASSIFICATION
, nrounds = nrounds
, valids = list(
"valid1" = DVALID_RANDOM_CLASSIFICATION
)
, eval = .constant_metric
)
# the difference metrics shouldn't have been mixed up with each other
results <- bst$record_evals[["valid1"]]
expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE)
})
test_that("lgb.train() works with early stopping for regression with a metric that should be minimized", { test_that("lgb.train() works with early stopping for regression with a metric that should be minimized", {
set.seed(708L) set.seed(708L)
trainDF <- data.frame( trainDF <- data.frame(
...@@ -1099,6 +1458,172 @@ test_that("using lightgbm() without early stopping, best_iter and best_score com ...@@ -1099,6 +1458,172 @@ test_that("using lightgbm() without early stopping, best_iter and best_score com
expect_identical(bst$best_score, auc_scores[which.max(auc_scores)]) expect_identical(bst$best_score, auc_scores[which.max(auc_scores)])
}) })
test_that("lgb.cv() works when you specify both 'metric' and 'eval' with strings", {
set.seed(708L)
nrounds <- 10L
nfolds <- 4L
increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
bst <- lgb.cv(
params = list(
objective = "binary"
, metric = "binary_error"
)
, data = DTRAIN_RANDOM_CLASSIFICATION
, nrounds = nrounds
, nfold = nfolds
, eval = "binary_logloss"
)
# both metrics should have been used
expect_named(
bst$record_evals[["valid"]]
, expected = c("binary_error", "binary_logloss")
, ignore.order = TRUE
, ignore.case = FALSE
)
# the difference metrics shouldn't have been mixed up with each other
results <- bst$record_evals[["valid"]]
expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.5005654) < TOLERANCE)
expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.7011232) < TOLERANCE)
# all boosters should have been created
expect_length(bst$boosters, nfolds)
})
test_that("lgb.cv() works when you give a function for eval", {
set.seed(708L)
nrounds <- 10L
nfolds <- 3L
increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
bst <- lgb.cv(
params = list(
objective = "binary"
, metric = "None"
)
, data = DTRAIN_RANDOM_CLASSIFICATION
, nfold = nfolds
, nrounds = nrounds
, eval = .constant_metric
)
# the difference metrics shouldn't have been mixed up with each other
results <- bst$record_evals[["valid"]]
expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE)
expect_named(results, "constant_metric")
})
test_that("If first_metric_only is TRUE, lgb.cv() decides to stop early based on only the first metric", {
set.seed(708L)
nrounds <- 10L
nfolds <- 5L
early_stopping_rounds <- 3L
increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
bst <- lgb.cv(
params = list(
objective = "regression"
, metric = "None"
, early_stopping_rounds = early_stopping_rounds
, first_metric_only = TRUE
)
, data = DTRAIN_RANDOM_REGRESSION
, nfold = nfolds
, nrounds = nrounds
, valids = list(
"valid1" = DVALID_RANDOM_REGRESSION
)
, eval = list(
.increasing_metric
, .constant_metric
)
)
# Only the two functions provided to "eval" should have been evaluated
expect_named(bst$record_evals[["valid"]], c("increasing_metric", "constant_metric"))
# all 10 iterations should happen, and the best_iter should be the final one
expect_equal(bst$best_iter, nrounds)
# best_score should be taken from "increasing_metric"
#
# this expected value looks magical and confusing, but it's because
# evaluation metrics are averaged over all folds.
#
# consider 5-fold CV with a metric that adds 0.1 to a global accumulator
# each time it's called
#
# * iter 1: [0.1, 0.2, 0.3, 0.4, 0.5] (mean = 0.3)
# * iter 2: [0.6, 0.7, 0.8, 0.9, 1.0] (mean = 1.3)
# * iter 3: [1.1, 1.2, 1.3, 1.4, 1.5] (mean = 1.8)
#
cv_value <- increasing_metric_starting_value + mean(seq_len(nfolds) / 10.0) + (nrounds - 1L) * 0.1 * nfolds
expect_equal(bst$best_score, cv_value)
# early stopping should not have happened. Even though constant_metric
# had 9 consecutive iterations with no improvement, it is ignored because of
# first_metric_only = TRUE
expect_equal(
length(bst$record_evals[["valid"]][["constant_metric"]][["eval"]])
, nrounds
)
expect_equal(
length(bst$record_evals[["valid"]][["increasing_metric"]][["eval"]])
, nrounds
)
})
test_that("early stopping works with lgb.cv()", {
set.seed(708L)
nrounds <- 10L
nfolds <- 5L
early_stopping_rounds <- 3L
increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv)
bst <- lgb.cv(
params = list(
objective = "regression"
, metric = "None"
, early_stopping_rounds = early_stopping_rounds
, first_metric_only = TRUE
)
, data = DTRAIN_RANDOM_REGRESSION
, nfold = nfolds
, nrounds = nrounds
, valids = list(
"valid1" = DVALID_RANDOM_REGRESSION
)
, eval = list(
.constant_metric
, .increasing_metric
)
)
# only the two functions provided to "eval" should have been evaluated
expect_named(bst$record_evals[["valid"]], c("constant_metric", "increasing_metric"))
# best_iter should be based on the first metric. Since constant_metric
# never changes, its first iteration was the best oone
expect_equal(bst$best_iter, 1L)
# best_score should be taken from the first metri
expect_equal(bst$best_score, 0.2)
# early stopping should have happened, since constant_metric was the first
# one passed to eval and it will not improve over consecutive iterations
#
# note that this test is identical to the previous one, but with the
# order of the eval metrics switched
expect_equal(
length(bst$record_evals[["valid"]][["constant_metric"]][["eval"]])
, early_stopping_rounds + 1L
)
expect_equal(
length(bst$record_evals[["valid"]][["increasing_metric"]][["eval"]])
, early_stopping_rounds + 1L
)
})
context("interaction constraints")
test_that("lgb.train() throws an informative error if interaction_constraints is not a list", { test_that("lgb.train() throws an informative error if interaction_constraints is not a list", {
dtrain <- lgb.Dataset(train$data, label = train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
params <- list(objective = "regression", interaction_constraints = "[1,2],[3]") params <- list(objective = "regression", interaction_constraints = "[1,2],[3]")
......
...@@ -115,3 +115,12 @@ test_that("lgb.check.eval adds eval to metric in params if a list is provided", ...@@ -115,3 +115,12 @@ test_that("lgb.check.eval adds eval to metric in params if a list is provided",
expect_named(params, "metric") expect_named(params, "metric")
expect_identical(params[["metric"]], list("auc", "binary_error", "binary_logloss")) expect_identical(params[["metric"]], list("auc", "binary_error", "binary_logloss"))
}) })
test_that("lgb.check.eval drops duplicate metrics and preserves order", {
params <- lgb.check.eval(
params = list(metric = "l1")
, eval = list("l2", "rmse", "l1", "rmse")
)
expect_named(params, "metric")
expect_identical(params[["metric"]], list("l1", "l2", "rmse"))
})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment