% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lgb.cv.R, R/lgb.train.R, R/lightgbm.R \name{lgb.cv} \alias{lgb.cv} \alias{lgb.train} \alias{lightgbm} \title{Main CV logic for LightGBM} \usage{ lgb.cv(params = list(), data, nrounds = 10, nfold = 3, label = NULL, weight = NULL, obj = NULL, eval = NULL, verbose = 1, record = TRUE, eval_freq = 1L, showsd = TRUE, stratified = TRUE, folds = NULL, init_model = NULL, colnames = NULL, categorical_feature = NULL, early_stopping_rounds = NULL, callbacks = list(), ...) lgb.train(params = list(), data, nrounds = 10, valids = list(), obj = NULL, eval = NULL, verbose = 1, record = TRUE, eval_freq = 1L, init_model = NULL, colnames = NULL, categorical_feature = NULL, early_stopping_rounds = NULL, callbacks = list(), ...) lightgbm(data, label = NULL, weight = NULL, params = list(), nrounds = 10, verbose = 1, eval_freq = 1L, early_stopping_rounds = NULL, save_name = "lightgbm.model", init_model = NULL, callbacks = list(), ...) } \arguments{ \item{params}{List of parameters} \item{data}{a \code{lgb.Dataset} object, used for CV} \item{nrounds}{number of CV rounds} \item{nfold}{the original dataset is randomly partitioned into \code{nfold} equal size subsamples.} \item{label}{vector of response values. Should be provided only when data is an R-matrix.} \item{weight}{vector of response values. If not NULL, will set to dataset} \item{obj}{objective function, can be character or custom objective function. Examples include \code{regression}, \code{regression_l1}, \code{huber}, \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} \item{eval}{evaluation function, can be (list of) character or custom eval function} \item{verbose}{verbosity for output, if <= 0, also will disable the print of evalutaion during training} \item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}} \item{eval_freq}{evalutaion output frequence, only effect when verbose > 0} \item{showsd}{\code{boolean}, whether to show standard deviation of cross validation} \item{stratified}{a \code{boolean} indicating whether sampling of folds should be stratified by the values of outcome labels.} \item{folds}{\code{list} provides a possibility to use a list of pre-defined CV folds (each element must be a vector of test fold's indices). When folds are supplied, the \code{nfold} and \code{stratified} parameters are ignored.} \item{init_model}{path of model file of \code{lgb.Booster} object, will continue train from this model} \item{colnames}{feature names, if not null, will use this to overwrite the names in dataset} \item{categorical_feature}{list of str or int type int represents index, type str represents feature names} \item{early_stopping_rounds}{int Activates early stopping. Requires at least one validation data and one metric If there's more than one, will check all of them Returns the model with (best_iter + early_stopping_rounds) If early stopping occurs, the model will have 'best_iter' field} \item{callbacks}{list of callback functions List of callback functions that are applied at each iteration.} \item{...}{other parameters, see parameters.md for more informations} \item{valids}{a list of \code{lgb.Dataset} objects, used for validation} \item{boosting}{boosting type. \code{gbdt}, \code{dart}} \item{num_leaves}{number of leaves in one tree. defaults to 127} \item{max_depth}{Limit the max depth for tree model. This is used to deal with overfit when #data is small. Tree still grow by leaf-wise.} \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to the number of real CPU cores, not the number of threads (most CPU using hyper-threading to generate 2 threads per CPU core).} \item{params}{List of parameters} \item{data}{a \code{lgb.Dataset} object, used for training} \item{nrounds}{number of training rounds} \item{obj}{objective function, can be character or custom objective function. Examples include \code{regression}, \code{regression_l1}, \code{huber}, \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} \item{boosting}{boosting type. \code{gbdt}, \code{dart}} \item{num_leaves}{number of leaves in one tree. defaults to 127} \item{max_depth}{Limit the max depth for tree model. This is used to deal with overfit when #data is small. Tree still grow by leaf-wise.} \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to the number of real CPU cores, not the number of threads (most CPU using hyper-threading to generate 2 threads per CPU core).} \item{eval}{evaluation function, can be (a list of) character or custom eval function} \item{verbose}{verbosity for output, if <= 0, also will disable the print of evalutaion during training} \item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}} \item{eval_freq}{evalutaion output frequency, only effect when verbose > 0} \item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model} \item{colnames}{feature names, if not null, will use this to overwrite the names in dataset} \item{categorical_feature}{list of str or int type int represents index, type str represents feature names} \item{early_stopping_rounds}{int Activates early stopping. Requires at least one validation data and one metric If there's more than one, will check all of them Returns the model with (best_iter + early_stopping_rounds) If early stopping occurs, the model will have 'best_iter' field} \item{callbacks}{list of callback functions List of callback functions that are applied at each iteration.} \item{...}{other parameters, see parameters.md for more informations} } \value{ a trained model \code{lgb.CVBooster}. a trained booster model \code{lgb.Booster}. } \description{ Main CV logic for LightGBM Main training logic for LightGBM Simple interface for training an lightgbm model. Its documentation is combined with lgb.train. } \examples{ \dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) params <- list(objective = "regression", metric = "l2") model <- lgb.cv(params, dtrain, 10, nfold = 5, min_data = 1, learning_rate = 1, early_stopping_rounds = 10) } \dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) data(agaricus.test, package = "lightgbm") test <- agaricus.test dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) params <- list(objective = "regression", metric = "l2") valids <- list(test = dtest) model <- lgb.train(params, dtrain, 100, valids, min_data = 1, learning_rate = 1, early_stopping_rounds = 10) } }