[R-package] removed horizontal whitespace (fixes #1642) (#1651)

* [R-package] removed horizontal whitespace (fixes #1642) * [R-package] fixed missing newline in test file

[R-package] removed horizontal whitespace (fixes #1642) (#1651)
* [R-package] removed horizontal whitespace (fixes #1642) * [R-package] fixed missing newline in test file
5457ef6b · James Lamb · Guolin Ke · f44b60b6 · 5457ef6b · 5457ef6b
Commit 5457ef6b authored Sep 08, 2018 by James Lamb Committed by Guolin Ke Sep 09, 2018
20 changed files
--- a/R-package/R/callback.R
+++ b/R-package/R/callback.R
@@ -16,74 +16,74 @@ CB_ENV <- R6::R6Class(
 )

 cb.reset.parameters <- function(new_params) {
-  
+
  # Check for parameter list
  if (!is.list(new_params)) {
    stop(sQuote("new_params"), " must be a list")
  }
-  
+
  # Deparse parameter list
  pnames  <- gsub("\\.", "_", names(new_params))
  nrounds <- NULL

  # Run some checks in the beginning
  init <- function(env) {
-    
+
    # Store boosting rounds
    nrounds <<- env$end_iteration - env$begin_iteration + 1
-    
+
    # Check for model environment
    if (is.null(env$model)) { stop("Env should have a ", sQuote("model")) }
-    
+
    # Some parameters are not allowed to be changed,
    # since changing them would simply wreck some chaos
    not_allowed <- c("num_class", "metric", "boosting_type")
    if (any(pnames %in% not_allowed)) {
      stop("Parameters ", paste0(pnames[pnames %in% not_allowed], collapse = ", "), " cannot be changed during boosting")
    }
-    
+
    # Check parameter names
    for (n in pnames) {
-      
+
      # Set name
      p <- new_params[[n]]
-      
+
      # Check if function for parameter
      if (is.function(p)) {
-        
+
        # Check if requires at least two arguments
        if (length(formals(p)) != 2) {
          stop("Parameter ", sQuote(n), " is a function but not of two arguments")
        }
-        
+
        # Check if numeric or character
      } else if (is.numeric(p) || is.character(p)) {
-        
+
        # Check if length is matching
        if (length(p) != nrounds) {
          stop("Length of ", sQuote(n), " has to be equal to length of ", sQuote("nrounds"))
        }
-        
+
      } else {
-        
+
        stop("Parameter ", sQuote(n), " is not a function or a vector")
-        
+
      }
-      
+
    }
-    
+
  }

  callback <- function(env) {
-    
+
    # Check if rounds is null
    if (is.null(nrounds)) {
      init(env)
    }
-    
+
    # Store iteration
    i <- env$iteration - env$begin_iteration
-    
+
    # Apply list on parameters
    pars <- lapply(new_params, function(p) {
      if (is.function(p)) {
@@ -91,14 +91,14 @@ cb.reset.parameters <- function(new_params) {
      }
      p[i]
    })
-    
+
    # To-do check pars
    if (!is.null(env$model)) {
      env$model$reset_parameter(pars)
    }
-    
+
  }
-  
+
  attr(callback, "call") <- match.call()
  attr(callback, "is_pre_iteration") <- TRUE
  attr(callback, "name") <- "cb.reset.parameters"
@@ -107,258 +107,258 @@ cb.reset.parameters <- function(new_params) {

 # Format the evaluation metric string
 format.eval.string <- function(eval_res, eval_err = NULL) {
-  
+
  # Check for empty evaluation string
  if (is.null(eval_res) || length(eval_res) == 0) {
    stop("no evaluation results")
  }
-  
+
  # Check for empty evaluation error
  if (!is.null(eval_err)) {
    sprintf("%s\'s %s:%g+%g", eval_res$data_name, eval_res$name, eval_res$value, eval_err)
  } else {
    sprintf("%s\'s %s:%g", eval_res$data_name, eval_res$name, eval_res$value)
  }
-  
+
 }

 merge.eval.string <- function(env) {
-  
+
  # Check length of evaluation list
  if (length(env$eval_list) <= 0) {
    return("")
  }
-  
+
  # Get evaluation
  msg <- list(sprintf("[%d]:", env$iteration))
-  
+
  # Set if evaluation error
  is_eval_err <- length(env$eval_err_list) > 0
-  
+
  # Loop through evaluation list
  for (j in seq_along(env$eval_list)) {
-    
+
    # Store evaluation error
    eval_err <- NULL
    if (is_eval_err) {
      eval_err <- env$eval_err_list[[j]]
    }
-    
+
    # Set error message
    msg <- c(msg, format.eval.string(env$eval_list[[j]], eval_err))
-    
+
  }
-  
+
  # Return tabulated separated message
  paste0(msg, collapse = "\t")
-  
+
 }

 cb.print.evaluation <- function(period = 1) {
-  
+
  # Create callback
  callback <- function(env) {
-    
+
    # Check if period is at least 1 or more
    if (period > 0) {
-      
+
      # Store iteration
      i <- env$iteration
-      
+
      # Check if iteration matches moduo
      if ((i - 1) %% period == 0 || is.element(i, c(env$begin_iteration, env$end_iteration ))) {
-        
+
        # Merge evaluation string
        msg <- merge.eval.string(env)
-        
+
        # Check if message is existing
        if (nchar(msg) > 0) {
          cat(merge.eval.string(env), "\n")
        }
-        
+
      }
-      
+
    }
-    
+
  }
-  
+
  # Store attributes
  attr(callback, "call") <- match.call()
  attr(callback, "name") <- "cb.print.evaluation"
-  
+
  # Return callback
  callback
-  
+
 }

 cb.record.evaluation <- function() {
-  
+
  # Create callback
  callback <- function(env) {
-    
+
    # Return empty if empty evaluation list
    if (length(env$eval_list) <= 0) {
      return()
    }
-    
+
    # Set if evaluation error
    is_eval_err <- length(env$eval_err_list) > 0
-    
+
    # Check length of recorded evaluation
    if (length(env$model$record_evals) == 0) {
-      
+
      # Loop through each evaluation list element
      for (j in seq_along(env$eval_list)) {
-        
+
        # Store names
        data_name <- env$eval_list[[j]]$data_name
        name <- env$eval_list[[j]]$name
        env$model$record_evals$start_iter <- env$begin_iteration
-        
+
        # Check if evaluation record exists
        if (is.null(env$model$record_evals[[data_name]])) {
          env$model$record_evals[[data_name]] <- list()
        }
-        
+
        # Create dummy lists
        env$model$record_evals[[data_name]][[name]] <- list()
        env$model$record_evals[[data_name]][[name]]$eval <- list()
        env$model$record_evals[[data_name]][[name]]$eval_err <- list()
-        
+
      }
-      
+
    }
-    
+
    # Loop through each evaluation list element
    for (j in seq_along(env$eval_list)) {
-      
+
      # Get evaluation data
      eval_res <- env$eval_list[[j]]
      eval_err <- NULL
      if (is_eval_err) {
        eval_err <- env$eval_err_list[[j]]
      }
-      
+
      # Store names
      data_name <- eval_res$data_name
      name <- eval_res$name
-      
+
      # Store evaluation data
      env$model$record_evals[[data_name]][[name]]$eval <- c(env$model$record_evals[[data_name]][[name]]$eval, eval_res$value)
      env$model$record_evals[[data_name]][[name]]$eval_err <- c(env$model$record_evals[[data_name]][[name]]$eval_err, eval_err)
-      
+
    }
-    
+
  }
-  
+
  # Store attributes
  attr(callback, "call") <- match.call()
  attr(callback, "name") <- "cb.record.evaluation"
-  
+
  # Return callback
  callback
-  
+
 }

 cb.early.stop <- function(stopping_rounds, verbose = TRUE) {
-  
+
  # Initialize variables
  factor_to_bigger_better <- NULL
  best_iter <- NULL
  best_score <- NULL
  best_msg <- NULL
  eval_len <- NULL
-  
+
  # Initalization function
  init <- function(env) {
-    
+
    # Store evaluation length
    eval_len <<- length(env$eval_list)
-    
+
    # Early stopping cannot work without metrics
    if (eval_len == 0) {
      stop("For early stopping, valids must have at least one element")
    }
-    
+
    # Check if verbose or not
    if (isTRUE(verbose)) {
      cat("Will train until there is no improvement in ", stopping_rounds, " rounds.\n\n", sep = "")
    }
-    
+
    # Maximization or minimization task
    factor_to_bigger_better <<- rep.int(1.0, eval_len)
    best_iter <<- rep.int(-1, eval_len)
    best_score <<- rep.int(-Inf, eval_len)
    best_msg <<- list()
-    
+
    # Loop through evaluation elements
    for (i in seq_len(eval_len)) {
-      
+
      # Prepend message
      best_msg <<- c(best_msg, "")
-      
+
      # Check if maximization or minimization
      if (!env$eval_list[[i]]$higher_better) {
        factor_to_bigger_better[i] <<- -1.0
      }
-      
+
    }
-    
+
  }
-  
+
  # Create callback
  callback <- function(env, finalize = FALSE) {
-    
+
    # Check for empty evaluation
    if (is.null(eval_len)) {
      init(env)
    }
-    
+
    # Store iteration
    cur_iter <- env$iteration
-    
+
    # Loop through evaluation
    for (i in seq_len(eval_len)) {
-      
+
      # Store score
      score <- env$eval_list[[i]]$value * factor_to_bigger_better[i]
-      
+
      # Check if score is better
      if (score > best_score[i]) {
-        
+
        # Store new scores
        best_score[i] <<- score
        best_iter[i] <<- cur_iter
-        
+
        # Prepare to print if verbose
        if (verbose) {
          best_msg[[i]] <<- as.character(merge.eval.string(env))
        }
-        
+
      } else {
-        
+
        # Check if early stopping is required
        if (cur_iter - best_iter[i] >= stopping_rounds) {
-          
+
          # Check if model is not null
          if (!is.null(env$model)) {
            env$model$best_score <- best_score[i]
            env$model$best_iter <- best_iter[i]
          }
-          
+
          # Print message if verbose
          if (isTRUE(verbose)) {
-            
+
            cat("Early stopping, best iteration is:", "\n")
            cat(best_msg[[i]], "\n")
-            
+
          }
-          
+
          # Store best iteration and stop
          env$best_iter <- best_iter[i]
          env$met_early_stop <- TRUE
        }
-        
+
      }
      if (!isTRUE(env$met_early_stop) && cur_iter == env$end_iteration) {
        # Check if model is not null
@@ -366,58 +366,58 @@ cb.early.stop <- function(stopping_rounds, verbose = TRUE) {
          env$model$best_score <- best_score[i]
          env$model$best_iter <- best_iter[i]
        }
-        
+
        # Print message if verbose
        if (isTRUE(verbose)) {
          cat("Did not meet early stopping, best iteration is:", "\n")
          cat(best_msg[[i]], "\n")
        }
-        
+
        # Store best iteration and stop
        env$best_iter <- best_iter[i]
        env$met_early_stop <- TRUE
      }
    }
  }
-  
+
  # Set attributes
  attr(callback, "call") <- match.call()
  attr(callback, "name") <- "cb.early.stop"
-  
+
  # Return callback
  callback
-  
+
 }

 # Extract callback names from the list of callbacks
 callback.names <- function(cb_list) { unlist(lapply(cb_list, attr, "name")) }

 add.cb <- function(cb_list, cb) {
-  
+
  # Combine two elements
  cb_list <- c(cb_list, cb)
-  
+
  # Set names of elements
  names(cb_list) <- callback.names(cb_list)
-  
+
  # Check for existence
  if ("cb.early.stop" %in% names(cb_list)) {
-    
+
    # Concatenate existing elements
    cb_list <- c(cb_list, cb_list["cb.early.stop"])
-    
+
    # Remove only the first one
    cb_list["cb.early.stop"] <- NULL
-    
+
  }
-  
+
  # Return element
  cb_list
-  
+
 }

 categorize.callbacks <- function(cb_list) {
-  
+
  # Check for pre-iteration or post-iteration
  list(
    pre_iter = Filter(function(x) {
@@ -429,5 +429,5 @@ categorize.callbacks <- function(cb_list) {
        is.null(pre) || !pre
      }, cb_list)
  )
-  
+
 }
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
--- a/R-package/R/lgb.Dataset.R
+++ b/R-package/R/lgb.Dataset.R
@@ -393,11 +393,11 @@ Dataset <- R6::R6Class(

      # Check for info name and handle
      if (is.null(private$info[[name]])) {
-        
+
        if (lgb.is.null.handle(private$handle)){
          stop("Cannot perform getinfo before constructing Dataset.")
        }
-        
+
        # Get field size of info
        info_len <- 0L
        info_len <- lgb.call("LGBM_DatasetGetFieldSize_R",
@@ -850,7 +850,7 @@ dimnames.lgb.Dataset <- function(x) {
 #'
 #' Get a new \code{lgb.Dataset} containing the specified rows of
 #' original lgb.Dataset object
-#' 
+#'
 #' @param dataset Object of class "lgb.Dataset"
 #' @param idxset a integer vector of indices of rows needed
 #' @param ... other parameters (currently not used)

--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -24,11 +24,11 @@ CVBooster <- R6::R6Class(
 #' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
 #' @param label vector of response values. Should be provided only when data is an R-matrix.
 #' @param weight vector of response values. If not NULL, will set to dataset
-#' @param obj objective function, can be character or custom objective function. Examples include 
+#' @param obj objective function, can be character or custom objective function. Examples include
 #'        \code{regression}, \code{regression_l1}, \code{huber},
 #'        \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}
 #' @param eval evaluation function, can be (list of) character or custom eval function
-#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals} 
+#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
 #' @param showsd \code{boolean}, whether to show standard deviation of cross validation
 #' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified
 #'        by the values of outcome labels.
@@ -45,15 +45,15 @@ CVBooster <- R6::R6Class(
 #'            \itemize{
 #'                \item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}}
 #'                \item{num_leaves}{number of leaves in one tree. defaults to 127}
-#'                \item{max_depth}{Limit the max depth for tree model. This is used to deal with 
+#'                \item{max_depth}{Limit the max depth for tree model. This is used to deal with
 #'                                 overfit when #data is small. Tree still grow by leaf-wise.}
 #'                \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to
-#'                                   the number of real CPU cores, not the number of threads (most 
+#'                                   the number of real CPU cores, not the number of threads (most
 #'                                   CPU using hyper-threading to generate 2 threads per CPU core).}
 #'            }
-#' 
+#'
 #' @return a trained model \code{lgb.CVBooster}.
-#' 
+#'
 #' @examples
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
@@ -70,7 +70,7 @@ CVBooster <- R6::R6Class(
 #' @export
 lgb.cv <- function(params = list(),
                   data,
-                   nrounds = 10, 
+                   nrounds = 10,
                   nfold = 3,
                   label = NULL,
                   weight = NULL,
@@ -88,7 +88,7 @@ lgb.cv <- function(params = list(),
                   early_stopping_rounds = NULL,
                   callbacks = list(),
                   ...) {
-  
+
  # Setup temporary variables
  addiction_params <- list(...)
  params <- append(params, addiction_params)
@@ -101,31 +101,31 @@ lgb.cv <- function(params = list(),
  if (nrounds <= 0) {
    stop("nrounds should be greater than zero")
  }
-  
+
  # Check for objective (function or not)
  if (is.function(params$objective)) {
    fobj <- params$objective
    params$objective <- "NONE"
  }
-  
+
  # Check for loss (function or not)
  if (is.function(eval)) {
    feval <- eval
  }
-  
+
  # Check for parameters
  lgb.check.params(params)
-  
+
  # Init predictor to empty
  predictor <- NULL
-  
+
  # Check for boosting from a trained model
  if (is.character(init_model)) {
    predictor <- Predictor$new(init_model)
  } else if (lgb.is.Booster(init_model)) {
    predictor <- init_model$to_predictor()
  }
-  
+
  # Set the iteration to start from / end to (and check for boosting from a trained model, again)
  begin_iteration <- 1
  if (!is.null(predictor)) {
@@ -138,7 +138,7 @@ lgb.cv <- function(params = list(),
  } else {
    end_iteration <- begin_iteration + nrounds - 1
  }
-  
+
  # Check for training dataset type correctness
  if (!lgb.is.Dataset(data)) {
    if (is.null(label)) {
@@ -146,49 +146,49 @@ lgb.cv <- function(params = list(),
    }
    data <- lgb.Dataset(data, label = label)
  }
-  
+
  # Check for weights
  if (!is.null(weight)) {
    data$setinfo("weight", weight)
  }
-  
+
  # Update parameters with parsed parameters
  data$update_params(params)
-  
+
  # Create the predictor set
  data$.__enclos_env__$private$set_predictor(predictor)
-  
+
  # Write column names
  if (!is.null(colnames)) {
    data$set_colnames(colnames)
  }
-  
+
  # Write categorical features
  if (!is.null(categorical_feature)) {
    data$set_categorical_feature(categorical_feature)
  }
-  
+
  # Construct datasets, if needed
  data$construct()
-  
+
  # Check for folds
  if (!is.null(folds)) {
-    
+
    # Check for list of folds or for single value
    if (!is.list(folds) || length(folds) < 2) {
      stop(sQuote("folds"), " must be a list with 2 or more elements that are vectors of indices for each CV-fold")
    }
-    
+
    # Set number of folds
    nfold <- length(folds)
-    
+
  } else {
-    
+
    # Check fold value
    if (nfold <= 1) {
      stop(sQuote("nfold"), " must be > 1")
    }
-    
+
    # Create folds
    folds <- generate.cv.folds(nfold,
                               nrow(data),
@@ -196,19 +196,19 @@ lgb.cv <- function(params = list(),
                               getinfo(data, "label"),
                               getinfo(data, "group"),
                               params)
-    
+
  }
-  
+
  # Add printing log callback
  if (verbose > 0 && eval_freq > 0) {
    callbacks <- add.cb(callbacks, cb.print.evaluation(eval_freq))
  }
-  
+
  # Add evaluation log callback
  if (record) {
    callbacks <- add.cb(callbacks, cb.record.evaluation())
  }
-  
+
  # Check for early stopping passed as parameter when adding early stopping callback
  early_stop <- c("early_stopping_round", "early_stopping_rounds", "early_stopping")
  if (any(names(params) %in% early_stop)) {
@@ -222,10 +222,10 @@ lgb.cv <- function(params = list(),
      }
    }
  }
-  
+
  # Categorize callbacks
  cb <- categorize.callbacks(callbacks)
-  
+
  # Construct booster using a list apply, check if requires group or not
  if (!is.list(folds[[1]])) {
    bst_folds <- lapply(seq_along(folds), function(k) {
@@ -254,107 +254,107 @@ lgb.cv <- function(params = list(),
      list(booster = booster)
    })
  }
-  
-  
+
+
  # Create new booster
  cv_booster <- CVBooster$new(bst_folds)
-  
+
  # Callback env
  env <- CB_ENV$new()
  env$model <- cv_booster
  env$begin_iteration <- begin_iteration
  env$end_iteration <- end_iteration
-  
+
  # Start training model using number of iterations to start and end with
  for (i in seq.int(from = begin_iteration, to = end_iteration)) {
-    
+
    # Overwrite iteration in environment
    env$iteration <- i
    env$eval_list <- list()
-    
+
    # Loop through "pre_iter" element
    for (f in cb$pre_iter) {
      f(env)
    }
-    
+
    # Update one boosting iteration
    msg <- lapply(cv_booster$boosters, function(fd) {
      fd$booster$update(fobj = fobj)
      fd$booster$eval_valid(feval = feval)
    })
-    
+
    # Prepare collection of evaluation results
    merged_msg <- lgb.merge.cv.result(msg)
-    
+
    # Write evaluation result in environment
    env$eval_list <- merged_msg$eval_list
-    
+
    # Check for standard deviation requirement
    if(showsd) {
      env$eval_err_list <- merged_msg$eval_err_list
    }
-    
+
    # Loop through env
    for (f in cb$post_iter) {
      f(env)
    }
-    
+
    # Check for early stopping and break if needed
    if (env$met_early_stop) break
-    
+
  }
-  
+
  # Return booster
  return(cv_booster)
-  
+
 }

 # Generates random (stratified if needed) CV folds
 generate.cv.folds <- function(nfold, nrows, stratified, label, group, params) {
-  
+
  # Check for group existence
  if (is.null(group)) {
-    
+
    # Shuffle
    rnd_idx <- sample.int(nrows)
-    
+
    # Request stratified folds
    if (isTRUE(stratified) && params$objective %in% c("binary", "multiclass") && length(label) == length(rnd_idx)) {
-      
+
      y <- label[rnd_idx]
      y <- factor(y)
      folds <- lgb.stratified.folds(y, nfold)
-      
+
    } else {
-      
+
      # Make simple non-stratified folds
      folds <- list()
-      
+
      # Loop through each fold
      for (i in seq_len(nfold)) {
        kstep <- length(rnd_idx) %/% (nfold - i + 1)
        folds[[i]] <- rnd_idx[seq_len(kstep)]
        rnd_idx <- rnd_idx[-seq_len(kstep)]
      }
-      
+
    }
-    
+
  } else {
-    
+
    # When doing group, stratified is not possible (only random selection)
    if (nfold > length(group)) {
      stop("\n\tYou requested too many folds for the number of available groups.\n")
    }
-    
+
    # Degroup the groups
    ungrouped <- inverse.rle(list(lengths = group, values = seq_along(group)))
-    
+
    # Can't stratify, shuffle
    rnd_idx <- sample.int(length(group))
-    
+
    # Make simple non-stratified folds
    folds <- list()
-    
+
    # Loop through each fold
    for (i in seq_len(nfold)) {
      kstep <- length(rnd_idx) %/% (nfold - i + 1)
@@ -362,12 +362,12 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, group, params) {
                         group = rnd_idx[seq_len(kstep)])
      rnd_idx <- rnd_idx[-seq_len(kstep)]
    }
-    
+
  }
-  
+
  # Return folds
  return(folds)
-  
+
 }

 # Creates CV folds stratified by the values of y.
@@ -375,7 +375,7 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, group, params) {
 # by always returning an unnamed list of fold indices.
 #' @importFrom stats quantile
 lgb.stratified.folds <- function(y, k = 10) {
-  
+
  ## Group the numeric data based on their magnitudes
  ## and sample within those groups.
  ## When the number of samples is low, we may have
@@ -385,51 +385,51 @@ lgb.stratified.folds <- function(y, k = 10) {
  ## At most, we will use quantiles. If the sample
  ## is too small, we just do regular unstratified CV
  if (is.numeric(y)) {
-    
+
    cuts <- length(y) %/% k
    if (cuts < 2) { cuts <- 2 }
    if (cuts > 5) { cuts <- 5 }
    y <- cut(y,
             unique(stats::quantile(y, probs = seq.int(0, 1, length.out = cuts))),
             include.lowest = TRUE)
-    
+
  }
-  
+
  if (k < length(y)) {
-    
+
    ## Reset levels so that the possible levels and
    ## the levels in the vector are the same
    y <- factor(as.character(y))
    numInClass <- table(y)
    foldVector <- vector(mode = "integer", length(y))
-    
+
    ## For each class, balance the fold allocation as far
    ## as possible, then resample the remainder.
    ## The final assignment of folds is also randomized.
-    
+
    for (i in seq_along(numInClass)) {
-      
+
      ## Create a vector of integers from 1:k as many times as possible without
      ## going over the number of samples in the class. Note that if the number
      ## of samples in a class is less than k, nothing is producd here.
      seqVector <- rep(seq_len(k), numInClass[i] %/% k)
-      
+
      ## Add enough random integers to get  length(seqVector) == numInClass[i]
      if (numInClass[i] %% k > 0) {
        seqVector <- c(seqVector, sample.int(k, numInClass[i] %% k))
      }
-      
+
      ## Shuffle the integers for fold assignment and assign to this classes's data
      foldVector[y == dimnames(numInClass)$y[i]] <- sample(seqVector)
-      
+
    }
-    
+
  } else {
-    
+
    foldVector <- seq(along = y)
-    
+
  }
-  
+
  # Return data
  out <- split(seq(along = y), foldVector)
  names(out) <- NULL
@@ -437,53 +437,53 @@ lgb.stratified.folds <- function(y, k = 10) {
 }

 lgb.merge.cv.result <- function(msg, showsd = TRUE) {
-  
+
  # Get CV message length
  if (length(msg) == 0) {
    stop("lgb.cv: size of cv result error")
  }
-  
+
  # Get evaluation message length
  eval_len <- length(msg[[1]])
-  
+
  # Is evaluation message empty?
  if (eval_len == 0) {
    stop("lgb.cv: should provide at least one metric for CV")
  }
-  
+
  # Get evaluation results using a list apply
  eval_result <- lapply(seq_len(eval_len), function(j) {
    as.numeric(lapply(seq_along(msg), function(i) {
      msg[[i]][[j]]$value }))
  })
-  
+
  # Get evaluation
  ret_eval <- msg[[1]]
-  
+
  # Go through evaluation length items
  for (j in seq_len(eval_len)) {
    ret_eval[[j]]$value <- mean(eval_result[[j]])
  }
-  
+
  # Preinit evaluation error
  ret_eval_err <- NULL
-  
+
  # Check for standard deviation
  if (showsd) {
-    
+
    # Parse standard deviation
    for (j in seq_len(eval_len)) {
      ret_eval_err <- c(ret_eval_err,
                        sqrt(mean(eval_result[[j]] ^ 2) - mean(eval_result[[j]]) ^ 2))
    }
-    
+
    # Convert to list
    ret_eval_err <- as.list(ret_eval_err)
-    
+
  }
-  
+
  # Return errors
  list(eval_list = ret_eval,
       eval_err_list = ret_eval_err)
-  
+
 }
--- a/R-package/R/lgb.importance.R
+++ b/R-package/R/lgb.importance.R
 #' Compute feature importance in a model
-#' 
+#'
 #' Creates a \code{data.table} of feature importances in a model.
-#' 
+#'
 #' @param model object of class \code{lgb.Booster}.
 #' @param percentage whether to show importance in relative percentage.
-#' 
+#'
 #' @return
-#' 
+#'
 #' For a tree model, a \code{data.table} with the following columns:
 #' \itemize{
 #'   \item \code{Feature} Feature names in the model.
@@ -14,7 +14,7 @@
 #'   \item \code{Cover} The number of observation related to this feature.
 #'   \item \code{Frequency} The number of times a feature splited in trees.
 #' }
-#' 
+#'
 #' @examples
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
@@ -29,20 +29,20 @@
 #'
 #' tree_imp1 <- lgb.importance(model, percentage = TRUE)
 #' tree_imp2 <- lgb.importance(model, percentage = FALSE)
-#' 
+#'
 #' @importFrom magrittr %>% %T>% extract
 #' @importFrom data.table :=
 #' @export
 lgb.importance <- function(model, percentage = TRUE) {
-  
+
  # Check if model is a lightgbm model
  if (!inherits(model, "lgb.Booster")) {
    stop("'model' has to be an object of class lgb.Booster")
  }
-  
+
  # Setup importance
  tree_dt <- lgb.model.dt.tree(model)
-  
+
  # Extract elements
  tree_imp <- tree_dt %>%
    magrittr::extract(.,
@@ -51,15 +51,15 @@ lgb.importance <- function(model, percentage = TRUE) {
                      by = "split_feature") %T>%
    data.table::setnames(., old = "split_feature", new = "Feature") %>%
    magrittr::extract(., i = order(Gain, decreasing = TRUE))
-  
+
  # Check if relative values are requested
  if (percentage) {
    tree_imp[, ":="(Gain = Gain / sum(Gain),
                    Cover = Cover / sum(Cover),
                    Frequency = Frequency / sum(Frequency))]
  }
-  
+
  # Return importance table
  return(tree_imp)
-  
+
 }
--- a/R-package/R/lgb.interprete.R
+++ b/R-package/R/lgb.interprete.R
 #' Compute feature contribution of prediction
-#' 
+#'
 #' Computes feature contribution components of rawscore prediction.
-#' 
+#'
 #' @param model object of class \code{lgb.Booster}.
 #' @param data a matrix object or a dgCMatrix object.
 #' @param idxset a integer vector of indices of rows needed.
 #' @param num_iteration number of iteration want to predict with, NULL or <= 0 means use best iteration.
-#' 
+#'
 #' @return
-#' 
+#'
 #' For regression, binary classification and lambdarank model, a \code{list} of \code{data.table} with the following columns:
 #' \itemize{
 #'   \item \code{Feature} Feature names in the model.
 #'   \item \code{Contribution} The total contribution of this feature's splits.
 #' }
 #' For multiclass classification, a \code{list} of \code{data.table} with the Feature column and Contribution columns to each class.
-#' 
+#'
 #' @examples
 #' Sigmoid <- function(x) 1 / (1 + exp(-x))
 #' Logit <- function(x) log(x / (1 - x))
@@ -25,7 +25,7 @@
 #' setinfo(dtrain, "init_score", rep(Logit(mean(train$label)), length(train$label)))
 #' data(agaricus.test, package = "lightgbm")
 #' test <- agaricus.test
-#' 
+#'
 #' params <- list(
 #'     objective = "binary"
 #'     , learning_rate = 0.01
@@ -35,9 +35,9 @@
 #'     , min_sum_hessian_in_leaf = 1
 #' )
 #' model <- lgb.train(params, dtrain, 20)
-#' 
+#'
 #' tree_interpretation <- lgb.interprete(model, test$data, 1:5)
-#' 
+#'
 #' @importFrom data.table as.data.table
 #' @importFrom magrittr %>% %T>%
 #' @export
@@ -45,16 +45,16 @@ lgb.interprete <- function(model,
                           data,
                           idxset,
                           num_iteration = NULL) {
-  
+
  # Get tree model
  tree_dt <- lgb.model.dt.tree(model, num_iteration)
-  
+
  # Check number of classes
  num_class <- model$.__enclos_env__$private$num_class
-  
+
  # Get vector list
  tree_interpretation_dt_list <- vector(mode = "list", length = length(idxset))
-  
+
  # Get parsed predictions of data
  leaf_index_mat_list <- model$predict(data[idxset, , drop = FALSE],
                                       num_iteration = num_iteration,
@@ -62,63 +62,63 @@ lgb.interprete <- function(model,
    t(.) %>%
    data.table::as.data.table(.) %>%
    lapply(., FUN = function(x) matrix(x, ncol = num_class, byrow = TRUE))
-  
+
  # Get list of trees
  tree_index_mat_list <- lapply(leaf_index_mat_list,
                                FUN = function(x) matrix(seq_len(length(x)) - 1, ncol = num_class, byrow = TRUE))
-  
+
  # Sequence over idxset
  for (i in seq_along(idxset)) {
    tree_interpretation_dt_list[[i]] <- single.row.interprete(tree_dt, num_class, tree_index_mat_list[[i]], leaf_index_mat_list[[i]])
  }
-  
+
  # Return interpretation list
  return(tree_interpretation_dt_list)
-  
+
 }

 #' @importFrom data.table data.table
 single.tree.interprete <- function(tree_dt,
                                   tree_id,
                                   leaf_id) {
-  
+
  # Match tree id
  single_tree_dt <- tree_dt[tree_index == tree_id, ]
-  
+
  # Get leaves
  leaf_dt <- single_tree_dt[leaf_index == leaf_id, .(leaf_index, leaf_parent, leaf_value)]
-  
+
  # Get nodes
  node_dt <- single_tree_dt[!is.na(split_index), .(split_index, split_feature, node_parent, internal_value)]
-  
+
  # Prepare sequences
  feature_seq <- character(0)
  value_seq <- numeric(0)
-  
+
  # Get to root from leaf
  leaf_to_root <- function(parent_id, current_value) {
-    
+
    # Store value
    value_seq <<- c(current_value, value_seq)
-    
+
    # Check for null parent id
    if (!is.na(parent_id)) {
-      
+
      # Not null means existing node
      this_node <- node_dt[split_index == parent_id, ]
      feature_seq <<- c(this_node[["split_feature"]], feature_seq)
      leaf_to_root(this_node[["node_parent"]], this_node[["internal_value"]])
-      
+
    }
-    
+
  }
-  
+
  # Perform leaf to root conversion
  leaf_to_root(leaf_dt[["leaf_parent"]], leaf_dt[["leaf_value"]])
-  
+
  # Return formatted data.table
  data.table::data.table(Feature = feature_seq, Contribution = diff.default(value_seq))
-  
+
 }

 #' @importFrom data.table rbindlist
@@ -126,7 +126,7 @@ single.tree.interprete <- function(tree_dt,
 multiple.tree.interprete <- function(tree_dt,
                                     tree_index,
                                     leaf_index) {
-  
+
  # Apply each trees
  mapply(single.tree.interprete,
         tree_id = tree_index, leaf_id = leaf_index,
@@ -135,52 +135,52 @@ multiple.tree.interprete <- function(tree_dt,
    data.table::rbindlist(., use.names = TRUE) %>%
    magrittr::extract(., j = .(Contribution = sum(Contribution)), by = "Feature") %>%
    magrittr::extract(., i = order(abs(Contribution), decreasing = TRUE))
-  
+
 }

 #' @importFrom data.table set setnames
 single.row.interprete <- function(tree_dt, num_class, tree_index_mat, leaf_index_mat) {
-  
+
  # Prepare vector list
  tree_interpretation <- vector(mode = "list", length = num_class)
-  
+
  # Loop throughout each class
  for (i in seq_len(num_class)) {
-    
+
    tree_interpretation[[i]] <- multiple.tree.interprete(tree_dt, tree_index_mat[,i], leaf_index_mat[,i]) %T>% {
-      
+
      # Number of classes larger than 1 requires adjustment
      if (num_class > 1) {
        data.table::setnames(., old = "Contribution", new = paste("Class", i - 1))
      }
    }
-    
+
  }
-  
+
  # Check for numbe rof classes larger than 1
  if (num_class == 1) {
-    
+
    # First interpretation element
    tree_interpretation_dt <- tree_interpretation[[1]]
-    
+
  } else {
-    
+
    # Full interpretation elements
    tree_interpretation_dt <- Reduce(f = function(x, y) merge(x, y, by = "Feature", all = TRUE),
                                     x = tree_interpretation)
-    
+
    # Loop throughout each tree
    for (j in 2:ncol(tree_interpretation_dt)) {
-      
+
      data.table::set(tree_interpretation_dt,
                      i = which(is.na(tree_interpretation_dt[[j]])),
                      j = j,
                      value = 0)
-      
+
    }
-    
+
  }
-  
+
  # Return interpretation tree
  return(tree_interpretation_dt)
 }
--- a/R-package/R/lgb.prepare.R
+++ b/R-package/R/lgb.prepare.R
 #' Data preparator for LightGBM datasets (numeric)
 #'
 #' Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric without integers. Please use \code{lgb.prepare_rules} if you want to apply this transformation to other datasets.
-#' 
+#'
 #' @param data A data.frame or data.table to prepare.
-#' 
+#'
 #' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset.
-#' 
+#'
 #' @examples
 #' library(lightgbm)
 #' data(iris)
-#' 
+#'
 #' str(iris)
 #' # 'data.frame':	150 obs. of  5 variables:
 #' # $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
@@ -17,7 +17,7 @@
 #' # $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 #' # $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 #' # $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ...
-#' 
+#'
 #' str(lgb.prepare(data = iris)) # Convert all factors/chars to numeric
 #' # 'data.frame':	150 obs. of  5 variables:
 #' # $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
@@ -25,7 +25,7 @@
 #' # $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 #' # $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 #' # $ Species     : num  1 1 1 1 1 1 1 1 1 1 ...
-#' 
+#'
 #' # When lightgbm package is installed, and you do not want to load it
 #' # You can still use the function!
 #' lgb.unloader()
@@ -36,57 +36,57 @@
 #' # $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 #' # $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 #' # $ Species     : num  1 1 1 1 1 1 1 1 1 1 ...
-#' 
+#'
 #' @export
 lgb.prepare <- function(data) {
-  
+
  # data.table not behaving like data.frame
  if ("data.table" %in% class(data)) {
-    
+
    # Get data classes
    list_classes <- sapply(data, class)
-    
+
    # Convert characters to factors only (we can change them to numeric after)
    is_char <- which(list_classes == "character")
    if (length(is_char) > 0) {
      data[, (is_char) := lapply(.SD, function(x) {as.numeric(as.factor(x))}), .SDcols = is_char]
    }
-    
+
    # Convert factors to numeric (integer is more efficient actually)
    is_fact <- c(which(list_classes == "factor"), is_char)
    if (length(is_fact) > 0) {
      data[, (is_fact) := lapply(.SD, function(x) {as.numeric(x)}), .SDcols = is_fact]
    }
-    
+
  } else {
-    
+
    # Default routine (data.frame)
    if ("data.frame" %in% class(data)) {
-      
+
      # Get data classes
      list_classes <- sapply(data, class)
-      
+
      # Convert characters to factors to numeric (integer is more efficient actually)
      is_char <- which(list_classes == "character")
      if (length(is_char) > 0) {
        data[is_char] <- lapply(data[is_char], function(x) {as.numeric(as.factor(x))})
      }
-      
+
      # Convert factors to numeric (integer is more efficient actually)
      is_fact <- which(list_classes == "factor")
      if (length(is_fact) > 0) {
        data[is_fact] <- lapply(data[is_fact], function(x) {as.numeric(x)})
      }
-      
+
    } else {
-      
+
      # What do you think you are doing here? Throw error.
      stop("lgb.prepare2: you provided ", paste(class(data), collapse = " & "), " but data should have class data.frame")
-      
+
    }
-    
+
  }
-  
+
  return(data)
-  
+
 }
--- a/R-package/R/lgb.prepare2.R
+++ b/R-package/R/lgb.prepare2.R
 #' Data preparator for LightGBM datasets (integer)
 #'
 #' Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric (specifically: integer). Please use \code{lgb.prepare_rules2} if you want to apply this transformation to other datasets. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM.
-#' 
+#'
 #' @param data A data.frame or data.table to prepare.
-#' 
+#'
 #' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset.
-#' 
+#'
 #' @examples
 #' library(lightgbm)
 #' data(iris)
-#' 
+#'
 #' str(iris)
 #' # 'data.frame':	150 obs. of  5 variables:
 #' # $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
@@ -17,7 +17,7 @@
 #' # $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 #' # $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 #' # $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ...
-#' 
+#'
 #' # Convert all factors/chars to integer
 #' str(lgb.prepare2(data = iris))
 #' # 'data.frame':	150 obs. of  5 variables:
@@ -26,7 +26,7 @@
 #' # $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 #' # $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 #' # $ Species     : int  1 1 1 1 1 1 1 1 1 1 ...
-#' 
+#'
 #' # When lightgbm package is installed, and you do not want to load it
 #' # You can still use the function!
 #' lgb.unloader()
@@ -37,57 +37,57 @@
 #' # $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 #' # $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 #' # $ Species     : int  1 1 1 1 1 1 1 1 1 1 ...
-#' 
+#'
 #' @export
 lgb.prepare2 <- function(data) {
-  
+
  # data.table not behaving like data.frame
  if (inherits(data, "data.table")) {
-    
+
    # Get data classes
    list_classes <- vapply(data, class, character(1))
-    
+
    # Convert characters to factors only (we can change them to numeric after)
    is_char <- which(list_classes == "character")
    if (length(is_char) > 0) {
      data[, (is_char) := lapply(.SD, function(x) {as.integer(as.factor(x))}), .SDcols = is_char]
    }
-    
+
    # Convert factors to numeric (integer is more efficient actually)
    is_fact <- c(which(list_classes == "factor"), is_char)
    if (length(is_fact) > 0) {
      data[, (is_fact) := lapply(.SD, function(x) {as.integer(x)}), .SDcols = is_fact]
    }
-    
+
  } else {
-    
+
    # Default routine (data.frame)
    if (inherits(data, "data.frame")) {
-      
+
      # Get data classes
      list_classes <- vapply(data, class, character(1))
-      
+
      # Convert characters to factors to numeric (integer is more efficient actually)
      is_char <- which(list_classes == "character")
      if (length(is_char) > 0) {
        data[is_char] <- lapply(data[is_char], function(x) {as.integer(as.factor(x))})
      }
-      
+
      # Convert factors to numeric (integer is more efficient actually)
      is_fact <- which(list_classes == "factor")
      if (length(is_fact) > 0) {
        data[is_fact] <- lapply(data[is_fact], function(x) {as.integer(x)})
      }
-      
+
    } else {
-      
+
      # What do you think you are doing here? Throw error.
      stop("lgb.prepare: you provided ", paste(class(data), collapse = " & "), " but data should have class data.frame")
-      
+
    }
-    
+
  }
-  
+
  return(data)
-  
+
 }
--- a/R-package/R/lgb.prepare_rules.R
+++ b/R-package/R/lgb.prepare_rules.R
 #' Data preparator for LightGBM datasets with rules (numeric)
 #'
 #' Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric. In addition, keeps rules created so you can convert other datasets using this converter.
-#' 
+#'
 #' @param data A data.frame or data.table to prepare.
 #' @param rules A set of rules from the data preparator, if already used.
-#' 
+#'
 #' @return A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The data must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset.
-#' 
+#'
 #' @examples
 #' library(lightgbm)
 #' data(iris)
-#' 
+#'
 #' str(iris)
 #' # 'data.frame':	150 obs. of  5 variables:
 #' # $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
@@ -18,7 +18,7 @@
 #' # $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 #' # $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 #' # $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ...
-#' 
+#'
 #' new_iris <- lgb.prepare_rules(data = iris) # Autoconverter
 #' str(new_iris$data)
 #' # 'data.frame':	150 obs. of  5 variables:
@@ -27,31 +27,31 @@
 #' # $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 #' # $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 #' # $ Species     : num  1 1 1 1 1 1 1 1 1 1 ...
-#' 
+#'
 #' data(iris) # Erase iris dataset
 #' iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA)
 #' # Warning message:
 #' # In `[<-.factor`(`*tmp*`, 1, value = c(NA, 1L, 1L, 1L, 1L, 1L, 1L,  :
 #' #  invalid factor level, NA generated
-#' 
+#'
 #' # Use conversion using known rules
 #' # Unknown factors become 0, excellent for sparse datasets
 #' newer_iris <- lgb.prepare_rules(data = iris, rules = new_iris$rules)
-#' 
+#'
 #' # Unknown factor is now zero, perfect for sparse datasets
 #' newer_iris$data[1, ] # Species became 0 as it is an unknown factor
 #' #   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
 #' # 1          5.1         3.5          1.4         0.2       0
-#' 
+#'
 #' newer_iris$data[1, 5] <- 1 # Put back real initial value
-#' 
+#'
 #' # Is the newly created dataset equal? YES!
 #' all.equal(new_iris$data, newer_iris$data)
 #' # [1] TRUE
-#' 
+#'
 #' # Can we test our own rules?
 #' data(iris) # Erase iris dataset
-#' 
+#'
 #' # We remapped values differently
 #' personal_rules <- list(Species = c("setosa" = 3,
 #'                                    "versicolor" = 2,
@@ -64,43 +64,43 @@
 #' # $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 #' # $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 #' # $ Species     : num  3 3 3 3 3 3 3 3 3 3 ...
-#' 
+#'
 #' @importFrom data.table set
 #' @export
 lgb.prepare_rules <- function(data, rules = NULL) {
-  
+
  # data.table not behaving like data.frame
  if (inherits(data, "data.table")) {
-    
+
    # Must use existing rules
    if (!is.null(rules)) {
-      
+
      # Loop through rules
      for (i in names(rules)) {
-        
+
        data.table::set(data, j = i, value = unname(rules[[i]][data[[i]]]))
        data[[i]][is.na(data[[i]])] <- 0 # Overwrite NAs by 0s
-        
+
      }
-      
+
    } else {
-      
+
      # Get data classes
      list_classes <- vapply(data, class, character(1))
-      
+
      # Map characters/factors
      is_fix <- which(list_classes %in% c("character", "factor"))
      rules <- list()
-      
+
      # Need to create rules?
      if (length(is_fix) > 0) {
-        
+
        # Go through all characters/factors
        for (i in is_fix) {
-          
+
          # Store column elsewhere
          mini_data <- data[[i]]
-          
+
          # Get unique values
          if (is.factor(mini_data)) {
            mini_unique <- levels(mini_data) # Factor
@@ -110,55 +110,55 @@ lgb.prepare_rules <- function(data, rules = NULL) {
            mini_unique <- as.factor(unique(mini_data)) # Character
            mini_numeric <- as.numeric(mini_unique) # No respect of ordinality
          }
-          
+
          # Create rules
          indexed <- colnames(data)[i] # Index value
          rules[[indexed]] <- mini_numeric # Numeric content
          names(rules[[indexed]]) <- mini_unique # Character equivalent
-          
+
          # Apply to real data column
          data.table::set(data, j = i, value = unname(rules[[indexed]][mini_data]))
-          
+
        }
-        
+
      }
-      
+
    }
-    
+
  } else {
-    
+
    # Must use existing rules
    if (!is.null(rules)) {
-      
+
      # Loop through rules
      for (i in names(rules)) {
-        
+
        data[[i]] <- unname(rules[[i]][data[[i]]])
        data[[i]][is.na(data[[i]])] <- 0 # Overwrite NAs by 0s
-        
+
      }
-      
+
    } else {
-      
+
      # Default routine (data.frame)
      if (inherits(data, "data.frame")) {
-        
+
        # Get data classes
        list_classes <- vapply(data, class, character(1))
-        
+
        # Map characters/factors
        is_fix <- which(list_classes %in% c("character", "factor"))
        rules <- list()
-        
+
        # Need to create rules?
        if (length(is_fix) > 0) {
-          
+
          # Go through all characters/factors
          for (i in is_fix) {
-            
+
            # Store column elsewhere
            mini_data <- data[[i]]
-            
+
            # Get unique values
            if (is.factor(mini_data)) {
              mini_unique <- levels(mini_data) # Factor
@@ -168,30 +168,30 @@ lgb.prepare_rules <- function(data, rules = NULL) {
              mini_unique <- as.factor(unique(mini_data)) # Character
              mini_numeric <- as.numeric(mini_unique) # No respect of ordinality
            }
-            
+
            # Create rules
            indexed <- colnames(data)[i] # Index value
            rules[[indexed]] <- mini_numeric # Numeric content
            names(rules[[indexed]]) <- mini_unique # Character equivalent
-            
+
            # Apply to real data column
            data[[i]] <- unname(rules[[indexed]][mini_data])
-            
+
          }
-          
+
        }
-        
+
      } else {
-        
+
        # What do you think you are doing here? Throw error.
        stop("lgb.prepare: you provided ", paste(class(data), collapse = " & "), " but data should have class data.frame")
-        
+
      }
-      
+
    }
-    
+
  }
-  
+
  return(list(data = data, rules = rules))
-  
+
 }
--- a/R-package/R/lgb.prepare_rules2.R
+++ b/R-package/R/lgb.prepare_rules2.R
 #' Data preparator for LightGBM datasets with rules (integer)
 #'
 #' Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric (specifically: integer). In addition, keeps rules created so you can convert other datasets using this converter. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM.
-#' 
+#'
 #' @param data A data.frame or data.table to prepare.
 #' @param rules A set of rules from the data preparator, if already used.
-#' 
+#'
 #' @return A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The data must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset.
-#' 
+#'
 #' @examples
 #' library(lightgbm)
 #' data(iris)
-#' 
+#'
 #' str(iris)
 #' # 'data.frame':	150 obs. of  5 variables:
 #' # $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
@@ -18,7 +18,7 @@
 #' # $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 #' # $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 #' # $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ...
-#' 
+#'
 #' new_iris <- lgb.prepare_rules2(data = iris) # Autoconverter
 #' str(new_iris$data)
 #' # 'data.frame':	150 obs. of  5 variables:
@@ -27,31 +27,31 @@
 #' # $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 #' # $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 #' # $ Species     : int  1 1 1 1 1 1 1 1 1 1 ...
-#' 
+#'
 #' data(iris) # Erase iris dataset
 #' iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA)
 #' # Warning message:
 #' # In `[<-.factor`(`*tmp*`, 1, value = c(NA, 1L, 1L, 1L, 1L, 1L, 1L,  :
 #' #  invalid factor level, NA generated
-#' 
+#'
 #' # Use conversion using known rules
 #' # Unknown factors become 0, excellent for sparse datasets
 #' newer_iris <- lgb.prepare_rules2(data = iris, rules = new_iris$rules)
-#' 
+#'
 #' # Unknown factor is now zero, perfect for sparse datasets
 #' newer_iris$data[1, ] # Species became 0 as it is an unknown factor
 #' #   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
 #' # 1          5.1         3.5          1.4         0.2       0
-#' 
+#'
 #' newer_iris$data[1, 5] <- 1 # Put back real initial value
-#' 
+#'
 #' # Is the newly created dataset equal? YES!
 #' all.equal(new_iris$data, newer_iris$data)
 #' # [1] TRUE
-#' 
+#'
 #' # Can we test our own rules?
 #' data(iris) # Erase iris dataset
-#' 
+#'
 #' # We remapped values differently
 #' personal_rules <- list(Species = c("setosa" = 3L,
 #'                                    "versicolor" = 2L,
@@ -64,43 +64,43 @@
 #' # $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 #' # $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 #' # $ Species     : int  3 3 3 3 3 3 3 3 3 3 ...
-#' 
+#'
 #' @importFrom data.table set
 #' @export
 lgb.prepare_rules2 <- function(data, rules = NULL) {
-  
+
  # data.table not behaving like data.frame
  if (inherits(data, "data.table")) {
-    
+
    # Must use existing rules
    if (!is.null(rules)) {
-      
+
      # Loop through rules
      for (i in names(rules)) {
-        
+
        data.table::set(data, j = i, value = unname(rules[[i]][data[[i]]]))
        data[[i]][is.na(data[[i]])] <- 0L # Overwrite NAs by 0s as integer
-        
+
      }
-      
+
    } else {
-      
+
      # Get data classes
      list_classes <- vapply(data, class, character(1))
-      
+
      # Map characters/factors
      is_fix <- which(list_classes %in% c("character", "factor"))
      rules <- list()
-      
+
      # Need to create rules?
      if (length(is_fix) > 0) {
-        
+
        # Go through all characters/factors
        for (i in is_fix) {
-          
+
          # Store column elsewhere
          mini_data <- data[[i]]
-          
+
          # Get unique values
          if (is.factor(mini_data)) {
            mini_unique <- levels(mini_data) # Factor
@@ -109,55 +109,55 @@ lgb.prepare_rules2 <- function(data, rules = NULL) {
            mini_unique <- as.factor(unique(mini_data)) # Character
            mini_numeric <- as.integer(mini_unique) # No respect of ordinality
          }
-          
+
          # Create rules
          indexed <- colnames(data)[i] # Index value
          rules[[indexed]] <- mini_numeric # Numeric content
          names(rules[[indexed]]) <- mini_unique # Character equivalent
-          
+
          # Apply to real data column
          data.table::set(data, j = i, value = unname(rules[[indexed]][mini_data]))
-          
+
        }
-        
+
      }
-      
+
    }

  } else {
-    
+
    # Must use existing rules
    if (!is.null(rules)) {
-      
+
      # Loop through rules
      for (i in names(rules)) {
-        
+
        data[[i]] <- unname(rules[[i]][data[[i]]])
        data[[i]][is.na(data[[i]])] <- 0L # Overwrite NAs by 0s as integer
-        
+
      }
-      
+
    } else {
-      
+
      # Default routine (data.frame)
      if (inherits(data, "data.frame")) {
-        
+
        # Get data classes
        list_classes <- vapply(data, class, character(1))
-        
+
        # Map characters/factors
        is_fix <- which(list_classes %in% c("character", "factor"))
        rules <- list()
-        
+
        # Need to create rules?
        if (length(is_fix) > 0) {
-          
+
          # Go through all characters/factors
          for (i in is_fix) {
-            
+
            # Store column elsewhere
            mini_data <- data[[i]]
-            
+
            # Get unique values
            if (is.factor(mini_data)) {
              mini_unique <- levels(mini_data) # Factor
@@ -166,30 +166,30 @@ lgb.prepare_rules2 <- function(data, rules = NULL) {
              mini_unique <- as.factor(unique(mini_data)) # Character
              mini_numeric <- as.integer(mini_unique) # No respect of ordinality
            }
-            
+
            # Create rules
            indexed <- colnames(data)[i] # Index value
            rules[[indexed]] <- mini_numeric # Numeric content
            names(rules[[indexed]]) <- mini_unique # Character equivalent
-            
+
            # Apply to real data column
            data[[i]] <- unname(rules[[indexed]][mini_data])
-            
+
          }
-          
+
        }
-        
+
      } else {
-        
+
        # What do you think you are doing here? Throw error.
        stop("lgb.prepare: you provided ", paste(class(data), collapse = " & "), " but data should have class data.frame")
-        
+
      }
-      
+
    }
-    
+
  }
-  
+
  return(list(data = data, rules = rules))
-  
+
 }
--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
@@ -3,11 +3,11 @@
 #' @description Logic to train with LightGBM
 #' @inheritParams lgb_shared_params
 #' @param valids a list of \code{lgb.Dataset} objects, used for validation
-#' @param obj objective function, can be character or custom objective function. Examples include 
+#' @param obj objective function, can be character or custom objective function. Examples include
 #'        \code{regression}, \code{regression_l1}, \code{huber},
 #'        \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}
 #' @param eval evaluation function, can be (a list of) character or custom eval function
-#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals} 
+#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
 #' @param colnames feature names, if not null, will use this to overwrite the names in dataset
 #' @param categorical_feature list of str or int
 #'        type int represents index,
@@ -17,14 +17,14 @@
 #'            \itemize{
 #'                \item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}}
 #'                \item{num_leaves}{number of leaves in one tree. defaults to 127}
-#'                \item{max_depth}{Limit the max depth for tree model. This is used to deal with 
+#'                \item{max_depth}{Limit the max depth for tree model. This is used to deal with
 #'                                 overfit when #data is small. Tree still grow by leaf-wise.}
 #'                \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to
-#'                                   the number of real CPU cores, not the number of threads (most 
+#'                                   the number of real CPU cores, not the number of threads (most
 #'                                   CPU using hyper-threading to generate 2 threads per CPU core).}
 #'            }
 #' @return a trained booster model \code{lgb.Booster}.
-#' 
+#'
 #' @examples
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
@@ -42,7 +42,7 @@
 #'                    min_data = 1,
 #'                    learning_rate = 1,
 #'                    early_stopping_rounds = 10)
-#' 
+#'
 #' @export
 lgb.train <- function(params = list(),
                      data,
@@ -60,7 +60,7 @@ lgb.train <- function(params = list(),
                      callbacks = list(),
                      reset_data = FALSE,
                      ...) {
-  
+
  # Setup temporary variables
  additional_params <- list(...)
  params <- append(params, additional_params)
@@ -69,7 +69,7 @@ lgb.train <- function(params = list(),
  params <- lgb.check.eval(params, eval)
  fobj <- NULL
  feval <- NULL
-  
+
  if (nrounds <= 0) {
    stop("nrounds should be greater than zero")
  }
@@ -79,25 +79,25 @@ lgb.train <- function(params = list(),
    fobj <- params$objective
    params$objective <- "NONE"
  }
-  
+
  # Check for loss (function or not)
  if (is.function(eval)) {
    feval <- eval
  }
-  
+
  # Check for parameters
  lgb.check.params(params)
-  
+
  # Init predictor to empty
  predictor <- NULL
-  
+
  # Check for boosting from a trained model
  if (is.character(init_model)) {
    predictor <- Predictor$new(init_model)
  } else if (lgb.is.Booster(init_model)) {
    predictor <- init_model$to_predictor()
  }
-  
+
  # Set the iteration to start from / end to (and check for boosting from a trained model, again)
  begin_iteration <- 1
  if (!is.null(predictor)) {
@@ -110,89 +110,89 @@ lgb.train <- function(params = list(),
  } else {
    end_iteration <- begin_iteration + nrounds - 1
  }
-  
-  
+
+
  # Check for training dataset type correctness
  if (!lgb.is.Dataset(data)) {
    stop("lgb.train: data only accepts lgb.Dataset object")
  }
-  
+
  # Check for validation dataset type correctness
  if (length(valids) > 0) {
-    
+
    # One or more validation dataset
-    
+
    # Check for list as input and type correctness by object
    if (!is.list(valids) || !all(vapply(valids, lgb.is.Dataset, logical(1)))) {
      stop("lgb.train: valids must be a list of lgb.Dataset elements")
    }
-    
+
    # Attempt to get names
    evnames <- names(valids)
-    
+
    # Check for names existance
    if (is.null(evnames) || !all(nzchar(evnames))) {
      stop("lgb.train: each element of the valids must have a name tag")
    }
  }
-  
+
  # Update parameters with parsed parameters
  data$update_params(params)
-  
+
  # Create the predictor set
  data$.__enclos_env__$private$set_predictor(predictor)
-  
+
  # Write column names
  if (!is.null(colnames)) {
    data$set_colnames(colnames)
  }
-  
+
  # Write categorical features
  if (!is.null(categorical_feature)) {
    data$set_categorical_feature(categorical_feature)
  }
-  
+
  # Construct datasets, if needed
  data$construct()
  vaild_contain_train <- FALSE
  train_data_name <- "train"
  reduced_valid_sets <- list()
-  
+
  # Parse validation datasets
  if (length(valids) > 0) {
-    
+
    # Loop through all validation datasets using name
    for (key in names(valids)) {
-      
+
      # Use names to get validation datasets
      valid_data <- valids[[key]]
-      
+
      # Check for duplicate train/validation dataset
      if (identical(data, valid_data)) {
        vaild_contain_train <- TRUE
        train_data_name <- key
        next
      }
-      
+
      # Update parameters, data
      valid_data$update_params(params)
      valid_data$set_reference(data)
      reduced_valid_sets[[key]] <- valid_data
-      
+
    }
-    
+
  }
-  
+
  # Add printing log callback
  if (verbose > 0 && eval_freq > 0) {
    callbacks <- add.cb(callbacks, cb.print.evaluation(eval_freq))
  }
-  
+
  # Add evaluation log callback
  if (record && length(valids) > 0) {
    callbacks <- add.cb(callbacks, cb.record.evaluation())
  }
-  
+
  # Check for early stopping passed as parameter when adding early stopping callback
  early_stop <- c("early_stopping_round", "early_stopping_rounds", "early_stopping")
  if (any(names(params) %in% early_stop)) {
@@ -206,83 +206,83 @@ lgb.train <- function(params = list(),
      }
    }
  }
-  
+
  # "Categorize" callbacks
  cb <- categorize.callbacks(callbacks)
-  
+
  # Construct booster with datasets
  booster <- Booster$new(params = params, train_set = data)
  if (vaild_contain_train) { booster$set_train_data_name(train_data_name) }
  for (key in names(reduced_valid_sets)) {
    booster$add_valid(reduced_valid_sets[[key]], key)
  }
-  
+
  # Callback env
  env <- CB_ENV$new()
  env$model <- booster
  env$begin_iteration <- begin_iteration
  env$end_iteration <- end_iteration
-  
+
  # Start training model using number of iterations to start and end with
  for (i in seq.int(from = begin_iteration, to = end_iteration)) {
-    
+
    # Overwrite iteration in environment
    env$iteration <- i
    env$eval_list <- list()
-    
+
    # Loop through "pre_iter" element
    for (f in cb$pre_iter) {
      f(env)
    }
-    
+
    # Update one boosting iteration
    booster$update(fobj = fobj)
-    
+
    # Prepare collection of evaluation results
    eval_list <- list()
-    
+
    # Collection: Has validation dataset?
    if (length(valids) > 0) {
-      
+
      # Validation has training dataset?
      if (vaild_contain_train) {
        eval_list <- append(eval_list, booster$eval_train(feval = feval))
      }
-      
+
      # Has no validation dataset
      eval_list <- append(eval_list, booster$eval_valid(feval = feval))
    }
-    
+
    # Write evaluation result in environment
    env$eval_list <- eval_list
-    
+
    # Loop through env
    for (f in cb$post_iter) {
      f(env)
    }
-    
+
    # Check for early stopping and break if needed
    if (env$met_early_stop) break
-    
+
  }
-  
+
  # Check for booster model conversion to predictor model
  if (reset_data) {
-    
+
    # Store temporarily model data elsewhere
    booster_old <- list(best_iter = booster$best_iter,
                        best_score = booster$best_score,
                        record_evals = booster$record_evals)
-    
+
    # Reload model
    booster <- lgb.load(model_str = booster$save_model_to_string())
    booster$best_iter <- booster_old$best_iter
    booster$best_score <- booster_old$best_score
    booster$record_evals <- booster_old$record_evals
-    
+
  }
-  
+
  # Return booster
  return(booster)
-  
+
 }
--- a/R-package/R/lgb.unloader.R
+++ b/R-package/R/lgb.unloader.R
 #' LightGBM unloading error fix
 #'
 #' Attempts to unload LightGBM packages so you can remove objects cleanly without having to restart R. This is useful for instance if an object becomes stuck for no apparent reason and you do not want to restart R to fix the lost object.
-#' 
+#'
 #' @param restore Whether to reload \code{LightGBM} immediately after detaching from R. Defaults to \code{TRUE} which means automatically reload \code{LightGBM} once unloading is performed.
 #' @param wipe Whether to wipe all \code{lgb.Dataset} and \code{lgb.Booster} from the global environment. Defaults to \code{FALSE} which means to not remove them.
 #' @param envir The environment to perform wiping on if \code{wipe == TRUE}. Defaults to \code{.GlobalEnv} which is the global environment.
-#' 
+#'
 #' @return NULL invisibly.
-#' 
+#'
 #' @examples
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
@@ -28,16 +28,16 @@
 #' lgb.unloader(restore = FALSE, wipe = FALSE, envir = .GlobalEnv)
 #' rm(model, dtrain, dtest) # Not needed if wipe = TRUE
 #' gc() # Not needed if wipe = TRUE
-#' 
+#'
 #' library(lightgbm)
 #' # Do whatever you want again with LightGBM without object clashing
-#' 
+#'
 #' @export
 lgb.unloader <- function(restore = TRUE, wipe = FALSE, envir = .GlobalEnv) {
-  
+
  # Unload package
  try(detach("package:lightgbm", unload = TRUE), silent = TRUE)
-  
+
  # Should we wipe variables? (lgb.Booster, lgb.Dataset)
  if (wipe) {
    boosters <- Filter(function(x) inherits(get(x, envir = envir), "lgb.Booster"), ls(envir = envir))
@@ -45,12 +45,12 @@ lgb.unloader <- function(restore = TRUE, wipe = FALSE, envir = .GlobalEnv) {
    rm(list = c(boosters, datasets), envir = envir)
    gc(verbose = FALSE)
  }
-  
+
  # Load package back?
  if (restore) {
    library(lightgbm)
  }
-  
+
  invisible()
-  
+
 }
--- a/R-package/R/lightgbm.R
+++ b/R-package/R/lightgbm.R
@@ -29,21 +29,21 @@ NULL
 #' @param ... Additional arguments passed to \code{\link{lgb.train}}. For example
 #'     \itemize{
 #'        \item{valids}{a list of \code{lgb.Dataset} objects, used for validation}
-#'        \item{obj}{objective function, can be character or custom objective function. Examples include 
+#'        \item{obj}{objective function, can be character or custom objective function. Examples include
 #'                   \code{regression}, \code{regression_l1}, \code{huber},
 #'                    \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
 #'        \item{eval}{evaluation function, can be (a list of) character or custom eval function}
 #'        \item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}}
 #'        \item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
 #'        \item{categorical_feature}{list of str or int. type int represents index, type str represents feature names}
-#'        \item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model 
+#'        \item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model
 #'                          into a predictor model which frees up memory and the original datasets}
 #'         \item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}}
 #'         \item{num_leaves}{number of leaves in one tree. defaults to 127}
-#'         \item{max_depth}{Limit the max depth for tree model. This is used to deal with 
+#'         \item{max_depth}{Limit the max depth for tree model. This is used to deal with
 #'                          overfit when #data is small. Tree still grow by leaf-wise.}
 #'          \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to
-#'                             the number of real CPU cores, not the number of threads (most 
+#'                             the number of real CPU cores, not the number of threads (most
 #'                             CPU using hyper-threading to generate 2 threads per CPU core).}
 #'     }
 #' @export
@@ -59,7 +59,7 @@ lightgbm <- function(data,
                     init_model = NULL,
                     callbacks = list(),
                     ...) {
-  
+
  # Set data to a temporary variable
  dtrain <- data
  if (nrounds <= 0) {
@@ -75,15 +75,15 @@ lightgbm <- function(data,
  if (verbose > 0) {
    valids$train = dtrain
  }
-  
+
  # Train a model using the regular way
  bst <- lgb.train(params, dtrain, nrounds, valids, verbose = verbose, eval_freq = eval_freq,
                   early_stopping_rounds = early_stopping_rounds,
                   init_model = init_model, callbacks = callbacks, ...)
-  
+
  # Store model under a specific name
  bst$save_model(save_name)
-  
+
  # Return booster
  return(bst)
 }
@@ -152,7 +152,7 @@ NULL
 #'
 #' @references
 #' http://archive.ics.uci.edu/ml/datasets/Bank+Marketing
-#' 
+#'
 #' S. Moro, P. Cortez and P. Rita. (2014)
 #' A Data-Driven Approach to Predict the Success of Bank Telemarketing. Decision Support Systems
 #'

--- a/R-package/R/readRDS.lgb.Booster.R
+++ b/R-package/R/readRDS.lgb.Booster.R
 #' readRDS for lgb.Booster models
 #'
 #' Attempts to load a model using RDS.
-#' 
+#'
 #' @param file a connection or the name of the file where the R object is saved to or read from.
 #' @param refhook a hook function for handling reference objects.
-#' 
+#'
 #' @return lgb.Booster.
-#' 
+#'
 #' @examples
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
@@ -26,31 +26,31 @@
 #'                    early_stopping_rounds = 10)
 #' saveRDS.lgb.Booster(model, "model.rds")
 #' new_model <- readRDS.lgb.Booster("model.rds")
-#' 
+#'
 #' @export
 readRDS.lgb.Booster <- function(file = "", refhook = NULL) {
-  
+
  # Read RDS file
  object <- readRDS(file = file, refhook = refhook)
-  
+
  # Check if object has the model stored
  if (!is.na(object$raw)) {
-    
+
    # Create temporary model for the model loading
    object2 <- lgb.load(model_str = object$raw)
-    
+
    # Restore best iteration and recorded evaluations
    object2$best_iter <- object$best_iter
    object2$record_evals <- object$record_evals
-    
+
    # Return newly loaded object
    return(object2)
-    
+
  } else {
-    
+
    # Return RDS loaded object
    return(object)
-    
+
  }
-  
+
 }
--- a/R-package/R/saveRDS.lgb.Booster.R
+++ b/R-package/R/saveRDS.lgb.Booster.R
 #' saveRDS for lgb.Booster models
 #'
 #' Attempts to save a model using RDS. Has an additional parameter (\code{raw}) which decides whether to save the raw model or not.
-#' 
+#'
 #' @param object R object to serialize.
 #' @param file a connection or the name of the file where the R object is saved to or read from.
 #' @param ascii a logical. If TRUE or NA, an ASCII representation is written; otherwise (default), a binary one is used. See the comments in the help for save.
@@ -9,9 +9,9 @@
 #' @param compress a logical specifying whether saving to a named file is to use "gzip" compression, or one of \code{"gzip"}, \code{"bzip2"} or \code{"xz"} to indicate the type of compression to be used. Ignored if file is a connection.
 #' @param refhook a hook function for handling reference objects.
 #' @param raw whether to save the model in a raw variable or not, recommended to leave it to \code{TRUE}.
-#' 
+#'
 #' @return NULL invisibly.
-#' 
+#'
 #' @examples
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
@@ -40,13 +40,13 @@ saveRDS.lgb.Booster <- function(object,
                                compress = TRUE,
                                refhook = NULL,
                                raw = TRUE) {
-  
+
  # Check if object has a raw value (and if the user wants to store the raw)
  if (is.na(object$raw) && raw) {
-    
+
    # Save model
    object$save()
-    
+
    # Save RDS
    saveRDS(object,
            file = file,
@@ -54,12 +54,12 @@ saveRDS.lgb.Booster <- function(object,
            version = version,
            compress = compress,
            refhook = refhook)
-    
+
    # Free model from memory
    object$raw <- NA
-    
+
  } else {
-    
+
    # Save as usual
    saveRDS(object,
            file = file,
@@ -67,7 +67,7 @@ saveRDS.lgb.Booster <- function(object,
            version = version,
            compress = compress,
            refhook = refhook)
-    
+
  }
-  
+
 }
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -11,18 +11,18 @@ lgb.is.null.handle <- function(x) {
 }

 lgb.encode.char <- function(arr, len) {
-  
+
  if (!is.raw(arr)) {
    stop("lgb.encode.char: Can only encode from raw type") # Not an object of type raw
  }
  rawToChar(arr[seq_len(len)]) # Return the conversion of raw type to character type
-  
+
 }

 lgb.call <- function(fun_name, ret, ...) {
  # Set call state to a zero value
  call_state <- 0L
-  
+
  # Check for a ret call
  if (!is.null(ret)) {
    call_state <- .Call(fun_name, ..., ret, call_state, PACKAGE = "lib_lightgbm") # Call with ret
@@ -38,7 +38,7 @@ lgb.call <- function(fun_name, ret, ...) {
    act_len <- 0L
    err_msg <- raw(buf_len)
    err_msg <- .Call("LGBM_GetLastError_R", buf_len, act_len, err_msg, PACKAGE = "lib_lightgbm")
-    
+
    # Check error buffer
    if (act_len > buf_len) {
      buf_len <- act_len
@@ -49,169 +49,169 @@ lgb.call <- function(fun_name, ret, ...) {
                        err_msg,
                        PACKAGE = "lib_lightgbm")
    }
-    
+
    # Return error
    stop("api error: ", lgb.encode.char(err_msg, act_len))

  }

  return(ret)
-  
+
 }

 lgb.call.return.str <- function(fun_name, ...) {
-  
+
  # Create buffer
  buf_len <- as.integer(1024 * 1024)
  act_len <- 0L
  buf <- raw(buf_len)
-  
+
  # Call buffer
  buf <- lgb.call(fun_name, ret = buf, ..., buf_len, act_len)
-  
+
  # Check for buffer content
  if (act_len > buf_len) {
    buf_len <- act_len
    buf <- raw(buf_len)
    buf <- lgb.call(fun_name, ret = buf, ..., buf_len, act_len)
  }
-  
+
  # Return encoded character
  return(lgb.encode.char(buf, act_len))
-  
+
 }

 lgb.params2str <- function(params, ...) {
-  
+
  # Check for a list as input
  if (!is.list(params)) {
    stop("params must be a list")
  }
-  
+
  # Split parameter names
  names(params) <- gsub("\\.", "_", names(params))
-  
+
  # Merge parameters from the params and the dots-expansion
  dot_params <- list(...)
  names(dot_params) <- gsub("\\.", "_", names(dot_params))
-  
+
  # Check for identical parameters
  if (length(intersect(names(params), names(dot_params))) > 0) {
    stop("Same parameters in ", sQuote("params"), " and in the call are not allowed. Please check your ", sQuote("params"), " list")
  }
-  
+
  # Merge parameters
  params <- c(params, dot_params)
-  
+
  # Setup temporary variable
  ret <- list()
-  
+
  # Perform key value join
  for (key in names(params)) {
-    
+
    # Join multi value first
    val <- paste0(format(params[[key]], scientific = FALSE), collapse = ",")
    if (nchar(val) <= 0) next # Skip join
-    
+
    # Join key value
    pair <- paste0(c(key, val), collapse = "=")
    ret <- c(ret, pair)
-    
+
  }
-  
+
  # Check ret length
  if (length(ret) == 0) {
-    
+
    # Return empty string
    lgb.c_str("")
-    
+
  } else {
-    
+
    # Return string separated by a space per element
    lgb.c_str(paste0(ret, collapse = " "))
-    
+
  }
-  
+
 }

 lgb.c_str <- function(x) {
-  
+
  # Perform character to raw conversion
  ret <- charToRaw(as.character(x))
  ret <- c(ret, as.raw(0))
  ret
-  
+
 }

 lgb.check.r6.class <- function(object, name) {
-  
+
  # Check for non-existence of R6 class or named class
  all(c("R6", name) %in% class(object))
-  
+
 }

 lgb.check.params <- function(params) {
-  
+
  # To-do
  params # Currently return params because this is not finalized
-  
+
 }

 lgb.check.obj <- function(params, obj) {
-  
+
  # List known objectives in a vector
  OBJECTIVES <- c("regression", "regression_l1", "regression_l2", "mean_squared_error", "mse", "l2_root", "root_mean_squared_error", "rmse",
-                  "mean_absolute_error", "mae", "quantile", 
-                  "huber", "fair", "poisson", "binary", "lambdarank", 
+                  "mean_absolute_error", "mae", "quantile",
+                  "huber", "fair", "poisson", "binary", "lambdarank",
                  "multiclass", "softmax", "multiclassova", "multiclass_ova", "ova", "ovr",
                  "xentropy", "cross_entropy", "xentlambda", "cross_entropy_lambda", "mean_absolute_percentage_error", "mape",
                  "gamma", "tweedie")
-  
+
  # Check whether the objective is empty or not, and take it from params if needed
  if (!is.null(obj)) { params$objective <- obj }
-  
+
  # Check whether the objective is a character
  if (is.character(params$objective)) {
-    
+
    # If the objective is a character, check if it is a known objective
    if (!(params$objective %in% OBJECTIVES)) {
-      
+
      # Interrupt on unknown objective name
      stop("lgb.check.obj: objective name error should be one of (", paste0(OBJECTIVES, collapse = ", "), ")")
-      
+
    }
-    
+
  } else if (!is.function(params$objective)) {
-    
+
    # If objective is not a character nor a function, then stop
    stop("lgb.check.obj: objective should be a character or a function")
-    
+
  }
-  
+
  # Return parameters
  return(params)
-  
+
 }

 lgb.check.eval <- function(params, eval) {
-  
+
  # Check if metric is null, if yes put a list instead
  if (is.null(params$metric)) {
    params$metric <- list()
  }
-  
+
  # Check if evaluation metric is null, if not then append it
  if (!is.null(eval)) {
-    
+
    # Append metric if character or list
    if (is.character(eval) || is.list(eval)) {
-      
+
      # Append metrics
      params$metric <- append(params$metric, eval)
-      
+
    }
-    
-  } 
+
+  }
  # Return parameters
  return(params)
 }
--- a/R-package/demo/categorical_features_prepare.R
+++ b/R-package/demo/categorical_features_prepare.R
@@ -5,7 +5,7 @@ library(data.table)
 library(lightgbm)

 # Load data and look at the structure
-# 
+#
 # Classes 'data.table' and 'data.frame':	4521 obs. of  17 variables:
 # $ age      : int  30 33 35 30 59 35 36 39 41 43 ...
 # $ job      : chr  "unemployed" "services" "management" "management" ...
@@ -30,7 +30,7 @@ str(bank)
 # We must now transform the data to fit in LightGBM
 # For this task, we use lgb.prepare
 # The function transforms the data into a fittable data
-# 
+#
 # Classes 'data.table' and 'data.frame':	4521 obs. of  17 variables:
 # $ age      : int  30 33 35 30 59 35 36 39 41 43 ...
 # $ job      : chr  "unemployed" "services" "management" "management" ...

--- a/R-package/demo/categorical_features_rules.R
+++ b/R-package/demo/categorical_features_rules.R
@@ -5,7 +5,7 @@ library(data.table)
 library(lightgbm)

 # Load data and look at the structure
-# 
+#
 # Classes 'data.table' and 'data.frame':	4521 obs. of  17 variables:
 # $ age      : int  30 33 35 30 59 35 36 39 41 43 ...
 # $ job      : chr  "unemployed" "services" "management" "management" ...
@@ -34,7 +34,7 @@ bank_test <- bank[4001:4521, ]
 # We must now transform the data to fit in LightGBM
 # For this task, we use lgb.prepare
 # The function transforms the data into a fittable data
-# 
+#
 # Classes 'data.table' and 'data.frame':	521 obs. of  17 variables:
 # $ age      : int  53 36 58 26 34 55 55 34 41 38 ...
 # $ job      : num  1 10 10 9 10 2 2 3 3 4 ...

--- a/R-package/demo/multiclass_custom_objective.R
+++ b/R-package/demo/multiclass_custom_objective.R
@@ -36,35 +36,35 @@ preds_builtin <- predict(model_builtin, test[, 1:4], rawscore = TRUE)
 # User defined objective function, given prediction, return gradient and second order gradient
 custom_multiclass_obj = function(preds, dtrain) {
    labels = getinfo(dtrain, "label")
-    
+
    # preds is a matrix with rows corresponding to samples and colums corresponding to choices
    preds = matrix(preds, nrow = length(labels))
-    
+
    # to prevent overflow, normalize preds by row
    preds = preds - apply(preds, 1, max)
    prob = exp(preds) / rowSums(exp(preds))
-    
+
    # compute gradient
    grad = prob
    grad[cbind(1:length(labels), labels + 1)] = grad[cbind(1:length(labels), labels + 1)] - 1
-    
+
    # compute hessian (approximation)
    hess = 2 * prob * (1 - prob)
-    
+
    return(list(grad = grad, hess = hess))
 }

-# define custom metric 
+# define custom metric
 custom_multiclass_metric = function(preds, dtrain) {
    labels = getinfo(dtrain, "label")
    preds = matrix(preds, nrow = length(labels))
    preds = preds - apply(preds, 1, max)
    prob = exp(preds) / rowSums(exp(preds))
-    
+
    return(list(name = "error",
                value = -mean(log(prob[cbind(1:length(labels), labels + 1)])),
                higher_better = FALSE))
-    
+
 }

 model_custom <- lgb.train(list(),

--- a/R-package/man/lgb.cv.Rd
+++ b/R-package/man/lgb.cv.Rd
@@ -23,7 +23,7 @@ lgb.cv(params = list(), data, nrounds = 10, nfold = 3, label = NULL,

 \item{weight}{vector of response values. If not NULL, will set to dataset}

-\item{obj}{objective function, can be character or custom objective function. Examples include 
+\item{obj}{objective function, can be character or custom objective function. Examples include
 \code{regression}, \code{regression_l1}, \code{huber},
 \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}

@@ -66,10 +66,10 @@ List of callback functions that are applied at each iteration.}
 \itemize{
    \item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}}
    \item{num_leaves}{number of leaves in one tree. defaults to 127}
-    \item{max_depth}{Limit the max depth for tree model. This is used to deal with 
+    \item{max_depth}{Limit the max depth for tree model. This is used to deal with
                     overfit when #data is small. Tree still grow by leaf-wise.}
    \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to
-                       the number of real CPU cores, not the number of threads (most 
+                       the number of real CPU cores, not the number of threads (most
                       CPU using hyper-threading to generate 2 threads per CPU core).}
 }}
 }