updated R dependencies (#1619)

21575cb2 · James Lamb · Laurae · b2240721 · 21575cb2 · 21575cb2
Commit 21575cb2 authored Aug 27, 2018 by James Lamb Committed by Laurae Aug 27, 2018
5 changed files
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -23,23 +23,23 @@ URL: https://github.com/Microsoft/LightGBM
 BugReports: https://github.com/Microsoft/LightGBM/issues
 VignetteBuilder: knitr
 Suggests:
+    Ckmeans.1d.dp (>= 3.3.1),
+    DiagrammeR (>= 0.8.1),
+    ggplot2 (>= 1.0.1),
+    igraph (>= 1.0.1),
    knitr,
    rmarkdown,
-    ggplot2 (>= 1.0.1),
-    DiagrammeR (>= 0.8.1),
-    Ckmeans.1d.dp (>= 3.3.1),
-    vcd (>= 1.3),
+    stringi (>= 0.5.2),
    testthat,
-    igraph (>= 1.0.1),
-    stringi (>= 0.5.2)
+    vcd (>= 1.3)
 Depends:
    R (>= 3.0),
    R6 (>= 2.0)
 Imports:
-    graphics,
-    methods,
-    Matrix (>= 1.1-0),
    data.table (>= 1.9.6),
+    graphics,
+    jsonlite (>= 1.0),
    magrittr (>= 1.5),
-    jsonlite (>= 1.0)
+    Matrix (>= 1.1-0),
+    methods
 RoxygenNote: 6.0.1
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -38,9 +38,15 @@ export(slice)
 import(methods)
 importFrom(R6,R6Class)
 importFrom(data.table,":=")
+importFrom(data.table,data.table)
+importFrom(data.table,rbindlist)
 importFrom(data.table,set)
 importFrom(graphics,barplot)
 importFrom(graphics,par)
+importFrom(jsonlite,fromJSON)
 importFrom(magrittr,"%>%")
 importFrom(magrittr,"%T>%")
+importFrom(magrittr,extract)
+importFrom(magrittr,inset)
+importFrom(methods,is)
 useDynLib(lib_lightgbm)
--- a/R-package/R/lgb.Dataset.R
+++ b/R-package/R/lgb.Dataset.R
+
+#' @importFrom methods is
 Dataset <- R6Class(
  classname = "lgb.Dataset",
  cloneable = FALSE,
  public = list(
-    
+
    # Finalize will free up the handles
    finalize = function() {
-      
+
      # Check the need for freeing handle
      if (!lgb.is.null.handle(private$handle)) {
-        
+
        # Freeing up handle
        lgb.call("LGBM_DatasetFree_R", ret = NULL, private$handle)
        private$handle <- NULL
-        
+
      }
-      
+
    },
-    
+
    # Initialize will create a starter dataset
    initialize = function(data,
                          params = list(),
@@ -28,45 +30,45 @@ Dataset <- R6Class(
                          used_indices = NULL,
                          info = list(),
                          ...) {
-      
+
      # Check for additional parameters
      additional_params <- list(...)
-      
+
      # Create known attributes list
      INFO_KEYS <- c("label", "weight", "init_score", "group")
-      
+
      # Check if attribute key is in the known attribute list
      for (key in names(additional_params)) {
-        
+
        # Key existing
        if (key %in% INFO_KEYS) {
-          
+
          # Store as info
          info[[key]] <- additional_params[[key]]
-          
+
        } else {
-          
+
          # Store as param
          params[[key]] <- additional_params[[key]]
-          
+
        }
-        
+
      }
-      
+
      # Check for dataset reference
      if (!is.null(reference)) {
        if (!lgb.check.r6.class(reference, "lgb.Dataset")) {
          stop("lgb.Dataset: Can only use ", sQuote("lgb.Dataset"), " as reference")
        }
      }
-      
+
      # Check for predictor reference
      if (!is.null(predictor)) {
        if (!lgb.check.r6.class(predictor, "lgb.Predictor")) {
          stop("lgb.Dataset: Only can use ", sQuote("lgb.Predictor"), " as predictor")
        }
      }
-      
+
      # Check for matrix format
      if (is.matrix(data)) {
        # Check whether matrix is the correct type first ("double")
@@ -74,7 +76,7 @@ Dataset <- R6Class(
          storage.mode(data) <- "double"
        }
      }
-      
+
      # Setup private attributes
      private$raw_data <- data
      private$params <- params
@@ -86,13 +88,13 @@ Dataset <- R6Class(
      private$free_raw_data <- free_raw_data
      private$used_indices <- used_indices
      private$info <- info
-      
+
    },
-    
+
    create_valid = function(data,
                            info = list(),
                            ...) {
-      
+
      # Create new dataset
      ret <- Dataset$new(data,
                         private$params,
@@ -104,61 +106,61 @@ Dataset <- R6Class(
                         NULL,
                         info,
                         ...)
-      
+
      # Return ret
      return(invisible(ret))
-      
+
    },
-    
+
    # Dataset constructor
    construct = function() {
-      
+
      # Check for handle null
      if (!lgb.is.null.handle(private$handle)) {
        return(invisible(self))
      }
-      
+
      # Get feature names
      cnames <- NULL
-      if (is.matrix(private$raw_data) || is(private$raw_data, "dgCMatrix")) {
+      if (is.matrix(private$raw_data) || methods::is(private$raw_data, "dgCMatrix")) {
        cnames <- colnames(private$raw_data)
      }
-      
+
      # set feature names if not exist
      if (is.null(private$colnames) && !is.null(cnames)) {
        private$colnames <- as.character(cnames)
      }
-      
+
      # Get categorical feature index
      if (!is.null(private$categorical_feature)) {
-        
+
        # Check for character name
        if (is.character(private$categorical_feature)) {
-          
+
            cate_indices <- as.list(match(private$categorical_feature, private$colnames) - 1)
-            
+
            # Provided indices, but some indices are not existing?
            if (sum(is.na(cate_indices)) > 0) {
              stop("lgb.self.get.handle: supplied an unknown feature in categorical_feature: ", sQuote(private$categorical_feature[is.na(cate_indices)]))
            }
-            
+
          } else {
-            
+
            # Check if more categorical features were output over the feature space
            if (max(private$categorical_feature) > length(private$colnames)) {
              stop("lgb.self.get.handle: supplied a too large value in categorical_feature: ", max(private$categorical_feature), " but only ", length(private$colnames), " features")
            }
-            
+
            # Store indices as [0, n-1] indexed instead of [1, n] indexed
            cate_indices <- as.list(private$categorical_feature - 1)
-            
+
          }
-        
+
        # Store indices for categorical features
        private$params$categorical_feature <- cate_indices
-        
+
      }
-      
+
      # Check has header or not
      has_header <- FALSE
      if (!is.null(private$params$has_header) || !is.null(private$params$header)) {
@@ -166,31 +168,31 @@ Dataset <- R6Class(
          has_header <- TRUE
        }
      }
-      
+
      # Generate parameter str
      params_str <- lgb.params2str(private$params)
-      
+
      # Get handle of reference dataset
      ref_handle <- NULL
      if (!is.null(private$reference)) {
        ref_handle <- private$reference$.__enclos_env__$private$get_handle()
      }
      handle <- NA_real_
-      
+
      # Not subsetting
      if (is.null(private$used_indices)) {
-        
+
        # Are we using a data file?
        if (is.character(private$raw_data)) {
-          
+
          handle <- lgb.call("LGBM_DatasetCreateFromFile_R",
                             ret = handle,
                             lgb.c_str(private$raw_data),
                             params_str,
                             ref_handle)
-          
+
        } else if (is.matrix(private$raw_data)) {
-          
+
          # Are we using a matrix?
          handle <- lgb.call("LGBM_DatasetCreateFromMat_R",
                             ret = handle,
@@ -199,8 +201,8 @@ Dataset <- R6Class(
                             ncol(private$raw_data),
                             params_str,
                             ref_handle)
-          
-        } else if (is(private$raw_data, "dgCMatrix")) {
+
+        } else if (methods::is(private$raw_data, "dgCMatrix")) {
          if (length(private$raw_data@p) > 2147483647) {
            stop("Cannot support large CSC matrix")
          }
@@ -215,21 +217,21 @@ Dataset <- R6Class(
                             nrow(private$raw_data),
                             params_str,
                             ref_handle)
-          
+
        } else {
-          
+
          # Unknown data type
          stop("lgb.Dataset.construct: does not support constructing from ", sQuote(class(private$raw_data)))
-          
+
        }
-        
+
      } else {
-        
+
        # Reference is empty
        if (is.null(private$reference)) {
          stop("lgb.Dataset.construct: reference cannot be NULL for constructing data subset")
        }
-        
+
        # Construct subset
        handle <- lgb.call("LGBM_DatasetGetSubset_R",
                           ret = handle,
@@ -237,7 +239,7 @@ Dataset <- R6Class(
                           c(private$used_indices), # Adding c() fixes issue in R v3.5
                           length(private$used_indices),
                           params_str)
-        
+
      }
      if (lgb.is.null.handle(handle)) {
        stop("lgb.Dataset.construct: cannot create Dataset handle")
@@ -245,7 +247,7 @@ Dataset <- R6Class(
      # Setup class and private type
      class(handle) <- "lgb.Dataset.handle"
      private$handle <- handle
-      
+
      # Set feature names
      if (!is.null(private$colnames)) {
        self$set_colnames(private$colnames)
@@ -253,139 +255,139 @@ Dataset <- R6Class(

      # Load init score if requested
      if (!is.null(private$predictor) && is.null(private$used_indices)) {
-        
+
        # Setup initial scores
        init_score <- private$predictor$predict(private$raw_data, rawscore = TRUE, reshape = TRUE)
-        
+
        # Not needed to transpose, for is col_marjor
        init_score <- as.vector(init_score)
        private$info$init_score <- init_score
-        
+
      }
-      
+
      # Should we free raw data?
      if (isTRUE(private$free_raw_data)) {
        private$raw_data <- NULL
      }
-      
+
      # Get private information
      if (length(private$info) > 0) {
-        
+
        # Set infos
        for (i in seq_along(private$info)) {
-          
+
          p <- private$info[i]
          self$setinfo(names(p), p[[1]])
-          
+
        }
-        
+
      }
-      
+
      # Get label information existence
      if (is.null(self$getinfo("label"))) {
        stop("lgb.Dataset.construct: label should be set")
      }
-      
+
      # Return self
      return(invisible(self))
-      
+
    },
-    
+
    # Dimension function
    dim = function() {
-      
+
      # Check for handle
      if (!lgb.is.null.handle(private$handle)) {
-        
+
        num_row <- 0L
        num_col <- 0L
-        
+
        # Get numeric data and numeric features
        c(lgb.call("LGBM_DatasetGetNumData_R", ret = num_row, private$handle),
          lgb.call("LGBM_DatasetGetNumFeature_R", ret = num_col, private$handle))
-        
-      } else if (is.matrix(private$raw_data) || is(private$raw_data, "dgCMatrix")) {
-        
+
+      } else if (is.matrix(private$raw_data) || methods::is(private$raw_data, "dgCMatrix")) {
+
        # Check if dgCMatrix (sparse matrix column compressed)
        dim(private$raw_data)
-        
+
      } else {
-        
+
        # Trying to work with unknown dimensions is not possible
        stop("dim: cannot get dimensions before dataset has been constructed, please call lgb.Dataset.construct explicitly")
-        
+
      }
-      
+
    },
-    
+
    # Get column names
    get_colnames = function() {
-      
+
      # Check for handle
      if (!lgb.is.null.handle(private$handle)) {
-        
+
        # Get feature names and write them
        cnames <- lgb.call.return.str("LGBM_DatasetGetFeatureNames_R", private$handle)
        private$colnames <- as.character(base::strsplit(cnames, "\t")[[1]])
        private$colnames
-        
-      } else if (is.matrix(private$raw_data) || is(private$raw_data, "dgCMatrix")) {
-        
+
+      } else if (is.matrix(private$raw_data) || methods::is(private$raw_data, "dgCMatrix")) {
+
        # Check if dgCMatrix (sparse matrix column compressed)
        colnames(private$raw_data)
-        
+
      } else {
-        
+
        # Trying to work with unknown dimensions is not possible
        stop("dim: cannot get dimensions before dataset has been constructed, please call lgb.Dataset.construct explicitly")
-        
+
      }
-      
+
    },
-    
+
    # Set column names
    set_colnames = function(colnames) {
-      
+
      # Check column names non-existence
      if (is.null(colnames)) {
        return(invisible(self))
      }
-      
+
      # Check empty column names
      colnames <- as.character(colnames)
      if (length(colnames) == 0) {
        return(invisible(self))
      }
-      
+
      # Write column names
      private$colnames <- colnames
      if (!lgb.is.null.handle(private$handle)) {
-        
+
        # Merge names with tab separation
        merged_name <- paste0(as.list(private$colnames), collapse = "\t")
        lgb.call("LGBM_DatasetSetFeatureNames_R",
                 ret = NULL,
                 private$handle,
                 lgb.c_str(merged_name))
-        
+
      }
-      
+
      # Return self
      return(invisible(self))
-      
+
    },
-    
+
    # Get information
    getinfo = function(name) {
-      
+
      # Create known attributes list
      INFONAMES <- c("label", "weight", "init_score", "group")
-      
+
      # Check if attribute key is in the known attribute list
      if (!is.character(name) || length(name) != 1 || !name %in% INFONAMES) {
        stop("getinfo: name must one of the following: ", paste0(sQuote(INFONAMES), collapse = ", "))
      }
-      
+
      # Check for info name and handle
      if (is.null(private$info[[name]])) {
        if (lgb.is.null.handle(private$handle)){
@@ -397,10 +399,10 @@ Dataset <- R6Class(
                             ret = info_len,
                             private$handle,
                             lgb.c_str(name))
-        
+
        # Check if info is not empty
        if (info_len > 0) {
-          
+
          # Get back fields
          ret <- NULL
          ret <- if (name == "group") {
@@ -408,65 +410,65 @@ Dataset <- R6Class(
          } else {
            numeric(info_len) # Numeric
          }
-          
+
          ret <- lgb.call("LGBM_DatasetGetField_R",
                          ret = ret,
                          private$handle,
                          lgb.c_str(name))
-          
+
          private$info[[name]] <- ret
-          
+
        }
      }
-      
+
      private$info[[name]]
-      
+
    },
-    
+
    # Set information
    setinfo = function(name, info) {
-      
+
      # Create known attributes list
      INFONAMES <- c("label", "weight", "init_score", "group")
-      
+
      # Check if attribute key is in the known attribute list
      if (!is.character(name) || length(name) != 1 || !name %in% INFONAMES) {
        stop("setinfo: name must one of the following: ", paste0(sQuote(INFONAMES), collapse = ", "))
      }
-      
+
      # Check for type of information
      info <- if (name == "group") {
        as.integer(info) # Integer
      } else {
        as.numeric(info) # Numeric
      }
-      
+
      # Store information privately
      private$info[[name]] <- info
-      
+
      if (!lgb.is.null.handle(private$handle) && !is.null(info)) {
-        
+
        if (length(info) > 0) {
-          
+
          lgb.call("LGBM_DatasetSetField_R",
                   ret = NULL,
                   private$handle,
                   lgb.c_str(name),
                   info,
                   length(info))
-          
+
        }
-        
+
      }
-      
+
      # Return self
      return(invisible(self))
-      
+
    },
-    
+
    # Slice dataset
    slice = function(idxset, ...) {
-      
+
      # Perform slicing
      Dataset$new(NULL,
                  private$params,
@@ -478,84 +480,84 @@ Dataset <- R6Class(
                  idxset,
                  NULL,
                  ...)
-      
+
    },
-    
+
    # Update parameters
    update_params = function(params) {
-      
+
      # Parameter updating
      private$params <- modifyList(private$params, params)
      return(invisible(self))
-      
+
    },
-    
+
    # Set categorical feature parameter
    set_categorical_feature = function(categorical_feature) {
-      
+
      # Check for identical input
      if (identical(private$categorical_feature, categorical_feature)) {
        return(invisible(self))
      }
-      
+
      # Check for empty data
      if (is.null(private$raw_data)) {
        stop("set_categorical_feature: cannot set categorical feature after freeing raw data,
          please set ", sQuote("free_raw_data = FALSE"), " when you construct lgb.Dataset")
      }
-      
+
      # Overwrite categorical features
      private$categorical_feature <- categorical_feature
-      
+
      # Finalize and return self
      self$finalize()
      return(invisible(self))
-      
+
    },
-    
+
    # Set reference
    set_reference = function(reference) {
-      
+
      # Set known references
      self$set_categorical_feature(reference$.__enclos_env__$private$categorical_feature)
      self$set_colnames(reference$get_colnames())
      private$set_predictor(reference$.__enclos_env__$private$predictor)
-      
+
      # Check for identical references
      if (identical(private$reference, reference)) {
        return(invisible(self))
      }
-      
+
      # Check for empty data
      if (is.null(private$raw_data)) {
-        
+
        stop("set_reference: cannot set reference after freeing raw data,
          please set ", sQuote("free_raw_data = FALSE"), " when you construct lgb.Dataset")
-        
+
      }
-      
+
      # Check for non-existing reference
      if (!is.null(reference)) {
-        
+
        # Reference is unknown
        if (!lgb.check.r6.class(reference, "lgb.Dataset")) {
          stop("set_reference: Can only use lgb.Dataset as a reference")
        }
-        
+
      }
-      
+
      # Store reference
      private$reference <- reference
-      
+
      # Finalize and return self
      self$finalize()
      return(invisible(self))
-      
+
    },
-    
+
    # Save binary model
    save_binary = function(fname) {
-      
+
      # Store binary data
      self$construct()
      lgb.call("LGBM_DatasetSaveBinary_R",
@@ -564,7 +566,7 @@ Dataset <- R6Class(
               lgb.c_str(fname))
      return(invisible(self))
    }
-    
+
  ),
  private = list(
    handle = NULL,
@@ -577,51 +579,51 @@ Dataset <- R6Class(
    free_raw_data = TRUE,
    used_indices = NULL,
    info = NULL,
-    
+
    # Get handle
    get_handle = function() {
-      
+
      # Get handle and construct if needed
      if (lgb.is.null.handle(private$handle)) {
        self$construct()
      }
      private$handle
-      
+
    },
-    
+
    # Set predictor
    set_predictor = function(predictor) {
-      
+
      # Return self is identical predictor
      if (identical(private$predictor, predictor)) {
        return(invisible(self))
      }
-      
+
      # Check for empty data
      if (is.null(private$raw_data)) {
        stop("set_predictor: cannot set predictor after free raw data,
          please set ", sQuote("free_raw_data = FALSE"), " when you construct lgb.Dataset")
      }
-      
+
      # Check for empty predictor
      if (!is.null(predictor)) {
-        
+
        # Predictor is unknown
        if (!lgb.check.r6.class(predictor, "lgb.Predictor")) {
          stop("set_predictor: Can only use lgb.Predictor as predictor")
        }
-        
+
      }
-      
+
      # Store predictor
      private$predictor <- predictor
-      
+
      # Finalize and return self
      self$finalize()
      return(invisible(self))
-      
+
    }
-    
+
  )
 )

@@ -638,9 +640,9 @@ Dataset <- R6Class(
 #' @param free_raw_data TRUE for need to free raw data after construct
 #' @param info a list of information of the lgb.Dataset object
 #' @param ... other information to pass to \code{info} or parameters pass to \code{params}
-#' 
+#'
 #' @return constructed dataset
-#' 
+#'
 #' @examples
 #' \dontrun{
 #' library(lightgbm)
@@ -651,7 +653,7 @@ Dataset <- R6Class(
 #' dtrain <- lgb.Dataset("lgb.Dataset.data")
 #' lgb.Dataset.construct(dtrain)
 #' }
-#' 
+#'
 #' @export
 lgb.Dataset <- function(data,
                        params = list(),
@@ -661,7 +663,7 @@ lgb.Dataset <- function(data,
                        free_raw_data = TRUE,
                        info = list(),
                        ...) {
-  
+
  # Create new dataset
  invisible(Dataset$new(data,
              params,
@@ -673,20 +675,20 @@ lgb.Dataset <- function(data,
              NULL,
              info,
              ...))
-  
+
 }

 #' Construct validation data
-#' 
+#'
 #' Construct validation data according to training data
-#' 
+#'
 #' @param dataset \code{lgb.Dataset} object, training data
 #' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
 #' @param info a list of information of the lgb.Dataset object
 #' @param ... other information to pass to \code{info}.
-#' 
+#'
 #' @return constructed dataset
-#' 
+#'
 #' @examples
 #' \dontrun{
 #' library(lightgbm)
@@ -697,24 +699,24 @@ lgb.Dataset <- function(data,
 #' test <- agaricus.test
 #' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
 #' }
-#' 
+#'
 #' @export
 lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) {
-  
+
  # Check if dataset is not a dataset
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.create.valid: input data should be an lgb.Dataset object")
  }
-  
+
  # Create validation dataset
  invisible(dataset$create_valid(data, info, ...))
-  
+
 }

 #' Construct Dataset explicitly
-#' 
+#'
 #' @param dataset Object of class \code{lgb.Dataset}
-#' 
+#'
 #' @examples
 #' \dontrun{
 #' library(lightgbm)
@@ -723,56 +725,56 @@ lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) {
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
 #' lgb.Dataset.construct(dtrain)
 #' }
-#' 
+#'
 #' @export
 lgb.Dataset.construct <- function(dataset) {
-  
+
  # Check if dataset is not a dataset
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.construct: input data should be an lgb.Dataset object")
  }
-  
+
  # Construct the dataset
  invisible(dataset$construct())
-  
+
 }

 #' Dimensions of an lgb.Dataset
-#' 
+#'
 #' Returns a vector of numbers of rows and of columns in an \code{lgb.Dataset}.
 #' @param x Object of class \code{lgb.Dataset}
 #' @param ... other parameters
-#' 
+#'
 #' @return a vector of numbers of rows and of columns
-#' 
+#'
 #' @details
 #' Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also
 #' be directly used with an \code{lgb.Dataset} object.
-#' 
+#'
 #' @examples
 #' \dontrun{
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
-#' 
+#'
 #' stopifnot(nrow(dtrain) == nrow(train$data))
 #' stopifnot(ncol(dtrain) == ncol(train$data))
 #' stopifnot(all(dim(dtrain) == dim(train$data)))
 #' }
-#' 
+#'
 #' @rdname dim
 #' @export
 dim.lgb.Dataset <- function(x, ...) {
-  
+
  # Check if dataset is not a dataset
  if (!lgb.is.Dataset(x)) {
    stop("dim.lgb.Dataset: input data should be an lgb.Dataset object")
  }
-  
+
  # Return dimensions
  x$dim()
-  
+
 }

 #' Handling of column names of \code{lgb.Dataset}
@@ -800,76 +802,76 @@ dim.lgb.Dataset <- function(x, ...) {
 #' colnames(dtrain) <- make.names(1:ncol(train$data))
 #' print(dtrain, verbose = TRUE)
 #' }
-#' 
+#'
 #' @rdname dimnames.lgb.Dataset
 #' @export
 dimnames.lgb.Dataset <- function(x) {
-  
+
  # Check if dataset is not a dataset
  if (!lgb.is.Dataset(x)) {
    stop("dimnames.lgb.Dataset: input data should be an lgb.Dataset object")
  }
-  
+
  # Return dimension names
  list(NULL, x$get_colnames())
-  
+
 }

 #' @rdname dimnames.lgb.Dataset
 #' @export
 `dimnames<-.lgb.Dataset` <- function(x, value) {
-  
+
  # Check if invalid element list
  if (!is.list(value) || length(value) != 2L) {
    stop("invalid ", sQuote("value"), " given: must be a list of two elements")
  }
-  
+
  # Check for unknown row names
  if (!is.null(value[[1L]])) {
    stop("lgb.Dataset does not have rownames")
  }
-  
+
  # Check for second value missing
  if (is.null(value[[2]])) {
-    
+
    # No column names
    x$set_colnames(NULL)
    return(x)
-    
+
  }
-  
+
  # Check for unmatching column size
  if (ncol(x) != length(value[[2]])) {
    stop("can't assign ", sQuote(length(value[[2]])), " colnames to an lgb.Dataset with ", sQuote(ncol(x)), " columns")
  }
-  
+
  # Set column names properly, and return
  x$set_colnames(value[[2]])
  x
-  
+
 }

 #' Slice a dataset
-#' 
+#'
 #' Get a new \code{lgb.Dataset} containing the specified rows of
 #' orginal lgb.Dataset object
-#' 
+#'
 #' @param dataset Object of class "lgb.Dataset"
 #' @param idxset a integer vector of indices of rows needed
 #' @param ... other parameters (currently not used)
 #' @return constructed sub dataset
-#' 
+#'
 #' @examples
 #' \dontrun{
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
-#' 
+#'
 #' dsub <- lightgbm::slice(dtrain, 1:42)
 #' labels <- lightgbm::getinfo(dsub, "label")
 #' }
-#' 
+#'
 #' @export
 slice <- function(dataset, ...) {
  UseMethod("slice")
@@ -878,34 +880,34 @@ slice <- function(dataset, ...) {
 #' @rdname slice
 #' @export
 slice.lgb.Dataset <- function(dataset, idxset, ...) {
-  
+
  # Check if dataset is not a dataset
  if (!lgb.is.Dataset(dataset)) {
    stop("slice.lgb.Dataset: input dataset should be an lgb.Dataset object")
  }
-  
+
  # Return sliced set
  invisible(dataset$slice(idxset, ...))
-  
+
 }

 #' Get information of an lgb.Dataset object
-#' 
+#'
 #' @param dataset Object of class \code{lgb.Dataset}
 #' @param name the name of the information field to get (see details)
 #' @param ... other parameters
 #' @return info data
-#' 
+#'
 #' @details
 #' The \code{name} field can be one of the following:
-#' 
+#'
 #' \itemize{
 #'     \item \code{label}: label lightgbm learn from ;
 #'     \item \code{weight}: to do a weight rescale ;
 #'     \item \code{group}: group size
 #'     \item \code{init_score}: initial score is the base prediction lightgbm will boost from ;
 #' }
-#' 
+#'
 #' @examples
 #' \dontrun{
 #' library(lightgbm)
@@ -913,14 +915,14 @@ slice.lgb.Dataset <- function(dataset, idxset, ...) {
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
 #' lgb.Dataset.construct(dtrain)
-#' 
+#'
 #' labels <- lightgbm::getinfo(dtrain, "label")
 #' lightgbm::setinfo(dtrain, "label", 1 - labels)
-#' 
+#'
 #' labels2 <- lightgbm::getinfo(dtrain, "label")
 #' stopifnot(all(labels2 == 1 - labels))
 #' }
-#' 
+#'
 #' @export
 getinfo <- function(dataset, ...) {
  UseMethod("getinfo")
@@ -929,35 +931,35 @@ getinfo <- function(dataset, ...) {
 #' @rdname getinfo
 #' @export
 getinfo.lgb.Dataset <- function(dataset, name, ...) {
-  
+
  # Check if dataset is not a dataset
  if (!lgb.is.Dataset(dataset)) {
    stop("getinfo.lgb.Dataset: input dataset should be an lgb.Dataset object")
  }
-  
+
  # Return information
  dataset$getinfo(name)
-  
+
 }

 #' Set information of an lgb.Dataset object
-#' 
+#'
 #' @param dataset Object of class "lgb.Dataset"
 #' @param name the name of the field to get
 #' @param info the specific field of information to set
 #' @param ... other parameters
 #' @return passed object
-#' 
+#'
 #' @details
 #' The \code{name} field can be one of the following:
-#' 
+#'
 #' \itemize{
 #'     \item \code{label}: label lightgbm learn from ;
 #'     \item \code{weight}: to do a weight rescale ;
 #'     \item \code{init_score}: initial score is the base prediction lightgbm will boost from ;
 #'     \item \code{group}.
 #' }
-#' 
+#'
 #' @examples
 #' \dontrun{
 #' library(lightgbm)
@@ -965,14 +967,14 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
 #' lgb.Dataset.construct(dtrain)
-#' 
+#'
 #' labels <- lightgbm::getinfo(dtrain, "label")
 #' lightgbm::setinfo(dtrain, "label", 1 - labels)
-#' 
+#'
 #' labels2 <- lightgbm::getinfo(dtrain, "label")
 #' stopifnot(all.equal(labels2, 1 - labels))
 #' }
-#' 
+#'
 #' @export
 setinfo <- function(dataset, ...) {
  UseMethod("setinfo")
@@ -981,23 +983,23 @@ setinfo <- function(dataset, ...) {
 #' @rdname setinfo
 #' @export
 setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
-  
+
  # Check if dataset is not a dataset
  if (!lgb.is.Dataset(dataset)) {
    stop("setinfo.lgb.Dataset: input dataset should be an lgb.Dataset object")
  }
-  
+
  # Set information
  invisible(dataset$setinfo(name, info))
 }

 #' Set categorical feature of \code{lgb.Dataset}
-#' 
+#'
 #' @param dataset object of class \code{lgb.Dataset}
 #' @param categorical_feature categorical features
-#' 
+#'
 #' @return passed dataset
-#' 
+#'
 #' @examples
 #' \dontrun{
 #' library(lightgbm)
@@ -1008,30 +1010,30 @@ setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
 #' dtrain <- lgb.Dataset("lgb.Dataset.data")
 #' lgb.Dataset.set.categorical(dtrain, 1:2)
 #' }
-#' 
+#'
 #' @rdname lgb.Dataset.set.categorical
 #' @export
 lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
-  
+
  # Check if dataset is not a dataset
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.set.categorical: input dataset should be an lgb.Dataset object")
  }
-  
+
  # Set categoricals
  invisible(dataset$set_categorical_feature(categorical_feature))
-  
+
 }

 #' Set reference of \code{lgb.Dataset}
-#' 
+#'
 #' If you want to use validation data, you should set reference to training data
-#' 
+#'
 #' @param dataset object of class \code{lgb.Dataset}
 #' @param reference object of class \code{lgb.Dataset}
-#' 
+#'
 #' @return passed dataset
-#' 
+#'
 #' @examples
 #' \dontrun{
 #' library(lightgbm)
@@ -1043,29 +1045,29 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
 #' dtest <- lgb.Dataset(test$data, test = train$label)
 #' lgb.Dataset.set.reference(dtest, dtrain)
 #' }
-#' 
+#'
 #' @rdname lgb.Dataset.set.reference
 #' @export
 lgb.Dataset.set.reference <- function(dataset, reference) {
-  
+
  # Check if dataset is not a dataset
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.set.reference: input dataset should be an lgb.Dataset object")
  }
-  
+
  # Set reference
  invisible(dataset$set_reference(reference))
 }

 #' Save \code{lgb.Dataset} to a binary file
-#' 
+#'
 #' @param dataset object of class \code{lgb.Dataset}
 #' @param fname object filename of output file
-#' 
+#'
 #' @return passed dataset
-#' 
+#'
 #' @examples
-#' 
+#'
 #' \dontrun{
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
@@ -1073,21 +1075,21 @@ lgb.Dataset.set.reference <- function(dataset, reference) {
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
 #' lgb.Dataset.save(dtrain, "data.bin")
 #' }
-#' 
+#'
 #' @rdname lgb.Dataset.save
 #' @export
 lgb.Dataset.save <- function(dataset, fname) {
-  
+
  # Check if dataset is not a dataset
  if (!lgb.is.Dataset(dataset)) {
    stop("lgb.Dataset.set: input dataset should be an lgb.Dataset object")
  }
-  
+
  # File-type is not matching
  if (!is.character(fname)) {
    stop("lgb.Dataset.set: fname should be a character or a file connection")
  }
-  
+
  # Store binary
  invisible(dataset$save_binary(fname))
 }
--- a/R-package/R/lgb.Predictor.R
+++ b/R-package/R/lgb.Predictor.R
+
+#' @importFrom methods is
 Predictor <- R6Class(
  classname = "lgb.Predictor",
  cloneable = FALSE,
  public = list(
-    
+
    # Finalize will free up the handles
    finalize = function() {
-      
+
      # Check the need for freeing handle
      if (private$need_free_handle && !lgb.is.null.handle(private$handle)) {
-        
+
        # Freeing up handle
        lgb.call("LGBM_BoosterFree_R", ret = NULL, private$handle)
        private$handle <- NULL
-        
+
      }
-      
+
    },
-    
+
    # Initialize will create a starter model
    initialize = function(modelfile, ...) {
      params <- list(...)
      private$params <- lgb.params2str(params)
      # Create new lgb handle
      handle <- 0.0
-      
+
      # Check if handle is a character
      if (is.character(modelfile)) {
-        
+
        # Create handle on it
        handle <- lgb.call("LGBM_BoosterCreateFromModelfile_R", ret = handle, lgb.c_str(modelfile))
        private$need_free_handle <- TRUE
-        
-      } else if (is(modelfile, "lgb.Booster.handle")) {
-        
+
+      } else if (methods::is(modelfile, "lgb.Booster.handle")) {
+
        # Check if model file is a booster handle already
        handle <- modelfile
        private$need_free_handle <- FALSE
-        
+
      } else {
-        
+
        # Model file is unknown
        stop("lgb.Predictor: modelfile must be either a character filename or an lgb.Booster.handle")
-        
+
      }
-      
+
      # Override class and store it
      class(handle) <- "lgb.Booster.handle"
      private$handle <- handle
-      
+
    },
-    
+
    # Get current iteration
    current_iter = function() {
-      
+
      cur_iter <- 0L
      lgb.call("LGBM_BoosterGetCurrentIteration_R",  ret = cur_iter, private$handle)
-      
+
    },
-    
+
    # Predict from data
    predict = function(data,
                       num_iteration = NULL,
@@ -66,22 +68,22 @@ Predictor <- R6Class(
                       predcontrib = FALSE,
                       header = FALSE,
                       reshape = FALSE) {
-      
+
      # Check if number of iterations is existing - if not, then set it to -1 (use all)
      if (is.null(num_iteration)) {
        num_iteration <- -1
      }
-      
+
      # Set temporary variable
      num_row <- 0L
-      
+
      # Check if data is a file name
      if (is.character(data)) {
-        
+
        # Data is a filename, create a temporary file with a "lightgbm_" pattern in it
        tmp_filename <- tempfile(pattern = "lightgbm_")
        on.exit(unlink(tmp_filename), add = TRUE)
-        
+
        # Predict from temporary file
        lgb.call("LGBM_BoosterPredictForFile_R", ret = NULL, private$handle, data,
          as.integer(header),
@@ -91,19 +93,19 @@ Predictor <- R6Class(
          as.integer(num_iteration),
          private$params,
          lgb.c_str(tmp_filename))
-        
+
        # Get predictions from file
        preds <- read.delim(tmp_filename, header = FALSE, seq = "\t")
        num_row <- nrow(preds)
        preds <- as.vector(t(preds))
-        
+
      } else {
-        
+
        # Not a file, we need to predict from R object
        num_row <- nrow(data)
-        
+
        npred <- 0L
-        
+
        # Check number of predictions to do
        npred <- lgb.call("LGBM_BoosterCalcNumPredict_R",
                          ret = npred,
@@ -113,10 +115,10 @@ Predictor <- R6Class(
                          as.integer(predleaf),
                          as.integer(predcontrib),
                          as.integer(num_iteration))
-        
+
        # Pre-allocate empty vector
        preds <- numeric(npred)
-        
+
        # Check if data is a matrix
        if (is.matrix(data)) {
          preds <- lgb.call("LGBM_BoosterPredictForMat_R",
@@ -130,8 +132,8 @@ Predictor <- R6Class(
                            as.integer(predcontrib),
                            as.integer(num_iteration),
                            private$params)
-          
-        } else if (is(data, "dgCMatrix")) {
+
+        } else if (methods::is(data, "dgCMatrix")) {
          if (length(data@p) > 2147483647) {
            stop("Cannot support large CSC matrix")
          }
@@ -150,44 +152,44 @@ Predictor <- R6Class(
                            as.integer(predcontrib),
                            as.integer(num_iteration),
                            private$params)
-          
+
        } else {
-          
+
          # Cannot predict on unknown class
          # to-do: predict from lgb.Dataset
          stop("predict: cannot predict on data of class ", sQuote(class(data)))
-          
+
        }
      }
-      
+
      # Check if number of rows is strange (not a multiple of the dataset rows)
      if (length(preds) %% num_row != 0) {
        stop("predict: prediction length ", sQuote(length(preds))," is not a multiple of nrows(data): ", sQuote(num_row))
      }
-      
+
      # Get number of cases per row
      npred_per_case <- length(preds) / num_row
-      
-      
+
+
      # Data reshaping
-      
+
      if (predleaf | predcontrib) {
-        
+
        # Predict leaves only, reshaping is mandatory
        preds <- matrix(preds, ncol = npred_per_case, byrow = TRUE)
-        
+
      } else if (reshape && npred_per_case > 1) {
-        
+
        # Predict with data reshaping
        preds <- matrix(preds, ncol = npred_per_case, byrow = TRUE)
-        
+
      }
-      
+
      # Return predictions
      return(preds)
-      
+
    }
-    
+
  ),
  private = list(handle = NULL,
                 need_free_handle = FALSE,

--- a/R-package/R/lgb.model.dt.tree.R
+++ b/R-package/R/lgb.model.dt.tree.R
 #' Parse a LightGBM model json dump
-#' 
+#'
 #' Parse a LightGBM model json dump into a \code{data.table} structure.
-#' 
+#'
 #' @param model object of class \code{lgb.Booster}
-#' @param num_iteration number of iterations you want to predict with. NULL or 
+#' @param num_iteration number of iterations you want to predict with. NULL or
 #'                      <= 0 means use best iteration
-#' 
+#'
 #' @return
 #' A \code{data.table} with detailed information about model trees' nodes and leafs.
-#' 
+#'
 #' The columns of the \code{data.table} are:
-#' 
+#'
 #' \itemize{
 #'  \item \code{tree_index}: ID of a tree in a model (integer)
 #'  \item \code{split_index}: ID of a node in a tree (integer)
@@ -28,11 +28,11 @@
 #'  \item \code{leaf_value}: Leaf value
 #'  \item \code{leaf_count}: The number of observation collected by a leaf
 #' }
-#' 
+#'
 #' @examples
 #' \dontrun{
 #' library(lightgbm)
-#' 
+#'
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -45,43 +45,45 @@
 #'
 #' tree_dt <- lgb.model.dt.tree(model)
 #' }
-#' 
+#'
 #' @importFrom magrittr %>%
-#' @importFrom data.table :=
+#' @importFrom data.table := data.table
+#' @importFrom jsonlite fromJSON
 #' @export
 lgb.model.dt.tree <- function(model, num_iteration = NULL) {
-  
+
  # Dump json model first
  json_model <- lgb.dump(model, num_iteration = num_iteration)
-  
+
  # Parse json model second
  parsed_json_model <- jsonlite::fromJSON(json_model,
                                          simplifyVector = TRUE,
                                          simplifyDataFrame = FALSE,
                                          simplifyMatrix = FALSE,
                                          flatten = FALSE)
-  
+
  # Parse tree model third
  tree_list <- lapply(parsed_json_model$tree_info, single.tree.parse)
-  
+
  # Combine into single data.table fourth
  tree_dt <- data.table::rbindlist(tree_list, use.names = TRUE)
-  
+
  # Lookup sequence
  tree_dt[, split_feature := Lookup(split_feature,
                                    seq.int(from = 0, to = parsed_json_model$max_feature_idx),
                                    parsed_json_model$feature_names)]
-  
+
  # Return tree
  return(tree_dt)
-  
+
 }

+#' @importFrom data.table data.table rbindlist
 single.tree.parse <- function(lgb_tree) {
-  
+
  # Traverse tree function
  pre_order_traversal <- function(env = NULL, tree_node_leaf, current_depth = 0L, parent_index = NA_integer_) {
-    
+
    if (is.null(env)) {
      # Setup initial default data.table with default types
      env <- new.env(parent = emptyenv())
@@ -103,10 +105,10 @@ single.tree.parse <- function(lgb_tree) {
      # start tree traversal
      pre_order_traversal(env, tree_node_leaf, current_depth, parent_index)
    } else {
-      
+
      # Check if split index is not null in leaf
      if (!is.null(tree_node_leaf$split_index)) {
-        
+
        # update data.table
        env$single_tree_dt <- data.table::rbindlist(l = list(env$single_tree_dt,
                                                             c(tree_node_leaf[c("split_index",
@@ -121,7 +123,7 @@ single.tree.parse <- function(lgb_tree) {
                                                               "node_parent" = parent_index)),
                                                    use.names = TRUE,
                                                    fill = TRUE)
-        
+
        # Traverse tree again both left and right
        pre_order_traversal(env,
                            tree_node_leaf$left_child,
@@ -131,9 +133,9 @@ single.tree.parse <- function(lgb_tree) {
                            tree_node_leaf$right_child,
                            current_depth = current_depth + 1L,
                            parent_index = tree_node_leaf$split_index)
-        
+
      } else if (!is.null(tree_node_leaf$leaf_index)) {
-        
+
        # update data.table
        env$single_tree_dt <- data.table::rbindlist(l = list(env$single_tree_dt,
                                                             c(tree_node_leaf[c("leaf_index",
@@ -143,29 +145,30 @@ single.tree.parse <- function(lgb_tree) {
                                                               "leaf_parent" = parent_index)),
                                                    use.names = TRUE,
                                                    fill = TRUE)
-        
+
      }
-      
+
    }
    return(env$single_tree_dt)
  }
-  
+
  # Traverse structure
  single_tree_dt <- pre_order_traversal(tree_node_leaf = lgb_tree$tree_structure)
-  
+
  # Store index
  single_tree_dt[, tree_index := lgb_tree$tree_index]
-  
+
  # Return tree
  return(single_tree_dt)
-  
+
 }

+#' @importFrom magrittr %>% extract inset
 Lookup <- function(key, key_lookup, value_lookup, missing = NA) {
-  
+
  # Match key by looked up key
  match(key, key_lookup) %>%
    magrittr::extract(value_lookup, .) %>%
    magrittr::inset(. , is.na(.), missing)
-  
+
 }