[R-package] [ci] added more linters (#2803)

* [R-package] [ci] added more linters * Update R-package/tests/testthat/test_parameters.R Co-Authored-By: Nikita Titov <nekit94-08@mail.ru> * added cbind() to undesirable_function_linter * fixed linting * Update .ci/lint_r_code.R * Apply suggestions from code review Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

[R-package] [ci] added more linters (#2803)
* [R-package] [ci] added more linters * Update R-package/tests/testthat/test_parameters.R Co-Authored-By: Nikita Titov <nekit94-08@mail.ru> * added cbind() to undesirable_function_linter * fixed linting * Update .ci/lint_r_code.R * Apply suggestions from code review Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
d42f4313 · James Lamb · GitHub · 6b68967d · d42f4313 · d42f4313
Unverified Commit d42f4313 authored Mar 17, 2020 by James Lamb Committed by GitHub Mar 17, 2020
16 changed files
--- a/.ci/lint_r_code.R
+++ b/.ci/lint_r_code.R
@@ -16,17 +16,30 @@ FILES_TO_LINT <- list.files(
    , include.dirs = FALSE
 )
+# text to use for pipe operators from packages like 'magrittr'
+pipe_text <- paste0(
+    "For consistency and the sake of being explicit, this project's code "
+    , "does not use the pipe operator."
+)
+# text to use for functions that should only be called interactively
+interactive_text <- paste0(
+    "Functions like '?', 'help', and 'install.packages()' should only be used "
+    , "interactively, not in package code."
+)
 LINTERS_TO_USE <- list(
-    "assignment" = lintr::assignment_linter
+    "absolute_path"          = lintr::absolute_path_linter
+    , "assignment"           = lintr::assignment_linter
    , "closed_curly"         = lintr::closed_curly_linter
+    , "commas"               = lintr::commas_linter
    , "equals_na"            = lintr::equals_na_linter
    , "function_left"        = lintr::function_left_parentheses_linter
-    , "commas" = lintr::commas_linter
-    , "concatenation" = lintr::unneeded_concatenation_linter
    , "implicit_integers"    = lintr::implicit_integer_linter
    , "infix_spaces"         = lintr::infix_spaces_linter
    , "long_lines"           = lintr::line_length_linter(length = 120L)
-    , "tabs" = lintr::no_tab_linter
+    , "no_tabs"              = lintr::no_tab_linter
+    , "non_portable_path"    = lintr::nonportable_path_linter
    , "open_curly"           = lintr::open_curly_linter
    , "paren_brace_linter"   = lintr::paren_brace_linter
    , "semicolon"            = lintr::semicolon_terminator_linter
@@ -38,6 +51,38 @@ LINTERS_TO_USE <- list(
    , "trailing_blank"       = lintr::trailing_blank_lines_linter
    , "trailing_white"       = lintr::trailing_whitespace_linter
    , "true_false"           = lintr::T_and_F_symbol_linter
+    , "undesirable_function" = lintr::undesirable_function_linter(
+        fun = c(
+            "cbind" = paste0(
+                "cbind is an unsafe way to build up a data frame. merge() or direct "
+                , "column assignment is preferred."
+            )
+            , "dyn.load" = "Directly loading/unloading .dll/.so files in package code should not be necessary."
+            , "dyn.unload" = "Directly loading/unloading .dll/.so files in package code should not be necessary."
+            , "help" = interactive_text
+            , "ifelse" = "The use of ifelse() is dangerous because it will silently allow mixing types."
+            , "install.packages" = interactive_text
+            , "is.list" = paste0(
+                "This project uses data.table, and is.list(x) is TRUE for a data.table. "
+                , "identical(class(x), 'list') is a safer way to check that something is an R list object."
+            )
+            , "rbind" = "data.table::rbindlist() is faster and safer than rbind(), and is preferred in this project."
+            , "require" = paste0(
+                "library() is preferred to require() because it will raise an error immediately "
+                , "if a package is missing."
+            )
+        )
+    )
+    , "undesirable_operator" = lintr::undesirable_operator_linter(
+        op = c(
+            "%>%" = pipe_text
+            , "%.%" = pipe_text
+            , "%..%" = pipe_text
+            , "?" = interactive_text
+            , "??" = interactive_text
+        )
+    )
+    , "unneeded_concatenation" = lintr::unneeded_concatenation_linter
 )
 cat(sprintf("Found %i R files to lint\n", length(FILES_TO_LINT)))
@@ -52,11 +97,14 @@ for (r_file in FILES_TO_LINT) {
        , cache = FALSE
    )
-    cat(sprintf(
+    print(
-        "Found %i linting errors in %s\n"
+        sprintf(
+            "Found %i linting errors in %s"
            , length(this_result)
            , r_file
-    ))
+        )
+        , quote = FALSE
+    )
    results <- c(results, this_result)
@@ -65,7 +113,6 @@ for (r_file in FILES_TO_LINT) {
 issues_found <- length(results)
 if (issues_found > 0L) {
-    cat("\n")
    print(results)
 }

--- a/R-package/R/callback.R
+++ b/R-package/R/callback.R
@@ -18,7 +18,7 @@ CB_ENV <- R6::R6Class(
 cb.reset.parameters <- function(new_params) {
  # Check for parameter list
-  if (!is.list(new_params)) {
+  if (!identical(class(new_params), "list")) {
    stop(sQuote("new_params"), " must be a list")
  }

--- a/R-package/R/lgb.Dataset.R
+++ b/R-package/R/lgb.Dataset.R
@@ -892,7 +892,7 @@ dimnames.lgb.Dataset <- function(x) {
 `dimnames<-.lgb.Dataset` <- function(x, value) {
  # Check if invalid element list
-  if (!is.list(value) || length(value) != 2L) {
+  if (!identical(class(value), "list") || length(value) != 2L) {
    stop("invalid ", sQuote("value"), " given: must be a list of two elements")
  }

--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -178,7 +178,7 @@ lgb.cv <- function(params = list()
  if (!is.null(folds)) {
    # Check for list of folds or for single value
-    if (!is.list(folds) || length(folds) < 2L) {
+    if (!identical(class(folds), "list") || length(folds) < 2L) {
      stop(sQuote("folds"), " must be a list with 2 or more elements that are vectors of indices for each CV-fold")
    }

--- a/R-package/R/lgb.plot.interpretation.R
+++ b/R-package/R/lgb.plot.interpretation.R
@@ -125,13 +125,15 @@ multiple.tree.plot.interpretation <- function(tree_interpretation,
    cex <- 2.5 / log2(1.0 + top_n)
  }
-  # Do plot
+  # create plot
+  tree_interpretation[Contribution > 0.0, bar_color := "firebrick"]
+  tree_interpretation[Contribution == 0.0, bar_color := "steelblue"]
  tree_interpretation[.N:1L,
                      graphics::barplot(
                          height = Contribution
                          , names.arg = Feature
                          , horiz = TRUE
-                          , col = ifelse(Contribution > 0L, "firebrick", "steelblue")
+                          , col = bar_color
                          , border = NA
                          , main = title
                          , cex.names = cex

--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
@@ -73,7 +73,7 @@ lgb.train <- function(params = list(),
    stop("lgb.train: data must be an lgb.Dataset instance")
  }
  if (length(valids) > 0L) {
-    if (!is.list(valids) || !all(vapply(valids, lgb.is.Dataset, logical(1L)))) {
+    if (!identical(class(valids), "list") || !all(vapply(valids, lgb.is.Dataset, logical(1L)))) {
      stop("lgb.train: valids must be a list of lgb.Dataset elements")
    }
    evnames <- names(valids)

--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -105,7 +105,7 @@ lgb.call.return.str <- function(fun_name, ...) {
 lgb.params2str <- function(params, ...) {
  # Check for a list as input
-  if (!is.list(params)) {
+  if (!identical(class(params), "list")) {
    stop("params must be a list")
  }
@@ -254,7 +254,7 @@ lgb.check.eval <- function(params, eval) {
  }
  # If 'eval' is a list or character vector, store it in 'metric'
-  if (is.character(eval) || is.list(eval)) {
+  if (is.character(eval) || identical(class(eval), "list")) {
    params$metric <- append(params$metric, eval)
  }

--- a/R-package/demo/basic_walkthrough.R
+++ b/R-package/demo/basic_walkthrough.R
-require(lightgbm)
+library(lightgbm)
-require(methods)
+library(methods)
 # We load in the agaricus dataset
 # In this example, we are aiming to predict whether a mushroom is edible

--- a/R-package/demo/boost_from_prediction.R
+++ b/R-package/demo/boost_from_prediction.R
-require(lightgbm)
+library(lightgbm)
-require(methods)
+library(methods)
 # Load in the agaricus dataset
 data(agaricus.train, package = "lightgbm")

--- a/R-package/demo/cross_validation.R
+++ b/R-package/demo/cross_validation.R
-require(lightgbm)
+library(lightgbm)
 # load in the agaricus dataset
 data(agaricus.train, package = "lightgbm")
 data(agaricus.test, package = "lightgbm")

--- a/R-package/demo/early_stopping.R
+++ b/R-package/demo/early_stopping.R
-require(lightgbm)
+library(lightgbm)
-require(methods)
+library(methods)
 # Load in the agaricus dataset
 data(agaricus.train, package = "lightgbm")

--- a/R-package/demo/multiclass.R
+++ b/R-package/demo/multiclass.R
-require(lightgbm)
+library(lightgbm)
 # We load the default iris dataset shipped with R
 data(iris)

--- a/R-package/demo/multiclass_custom_objective.R
+++ b/R-package/demo/multiclass_custom_objective.R
-require(lightgbm)
+library(lightgbm)
 # We load the default iris dataset shipped with R
 data(iris)
@@ -43,16 +43,25 @@ probs_builtin <- exp(preds_builtin) / rowSums(exp(preds_builtin))
 custom_multiclass_obj <- function(preds, dtrain) {
    labels <- getinfo(dtrain, "label")
-    # preds is a matrix with rows corresponding to samples and colums corresponding to choices
+    # preds is a matrix with rows corresponding to samples and columns corresponding to choices
    preds <- matrix(preds, nrow = length(labels))
    # to prevent overflow, normalize preds by row
-    preds <- preds - apply(preds, 1L, max)
+    preds <- preds - apply(preds, MARGIN = 1L, max)
    prob <- exp(preds) / rowSums(exp(preds))
    # compute gradient
    grad <- prob
-    grad[cbind(seq_len(length(labels)), labels + 1L)] <- grad[cbind(seq_len(length(labels)), labels + 1L)] - 1L
+    subset_index <- as.matrix(
+        data.frame(
+            seq_len(length(labels))
+            , labels + 1L
+            , fix.empty.names = FALSE
+        )
+        , nrow = length(labels)
+        , dimnames = NULL
+    )
+    grad[subset_index] <- grad[subset_index] - 1L
    # compute hessian (approximation)
    hess <- 2.0 * prob * (1.0 - prob)
@@ -67,9 +76,18 @@ custom_multiclass_metric <- function(preds, dtrain) {
    preds <- preds - apply(preds, 1L, max)
    prob <- exp(preds) / rowSums(exp(preds))
+    subset_index <- as.matrix(
+        data.frame(
+            seq_len(length(labels))
+            , labels + 1L
+            , fix.empty.names = FALSE
+        )
+        , nrow = length(labels)
+        , dimnames = NULL
+    )
    return(list(
        name = "error"
-        , value = -mean(log(prob[cbind(seq_len(length(labels)), labels + 1L)]))
+        , value = -mean(log(prob[subset_index]))
        , higher_better = FALSE
    ))
 }

--- a/R-package/tests/testthat/test_dataset.R
+++ b/R-package/tests/testthat/test_dataset.R
-require(lightgbm)
+library(lightgbm)
-require(Matrix)
+library(Matrix)
 context("testing lgb.Dataset functionality")
@@ -140,7 +140,7 @@ test_that("Dataset$get_params() successfully returns parameters if you passed th
    , params = params
  )
  returned_params <- ds$get_params()
-  expect_true(methods::is(returned_params, "list"))
+  expect_identical(class(returned_params), "list")
  expect_identical(length(params), length(returned_params))
  expect_identical(sort(names(params)), sort(names(returned_params)))
  for (param_name in names(params)) {

--- a/R-package/tests/testthat/test_lgb.interprete.R
+++ b/R-package/tests/testthat/test_lgb.interprete.R
@@ -40,7 +40,7 @@ test_that("lgb.intereprete works as expected for binary classification", {
        , data = test$data
        , idxset = seq_len(num_trees)
    )
-    expect_true(methods::is(tree_interpretation, "list"))
+    expect_identical(class(tree_interpretation), "list")
    expect_true(length(tree_interpretation) == num_trees)
    expect_null(names(tree_interpretation))
    expect_true(all(
@@ -91,7 +91,7 @@ test_that("lgb.intereprete works as expected for multiclass classification", {
        , data = test[, 1L:4L]
        , idxset = seq_len(num_trees)
    )
-    expect_true(methods::is(tree_interpretation, "list"))
+    expect_identical(class(tree_interpretation), "list")
    expect_true(length(tree_interpretation) == num_trees)
    expect_null(names(tree_interpretation))
    expect_true(all(

--- a/R-package/tests/testthat/test_parameters.R
+++ b/R-package/tests/testthat/test_parameters.R
@@ -48,7 +48,7 @@ context("parameter aliases")
 test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where names are unique", {
  param_aliases <- .PARAMETER_ALIASES()
-  expect_true(is.list(param_aliases))
+  expect_identical(class(param_aliases), "list")
  expect_true(is.character(names(param_aliases)))
  expect_true(is.character(param_aliases[["boosting"]]))
  expect_true(is.character(param_aliases[["early_stopping_round"]]))