[R-package][ci] Added more R linters (fixes #2477) (#2533)

* Added more linters on R code * started working on implicit integers * finished style changes to handle implicit integers * regenned documentation and added concatenation linter * changed channel for r-lintr * try building stringi before lintr * trying to get libicui18n * trying another thing * trying conda-forge again * added re-install of stringi * uncommented other stages * Update .ci/test.sh Co-Authored-By: Nikita Titov <nekit94-08@mail.ru> * removed apt update and changed lintr version floor * get lintr from CRAN * R needs to come before C++ linting * testing lintr install from CRAN * trying one more thing * more verbose * order might matter * removed commented code * cleaned up linting block in test.sh * grouped conda install calls and fixed a few integer array things

[R-package][ci] Added more R linters (fixes #2477) (#2533)
* Added more linters on R code * started working on implicit integers * finished style changes to handle implicit integers * regenned documentation and added concatenation linter * changed channel for r-lintr * try building stringi before lintr * trying to get libicui18n * trying another thing * trying conda-forge again * added re-install of stringi * uncommented other stages * Update .ci/test.sh Co-Authored-By: Nikita Titov <nekit94-08@mail.ru> * removed apt update and changed lintr version floor * get lintr from CRAN * R needs to come before C++ linting * testing lintr install from CRAN * trying one more thing * more verbose * order might matter * removed commented code * cleaned up linting block in test.sh * grouped conda install calls and fixed a few integer array things
1f7b06b9 · James Lamb · Nikita Titov · 11f9682b · 1f7b06b9 · 1f7b06b9
Commit 1f7b06b9 authored Nov 16, 2019 by James Lamb Committed by Nikita Titov Nov 17, 2019
6 changed files
--- a/R-package/tests/testthat/test_lgb.importance.R
+++ b/R-package/tests/testthat/test_lgb.importance.R
@@ -10,28 +10,28 @@ test_that("lgb.importance() should reject bad inputs", {
        , -10L:10L
        , list(c("a", "b", "c"))
        , data.frame(
-            x = rnorm(20)
+            x = rnorm(20L)
            , y = sample(
-                x = c(1, 2)
-                , size = 20
+                x = c(1L, 2L)
+                , size = 20L
                , replace = TRUE
            )
        )
        , data.table::data.table(
-            x = rnorm(20)
+            x = rnorm(20L)
            , y = sample(
-                x = c(1, 2)
-                , size = 20
+                x = c(1L, 2L)
+                , size = 20L
                , replace = TRUE
            )
        )
        , lgb.Dataset(
-            data = matrix(rnorm(100), ncol = 2)
-            , label = matrix(sample(c(0, 1), 50, replace = TRUE))
+            data = matrix(rnorm(100L), ncol = 2L)
+            , label = matrix(sample(c(0L, 1L), 50L, replace = TRUE))
        )
        , "lightgbm.model"
    )
-    for (input in bad_inputs){
+    for (input in bad_inputs) {
        expect_error({
            lgb.importance(input)
        }, regexp = "'model' has to be an object of class lgb\\.Booster")

--- a/R-package/tests/testthat/test_lgb.interprete.R
+++ b/R-package/tests/testthat/test_lgb.interprete.R
 context("lgb.interpete")

-.sigmoid <- function(x){
-    1 / (1 + exp(-x))
+.sigmoid <- function(x) {
+    1.0 / (1.0 + exp(-x))
 }
-.logit <- function(x){
-    log(x / (1 - x))
+.logit <- function(x) {
+    log(x / (1.0 - x))
 }

 test_that("lgb.intereprete works as expected for binary classification", {
@@ -24,21 +24,21 @@ test_that("lgb.intereprete works as expected for binary classification", {
    params <- list(
        objective = "binary"
        , learning_rate = 0.01
-        , num_leaves = 63
-        , max_depth = -1
-        , min_data_in_leaf = 1
-        , min_sum_hessian_in_leaf = 1
+        , num_leaves = 63L
+        , max_depth = -1L
+        , min_data_in_leaf = 1L
+        , min_sum_hessian_in_leaf = 1.0
    )
    model <- lgb.train(
        params = params
        , data = dtrain
-        , nrounds = 10
+        , nrounds = 10L
    )
-    num_trees <- 5
+    num_trees <- 5L
    tree_interpretation <- lgb.interprete(
        model = model
        , data = test$data
-        , idxset = 1:num_trees
+        , idxset = seq_len(num_trees)
    )
    expect_true(methods::is(tree_interpretation, "list"))
    expect_true(length(tree_interpretation) == num_trees)
@@ -46,7 +46,7 @@ test_that("lgb.intereprete works as expected for binary classification", {
    expect_true(all(
        sapply(
            X = tree_interpretation
-            , FUN = function(treeDT){
+            , FUN = function(treeDT) {
                checks <- c(
                    data.table::is.data.table(treeDT)
                    , identical(names(treeDT), c("Feature", "Contribution"))
@@ -65,31 +65,31 @@ test_that("lgb.intereprete works as expected for multiclass classification", {
    # We must convert factors to numeric
    # They must be starting from number 0 to use multiclass
    # For instance: 0, 1, 2, 3, 4, 5...
-    iris$Species <- as.numeric(as.factor(iris$Species)) - 1
+    iris$Species <- as.numeric(as.factor(iris$Species)) - 1L

    # Create imbalanced training data (20, 30, 40 examples for classes 0, 1, 2)
-    train <- as.matrix(iris[c(1:20, 51:80, 101:140), ])
+    train <- as.matrix(iris[c(1L:20L, 51L:80L, 101L:140L), ])
    # The 10 last samples of each class are for validation
-    test <- as.matrix(iris[c(41:50, 91:100, 141:150), ])
-    dtrain <- lgb.Dataset(data = train[, 1:4], label = train[, 5])
-    dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1:4], label = test[, 5])
+    test <- as.matrix(iris[c(41L:50L, 91L:100L, 141L:150L), ])
+    dtrain <- lgb.Dataset(data = train[, 1L:4L], label = train[, 5L])
+    dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1L:4L], label = test[, 5L])
    params <- list(
        objective = "multiclass"
        , metric = "multi_logloss"
-        , num_class = 3
+        , num_class = 3L
        , learning_rate = 0.00001
    )
    model <- lgb.train(
        params = params
        , data = dtrain
-        , nrounds = 10
-        , min_data = 1
+        , nrounds = 10L
+        , min_data = 1L
    )
-    num_trees <- 5
+    num_trees <- 5L
    tree_interpretation <- lgb.interprete(
        model = model
-        , data = test[, 1:4]
-        , idxset = 1:num_trees
+        , data = test[, 1L:4L]
+        , idxset = seq_len(num_trees)
    )
    expect_true(methods::is(tree_interpretation, "list"))
    expect_true(length(tree_interpretation) == num_trees)
@@ -97,7 +97,7 @@ test_that("lgb.intereprete works as expected for multiclass classification", {
    expect_true(all(
        sapply(
            X = tree_interpretation
-            , FUN = function(treeDT){
+            , FUN = function(treeDT) {
                checks <- c(
                    data.table::is.data.table(treeDT)
                    , identical(names(treeDT), c("Feature", "Class 0", "Class 1", "Class 2"))

--- a/R-package/tests/testthat/test_lgb.plot.interpretation.R
+++ b/R-package/tests/testthat/test_lgb.plot.interpretation.R
 context("lgb.plot.interpretation")

-.sigmoid <- function(x){
-    1 / (1 + exp(-x))
+.sigmoid <- function(x) {
+    1.0 / (1.0 + exp(-x))
 }
-.logit <- function(x){
-    log(x / (1 - x))
+.logit <- function(x) {
+    log(x / (1.0 - x))
 }

 test_that("lgb.plot.interepretation works as expected for binary classification", {
@@ -24,34 +24,34 @@ test_that("lgb.plot.interepretation works as expected for binary classification"
    params <- list(
        objective = "binary"
        , learning_rate = 0.01
-        , num_leaves = 63
-        , max_depth = -1
-        , min_data_in_leaf = 1
-        , min_sum_hessian_in_leaf = 1
+        , num_leaves = 63L
+        , max_depth = -1L
+        , min_data_in_leaf = 1L
+        , min_sum_hessian_in_leaf = 1.0
    )
    model <- lgb.train(
        params = params
        , data = dtrain
-        , nrounds = 10
+        , nrounds = 10L
    )
-    num_trees <- 5
+    num_trees <- 5L
    tree_interpretation <- lgb.interprete(
        model = model
        , data = test$data
-        , idxset = 1:num_trees
+        , idxset = seq_len(num_trees)
    )
    expect_true({
        lgb.plot.interpretation(
-            tree_interpretation_dt = tree_interpretation[[1]]
-            , top_n = 5
+            tree_interpretation_dt = tree_interpretation[[1L]]
+            , top_n = 5L
        )
        TRUE
    })

    # should also work when you explicitly pass cex
    plot_res <- lgb.plot.interpretation(
-        tree_interpretation_dt = tree_interpretation[[1]]
-        , top_n = 5
+        tree_interpretation_dt = tree_interpretation[[1L]]
+        , top_n = 5L
        , cex = 0.95
    )
    expect_null(plot_res)
@@ -63,35 +63,35 @@ test_that("lgb.plot.interepretation works as expected for multiclass classificat
    # We must convert factors to numeric
    # They must be starting from number 0 to use multiclass
    # For instance: 0, 1, 2, 3, 4, 5...
-    iris$Species <- as.numeric(as.factor(iris$Species)) - 1
+    iris$Species <- as.numeric(as.factor(iris$Species)) - 1L

    # Create imbalanced training data (20, 30, 40 examples for classes 0, 1, 2)
-    train <- as.matrix(iris[c(1:20, 51:80, 101:140), ])
+    train <- as.matrix(iris[c(1L:20L, 51L:80L, 101L:140L), ])
    # The 10 last samples of each class are for validation
-    test <- as.matrix(iris[c(41:50, 91:100, 141:150), ])
-    dtrain <- lgb.Dataset(data = train[, 1:4], label = train[, 5])
-    dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1:4], label = test[, 5])
+    test <- as.matrix(iris[c(41L:50L, 91L:100L, 141L:150L), ])
+    dtrain <- lgb.Dataset(data = train[, 1L:4L], label = train[, 5L])
+    dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1L:4L], label = test[, 5L])
    params <- list(
        objective = "multiclass"
        , metric = "multi_logloss"
-        , num_class = 3
+        , num_class = 3L
        , learning_rate = 0.00001
    )
    model <- lgb.train(
        params = params
        , data = dtrain
-        , nrounds = 10
-        , min_data = 1
+        , nrounds = 10L
+        , min_data = 1L
    )
-    num_trees <- 5
+    num_trees <- 5L
    tree_interpretation <- lgb.interprete(
        model = model
-        , data = test[, 1:4]
-        , idxset = 1:num_trees
+        , data = test[, 1L:4L]
+        , idxset = seq_len(num_trees)
    )
    plot_res <- lgb.plot.interpretation(
-        tree_interpretation_dt = tree_interpretation[[1]]
-        , top_n = 5
+        tree_interpretation_dt = tree_interpretation[[1L]]
+        , top_n = 5L
    )
    expect_null(plot_res)
 })
--- a/R-package/tests/testthat/test_parameters.R
+++ b/R-package/tests/testthat/test_parameters.R

 context("feature penalties")

-data(agaricus.train, package = 'lightgbm')
-data(agaricus.test, package = 'lightgbm')
+data(agaricus.train, package = "lightgbm")
+data(agaricus.test, package = "lightgbm")
 train <- agaricus.train
 test <- agaricus.test

 test_that("Feature penalties work properly", {
  # Fit a series of models with varying penalty on most important variable
  var_name <- "odor=none"
-  var_index <- which(train$data@Dimnames[[2]] == var_name)
+  var_index <- which(train$data@Dimnames[[2L]] == var_name)

-  bst <- lapply(seq(1, 0, by = -0.1), function(x) {
-    feature_penalties <- rep(1, ncol(train$data))
+  bst <- lapply(seq(1.0, 0.0, by = -0.1), function(x) {
+    feature_penalties <- rep(1.0, ncol(train$data))
    feature_penalties[var_index] <- x
    lightgbm(
      data = train$data
      , label = train$label
-      , num_leaves = 5
+      , num_leaves = 5L
      , learning_rate = 0.05
-      , nrounds = 20
+      , nrounds = 20L
      , objective = "binary"
      , feature_penalty = paste0(feature_penalties, collapse = ",")
      , metric = "binary_error"
-      , verbose = -1
+      , verbose = -1L
    )
  })

@@ -32,16 +32,16 @@ test_that("Feature penalties work properly", {
  var_freq <- lapply(bst, function(x) lgb.importance(x)[Feature == var_name, Frequency])

  # Ensure that feature gain, cover, and frequency decreases with stronger penalties
-  expect_true(all(diff(unlist(var_gain)) <= 0))
-  expect_true(all(diff(unlist(var_cover)) <= 0))
-  expect_true(all(diff(unlist(var_freq)) <= 0))
+  expect_true(all(diff(unlist(var_gain)) <= 0.0))
+  expect_true(all(diff(unlist(var_cover)) <= 0.0))
+  expect_true(all(diff(unlist(var_freq)) <= 0.0))

-  expect_lt(min(diff(unlist(var_gain))), 0)
-  expect_lt(min(diff(unlist(var_cover))), 0)
-  expect_lt(min(diff(unlist(var_freq))), 0)
+  expect_lt(min(diff(unlist(var_gain))), 0.0)
+  expect_lt(min(diff(unlist(var_cover))), 0.0)
+  expect_lt(min(diff(unlist(var_freq))), 0.0)

  # Ensure that feature is not used when feature_penalty = 0
-  expect_length(var_gain[[length(var_gain)]], 0)
+  expect_length(var_gain[[length(var_gain)]], 0L)
 })

 expect_true(".PARAMETER_ALIASES() returns a named list", {
@@ -56,17 +56,17 @@ expect_true(".PARAMETER_ALIASES() returns a named list", {
 })

 expect_true("training should warn if you use 'dart' boosting, specified with 'boosting' or aliases", {
-  for (boosting_param in .PARAMETER_ALIASES()[["boosting"]]){
+  for (boosting_param in .PARAMETER_ALIASES()[["boosting"]]) {
    expect_warning({
      result <- lightgbm(
        data = train$data
        , label = train$label
-        , num_leaves = 5
+        , num_leaves = 5L
        , learning_rate = 0.05
-        , nrounds = 5
+        , nrounds = 5L
        , objective = "binary"
        , metric = "binary_error"
-        , verbose = -1
+        , verbose = -1L
        , params = stats::setNames(
          object = "dart"
          , nm = boosting_param

--- a/build_r.R
+++ b/build_r.R
@@ -15,9 +15,9 @@

 # system() will not raise an R exception if the process called
 # fails. Wrapping it here to get that behavior
-.run_shell_command <- function(cmd, ...){
+.run_shell_command <- function(cmd, ...) {
    exit_code <- system(cmd, ...)
-    if (exit_code != 0){
+    if (exit_code != 0L) {
        stop(paste0("Command failed with exit code: ", exit_code))
    }
 }
@@ -27,33 +27,43 @@ unlink(x = "lightgbm_r", recursive = TRUE)
 dir.create("lightgbm_r")

 # copy in the relevant files
-result <- file.copy(from = "R-package/./",
-                    to = "lightgbm_r/",
-                    recursive = TRUE,
-                    overwrite = TRUE)
+result <- file.copy(
+  from = "R-package/./"
+  , to = "lightgbm_r/"
+  , recursive = TRUE
+  , overwrite = TRUE
+)
 .handle_result(result)

-result <- file.copy(from = "include/",
-                    to = file.path("lightgbm_r", "src/"),
-                    recursive = TRUE,
-                    overwrite = TRUE)
+result <- file.copy(
+  from = "include/"
+  , to = file.path("lightgbm_r", "src/")
+  , recursive = TRUE
+  , overwrite = TRUE
+)
 .handle_result(result)

-result <- file.copy(from = "src/",
-                    to = file.path("lightgbm_r", "src/"),
-                    recursive = TRUE,
-                    overwrite = TRUE)
+result <- file.copy(
+  from = "src/"
+  , to = file.path("lightgbm_r", "src/")
+  , recursive = TRUE
+  , overwrite = TRUE
+)
 .handle_result(result)

-result <- file.copy(from = "compute/",
-                    to = file.path("lightgbm_r", "src/"),
-                    recursive = TRUE,
-                    overwrite = TRUE)
+result <- file.copy(
+  from = "compute/"
+  , to = file.path("lightgbm_r", "src/")
+  , recursive = TRUE
+  , overwrite = TRUE
+)
 .handle_result(result)

-result <- file.copy(from = "CMakeLists.txt",
-                    to = file.path("lightgbm_r", "inst", "bin/"),
-                    overwrite = TRUE)
+result <- file.copy(
+  from = "CMakeLists.txt"
+  , to = file.path("lightgbm_r", "inst", "bin/")
+  , overwrite = TRUE
+)
 .handle_result(result)

 # Build the package (do not touch this line!)
@@ -68,15 +78,12 @@ version <- gsub(
  "Version: ",
  "",
  grep(
-    "Version: ",
-    readLines(con = file.path("lightgbm_r", "DESCRIPTION")),
-    value = TRUE
+    "Version: "
+    , readLines(con = file.path("lightgbm_r", "DESCRIPTION"))
+    , value = TRUE
  )
 )
 tarball <- file.path(getwd(), sprintf("lightgbm_%s.tar.gz", version))

 cmd <- sprintf("R CMD INSTALL %s --no-multiarch", tarball)
 .run_shell_command(cmd)
-
-# Run R CMD CHECK
-# R CMD CHECK lightgbm_2.1.2.tar.gz --as-cran | tee check.log | cat
--- a/build_r_site.R
+++ b/build_r_site.R
@@ -9,9 +9,11 @@ devtools::document()
 clean_site()
 init_site()
 build_home(preview = FALSE, quiet = FALSE)
-build_reference(lazy = FALSE,
-                document = FALSE,
-                examples = TRUE,
-                run_dont_run = FALSE,
-                seed = 42,
-                preview = FALSE)
+build_reference(
+    lazy = FALSE
+    , document = FALSE
+    , examples = TRUE
+    , run_dont_run = FALSE
+    , seed = 42L
+    , preview = FALSE
+)