Commit 1f7b06b9 authored by James Lamb's avatar James Lamb Committed by Nikita Titov
Browse files

[R-package][ci] Added more R linters (fixes #2477) (#2533)



* Added more linters on R code

* started working on implicit integers

* finished style changes to handle implicit integers

* regenned documentation and added concatenation linter

* changed channel for r-lintr

* try building stringi before lintr

* trying to get libicui18n

* trying another thing

* trying conda-forge again

* added re-install of stringi

* uncommented other stages

* Update .ci/test.sh
Co-Authored-By: default avatarNikita Titov <nekit94-08@mail.ru>

* removed apt update and changed lintr version floor

* get lintr from CRAN

* R needs to come before C++ linting

* testing lintr install from CRAN

* trying one more thing

* more verbose

* order might matter

* removed commented code

* cleaned up linting block in test.sh

* grouped conda install calls and fixed a few integer array things
parent 11f9682b
...@@ -10,28 +10,28 @@ test_that("lgb.importance() should reject bad inputs", { ...@@ -10,28 +10,28 @@ test_that("lgb.importance() should reject bad inputs", {
, -10L:10L , -10L:10L
, list(c("a", "b", "c")) , list(c("a", "b", "c"))
, data.frame( , data.frame(
x = rnorm(20) x = rnorm(20L)
, y = sample( , y = sample(
x = c(1, 2) x = c(1L, 2L)
, size = 20 , size = 20L
, replace = TRUE , replace = TRUE
) )
) )
, data.table::data.table( , data.table::data.table(
x = rnorm(20) x = rnorm(20L)
, y = sample( , y = sample(
x = c(1, 2) x = c(1L, 2L)
, size = 20 , size = 20L
, replace = TRUE , replace = TRUE
) )
) )
, lgb.Dataset( , lgb.Dataset(
data = matrix(rnorm(100), ncol = 2) data = matrix(rnorm(100L), ncol = 2L)
, label = matrix(sample(c(0, 1), 50, replace = TRUE)) , label = matrix(sample(c(0L, 1L), 50L, replace = TRUE))
) )
, "lightgbm.model" , "lightgbm.model"
) )
for (input in bad_inputs){ for (input in bad_inputs) {
expect_error({ expect_error({
lgb.importance(input) lgb.importance(input)
}, regexp = "'model' has to be an object of class lgb\\.Booster") }, regexp = "'model' has to be an object of class lgb\\.Booster")
......
context("lgb.interpete") context("lgb.interpete")
.sigmoid <- function(x){ .sigmoid <- function(x) {
1 / (1 + exp(-x)) 1.0 / (1.0 + exp(-x))
} }
.logit <- function(x){ .logit <- function(x) {
log(x / (1 - x)) log(x / (1.0 - x))
} }
test_that("lgb.intereprete works as expected for binary classification", { test_that("lgb.intereprete works as expected for binary classification", {
...@@ -24,21 +24,21 @@ test_that("lgb.intereprete works as expected for binary classification", { ...@@ -24,21 +24,21 @@ test_that("lgb.intereprete works as expected for binary classification", {
params <- list( params <- list(
objective = "binary" objective = "binary"
, learning_rate = 0.01 , learning_rate = 0.01
, num_leaves = 63 , num_leaves = 63L
, max_depth = -1 , max_depth = -1L
, min_data_in_leaf = 1 , min_data_in_leaf = 1L
, min_sum_hessian_in_leaf = 1 , min_sum_hessian_in_leaf = 1.0
) )
model <- lgb.train( model <- lgb.train(
params = params params = params
, data = dtrain , data = dtrain
, nrounds = 10 , nrounds = 10L
) )
num_trees <- 5 num_trees <- 5L
tree_interpretation <- lgb.interprete( tree_interpretation <- lgb.interprete(
model = model model = model
, data = test$data , data = test$data
, idxset = 1:num_trees , idxset = seq_len(num_trees)
) )
expect_true(methods::is(tree_interpretation, "list")) expect_true(methods::is(tree_interpretation, "list"))
expect_true(length(tree_interpretation) == num_trees) expect_true(length(tree_interpretation) == num_trees)
...@@ -46,7 +46,7 @@ test_that("lgb.intereprete works as expected for binary classification", { ...@@ -46,7 +46,7 @@ test_that("lgb.intereprete works as expected for binary classification", {
expect_true(all( expect_true(all(
sapply( sapply(
X = tree_interpretation X = tree_interpretation
, FUN = function(treeDT){ , FUN = function(treeDT) {
checks <- c( checks <- c(
data.table::is.data.table(treeDT) data.table::is.data.table(treeDT)
, identical(names(treeDT), c("Feature", "Contribution")) , identical(names(treeDT), c("Feature", "Contribution"))
...@@ -65,31 +65,31 @@ test_that("lgb.intereprete works as expected for multiclass classification", { ...@@ -65,31 +65,31 @@ test_that("lgb.intereprete works as expected for multiclass classification", {
# We must convert factors to numeric # We must convert factors to numeric
# They must be starting from number 0 to use multiclass # They must be starting from number 0 to use multiclass
# For instance: 0, 1, 2, 3, 4, 5... # For instance: 0, 1, 2, 3, 4, 5...
iris$Species <- as.numeric(as.factor(iris$Species)) - 1 iris$Species <- as.numeric(as.factor(iris$Species)) - 1L
# Create imbalanced training data (20, 30, 40 examples for classes 0, 1, 2) # Create imbalanced training data (20, 30, 40 examples for classes 0, 1, 2)
train <- as.matrix(iris[c(1:20, 51:80, 101:140), ]) train <- as.matrix(iris[c(1L:20L, 51L:80L, 101L:140L), ])
# The 10 last samples of each class are for validation # The 10 last samples of each class are for validation
test <- as.matrix(iris[c(41:50, 91:100, 141:150), ]) test <- as.matrix(iris[c(41L:50L, 91L:100L, 141L:150L), ])
dtrain <- lgb.Dataset(data = train[, 1:4], label = train[, 5]) dtrain <- lgb.Dataset(data = train[, 1L:4L], label = train[, 5L])
dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1:4], label = test[, 5]) dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1L:4L], label = test[, 5L])
params <- list( params <- list(
objective = "multiclass" objective = "multiclass"
, metric = "multi_logloss" , metric = "multi_logloss"
, num_class = 3 , num_class = 3L
, learning_rate = 0.00001 , learning_rate = 0.00001
) )
model <- lgb.train( model <- lgb.train(
params = params params = params
, data = dtrain , data = dtrain
, nrounds = 10 , nrounds = 10L
, min_data = 1 , min_data = 1L
) )
num_trees <- 5 num_trees <- 5L
tree_interpretation <- lgb.interprete( tree_interpretation <- lgb.interprete(
model = model model = model
, data = test[, 1:4] , data = test[, 1L:4L]
, idxset = 1:num_trees , idxset = seq_len(num_trees)
) )
expect_true(methods::is(tree_interpretation, "list")) expect_true(methods::is(tree_interpretation, "list"))
expect_true(length(tree_interpretation) == num_trees) expect_true(length(tree_interpretation) == num_trees)
...@@ -97,7 +97,7 @@ test_that("lgb.intereprete works as expected for multiclass classification", { ...@@ -97,7 +97,7 @@ test_that("lgb.intereprete works as expected for multiclass classification", {
expect_true(all( expect_true(all(
sapply( sapply(
X = tree_interpretation X = tree_interpretation
, FUN = function(treeDT){ , FUN = function(treeDT) {
checks <- c( checks <- c(
data.table::is.data.table(treeDT) data.table::is.data.table(treeDT)
, identical(names(treeDT), c("Feature", "Class 0", "Class 1", "Class 2")) , identical(names(treeDT), c("Feature", "Class 0", "Class 1", "Class 2"))
......
context("lgb.plot.interpretation") context("lgb.plot.interpretation")
.sigmoid <- function(x){ .sigmoid <- function(x) {
1 / (1 + exp(-x)) 1.0 / (1.0 + exp(-x))
} }
.logit <- function(x){ .logit <- function(x) {
log(x / (1 - x)) log(x / (1.0 - x))
} }
test_that("lgb.plot.interepretation works as expected for binary classification", { test_that("lgb.plot.interepretation works as expected for binary classification", {
...@@ -24,34 +24,34 @@ test_that("lgb.plot.interepretation works as expected for binary classification" ...@@ -24,34 +24,34 @@ test_that("lgb.plot.interepretation works as expected for binary classification"
params <- list( params <- list(
objective = "binary" objective = "binary"
, learning_rate = 0.01 , learning_rate = 0.01
, num_leaves = 63 , num_leaves = 63L
, max_depth = -1 , max_depth = -1L
, min_data_in_leaf = 1 , min_data_in_leaf = 1L
, min_sum_hessian_in_leaf = 1 , min_sum_hessian_in_leaf = 1.0
) )
model <- lgb.train( model <- lgb.train(
params = params params = params
, data = dtrain , data = dtrain
, nrounds = 10 , nrounds = 10L
) )
num_trees <- 5 num_trees <- 5L
tree_interpretation <- lgb.interprete( tree_interpretation <- lgb.interprete(
model = model model = model
, data = test$data , data = test$data
, idxset = 1:num_trees , idxset = seq_len(num_trees)
) )
expect_true({ expect_true({
lgb.plot.interpretation( lgb.plot.interpretation(
tree_interpretation_dt = tree_interpretation[[1]] tree_interpretation_dt = tree_interpretation[[1L]]
, top_n = 5 , top_n = 5L
) )
TRUE TRUE
}) })
# should also work when you explicitly pass cex # should also work when you explicitly pass cex
plot_res <- lgb.plot.interpretation( plot_res <- lgb.plot.interpretation(
tree_interpretation_dt = tree_interpretation[[1]] tree_interpretation_dt = tree_interpretation[[1L]]
, top_n = 5 , top_n = 5L
, cex = 0.95 , cex = 0.95
) )
expect_null(plot_res) expect_null(plot_res)
...@@ -63,35 +63,35 @@ test_that("lgb.plot.interepretation works as expected for multiclass classificat ...@@ -63,35 +63,35 @@ test_that("lgb.plot.interepretation works as expected for multiclass classificat
# We must convert factors to numeric # We must convert factors to numeric
# They must be starting from number 0 to use multiclass # They must be starting from number 0 to use multiclass
# For instance: 0, 1, 2, 3, 4, 5... # For instance: 0, 1, 2, 3, 4, 5...
iris$Species <- as.numeric(as.factor(iris$Species)) - 1 iris$Species <- as.numeric(as.factor(iris$Species)) - 1L
# Create imbalanced training data (20, 30, 40 examples for classes 0, 1, 2) # Create imbalanced training data (20, 30, 40 examples for classes 0, 1, 2)
train <- as.matrix(iris[c(1:20, 51:80, 101:140), ]) train <- as.matrix(iris[c(1L:20L, 51L:80L, 101L:140L), ])
# The 10 last samples of each class are for validation # The 10 last samples of each class are for validation
test <- as.matrix(iris[c(41:50, 91:100, 141:150), ]) test <- as.matrix(iris[c(41L:50L, 91L:100L, 141L:150L), ])
dtrain <- lgb.Dataset(data = train[, 1:4], label = train[, 5]) dtrain <- lgb.Dataset(data = train[, 1L:4L], label = train[, 5L])
dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1:4], label = test[, 5]) dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1L:4L], label = test[, 5L])
params <- list( params <- list(
objective = "multiclass" objective = "multiclass"
, metric = "multi_logloss" , metric = "multi_logloss"
, num_class = 3 , num_class = 3L
, learning_rate = 0.00001 , learning_rate = 0.00001
) )
model <- lgb.train( model <- lgb.train(
params = params params = params
, data = dtrain , data = dtrain
, nrounds = 10 , nrounds = 10L
, min_data = 1 , min_data = 1L
) )
num_trees <- 5 num_trees <- 5L
tree_interpretation <- lgb.interprete( tree_interpretation <- lgb.interprete(
model = model model = model
, data = test[, 1:4] , data = test[, 1L:4L]
, idxset = 1:num_trees , idxset = seq_len(num_trees)
) )
plot_res <- lgb.plot.interpretation( plot_res <- lgb.plot.interpretation(
tree_interpretation_dt = tree_interpretation[[1]] tree_interpretation_dt = tree_interpretation[[1L]]
, top_n = 5 , top_n = 5L
) )
expect_null(plot_res) expect_null(plot_res)
}) })
context("feature penalties") context("feature penalties")
data(agaricus.train, package = 'lightgbm') data(agaricus.train, package = "lightgbm")
data(agaricus.test, package = 'lightgbm') data(agaricus.test, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
test <- agaricus.test test <- agaricus.test
test_that("Feature penalties work properly", { test_that("Feature penalties work properly", {
# Fit a series of models with varying penalty on most important variable # Fit a series of models with varying penalty on most important variable
var_name <- "odor=none" var_name <- "odor=none"
var_index <- which(train$data@Dimnames[[2]] == var_name) var_index <- which(train$data@Dimnames[[2L]] == var_name)
bst <- lapply(seq(1, 0, by = -0.1), function(x) { bst <- lapply(seq(1.0, 0.0, by = -0.1), function(x) {
feature_penalties <- rep(1, ncol(train$data)) feature_penalties <- rep(1.0, ncol(train$data))
feature_penalties[var_index] <- x feature_penalties[var_index] <- x
lightgbm( lightgbm(
data = train$data data = train$data
, label = train$label , label = train$label
, num_leaves = 5 , num_leaves = 5L
, learning_rate = 0.05 , learning_rate = 0.05
, nrounds = 20 , nrounds = 20L
, objective = "binary" , objective = "binary"
, feature_penalty = paste0(feature_penalties, collapse = ",") , feature_penalty = paste0(feature_penalties, collapse = ",")
, metric = "binary_error" , metric = "binary_error"
, verbose = -1 , verbose = -1L
) )
}) })
...@@ -32,16 +32,16 @@ test_that("Feature penalties work properly", { ...@@ -32,16 +32,16 @@ test_that("Feature penalties work properly", {
var_freq <- lapply(bst, function(x) lgb.importance(x)[Feature == var_name, Frequency]) var_freq <- lapply(bst, function(x) lgb.importance(x)[Feature == var_name, Frequency])
# Ensure that feature gain, cover, and frequency decreases with stronger penalties # Ensure that feature gain, cover, and frequency decreases with stronger penalties
expect_true(all(diff(unlist(var_gain)) <= 0)) expect_true(all(diff(unlist(var_gain)) <= 0.0))
expect_true(all(diff(unlist(var_cover)) <= 0)) expect_true(all(diff(unlist(var_cover)) <= 0.0))
expect_true(all(diff(unlist(var_freq)) <= 0)) expect_true(all(diff(unlist(var_freq)) <= 0.0))
expect_lt(min(diff(unlist(var_gain))), 0) expect_lt(min(diff(unlist(var_gain))), 0.0)
expect_lt(min(diff(unlist(var_cover))), 0) expect_lt(min(diff(unlist(var_cover))), 0.0)
expect_lt(min(diff(unlist(var_freq))), 0) expect_lt(min(diff(unlist(var_freq))), 0.0)
# Ensure that feature is not used when feature_penalty = 0 # Ensure that feature is not used when feature_penalty = 0
expect_length(var_gain[[length(var_gain)]], 0) expect_length(var_gain[[length(var_gain)]], 0L)
}) })
expect_true(".PARAMETER_ALIASES() returns a named list", { expect_true(".PARAMETER_ALIASES() returns a named list", {
...@@ -56,17 +56,17 @@ expect_true(".PARAMETER_ALIASES() returns a named list", { ...@@ -56,17 +56,17 @@ expect_true(".PARAMETER_ALIASES() returns a named list", {
}) })
expect_true("training should warn if you use 'dart' boosting, specified with 'boosting' or aliases", { expect_true("training should warn if you use 'dart' boosting, specified with 'boosting' or aliases", {
for (boosting_param in .PARAMETER_ALIASES()[["boosting"]]){ for (boosting_param in .PARAMETER_ALIASES()[["boosting"]]) {
expect_warning({ expect_warning({
result <- lightgbm( result <- lightgbm(
data = train$data data = train$data
, label = train$label , label = train$label
, num_leaves = 5 , num_leaves = 5L
, learning_rate = 0.05 , learning_rate = 0.05
, nrounds = 5 , nrounds = 5L
, objective = "binary" , objective = "binary"
, metric = "binary_error" , metric = "binary_error"
, verbose = -1 , verbose = -1L
, params = stats::setNames( , params = stats::setNames(
object = "dart" object = "dart"
, nm = boosting_param , nm = boosting_param
......
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
# system() will not raise an R exception if the process called # system() will not raise an R exception if the process called
# fails. Wrapping it here to get that behavior # fails. Wrapping it here to get that behavior
.run_shell_command <- function(cmd, ...){ .run_shell_command <- function(cmd, ...) {
exit_code <- system(cmd, ...) exit_code <- system(cmd, ...)
if (exit_code != 0){ if (exit_code != 0L) {
stop(paste0("Command failed with exit code: ", exit_code)) stop(paste0("Command failed with exit code: ", exit_code))
} }
} }
...@@ -27,33 +27,43 @@ unlink(x = "lightgbm_r", recursive = TRUE) ...@@ -27,33 +27,43 @@ unlink(x = "lightgbm_r", recursive = TRUE)
dir.create("lightgbm_r") dir.create("lightgbm_r")
# copy in the relevant files # copy in the relevant files
result <- file.copy(from = "R-package/./", result <- file.copy(
to = "lightgbm_r/", from = "R-package/./"
recursive = TRUE, , to = "lightgbm_r/"
overwrite = TRUE) , recursive = TRUE
, overwrite = TRUE
)
.handle_result(result) .handle_result(result)
result <- file.copy(from = "include/", result <- file.copy(
to = file.path("lightgbm_r", "src/"), from = "include/"
recursive = TRUE, , to = file.path("lightgbm_r", "src/")
overwrite = TRUE) , recursive = TRUE
, overwrite = TRUE
)
.handle_result(result) .handle_result(result)
result <- file.copy(from = "src/", result <- file.copy(
to = file.path("lightgbm_r", "src/"), from = "src/"
recursive = TRUE, , to = file.path("lightgbm_r", "src/")
overwrite = TRUE) , recursive = TRUE
, overwrite = TRUE
)
.handle_result(result) .handle_result(result)
result <- file.copy(from = "compute/", result <- file.copy(
to = file.path("lightgbm_r", "src/"), from = "compute/"
recursive = TRUE, , to = file.path("lightgbm_r", "src/")
overwrite = TRUE) , recursive = TRUE
, overwrite = TRUE
)
.handle_result(result) .handle_result(result)
result <- file.copy(from = "CMakeLists.txt", result <- file.copy(
to = file.path("lightgbm_r", "inst", "bin/"), from = "CMakeLists.txt"
overwrite = TRUE) , to = file.path("lightgbm_r", "inst", "bin/")
, overwrite = TRUE
)
.handle_result(result) .handle_result(result)
# Build the package (do not touch this line!) # Build the package (do not touch this line!)
...@@ -68,15 +78,12 @@ version <- gsub( ...@@ -68,15 +78,12 @@ version <- gsub(
"Version: ", "Version: ",
"", "",
grep( grep(
"Version: ", "Version: "
readLines(con = file.path("lightgbm_r", "DESCRIPTION")), , readLines(con = file.path("lightgbm_r", "DESCRIPTION"))
value = TRUE , value = TRUE
) )
) )
tarball <- file.path(getwd(), sprintf("lightgbm_%s.tar.gz", version)) tarball <- file.path(getwd(), sprintf("lightgbm_%s.tar.gz", version))
cmd <- sprintf("R CMD INSTALL %s --no-multiarch", tarball) cmd <- sprintf("R CMD INSTALL %s --no-multiarch", tarball)
.run_shell_command(cmd) .run_shell_command(cmd)
# Run R CMD CHECK
# R CMD CHECK lightgbm_2.1.2.tar.gz --as-cran | tee check.log | cat
...@@ -9,9 +9,11 @@ devtools::document() ...@@ -9,9 +9,11 @@ devtools::document()
clean_site() clean_site()
init_site() init_site()
build_home(preview = FALSE, quiet = FALSE) build_home(preview = FALSE, quiet = FALSE)
build_reference(lazy = FALSE, build_reference(
document = FALSE, lazy = FALSE
examples = TRUE, , document = FALSE
run_dont_run = FALSE, , examples = TRUE
seed = 42, , run_dont_run = FALSE
preview = FALSE) , seed = 42L
, preview = FALSE
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment