Commit 1f7b06b9 authored by James Lamb's avatar James Lamb Committed by Nikita Titov
Browse files

[R-package][ci] Added more R linters (fixes #2477) (#2533)



* Added more linters on R code

* started working on implicit integers

* finished style changes to handle implicit integers

* regenned documentation and added concatenation linter

* changed channel for r-lintr

* try building stringi before lintr

* trying to get libicui18n

* trying another thing

* trying conda-forge again

* added re-install of stringi

* uncommented other stages

* Update .ci/test.sh
Co-Authored-By: default avatarNikita Titov <nekit94-08@mail.ru>

* removed apt update and changed lintr version floor

* get lintr from CRAN

* R needs to come before C++ linting

* testing lintr install from CRAN

* trying one more thing

* more verbose

* order might matter

* removed commented code

* cleaned up linting block in test.sh

* grouped conda install calls and fixed a few integer array things
parent 11f9682b
......@@ -10,28 +10,28 @@ test_that("lgb.importance() should reject bad inputs", {
, -10L:10L
, list(c("a", "b", "c"))
, data.frame(
x = rnorm(20)
x = rnorm(20L)
, y = sample(
x = c(1, 2)
, size = 20
x = c(1L, 2L)
, size = 20L
, replace = TRUE
)
)
, data.table::data.table(
x = rnorm(20)
x = rnorm(20L)
, y = sample(
x = c(1, 2)
, size = 20
x = c(1L, 2L)
, size = 20L
, replace = TRUE
)
)
, lgb.Dataset(
data = matrix(rnorm(100), ncol = 2)
, label = matrix(sample(c(0, 1), 50, replace = TRUE))
data = matrix(rnorm(100L), ncol = 2L)
, label = matrix(sample(c(0L, 1L), 50L, replace = TRUE))
)
, "lightgbm.model"
)
for (input in bad_inputs){
for (input in bad_inputs) {
expect_error({
lgb.importance(input)
}, regexp = "'model' has to be an object of class lgb\\.Booster")
......
context("lgb.interpete")
.sigmoid <- function(x){
1 / (1 + exp(-x))
.sigmoid <- function(x) {
1.0 / (1.0 + exp(-x))
}
.logit <- function(x){
log(x / (1 - x))
.logit <- function(x) {
log(x / (1.0 - x))
}
test_that("lgb.intereprete works as expected for binary classification", {
......@@ -24,21 +24,21 @@ test_that("lgb.intereprete works as expected for binary classification", {
params <- list(
objective = "binary"
, learning_rate = 0.01
, num_leaves = 63
, max_depth = -1
, min_data_in_leaf = 1
, min_sum_hessian_in_leaf = 1
, num_leaves = 63L
, max_depth = -1L
, min_data_in_leaf = 1L
, min_sum_hessian_in_leaf = 1.0
)
model <- lgb.train(
params = params
, data = dtrain
, nrounds = 10
, nrounds = 10L
)
num_trees <- 5
num_trees <- 5L
tree_interpretation <- lgb.interprete(
model = model
, data = test$data
, idxset = 1:num_trees
, idxset = seq_len(num_trees)
)
expect_true(methods::is(tree_interpretation, "list"))
expect_true(length(tree_interpretation) == num_trees)
......@@ -46,7 +46,7 @@ test_that("lgb.intereprete works as expected for binary classification", {
expect_true(all(
sapply(
X = tree_interpretation
, FUN = function(treeDT){
, FUN = function(treeDT) {
checks <- c(
data.table::is.data.table(treeDT)
, identical(names(treeDT), c("Feature", "Contribution"))
......@@ -65,31 +65,31 @@ test_that("lgb.intereprete works as expected for multiclass classification", {
# We must convert factors to numeric
# They must be starting from number 0 to use multiclass
# For instance: 0, 1, 2, 3, 4, 5...
iris$Species <- as.numeric(as.factor(iris$Species)) - 1
iris$Species <- as.numeric(as.factor(iris$Species)) - 1L
# Create imbalanced training data (20, 30, 40 examples for classes 0, 1, 2)
train <- as.matrix(iris[c(1:20, 51:80, 101:140), ])
train <- as.matrix(iris[c(1L:20L, 51L:80L, 101L:140L), ])
# The 10 last samples of each class are for validation
test <- as.matrix(iris[c(41:50, 91:100, 141:150), ])
dtrain <- lgb.Dataset(data = train[, 1:4], label = train[, 5])
dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1:4], label = test[, 5])
test <- as.matrix(iris[c(41L:50L, 91L:100L, 141L:150L), ])
dtrain <- lgb.Dataset(data = train[, 1L:4L], label = train[, 5L])
dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1L:4L], label = test[, 5L])
params <- list(
objective = "multiclass"
, metric = "multi_logloss"
, num_class = 3
, num_class = 3L
, learning_rate = 0.00001
)
model <- lgb.train(
params = params
, data = dtrain
, nrounds = 10
, min_data = 1
, nrounds = 10L
, min_data = 1L
)
num_trees <- 5
num_trees <- 5L
tree_interpretation <- lgb.interprete(
model = model
, data = test[, 1:4]
, idxset = 1:num_trees
, data = test[, 1L:4L]
, idxset = seq_len(num_trees)
)
expect_true(methods::is(tree_interpretation, "list"))
expect_true(length(tree_interpretation) == num_trees)
......@@ -97,7 +97,7 @@ test_that("lgb.intereprete works as expected for multiclass classification", {
expect_true(all(
sapply(
X = tree_interpretation
, FUN = function(treeDT){
, FUN = function(treeDT) {
checks <- c(
data.table::is.data.table(treeDT)
, identical(names(treeDT), c("Feature", "Class 0", "Class 1", "Class 2"))
......
context("lgb.plot.interpretation")
.sigmoid <- function(x){
1 / (1 + exp(-x))
.sigmoid <- function(x) {
1.0 / (1.0 + exp(-x))
}
.logit <- function(x){
log(x / (1 - x))
.logit <- function(x) {
log(x / (1.0 - x))
}
test_that("lgb.plot.interepretation works as expected for binary classification", {
......@@ -24,34 +24,34 @@ test_that("lgb.plot.interepretation works as expected for binary classification"
params <- list(
objective = "binary"
, learning_rate = 0.01
, num_leaves = 63
, max_depth = -1
, min_data_in_leaf = 1
, min_sum_hessian_in_leaf = 1
, num_leaves = 63L
, max_depth = -1L
, min_data_in_leaf = 1L
, min_sum_hessian_in_leaf = 1.0
)
model <- lgb.train(
params = params
, data = dtrain
, nrounds = 10
, nrounds = 10L
)
num_trees <- 5
num_trees <- 5L
tree_interpretation <- lgb.interprete(
model = model
, data = test$data
, idxset = 1:num_trees
, idxset = seq_len(num_trees)
)
expect_true({
lgb.plot.interpretation(
tree_interpretation_dt = tree_interpretation[[1]]
, top_n = 5
tree_interpretation_dt = tree_interpretation[[1L]]
, top_n = 5L
)
TRUE
})
# should also work when you explicitly pass cex
plot_res <- lgb.plot.interpretation(
tree_interpretation_dt = tree_interpretation[[1]]
, top_n = 5
tree_interpretation_dt = tree_interpretation[[1L]]
, top_n = 5L
, cex = 0.95
)
expect_null(plot_res)
......@@ -63,35 +63,35 @@ test_that("lgb.plot.interepretation works as expected for multiclass classificat
# We must convert factors to numeric
# They must be starting from number 0 to use multiclass
# For instance: 0, 1, 2, 3, 4, 5...
iris$Species <- as.numeric(as.factor(iris$Species)) - 1
iris$Species <- as.numeric(as.factor(iris$Species)) - 1L
# Create imbalanced training data (20, 30, 40 examples for classes 0, 1, 2)
train <- as.matrix(iris[c(1:20, 51:80, 101:140), ])
train <- as.matrix(iris[c(1L:20L, 51L:80L, 101L:140L), ])
# The 10 last samples of each class are for validation
test <- as.matrix(iris[c(41:50, 91:100, 141:150), ])
dtrain <- lgb.Dataset(data = train[, 1:4], label = train[, 5])
dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1:4], label = test[, 5])
test <- as.matrix(iris[c(41L:50L, 91L:100L, 141L:150L), ])
dtrain <- lgb.Dataset(data = train[, 1L:4L], label = train[, 5L])
dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1L:4L], label = test[, 5L])
params <- list(
objective = "multiclass"
, metric = "multi_logloss"
, num_class = 3
, num_class = 3L
, learning_rate = 0.00001
)
model <- lgb.train(
params = params
, data = dtrain
, nrounds = 10
, min_data = 1
, nrounds = 10L
, min_data = 1L
)
num_trees <- 5
num_trees <- 5L
tree_interpretation <- lgb.interprete(
model = model
, data = test[, 1:4]
, idxset = 1:num_trees
, data = test[, 1L:4L]
, idxset = seq_len(num_trees)
)
plot_res <- lgb.plot.interpretation(
tree_interpretation_dt = tree_interpretation[[1]]
, top_n = 5
tree_interpretation_dt = tree_interpretation[[1L]]
, top_n = 5L
)
expect_null(plot_res)
})
context("feature penalties")
data(agaricus.train, package = 'lightgbm')
data(agaricus.test, package = 'lightgbm')
data(agaricus.train, package = "lightgbm")
data(agaricus.test, package = "lightgbm")
train <- agaricus.train
test <- agaricus.test
test_that("Feature penalties work properly", {
# Fit a series of models with varying penalty on most important variable
var_name <- "odor=none"
var_index <- which(train$data@Dimnames[[2]] == var_name)
var_index <- which(train$data@Dimnames[[2L]] == var_name)
bst <- lapply(seq(1, 0, by = -0.1), function(x) {
feature_penalties <- rep(1, ncol(train$data))
bst <- lapply(seq(1.0, 0.0, by = -0.1), function(x) {
feature_penalties <- rep(1.0, ncol(train$data))
feature_penalties[var_index] <- x
lightgbm(
data = train$data
, label = train$label
, num_leaves = 5
, num_leaves = 5L
, learning_rate = 0.05
, nrounds = 20
, nrounds = 20L
, objective = "binary"
, feature_penalty = paste0(feature_penalties, collapse = ",")
, metric = "binary_error"
, verbose = -1
, verbose = -1L
)
})
......@@ -32,16 +32,16 @@ test_that("Feature penalties work properly", {
var_freq <- lapply(bst, function(x) lgb.importance(x)[Feature == var_name, Frequency])
# Ensure that feature gain, cover, and frequency decreases with stronger penalties
expect_true(all(diff(unlist(var_gain)) <= 0))
expect_true(all(diff(unlist(var_cover)) <= 0))
expect_true(all(diff(unlist(var_freq)) <= 0))
expect_true(all(diff(unlist(var_gain)) <= 0.0))
expect_true(all(diff(unlist(var_cover)) <= 0.0))
expect_true(all(diff(unlist(var_freq)) <= 0.0))
expect_lt(min(diff(unlist(var_gain))), 0)
expect_lt(min(diff(unlist(var_cover))), 0)
expect_lt(min(diff(unlist(var_freq))), 0)
expect_lt(min(diff(unlist(var_gain))), 0.0)
expect_lt(min(diff(unlist(var_cover))), 0.0)
expect_lt(min(diff(unlist(var_freq))), 0.0)
# Ensure that feature is not used when feature_penalty = 0
expect_length(var_gain[[length(var_gain)]], 0)
expect_length(var_gain[[length(var_gain)]], 0L)
})
expect_true(".PARAMETER_ALIASES() returns a named list", {
......@@ -56,17 +56,17 @@ expect_true(".PARAMETER_ALIASES() returns a named list", {
})
expect_true("training should warn if you use 'dart' boosting, specified with 'boosting' or aliases", {
for (boosting_param in .PARAMETER_ALIASES()[["boosting"]]){
for (boosting_param in .PARAMETER_ALIASES()[["boosting"]]) {
expect_warning({
result <- lightgbm(
data = train$data
, label = train$label
, num_leaves = 5
, num_leaves = 5L
, learning_rate = 0.05
, nrounds = 5
, nrounds = 5L
, objective = "binary"
, metric = "binary_error"
, verbose = -1
, verbose = -1L
, params = stats::setNames(
object = "dart"
, nm = boosting_param
......
......@@ -15,9 +15,9 @@
# system() will not raise an R exception if the process called
# fails. Wrapping it here to get that behavior
.run_shell_command <- function(cmd, ...){
.run_shell_command <- function(cmd, ...) {
exit_code <- system(cmd, ...)
if (exit_code != 0){
if (exit_code != 0L) {
stop(paste0("Command failed with exit code: ", exit_code))
}
}
......@@ -27,33 +27,43 @@ unlink(x = "lightgbm_r", recursive = TRUE)
dir.create("lightgbm_r")
# copy in the relevant files
result <- file.copy(from = "R-package/./",
to = "lightgbm_r/",
recursive = TRUE,
overwrite = TRUE)
result <- file.copy(
from = "R-package/./"
, to = "lightgbm_r/"
, recursive = TRUE
, overwrite = TRUE
)
.handle_result(result)
result <- file.copy(from = "include/",
to = file.path("lightgbm_r", "src/"),
recursive = TRUE,
overwrite = TRUE)
result <- file.copy(
from = "include/"
, to = file.path("lightgbm_r", "src/")
, recursive = TRUE
, overwrite = TRUE
)
.handle_result(result)
result <- file.copy(from = "src/",
to = file.path("lightgbm_r", "src/"),
recursive = TRUE,
overwrite = TRUE)
result <- file.copy(
from = "src/"
, to = file.path("lightgbm_r", "src/")
, recursive = TRUE
, overwrite = TRUE
)
.handle_result(result)
result <- file.copy(from = "compute/",
to = file.path("lightgbm_r", "src/"),
recursive = TRUE,
overwrite = TRUE)
result <- file.copy(
from = "compute/"
, to = file.path("lightgbm_r", "src/")
, recursive = TRUE
, overwrite = TRUE
)
.handle_result(result)
result <- file.copy(from = "CMakeLists.txt",
to = file.path("lightgbm_r", "inst", "bin/"),
overwrite = TRUE)
result <- file.copy(
from = "CMakeLists.txt"
, to = file.path("lightgbm_r", "inst", "bin/")
, overwrite = TRUE
)
.handle_result(result)
# Build the package (do not touch this line!)
......@@ -68,15 +78,12 @@ version <- gsub(
"Version: ",
"",
grep(
"Version: ",
readLines(con = file.path("lightgbm_r", "DESCRIPTION")),
value = TRUE
"Version: "
, readLines(con = file.path("lightgbm_r", "DESCRIPTION"))
, value = TRUE
)
)
tarball <- file.path(getwd(), sprintf("lightgbm_%s.tar.gz", version))
cmd <- sprintf("R CMD INSTALL %s --no-multiarch", tarball)
.run_shell_command(cmd)
# Run R CMD CHECK
# R CMD CHECK lightgbm_2.1.2.tar.gz --as-cran | tee check.log | cat
......@@ -9,9 +9,11 @@ devtools::document()
clean_site()
init_site()
build_home(preview = FALSE, quiet = FALSE)
build_reference(lazy = FALSE,
document = FALSE,
examples = TRUE,
run_dont_run = FALSE,
seed = 42,
preview = FALSE)
build_reference(
lazy = FALSE
, document = FALSE
, examples = TRUE
, run_dont_run = FALSE
, seed = 42L
, preview = FALSE
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment