Unverified Commit d42f4313 authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[R-package] [ci] added more linters (#2803)



* [R-package] [ci] added more linters

* Update R-package/tests/testthat/test_parameters.R
Co-Authored-By: default avatarNikita Titov <nekit94-08@mail.ru>

* added cbind() to undesirable_function_linter

* fixed linting

* Update .ci/lint_r_code.R

* Apply suggestions from code review
Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
parent 6b68967d
...@@ -16,17 +16,30 @@ FILES_TO_LINT <- list.files( ...@@ -16,17 +16,30 @@ FILES_TO_LINT <- list.files(
, include.dirs = FALSE , include.dirs = FALSE
) )
# text to use for pipe operators from packages like 'magrittr'
pipe_text <- paste0(
"For consistency and the sake of being explicit, this project's code "
, "does not use the pipe operator."
)
# text to use for functions that should only be called interactively
interactive_text <- paste0(
"Functions like '?', 'help', and 'install.packages()' should only be used "
, "interactively, not in package code."
)
LINTERS_TO_USE <- list( LINTERS_TO_USE <- list(
"assignment" = lintr::assignment_linter "absolute_path" = lintr::absolute_path_linter
, "assignment" = lintr::assignment_linter
, "closed_curly" = lintr::closed_curly_linter , "closed_curly" = lintr::closed_curly_linter
, "commas" = lintr::commas_linter
, "equals_na" = lintr::equals_na_linter , "equals_na" = lintr::equals_na_linter
, "function_left" = lintr::function_left_parentheses_linter , "function_left" = lintr::function_left_parentheses_linter
, "commas" = lintr::commas_linter
, "concatenation" = lintr::unneeded_concatenation_linter
, "implicit_integers" = lintr::implicit_integer_linter , "implicit_integers" = lintr::implicit_integer_linter
, "infix_spaces" = lintr::infix_spaces_linter , "infix_spaces" = lintr::infix_spaces_linter
, "long_lines" = lintr::line_length_linter(length = 120L) , "long_lines" = lintr::line_length_linter(length = 120L)
, "tabs" = lintr::no_tab_linter , "no_tabs" = lintr::no_tab_linter
, "non_portable_path" = lintr::nonportable_path_linter
, "open_curly" = lintr::open_curly_linter , "open_curly" = lintr::open_curly_linter
, "paren_brace_linter" = lintr::paren_brace_linter , "paren_brace_linter" = lintr::paren_brace_linter
, "semicolon" = lintr::semicolon_terminator_linter , "semicolon" = lintr::semicolon_terminator_linter
...@@ -38,6 +51,38 @@ LINTERS_TO_USE <- list( ...@@ -38,6 +51,38 @@ LINTERS_TO_USE <- list(
, "trailing_blank" = lintr::trailing_blank_lines_linter , "trailing_blank" = lintr::trailing_blank_lines_linter
, "trailing_white" = lintr::trailing_whitespace_linter , "trailing_white" = lintr::trailing_whitespace_linter
, "true_false" = lintr::T_and_F_symbol_linter , "true_false" = lintr::T_and_F_symbol_linter
, "undesirable_function" = lintr::undesirable_function_linter(
fun = c(
"cbind" = paste0(
"cbind is an unsafe way to build up a data frame. merge() or direct "
, "column assignment is preferred."
)
, "dyn.load" = "Directly loading/unloading .dll/.so files in package code should not be necessary."
, "dyn.unload" = "Directly loading/unloading .dll/.so files in package code should not be necessary."
, "help" = interactive_text
, "ifelse" = "The use of ifelse() is dangerous because it will silently allow mixing types."
, "install.packages" = interactive_text
, "is.list" = paste0(
"This project uses data.table, and is.list(x) is TRUE for a data.table. "
, "identical(class(x), 'list') is a safer way to check that something is an R list object."
)
, "rbind" = "data.table::rbindlist() is faster and safer than rbind(), and is preferred in this project."
, "require" = paste0(
"library() is preferred to require() because it will raise an error immediately "
, "if a package is missing."
)
)
)
, "undesirable_operator" = lintr::undesirable_operator_linter(
op = c(
"%>%" = pipe_text
, "%.%" = pipe_text
, "%..%" = pipe_text
, "?" = interactive_text
, "??" = interactive_text
)
)
, "unneeded_concatenation" = lintr::unneeded_concatenation_linter
) )
cat(sprintf("Found %i R files to lint\n", length(FILES_TO_LINT))) cat(sprintf("Found %i R files to lint\n", length(FILES_TO_LINT)))
...@@ -52,11 +97,14 @@ for (r_file in FILES_TO_LINT) { ...@@ -52,11 +97,14 @@ for (r_file in FILES_TO_LINT) {
, cache = FALSE , cache = FALSE
) )
cat(sprintf( print(
"Found %i linting errors in %s\n" sprintf(
"Found %i linting errors in %s"
, length(this_result) , length(this_result)
, r_file , r_file
)) )
, quote = FALSE
)
results <- c(results, this_result) results <- c(results, this_result)
...@@ -65,7 +113,6 @@ for (r_file in FILES_TO_LINT) { ...@@ -65,7 +113,6 @@ for (r_file in FILES_TO_LINT) {
issues_found <- length(results) issues_found <- length(results)
if (issues_found > 0L) { if (issues_found > 0L) {
cat("\n")
print(results) print(results)
} }
......
...@@ -18,7 +18,7 @@ CB_ENV <- R6::R6Class( ...@@ -18,7 +18,7 @@ CB_ENV <- R6::R6Class(
cb.reset.parameters <- function(new_params) { cb.reset.parameters <- function(new_params) {
# Check for parameter list # Check for parameter list
if (!is.list(new_params)) { if (!identical(class(new_params), "list")) {
stop(sQuote("new_params"), " must be a list") stop(sQuote("new_params"), " must be a list")
} }
......
...@@ -892,7 +892,7 @@ dimnames.lgb.Dataset <- function(x) { ...@@ -892,7 +892,7 @@ dimnames.lgb.Dataset <- function(x) {
`dimnames<-.lgb.Dataset` <- function(x, value) { `dimnames<-.lgb.Dataset` <- function(x, value) {
# Check if invalid element list # Check if invalid element list
if (!is.list(value) || length(value) != 2L) { if (!identical(class(value), "list") || length(value) != 2L) {
stop("invalid ", sQuote("value"), " given: must be a list of two elements") stop("invalid ", sQuote("value"), " given: must be a list of two elements")
} }
......
...@@ -178,7 +178,7 @@ lgb.cv <- function(params = list() ...@@ -178,7 +178,7 @@ lgb.cv <- function(params = list()
if (!is.null(folds)) { if (!is.null(folds)) {
# Check for list of folds or for single value # Check for list of folds or for single value
if (!is.list(folds) || length(folds) < 2L) { if (!identical(class(folds), "list") || length(folds) < 2L) {
stop(sQuote("folds"), " must be a list with 2 or more elements that are vectors of indices for each CV-fold") stop(sQuote("folds"), " must be a list with 2 or more elements that are vectors of indices for each CV-fold")
} }
......
...@@ -125,13 +125,15 @@ multiple.tree.plot.interpretation <- function(tree_interpretation, ...@@ -125,13 +125,15 @@ multiple.tree.plot.interpretation <- function(tree_interpretation,
cex <- 2.5 / log2(1.0 + top_n) cex <- 2.5 / log2(1.0 + top_n)
} }
# Do plot # create plot
tree_interpretation[Contribution > 0.0, bar_color := "firebrick"]
tree_interpretation[Contribution == 0.0, bar_color := "steelblue"]
tree_interpretation[.N:1L, tree_interpretation[.N:1L,
graphics::barplot( graphics::barplot(
height = Contribution height = Contribution
, names.arg = Feature , names.arg = Feature
, horiz = TRUE , horiz = TRUE
, col = ifelse(Contribution > 0L, "firebrick", "steelblue") , col = bar_color
, border = NA , border = NA
, main = title , main = title
, cex.names = cex , cex.names = cex
......
...@@ -73,7 +73,7 @@ lgb.train <- function(params = list(), ...@@ -73,7 +73,7 @@ lgb.train <- function(params = list(),
stop("lgb.train: data must be an lgb.Dataset instance") stop("lgb.train: data must be an lgb.Dataset instance")
} }
if (length(valids) > 0L) { if (length(valids) > 0L) {
if (!is.list(valids) || !all(vapply(valids, lgb.is.Dataset, logical(1L)))) { if (!identical(class(valids), "list") || !all(vapply(valids, lgb.is.Dataset, logical(1L)))) {
stop("lgb.train: valids must be a list of lgb.Dataset elements") stop("lgb.train: valids must be a list of lgb.Dataset elements")
} }
evnames <- names(valids) evnames <- names(valids)
......
...@@ -105,7 +105,7 @@ lgb.call.return.str <- function(fun_name, ...) { ...@@ -105,7 +105,7 @@ lgb.call.return.str <- function(fun_name, ...) {
lgb.params2str <- function(params, ...) { lgb.params2str <- function(params, ...) {
# Check for a list as input # Check for a list as input
if (!is.list(params)) { if (!identical(class(params), "list")) {
stop("params must be a list") stop("params must be a list")
} }
...@@ -254,7 +254,7 @@ lgb.check.eval <- function(params, eval) { ...@@ -254,7 +254,7 @@ lgb.check.eval <- function(params, eval) {
} }
# If 'eval' is a list or character vector, store it in 'metric' # If 'eval' is a list or character vector, store it in 'metric'
if (is.character(eval) || is.list(eval)) { if (is.character(eval) || identical(class(eval), "list")) {
params$metric <- append(params$metric, eval) params$metric <- append(params$metric, eval)
} }
......
require(lightgbm) library(lightgbm)
require(methods) library(methods)
# We load in the agaricus dataset # We load in the agaricus dataset
# In this example, we are aiming to predict whether a mushroom is edible # In this example, we are aiming to predict whether a mushroom is edible
......
require(lightgbm) library(lightgbm)
require(methods) library(methods)
# Load in the agaricus dataset # Load in the agaricus dataset
data(agaricus.train, package = "lightgbm") data(agaricus.train, package = "lightgbm")
......
require(lightgbm) library(lightgbm)
# load in the agaricus dataset # load in the agaricus dataset
data(agaricus.train, package = "lightgbm") data(agaricus.train, package = "lightgbm")
data(agaricus.test, package = "lightgbm") data(agaricus.test, package = "lightgbm")
......
require(lightgbm) library(lightgbm)
require(methods) library(methods)
# Load in the agaricus dataset # Load in the agaricus dataset
data(agaricus.train, package = "lightgbm") data(agaricus.train, package = "lightgbm")
......
require(lightgbm) library(lightgbm)
# We load the default iris dataset shipped with R # We load the default iris dataset shipped with R
data(iris) data(iris)
......
require(lightgbm) library(lightgbm)
# We load the default iris dataset shipped with R # We load the default iris dataset shipped with R
data(iris) data(iris)
...@@ -43,16 +43,25 @@ probs_builtin <- exp(preds_builtin) / rowSums(exp(preds_builtin)) ...@@ -43,16 +43,25 @@ probs_builtin <- exp(preds_builtin) / rowSums(exp(preds_builtin))
custom_multiclass_obj <- function(preds, dtrain) { custom_multiclass_obj <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label") labels <- getinfo(dtrain, "label")
# preds is a matrix with rows corresponding to samples and colums corresponding to choices # preds is a matrix with rows corresponding to samples and columns corresponding to choices
preds <- matrix(preds, nrow = length(labels)) preds <- matrix(preds, nrow = length(labels))
# to prevent overflow, normalize preds by row # to prevent overflow, normalize preds by row
preds <- preds - apply(preds, 1L, max) preds <- preds - apply(preds, MARGIN = 1L, max)
prob <- exp(preds) / rowSums(exp(preds)) prob <- exp(preds) / rowSums(exp(preds))
# compute gradient # compute gradient
grad <- prob grad <- prob
grad[cbind(seq_len(length(labels)), labels + 1L)] <- grad[cbind(seq_len(length(labels)), labels + 1L)] - 1L subset_index <- as.matrix(
data.frame(
seq_len(length(labels))
, labels + 1L
, fix.empty.names = FALSE
)
, nrow = length(labels)
, dimnames = NULL
)
grad[subset_index] <- grad[subset_index] - 1L
# compute hessian (approximation) # compute hessian (approximation)
hess <- 2.0 * prob * (1.0 - prob) hess <- 2.0 * prob * (1.0 - prob)
...@@ -67,9 +76,18 @@ custom_multiclass_metric <- function(preds, dtrain) { ...@@ -67,9 +76,18 @@ custom_multiclass_metric <- function(preds, dtrain) {
preds <- preds - apply(preds, 1L, max) preds <- preds - apply(preds, 1L, max)
prob <- exp(preds) / rowSums(exp(preds)) prob <- exp(preds) / rowSums(exp(preds))
subset_index <- as.matrix(
data.frame(
seq_len(length(labels))
, labels + 1L
, fix.empty.names = FALSE
)
, nrow = length(labels)
, dimnames = NULL
)
return(list( return(list(
name = "error" name = "error"
, value = -mean(log(prob[cbind(seq_len(length(labels)), labels + 1L)])) , value = -mean(log(prob[subset_index]))
, higher_better = FALSE , higher_better = FALSE
)) ))
} }
......
require(lightgbm) library(lightgbm)
require(Matrix) library(Matrix)
context("testing lgb.Dataset functionality") context("testing lgb.Dataset functionality")
...@@ -140,7 +140,7 @@ test_that("Dataset$get_params() successfully returns parameters if you passed th ...@@ -140,7 +140,7 @@ test_that("Dataset$get_params() successfully returns parameters if you passed th
, params = params , params = params
) )
returned_params <- ds$get_params() returned_params <- ds$get_params()
expect_true(methods::is(returned_params, "list")) expect_identical(class(returned_params), "list")
expect_identical(length(params), length(returned_params)) expect_identical(length(params), length(returned_params))
expect_identical(sort(names(params)), sort(names(returned_params))) expect_identical(sort(names(params)), sort(names(returned_params)))
for (param_name in names(params)) { for (param_name in names(params)) {
......
...@@ -40,7 +40,7 @@ test_that("lgb.intereprete works as expected for binary classification", { ...@@ -40,7 +40,7 @@ test_that("lgb.intereprete works as expected for binary classification", {
, data = test$data , data = test$data
, idxset = seq_len(num_trees) , idxset = seq_len(num_trees)
) )
expect_true(methods::is(tree_interpretation, "list")) expect_identical(class(tree_interpretation), "list")
expect_true(length(tree_interpretation) == num_trees) expect_true(length(tree_interpretation) == num_trees)
expect_null(names(tree_interpretation)) expect_null(names(tree_interpretation))
expect_true(all( expect_true(all(
...@@ -91,7 +91,7 @@ test_that("lgb.intereprete works as expected for multiclass classification", { ...@@ -91,7 +91,7 @@ test_that("lgb.intereprete works as expected for multiclass classification", {
, data = test[, 1L:4L] , data = test[, 1L:4L]
, idxset = seq_len(num_trees) , idxset = seq_len(num_trees)
) )
expect_true(methods::is(tree_interpretation, "list")) expect_identical(class(tree_interpretation), "list")
expect_true(length(tree_interpretation) == num_trees) expect_true(length(tree_interpretation) == num_trees)
expect_null(names(tree_interpretation)) expect_null(names(tree_interpretation))
expect_true(all( expect_true(all(
......
...@@ -48,7 +48,7 @@ context("parameter aliases") ...@@ -48,7 +48,7 @@ context("parameter aliases")
test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where names are unique", { test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where names are unique", {
param_aliases <- .PARAMETER_ALIASES() param_aliases <- .PARAMETER_ALIASES()
expect_true(is.list(param_aliases)) expect_identical(class(param_aliases), "list")
expect_true(is.character(names(param_aliases))) expect_true(is.character(names(param_aliases)))
expect_true(is.character(param_aliases[["boosting"]])) expect_true(is.character(param_aliases[["boosting"]]))
expect_true(is.character(param_aliases[["early_stopping_round"]])) expect_true(is.character(param_aliases[["early_stopping_round"]]))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment