Commit 1f7b06b9 authored by James Lamb's avatar James Lamb Committed by Nikita Titov
Browse files

[R-package][ci] Added more R linters (fixes #2477) (#2533)



* Added more linters on R code

* started working on implicit integers

* finished style changes to handle implicit integers

* regenned documentation and added concatenation linter

* changed channel for r-lintr

* try building stringi before lintr

* trying to get libicui18n

* trying another thing

* trying conda-forge again

* added re-install of stringi

* uncommented other stages

* Update .ci/test.sh
Co-Authored-By: default avatarNikita Titov <nekit94-08@mail.ru>

* removed apt update and changed lintr version floor

* get lintr from CRAN

* R needs to come before C++ linting

* testing lintr install from CRAN

* trying one more thing

* more verbose

* order might matter

* removed commented code

* cleaned up linting block in test.sh

* grouped conda install calls and fixed a few integer array things
parent 11f9682b
...@@ -32,12 +32,12 @@ dtrain <- lgb.Dataset(train$data, label = train$label) ...@@ -32,12 +32,12 @@ dtrain <- lgb.Dataset(train$data, label = train$label)
params <- list( params <- list(
objective = "binary" objective = "binary"
, learning_rate = 0.01 , learning_rate = 0.01
, num_leaves = 63 , num_leaves = 63L
, max_depth = -1 , max_depth = -1L
, min_data_in_leaf = 1 , min_data_in_leaf = 1L
, min_sum_hessian_in_leaf = 1 , min_sum_hessian_in_leaf = 1.0
) )
model <- lgb.train(params, dtrain, 10) model <- lgb.train(params, dtrain, 10L)
tree_imp1 <- lgb.importance(model, percentage = TRUE) tree_imp1 <- lgb.importance(model, percentage = TRUE)
tree_imp2 <- lgb.importance(model, percentage = FALSE) tree_imp2 <- lgb.importance(model, percentage = FALSE)
......
...@@ -16,19 +16,21 @@ lgb.interprete(model, data, idxset, num_iteration = NULL) ...@@ -16,19 +16,21 @@ lgb.interprete(model, data, idxset, num_iteration = NULL)
\item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration.} \item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration.}
} }
\value{ \value{
For regression, binary classification and lambdarank model, a \code{list} of \code{data.table} with the following columns: For regression, binary classification and lambdarank model, a \code{list} of \code{data.table}
\itemize{ with the following columns:
\item \code{Feature} Feature names in the model. \itemize{
\item \code{Contribution} The total contribution of this feature's splits. \item \code{Feature} Feature names in the model.
} \item \code{Contribution} The total contribution of this feature's splits.
For multiclass classification, a \code{list} of \code{data.table} with the Feature column and Contribution columns to each class. }
For multiclass classification, a \code{list} of \code{data.table} with the Feature column and
Contribution columns to each class.
} }
\description{ \description{
Computes feature contribution components of rawscore prediction. Computes feature contribution components of rawscore prediction.
} }
\examples{ \examples{
Sigmoid <- function(x) 1 / (1 + exp(-x)) Sigmoid <- function(x) 1.0 / (1.0 + exp(-x))
Logit <- function(x) log(x / (1 - x)) Logit <- function(x) log(x / (1.0 - x))
data(agaricus.train, package = "lightgbm") data(agaricus.train, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
...@@ -39,13 +41,13 @@ test <- agaricus.test ...@@ -39,13 +41,13 @@ test <- agaricus.test
params <- list( params <- list(
objective = "binary" objective = "binary"
, learning_rate = 0.01 , learning_rate = 0.01
, num_leaves = 63 , num_leaves = 63L
, max_depth = -1 , max_depth = -1L
, min_data_in_leaf = 1 , min_data_in_leaf = 1L
, min_sum_hessian_in_leaf = 1 , min_sum_hessian_in_leaf = 1.0
) )
model <- lgb.train(params, dtrain, 10) model <- lgb.train(params, dtrain, 10L)
tree_interpretation <- lgb.interprete(model, test$data, 1:5) tree_interpretation <- lgb.interprete(model, test$data, 1L:5L)
} }
...@@ -32,11 +32,11 @@ valids <- list(test = dtest) ...@@ -32,11 +32,11 @@ valids <- list(test = dtest)
model <- lgb.train( model <- lgb.train(
params = params params = params
, data = dtrain , data = dtrain
, nrounds = 10 , nrounds = 10L
, valids = valids , valids = valids
, min_data = 1 , min_data = 1L
, learning_rate = 1 , learning_rate = 1.0
, early_stopping_rounds = 5 , early_stopping_rounds = 5L
) )
lgb.save(model, "model.txt") lgb.save(model, "model.txt")
load_booster <- lgb.load(filename = "model.txt") load_booster <- lgb.load(filename = "model.txt")
......
...@@ -47,12 +47,12 @@ dtrain <- lgb.Dataset(train$data, label = train$label) ...@@ -47,12 +47,12 @@ dtrain <- lgb.Dataset(train$data, label = train$label)
params <- list( params <- list(
objective = "binary" objective = "binary"
, learning_rate = 0.01 , learning_rate = 0.01
, num_leaves = 63 , num_leaves = 63L
, max_depth = -1 , max_depth = -1L
, min_data_in_leaf = 1 , min_data_in_leaf = 1L
, min_sum_hessian_in_leaf = 1 , min_sum_hessian_in_leaf = 1.0
) )
model <- lgb.train(params, dtrain, 10) model <- lgb.train(params, dtrain, 10L)
tree_dt <- lgb.model.dt.tree(model) tree_dt <- lgb.model.dt.tree(model)
......
...@@ -6,9 +6,9 @@ ...@@ -6,9 +6,9 @@
\usage{ \usage{
lgb.plot.importance( lgb.plot.importance(
tree_imp, tree_imp,
top_n = 10, top_n = 10L,
measure = "Gain", measure = "Gain",
left_margin = 10, left_margin = 10L,
cex = NULL cex = NULL
) )
} }
...@@ -42,14 +42,14 @@ dtrain <- lgb.Dataset(train$data, label = train$label) ...@@ -42,14 +42,14 @@ dtrain <- lgb.Dataset(train$data, label = train$label)
params <- list( params <- list(
objective = "binary" objective = "binary"
, learning_rate = 0.01 , learning_rate = 0.01
, num_leaves = 63 , num_leaves = 63L
, max_depth = -1 , max_depth = -1L
, min_data_in_leaf = 1 , min_data_in_leaf = 1L
, min_sum_hessian_in_leaf = 1 , min_sum_hessian_in_leaf = 1.0
) )
model <- lgb.train(params, dtrain, 10) model <- lgb.train(params, dtrain, 10)
tree_imp <- lgb.importance(model, percentage = TRUE) tree_imp <- lgb.importance(model, percentage = TRUE)
lgb.plot.importance(tree_imp, top_n = 10, measure = "Gain") lgb.plot.importance(tree_imp, top_n = 10L, measure = "Gain")
} }
...@@ -6,9 +6,9 @@ ...@@ -6,9 +6,9 @@
\usage{ \usage{
lgb.plot.interpretation( lgb.plot.interpretation(
tree_interpretation_dt, tree_interpretation_dt,
top_n = 10, top_n = 10L,
cols = 1, cols = 1L,
left_margin = 10, left_margin = 10L,
cex = NULL cex = NULL
) )
} }
...@@ -35,8 +35,8 @@ contribution of a feature. Features are shown ranked in a decreasing contributio ...@@ -35,8 +35,8 @@ contribution of a feature. Features are shown ranked in a decreasing contributio
} }
\examples{ \examples{
library(lightgbm) library(lightgbm)
Sigmoid <- function(x) {1 / (1 + exp(-x))} Sigmoid <- function(x) {1.0 / (1.0 + exp(-x))}
Logit <- function(x) {log(x / (1 - x))} Logit <- function(x) {log(x / (1.0 - x))}
data(agaricus.train, package = "lightgbm") data(agaricus.train, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
...@@ -47,13 +47,13 @@ test <- agaricus.test ...@@ -47,13 +47,13 @@ test <- agaricus.test
params <- list( params <- list(
objective = "binary" objective = "binary"
, learning_rate = 0.01 , learning_rate = 0.01
, num_leaves = 63 , num_leaves = 63L
, max_depth = -1 , max_depth = -1L
, min_data_in_leaf = 1 , min_data_in_leaf = 1L
, min_sum_hessian_in_leaf = 1 , min_sum_hessian_in_leaf = 1.0
) )
model <- lgb.train(params, dtrain, 10) model <- lgb.train(params, dtrain, 10L)
tree_interpretation <- lgb.interprete(model, test$data, 1:5) tree_interpretation <- lgb.interprete(model, test$data, 1L:5L)
lgb.plot.interpretation(tree_interpretation[[1]], top_n = 10) lgb.plot.interpretation(tree_interpretation[[1L]], top_n = 10L)
} }
...@@ -31,16 +31,16 @@ new_iris <- lgb.prepare_rules(data = iris) # Autoconverter ...@@ -31,16 +31,16 @@ new_iris <- lgb.prepare_rules(data = iris) # Autoconverter
str(new_iris$data) str(new_iris$data)
data(iris) # Erase iris dataset data(iris) # Erase iris dataset
iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA) iris$Species[1L] <- "NEW FACTOR" # Introduce junk factor (NA)
# Use conversion using known rules # Use conversion using known rules
# Unknown factors become 0, excellent for sparse datasets # Unknown factors become 0, excellent for sparse datasets
newer_iris <- lgb.prepare_rules(data = iris, rules = new_iris$rules) newer_iris <- lgb.prepare_rules(data = iris, rules = new_iris$rules)
# Unknown factor is now zero, perfect for sparse datasets # Unknown factor is now zero, perfect for sparse datasets
newer_iris$data[1, ] # Species became 0 as it is an unknown factor newer_iris$data[1L, ] # Species became 0 as it is an unknown factor
newer_iris$data[1, 5] <- 1 # Put back real initial value newer_iris$data[1L, 5L] <- 1.0 # Put back real initial value
# Is the newly created dataset equal? YES! # Is the newly created dataset equal? YES!
all.equal(new_iris$data, newer_iris$data) all.equal(new_iris$data, newer_iris$data)
...@@ -49,9 +49,9 @@ all.equal(new_iris$data, newer_iris$data) ...@@ -49,9 +49,9 @@ all.equal(new_iris$data, newer_iris$data)
data(iris) # Erase iris dataset data(iris) # Erase iris dataset
# We remapped values differently # We remapped values differently
personal_rules <- list(Species = c("setosa" = 3, personal_rules <- list(Species = c("setosa" = 3L,
"versicolor" = 2, "versicolor" = 2L,
"virginica" = 1)) "virginica" = 1L))
newest_iris <- lgb.prepare_rules(data = iris, rules = personal_rules) newest_iris <- lgb.prepare_rules(data = iris, rules = personal_rules)
str(newest_iris$data) # SUCCESS! str(newest_iris$data) # SUCCESS!
......
...@@ -32,11 +32,11 @@ valids <- list(test = dtest) ...@@ -32,11 +32,11 @@ valids <- list(test = dtest)
model <- lgb.train( model <- lgb.train(
params = params params = params
, data = dtrain , data = dtrain
, nrounds = 10 , nrounds = 10L
, valids = valids , valids = valids
, min_data = 1 , min_data = 1L
, learning_rate = 1 , learning_rate = 1.0
, early_stopping_rounds = 5 , early_stopping_rounds = 5L
) )
lgb.save(model, "model.txt") lgb.save(model, "model.txt")
......
...@@ -7,11 +7,11 @@ ...@@ -7,11 +7,11 @@
lgb.train( lgb.train(
params = list(), params = list(),
data, data,
nrounds = 10, nrounds = 10L,
valids = list(), valids = list(),
obj = NULL, obj = NULL,
eval = NULL, eval = NULL,
verbose = 1, verbose = 1L,
record = TRUE, record = TRUE,
eval_freq = 1L, eval_freq = 1L,
init_model = NULL, init_model = NULL,
...@@ -93,10 +93,10 @@ valids <- list(test = dtest) ...@@ -93,10 +93,10 @@ valids <- list(test = dtest)
model <- lgb.train( model <- lgb.train(
params = params params = params
, data = dtrain , data = dtrain
, nrounds = 10 , nrounds = 10L
, valids = valids , valids = valids
, min_data = 1 , min_data = 1L
, learning_rate = 1 , learning_rate = 1.0
, early_stopping_rounds = 5 , early_stopping_rounds = 5L
) )
} }
...@@ -21,7 +21,9 @@ environment. Defaults to \code{FALSE} which means to not remove them.} ...@@ -21,7 +21,9 @@ environment. Defaults to \code{FALSE} which means to not remove them.}
NULL invisibly. NULL invisibly.
} }
\description{ \description{
Attempts to unload LightGBM packages so you can remove objects cleanly without having to restart R. This is useful for instance if an object becomes stuck for no apparent reason and you do not want to restart R to fix the lost object. Attempts to unload LightGBM packages so you can remove objects cleanly without having to restart R.
This is useful for instance if an object becomes stuck for no apparent reason and you do not want
to restart R to fix the lost object.
} }
\examples{ \examples{
library(lightgbm) library(lightgbm)
...@@ -36,11 +38,11 @@ valids <- list(test = dtest) ...@@ -36,11 +38,11 @@ valids <- list(test = dtest)
model <- lgb.train( model <- lgb.train(
params = params params = params
, data = dtrain , data = dtrain
, nrounds = 10 , nrounds = 10L
, valids = valids , valids = valids
, min_data = 1 , min_data = 1L
, learning_rate = 1 , learning_rate = 1.0
, early_stopping_rounds = 5 , early_stopping_rounds = 5L
) )
\dontrun{ \dontrun{
......
...@@ -9,8 +9,8 @@ lightgbm( ...@@ -9,8 +9,8 @@ lightgbm(
label = NULL, label = NULL,
weight = NULL, weight = NULL,
params = list(), params = list(),
nrounds = 10, nrounds = 10L,
verbose = 1, verbose = 1L,
eval_freq = 1L, eval_freq = 1L,
early_stopping_rounds = NULL, early_stopping_rounds = NULL,
save_name = "lightgbm.model", save_name = "lightgbm.model",
......
...@@ -41,12 +41,12 @@ the \code{lgb.Booster} object passed to \code{object}.} ...@@ -41,12 +41,12 @@ the \code{lgb.Booster} object passed to \code{object}.}
} }
\value{ \value{
For regression or binary classification, it returns a vector of length \code{nrows(data)}. For regression or binary classification, it returns a vector of length \code{nrows(data)}.
For multiclass classification, either a \code{num_class * nrows(data)} vector or For multiclass classification, either a \code{num_class * nrows(data)} vector or
a \code{(nrows(data), num_class)} dimension matrix is returned, depending on a \code{(nrows(data), num_class)} dimension matrix is returned, depending on
the \code{reshape} value. the \code{reshape} value.
When \code{predleaf = TRUE}, the output is a matrix object with the When \code{predleaf = TRUE}, the output is a matrix object with the
number of columns corresponding to the number of trees. number of columns corresponding to the number of trees.
} }
\description{ \description{
Predicted values based on class \code{lgb.Booster} Predicted values based on class \code{lgb.Booster}
...@@ -64,11 +64,11 @@ valids <- list(test = dtest) ...@@ -64,11 +64,11 @@ valids <- list(test = dtest)
model <- lgb.train( model <- lgb.train(
params = params params = params
, data = dtrain , data = dtrain
, nrounds = 10 , nrounds = 10L
, valids = valids , valids = valids
, min_data = 1 , min_data = 1L
, learning_rate = 1 , learning_rate = 1.0
, early_stopping_rounds = 5 , early_stopping_rounds = 5L
) )
preds <- predict(model, test$data) preds <- predict(model, test$data)
......
...@@ -30,11 +30,11 @@ valids <- list(test = dtest) ...@@ -30,11 +30,11 @@ valids <- list(test = dtest)
model <- lgb.train( model <- lgb.train(
params = params params = params
, data = dtrain , data = dtrain
, nrounds = 10 , nrounds = 10L
, valids = valids , valids = valids
, min_data = 1 , min_data = 1L
, learning_rate = 1 , learning_rate = 1.0
, early_stopping_rounds = 5 , early_stopping_rounds = 5L
) )
saveRDS.lgb.Booster(model, "model.rds") saveRDS.lgb.Booster(model, "model.rds")
new_model <- readRDS.lgb.Booster("model.rds") new_model <- readRDS.lgb.Booster("model.rds")
......
...@@ -54,11 +54,11 @@ valids <- list(test = dtest) ...@@ -54,11 +54,11 @@ valids <- list(test = dtest)
model <- lgb.train( model <- lgb.train(
params = params params = params
, data = dtrain , data = dtrain
, nrounds = 10 , nrounds = 10L
, valids = valids , valids = valids
, min_data = 1 , min_data = 1L
, learning_rate = 1 , learning_rate = 1.0
, early_stopping_rounds = 5 , early_stopping_rounds = 5L
) )
saveRDS.lgb.Booster(model, "model.rds") saveRDS.lgb.Booster(model, "model.rds")
} }
...@@ -12,11 +12,11 @@ setinfo(dataset, ...) ...@@ -12,11 +12,11 @@ setinfo(dataset, ...)
\arguments{ \arguments{
\item{dataset}{Object of class \code{lgb.Dataset}} \item{dataset}{Object of class \code{lgb.Dataset}}
\item{...}{other parameters}
\item{name}{the name of the field to get} \item{name}{the name of the field to get}
\item{info}{the specific field of information to set} \item{info}{the specific field of information to set}
\item{...}{other parameters}
} }
\value{ \value{
passed object passed object
......
...@@ -12,9 +12,9 @@ slice(dataset, ...) ...@@ -12,9 +12,9 @@ slice(dataset, ...)
\arguments{ \arguments{
\item{dataset}{Object of class \code{lgb.Dataset}} \item{dataset}{Object of class \code{lgb.Dataset}}
\item{idxset}{an integer vector of indices of rows needed}
\item{...}{other parameters (currently not used)} \item{...}{other parameters (currently not used)}
\item{idxset}{an integer vector of indices of rows needed}
} }
\value{ \value{
constructed sub dataset constructed sub dataset
...@@ -29,7 +29,7 @@ data(agaricus.train, package = "lightgbm") ...@@ -29,7 +29,7 @@ data(agaricus.train, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label) dtrain <- lgb.Dataset(train$data, label = train$label)
dsub <- lightgbm::slice(dtrain, 1:42) dsub <- lightgbm::slice(dtrain, seq_len(42L))
lgb.Dataset.construct(dsub) lgb.Dataset.construct(dsub)
labels <- lightgbm::getinfo(dsub, "label") labels <- lightgbm::getinfo(dsub, "label")
......
...@@ -3,20 +3,20 @@ use_precompile <- FALSE ...@@ -3,20 +3,20 @@ use_precompile <- FALSE
use_gpu <- FALSE use_gpu <- FALSE
use_mingw <- FALSE use_mingw <- FALSE
if (.Machine$sizeof.pointer != 8){ if (.Machine$sizeof.pointer != 8L) {
stop("Only support 64-bit R, please check your the version of your R and Rtools.") stop("Only support 64-bit R, please check your the version of your R and Rtools.")
} }
R_int_UUID <- .Internal(internalsID()) R_int_UUID <- .Internal(internalsID())
R_ver <- as.double(R.Version()$major) + as.double(R.Version()$minor) / 10 R_ver <- as.double(R.Version()$major) + as.double(R.Version()$minor) / 10.0
if (!(R_int_UUID == "0310d4b8-ccb1-4bb8-ba94-d36a55f60262" if (!(R_int_UUID == "0310d4b8-ccb1-4bb8-ba94-d36a55f60262"
|| R_int_UUID == "2fdf6c18-697a-4ba7-b8ef-11c0d92f1327")){ || R_int_UUID == "2fdf6c18-697a-4ba7-b8ef-11c0d92f1327")) {
print("Warning: unmatched R_INTERNALS_UUID, may cannot run normally.") print("Warning: unmatched R_INTERNALS_UUID, may cannot run normally.")
} }
# Move in CMakeLists.txt # Move in CMakeLists.txt
if (!file.copy("../inst/bin/CMakeLists.txt", "CMakeLists.txt", overwrite = TRUE)){ if (!file.copy("../inst/bin/CMakeLists.txt", "CMakeLists.txt", overwrite = TRUE)) {
stop("Copying CMakeLists failed") stop("Copying CMakeLists failed")
} }
...@@ -48,14 +48,14 @@ if (!use_precompile) { ...@@ -48,14 +48,14 @@ if (!use_precompile) {
# Using this kind-of complicated pattern to avoid matching to # Using this kind-of complicated pattern to avoid matching to
# things like "pgcc" # things like "pgcc"
using_gcc <- grepl( using_gcc <- grepl(
pattern = '^gcc$|[/\\]+gcc$|^gcc\\-[0-9]+$|[/\\]+gcc\\-[0-9]+$' pattern = "^gcc$|[/\\]+gcc$|^gcc\\-[0-9]+$|[/\\]+gcc\\-[0-9]+$"
, x = Sys.getenv('CC', '') , x = Sys.getenv("CC", "")
) )
using_gpp <- grepl( using_gpp <- grepl(
pattern = '^g\\+\\+$|[/\\]+g\\+\\+$|^g\\+\\+\\-[0-9]+$|[/\\]+g\\+\\+\\-[0-9]+$' pattern = "^g\\+\\+$|[/\\]+g\\+\\+$|^g\\+\\+\\-[0-9]+$|[/\\]+g\\+\\+\\-[0-9]+$"
, x = Sys.getenv('CXX', '') , x = Sys.getenv("CXX", "")
) )
on_mac <- Sys.info()['sysname'] == 'Darwin' on_mac <- Sys.info()["sysname"] == "Darwin"
if (on_mac && !(using_gcc & using_gpp)) { if (on_mac && !(using_gcc & using_gpp)) {
cmake_cmd <- paste(cmake_cmd, ' -DOpenMP_C_FLAGS="-Xpreprocessor -fopenmp -I$(brew --prefix libomp)/include" ') cmake_cmd <- paste(cmake_cmd, ' -DOpenMP_C_FLAGS="-Xpreprocessor -fopenmp -I$(brew --prefix libomp)/include" ')
cmake_cmd <- paste(cmake_cmd, ' -DOpenMP_C_LIB_NAMES="omp" ') cmake_cmd <- paste(cmake_cmd, ' -DOpenMP_C_LIB_NAMES="omp" ')
...@@ -71,21 +71,21 @@ if (!use_precompile) { ...@@ -71,21 +71,21 @@ if (!use_precompile) {
build_cmd <- "mingw32-make.exe _lightgbm" build_cmd <- "mingw32-make.exe _lightgbm"
system(paste0(cmake_cmd, " ..")) # Must build twice for Windows due sh.exe in Rtools system(paste0(cmake_cmd, " ..")) # Must build twice for Windows due sh.exe in Rtools
} else { } else {
try_vs <- 0 try_vs <- 0L
local_vs_def <- "" local_vs_def <- ""
vs_versions <- c("Visual Studio 16 2019", "Visual Studio 15 2017", "Visual Studio 14 2015") vs_versions <- c("Visual Studio 16 2019", "Visual Studio 15 2017", "Visual Studio 14 2015")
for (vs in vs_versions){ for (vs in vs_versions) {
vs_def <- paste0(" -G \"", vs, "\" -A x64") vs_def <- paste0(" -G \"", vs, "\" -A x64")
tmp_cmake_cmd <- paste0(cmake_cmd, vs_def) tmp_cmake_cmd <- paste0(cmake_cmd, vs_def)
try_vs <- system(paste0(tmp_cmake_cmd, " ..")) try_vs <- system(paste0(tmp_cmake_cmd, " .."))
if (try_vs == 0) { if (try_vs == 0L) {
local_vs_def = vs_def local_vs_def <- vs_def
break break
} else { } else {
unlink("./*", recursive = TRUE) # Clean up build directory unlink("./*", recursive = TRUE) # Clean up build directory
} }
} }
if (try_vs == 1) { if (try_vs == 1L) {
cmake_cmd <- paste0(cmake_cmd, " -G \"MinGW Makefiles\" ") # Switch to MinGW on failure, try build once cmake_cmd <- paste0(cmake_cmd, " -G \"MinGW Makefiles\" ") # Switch to MinGW on failure, try build once
system(paste0(cmake_cmd, " ..")) # Must build twice for Windows due sh.exe in Rtools system(paste0(cmake_cmd, " ..")) # Must build twice for Windows due sh.exe in Rtools
build_cmd <- "mingw32-make.exe _lightgbm" build_cmd <- "mingw32-make.exe _lightgbm"
......
context("basic functions") context("basic functions")
data(agaricus.train, package = 'lightgbm') data(agaricus.train, package = "lightgbm")
data(agaricus.test, package = 'lightgbm') data(agaricus.test, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
test <- agaricus.test test <- agaricus.test
windows_flag = grepl('Windows', Sys.info()[['sysname']]) windows_flag <- grepl("Windows", Sys.info()[["sysname"]])
test_that("train and predict binary classification", { test_that("train and predict binary classification", {
nrounds = 10 nrounds <- 10L
bst <- lightgbm( bst <- lightgbm(
data = train$data data = train$data
, label = train$label , label = train$label
, num_leaves = 5 , num_leaves = 5L
, nrounds = nrounds , nrounds = nrounds
, objective = "binary" , objective = "binary"
, metric = "binary_error" , metric = "binary_error"
...@@ -22,38 +22,38 @@ test_that("train and predict binary classification", { ...@@ -22,38 +22,38 @@ test_that("train and predict binary classification", {
expect_lt(min(record_results), 0.02) expect_lt(min(record_results), 0.02)
pred <- predict(bst, test$data) pred <- predict(bst, test$data)
expect_equal(length(pred), 1611) expect_equal(length(pred), 1611L)
pred1 <- predict(bst, train$data, num_iteration = 1) pred1 <- predict(bst, train$data, num_iteration = 1L)
expect_equal(length(pred1), 6513) expect_equal(length(pred1), 6513L)
err_pred1 <- sum( (pred1 > 0.5) != train$label) / length(train$label) err_pred1 <- sum((pred1 > 0.5) != train$label) / length(train$label)
err_log <- record_results[1] err_log <- record_results[1L]
expect_lt(abs(err_pred1 - err_log), 10e-6) expect_lt(abs(err_pred1 - err_log), 10e-6)
}) })
test_that("train and predict softmax", { test_that("train and predict softmax", {
lb <- as.numeric(iris$Species) - 1 lb <- as.numeric(iris$Species) - 1L
bst <- lightgbm( bst <- lightgbm(
data = as.matrix(iris[, -5]) data = as.matrix(iris[, -5L])
, label = lb , label = lb
, num_leaves = 4 , num_leaves = 4L
, learning_rate = 0.1 , learning_rate = 0.1
, nrounds = 20 , nrounds = 20L
, min_data = 20 , min_data = 20L
, min_hess = 20 , min_hess = 20.0
, objective = "multiclass" , objective = "multiclass"
, metric = "multi_error" , metric = "multi_error"
, num_class = 3 , num_class = 3L
) )
expect_false(is.null(bst$record_evals)) expect_false(is.null(bst$record_evals))
record_results <- lgb.get.eval.result(bst, "train", "multi_error") record_results <- lgb.get.eval.result(bst, "train", "multi_error")
expect_lt(min(record_results), 0.03) expect_lt(min(record_results), 0.03)
pred <- predict(bst, as.matrix(iris[, -5])) pred <- predict(bst, as.matrix(iris[, -5L]))
expect_equal(length(pred), nrow(iris) * 3) expect_equal(length(pred), nrow(iris) * 3L)
}) })
...@@ -61,11 +61,11 @@ test_that("use of multiple eval metrics works", { ...@@ -61,11 +61,11 @@ test_that("use of multiple eval metrics works", {
bst <- lightgbm( bst <- lightgbm(
data = train$data data = train$data
, label = train$label , label = train$label
, num_leaves = 4 , num_leaves = 4L
, learning_rate = 1 , learning_rate = 1.0
, nrounds = 10 , nrounds = 10L
, objective = "binary" , objective = "binary"
, metric = list("binary_error","auc","binary_logloss") , metric = list("binary_error", "auc", "binary_logloss")
) )
expect_false(is.null(bst$record_evals)) expect_false(is.null(bst$record_evals))
}) })
...@@ -78,28 +78,28 @@ test_that("training continuation works", { ...@@ -78,28 +78,28 @@ test_that("training continuation works", {
, label = train$label , label = train$label
, free_raw_data = FALSE , free_raw_data = FALSE
) )
watchlist = list(train = dtrain) watchlist <- list(train = dtrain)
param <- list( param <- list(
objective = "binary" objective = "binary"
, metric = "binary_logloss" , metric = "binary_logloss"
, num_leaves = 5 , num_leaves = 5L
, learning_rate = 1 , learning_rate = 1.0
) )
# for the reference, use 10 iterations at once: # for the reference, use 10 iterations at once:
bst <- lgb.train(param, dtrain, nrounds = 10, watchlist) bst <- lgb.train(param, dtrain, nrounds = 10L, watchlist)
err_bst <- lgb.get.eval.result(bst, "train", "binary_logloss", 10) err_bst <- lgb.get.eval.result(bst, "train", "binary_logloss", 10L)
# first 5 iterations: # first 5 iterations:
bst1 <- lgb.train(param, dtrain, nrounds = 5, watchlist) bst1 <- lgb.train(param, dtrain, nrounds = 5L, watchlist)
# test continuing from a model in file # test continuing from a model in file
lgb.save(bst1, "lightgbm.model") lgb.save(bst1, "lightgbm.model")
# continue for 5 more: # continue for 5 more:
bst2 <- lgb.train(param, dtrain, nrounds = 5, watchlist, init_model = bst1) bst2 <- lgb.train(param, dtrain, nrounds = 5L, watchlist, init_model = bst1)
err_bst2 <- lgb.get.eval.result(bst2, "train", "binary_logloss", 10) err_bst2 <- lgb.get.eval.result(bst2, "train", "binary_logloss", 10L)
expect_lt(abs(err_bst - err_bst2), 0.01) expect_lt(abs(err_bst - err_bst2), 0.01)
bst2 <- lgb.train(param, dtrain, nrounds = 5, watchlist, init_model = "lightgbm.model") bst2 <- lgb.train(param, dtrain, nrounds = 5L, watchlist, init_model = "lightgbm.model")
err_bst2 <- lgb.get.eval.result(bst2, "train", "binary_logloss", 10) err_bst2 <- lgb.get.eval.result(bst2, "train", "binary_logloss", 10L)
expect_lt(abs(err_bst - err_bst2), 0.01) expect_lt(abs(err_bst - err_bst2), 0.01)
}) })
...@@ -110,11 +110,11 @@ test_that("cv works", { ...@@ -110,11 +110,11 @@ test_that("cv works", {
bst <- lgb.cv( bst <- lgb.cv(
params params
, dtrain , dtrain
, 10 , 10L
, nfold = 5 , nfold = 5L
, min_data = 1 , min_data = 1L
, learning_rate = 1 , learning_rate = 1.0
, early_stopping_rounds = 10 , early_stopping_rounds = 10L
) )
expect_false(is.null(bst$record_evals)) expect_false(is.null(bst$record_evals))
}) })
context('Test models with custom objective') context("Test models with custom objective")
data(agaricus.train, package = 'lightgbm') data(agaricus.train, package = "lightgbm")
data(agaricus.test, package = 'lightgbm') data(agaricus.test, package = "lightgbm")
dtrain <- lgb.Dataset(agaricus.train$data, label = agaricus.train$label) dtrain <- lgb.Dataset(agaricus.train$data, label = agaricus.train$label)
dtest <- lgb.Dataset(agaricus.test$data, label = agaricus.test$label) dtest <- lgb.Dataset(agaricus.test$data, label = agaricus.test$label)
watchlist <- list(eval = dtest, train = dtrain) watchlist <- list(eval = dtest, train = dtrain)
logregobj <- function(preds, dtrain) { logregobj <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label") labels <- getinfo(dtrain, "label")
preds <- 1 / (1 + exp(-preds)) preds <- 1.0 / (1.0 + exp(-preds))
grad <- preds - labels grad <- preds - labels
hess <- preds * (1 - preds) hess <- preds * (1.0 - preds)
return(list(grad = grad, hess = hess)) return(list(grad = grad, hess = hess))
} }
evalerror <- function(preds, dtrain) { evalerror <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label") labels <- getinfo(dtrain, "label")
err <- as.numeric(sum(labels != (preds > 0))) / length(labels) err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
return(list( return(list(
name = "error" name = "error"
, value = err , value = err
...@@ -25,12 +25,12 @@ evalerror <- function(preds, dtrain) { ...@@ -25,12 +25,12 @@ evalerror <- function(preds, dtrain) {
} }
param <- list( param <- list(
num_leaves = 8 num_leaves = 8L
, learning_rate = 1 , learning_rate = 1.0
, objective = logregobj , objective = logregobj
, metric = "auc" , metric = "auc"
) )
num_round <- 10 num_round <- 10L
test_that("custom objective works", { test_that("custom objective works", {
bst <- lgb.train(param, dtrain, num_round, watchlist, eval = evalerror) bst <- lgb.train(param, dtrain, num_round, watchlist, eval = evalerror)
......
...@@ -3,49 +3,49 @@ require(Matrix) ...@@ -3,49 +3,49 @@ require(Matrix)
context("testing lgb.Dataset functionality") context("testing lgb.Dataset functionality")
data(agaricus.test, package = 'lightgbm') data(agaricus.test, package = "lightgbm")
test_data <- agaricus.test$data[1:100,] test_data <- agaricus.test$data[1L:100L, ]
test_label <- agaricus.test$label[1:100] test_label <- agaricus.test$label[1L:100L]
test_that("lgb.Dataset: basic construction, saving, loading", { test_that("lgb.Dataset: basic construction, saving, loading", {
# from sparse matrix # from sparse matrix
dtest1 <- lgb.Dataset(test_data, label = test_label) dtest1 <- lgb.Dataset(test_data, label = test_label)
# from dense matrix # from dense matrix
dtest2 <- lgb.Dataset(as.matrix(test_data), label = test_label) dtest2 <- lgb.Dataset(as.matrix(test_data), label = test_label)
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest2, 'label')) expect_equal(getinfo(dtest1, "label"), getinfo(dtest2, "label"))
# save to a local file # save to a local file
tmp_file <- tempfile('lgb.Dataset_') tmp_file <- tempfile("lgb.Dataset_")
lgb.Dataset.save(dtest1, tmp_file) lgb.Dataset.save(dtest1, tmp_file)
# read from a local file # read from a local file
dtest3 <- lgb.Dataset(tmp_file) dtest3 <- lgb.Dataset(tmp_file)
lgb.Dataset.construct(dtest3) lgb.Dataset.construct(dtest3)
unlink(tmp_file) unlink(tmp_file)
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest3, 'label')) expect_equal(getinfo(dtest1, "label"), getinfo(dtest3, "label"))
}) })
test_that("lgb.Dataset: getinfo & setinfo", { test_that("lgb.Dataset: getinfo & setinfo", {
dtest <- lgb.Dataset(test_data) dtest <- lgb.Dataset(test_data)
dtest$construct() dtest$construct()
setinfo(dtest, 'label', test_label) setinfo(dtest, "label", test_label)
labels <- getinfo(dtest, 'label') labels <- getinfo(dtest, "label")
expect_equal(test_label, getinfo(dtest, 'label')) expect_equal(test_label, getinfo(dtest, "label"))
expect_true(length(getinfo(dtest, 'weight')) == 0) expect_true(length(getinfo(dtest, "weight")) == 0L)
expect_true(length(getinfo(dtest, 'init_score')) == 0) expect_true(length(getinfo(dtest, "init_score")) == 0L)
# any other label should error # any other label should error
expect_error(setinfo(dtest, 'asdf', test_label)) expect_error(setinfo(dtest, "asdf", test_label))
}) })
test_that("lgb.Dataset: slice, dim", { test_that("lgb.Dataset: slice, dim", {
dtest <- lgb.Dataset(test_data, label = test_label) dtest <- lgb.Dataset(test_data, label = test_label)
lgb.Dataset.construct(dtest) lgb.Dataset.construct(dtest)
expect_equal(dim(dtest), dim(test_data)) expect_equal(dim(dtest), dim(test_data))
dsub1 <- slice(dtest, 1:42) dsub1 <- slice(dtest, seq_len(42L))
lgb.Dataset.construct(dsub1) lgb.Dataset.construct(dsub1)
expect_equal(nrow(dsub1), 42) expect_equal(nrow(dsub1), 42L)
expect_equal(ncol(dsub1), ncol(test_data)) expect_equal(ncol(dsub1), ncol(test_data))
}) })
...@@ -54,15 +54,17 @@ test_that("lgb.Dataset: colnames", { ...@@ -54,15 +54,17 @@ test_that("lgb.Dataset: colnames", {
expect_equal(colnames(dtest), colnames(test_data)) expect_equal(colnames(dtest), colnames(test_data))
lgb.Dataset.construct(dtest) lgb.Dataset.construct(dtest)
expect_equal(colnames(dtest), colnames(test_data)) expect_equal(colnames(dtest), colnames(test_data))
expect_error( colnames(dtest) <- 'asdf') expect_error({
new_names <- make.names(1:ncol(test_data)) colnames(dtest) <- "asdf"
})
new_names <- make.names(seq_len(ncol(test_data)))
expect_silent(colnames(dtest) <- new_names) expect_silent(colnames(dtest) <- new_names)
expect_equal(colnames(dtest), new_names) expect_equal(colnames(dtest), new_names)
}) })
test_that("lgb.Dataset: nrow is correct for a very sparse matrix", { test_that("lgb.Dataset: nrow is correct for a very sparse matrix", {
nr <- 1000 nr <- 1000L
x <- Matrix::rsparsematrix(nr, 100, density = 0.0005) x <- Matrix::rsparsematrix(nr, 100L, density = 0.0005)
# we want it very sparse, so that last rows are empty # we want it very sparse, so that last rows are empty
expect_lt(max(x@i), nr) expect_lt(max(x@i), nr)
dtest <- lgb.Dataset(x) dtest <- lgb.Dataset(x)
...@@ -70,7 +72,7 @@ test_that("lgb.Dataset: nrow is correct for a very sparse matrix", { ...@@ -70,7 +72,7 @@ test_that("lgb.Dataset: nrow is correct for a very sparse matrix", {
}) })
test_that("lgb.Dataset: Dataset should be able to construct from matrix and return non-null handle", { test_that("lgb.Dataset: Dataset should be able to construct from matrix and return non-null handle", {
rawData <- matrix(runif(1000), ncol = 10) rawData <- matrix(runif(1000L), ncol = 10L)
handle <- NA_real_ handle <- NA_real_
ref_handle <- NULL ref_handle <- NULL
handle <- lightgbm:::lgb.call( handle <- lightgbm:::lgb.call(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment