Commit 1f7b06b9 authored by James Lamb's avatar James Lamb Committed by Nikita Titov
Browse files

[R-package][ci] Added more R linters (fixes #2477) (#2533)



* Added more linters on R code

* started working on implicit integers

* finished style changes to handle implicit integers

* regenned documentation and added concatenation linter

* changed channel for r-lintr

* try building stringi before lintr

* trying to get libicui18n

* trying another thing

* trying conda-forge again

* added re-install of stringi

* uncommented other stages

* Update .ci/test.sh
Co-Authored-By: default avatarNikita Titov <nekit94-08@mail.ru>

* removed apt update and changed lintr version floor

* get lintr from CRAN

* R needs to come before C++ linting

* testing lintr install from CRAN

* trying one more thing

* more verbose

* order might matter

* removed commented code

* cleaned up linting block in test.sh

* grouped conda install calls and fixed a few integer array things
parent 11f9682b
......@@ -32,12 +32,12 @@ dtrain <- lgb.Dataset(train$data, label = train$label)
params <- list(
objective = "binary"
, learning_rate = 0.01
, num_leaves = 63
, max_depth = -1
, min_data_in_leaf = 1
, min_sum_hessian_in_leaf = 1
, num_leaves = 63L
, max_depth = -1L
, min_data_in_leaf = 1L
, min_sum_hessian_in_leaf = 1.0
)
model <- lgb.train(params, dtrain, 10)
model <- lgb.train(params, dtrain, 10L)
tree_imp1 <- lgb.importance(model, percentage = TRUE)
tree_imp2 <- lgb.importance(model, percentage = FALSE)
......
......@@ -16,19 +16,21 @@ lgb.interprete(model, data, idxset, num_iteration = NULL)
\item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration.}
}
\value{
For regression, binary classification and lambdarank model, a \code{list} of \code{data.table} with the following columns:
\itemize{
\item \code{Feature} Feature names in the model.
\item \code{Contribution} The total contribution of this feature's splits.
}
For multiclass classification, a \code{list} of \code{data.table} with the Feature column and Contribution columns to each class.
For regression, binary classification and lambdarank model, a \code{list} of \code{data.table}
with the following columns:
\itemize{
\item \code{Feature} Feature names in the model.
\item \code{Contribution} The total contribution of this feature's splits.
}
For multiclass classification, a \code{list} of \code{data.table} with the Feature column and
Contribution columns to each class.
}
\description{
Computes feature contribution components of rawscore prediction.
}
\examples{
Sigmoid <- function(x) 1 / (1 + exp(-x))
Logit <- function(x) log(x / (1 - x))
Sigmoid <- function(x) 1.0 / (1.0 + exp(-x))
Logit <- function(x) log(x / (1.0 - x))
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
......@@ -39,13 +41,13 @@ test <- agaricus.test
params <- list(
objective = "binary"
, learning_rate = 0.01
, num_leaves = 63
, max_depth = -1
, min_data_in_leaf = 1
, min_sum_hessian_in_leaf = 1
, num_leaves = 63L
, max_depth = -1L
, min_data_in_leaf = 1L
, min_sum_hessian_in_leaf = 1.0
)
model <- lgb.train(params, dtrain, 10)
model <- lgb.train(params, dtrain, 10L)
tree_interpretation <- lgb.interprete(model, test$data, 1:5)
tree_interpretation <- lgb.interprete(model, test$data, 1L:5L)
}
......@@ -32,11 +32,11 @@ valids <- list(test = dtest)
model <- lgb.train(
params = params
, data = dtrain
, nrounds = 10
, nrounds = 10L
, valids = valids
, min_data = 1
, learning_rate = 1
, early_stopping_rounds = 5
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 5L
)
lgb.save(model, "model.txt")
load_booster <- lgb.load(filename = "model.txt")
......
......@@ -47,12 +47,12 @@ dtrain <- lgb.Dataset(train$data, label = train$label)
params <- list(
objective = "binary"
, learning_rate = 0.01
, num_leaves = 63
, max_depth = -1
, min_data_in_leaf = 1
, min_sum_hessian_in_leaf = 1
, num_leaves = 63L
, max_depth = -1L
, min_data_in_leaf = 1L
, min_sum_hessian_in_leaf = 1.0
)
model <- lgb.train(params, dtrain, 10)
model <- lgb.train(params, dtrain, 10L)
tree_dt <- lgb.model.dt.tree(model)
......
......@@ -6,9 +6,9 @@
\usage{
lgb.plot.importance(
tree_imp,
top_n = 10,
top_n = 10L,
measure = "Gain",
left_margin = 10,
left_margin = 10L,
cex = NULL
)
}
......@@ -42,14 +42,14 @@ dtrain <- lgb.Dataset(train$data, label = train$label)
params <- list(
objective = "binary"
, learning_rate = 0.01
, num_leaves = 63
, max_depth = -1
, min_data_in_leaf = 1
, min_sum_hessian_in_leaf = 1
, num_leaves = 63L
, max_depth = -1L
, min_data_in_leaf = 1L
, min_sum_hessian_in_leaf = 1.0
)
model <- lgb.train(params, dtrain, 10)
tree_imp <- lgb.importance(model, percentage = TRUE)
lgb.plot.importance(tree_imp, top_n = 10, measure = "Gain")
lgb.plot.importance(tree_imp, top_n = 10L, measure = "Gain")
}
......@@ -6,9 +6,9 @@
\usage{
lgb.plot.interpretation(
tree_interpretation_dt,
top_n = 10,
cols = 1,
left_margin = 10,
top_n = 10L,
cols = 1L,
left_margin = 10L,
cex = NULL
)
}
......@@ -35,8 +35,8 @@ contribution of a feature. Features are shown ranked in a decreasing contributio
}
\examples{
library(lightgbm)
Sigmoid <- function(x) {1 / (1 + exp(-x))}
Logit <- function(x) {log(x / (1 - x))}
Sigmoid <- function(x) {1.0 / (1.0 + exp(-x))}
Logit <- function(x) {log(x / (1.0 - x))}
data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
......@@ -47,13 +47,13 @@ test <- agaricus.test
params <- list(
objective = "binary"
, learning_rate = 0.01
, num_leaves = 63
, max_depth = -1
, min_data_in_leaf = 1
, min_sum_hessian_in_leaf = 1
, num_leaves = 63L
, max_depth = -1L
, min_data_in_leaf = 1L
, min_sum_hessian_in_leaf = 1.0
)
model <- lgb.train(params, dtrain, 10)
model <- lgb.train(params, dtrain, 10L)
tree_interpretation <- lgb.interprete(model, test$data, 1:5)
lgb.plot.interpretation(tree_interpretation[[1]], top_n = 10)
tree_interpretation <- lgb.interprete(model, test$data, 1L:5L)
lgb.plot.interpretation(tree_interpretation[[1L]], top_n = 10L)
}
......@@ -31,16 +31,16 @@ new_iris <- lgb.prepare_rules(data = iris) # Autoconverter
str(new_iris$data)
data(iris) # Erase iris dataset
iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA)
iris$Species[1L] <- "NEW FACTOR" # Introduce junk factor (NA)
# Use conversion using known rules
# Unknown factors become 0, excellent for sparse datasets
newer_iris <- lgb.prepare_rules(data = iris, rules = new_iris$rules)
# Unknown factor is now zero, perfect for sparse datasets
newer_iris$data[1, ] # Species became 0 as it is an unknown factor
newer_iris$data[1L, ] # Species became 0 as it is an unknown factor
newer_iris$data[1, 5] <- 1 # Put back real initial value
newer_iris$data[1L, 5L] <- 1.0 # Put back real initial value
# Is the newly created dataset equal? YES!
all.equal(new_iris$data, newer_iris$data)
......@@ -49,9 +49,9 @@ all.equal(new_iris$data, newer_iris$data)
data(iris) # Erase iris dataset
# We remapped values differently
personal_rules <- list(Species = c("setosa" = 3,
"versicolor" = 2,
"virginica" = 1))
personal_rules <- list(Species = c("setosa" = 3L,
"versicolor" = 2L,
"virginica" = 1L))
newest_iris <- lgb.prepare_rules(data = iris, rules = personal_rules)
str(newest_iris$data) # SUCCESS!
......
......@@ -32,11 +32,11 @@ valids <- list(test = dtest)
model <- lgb.train(
params = params
, data = dtrain
, nrounds = 10
, nrounds = 10L
, valids = valids
, min_data = 1
, learning_rate = 1
, early_stopping_rounds = 5
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 5L
)
lgb.save(model, "model.txt")
......
......@@ -7,11 +7,11 @@
lgb.train(
params = list(),
data,
nrounds = 10,
nrounds = 10L,
valids = list(),
obj = NULL,
eval = NULL,
verbose = 1,
verbose = 1L,
record = TRUE,
eval_freq = 1L,
init_model = NULL,
......@@ -93,10 +93,10 @@ valids <- list(test = dtest)
model <- lgb.train(
params = params
, data = dtrain
, nrounds = 10
, nrounds = 10L
, valids = valids
, min_data = 1
, learning_rate = 1
, early_stopping_rounds = 5
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 5L
)
}
......@@ -21,7 +21,9 @@ environment. Defaults to \code{FALSE} which means to not remove them.}
NULL invisibly.
}
\description{
Attempts to unload LightGBM packages so you can remove objects cleanly without having to restart R. This is useful for instance if an object becomes stuck for no apparent reason and you do not want to restart R to fix the lost object.
Attempts to unload LightGBM packages so you can remove objects cleanly without having to restart R.
This is useful for instance if an object becomes stuck for no apparent reason and you do not want
to restart R to fix the lost object.
}
\examples{
library(lightgbm)
......@@ -36,11 +38,11 @@ valids <- list(test = dtest)
model <- lgb.train(
params = params
, data = dtrain
, nrounds = 10
, nrounds = 10L
, valids = valids
, min_data = 1
, learning_rate = 1
, early_stopping_rounds = 5
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 5L
)
\dontrun{
......
......@@ -9,8 +9,8 @@ lightgbm(
label = NULL,
weight = NULL,
params = list(),
nrounds = 10,
verbose = 1,
nrounds = 10L,
verbose = 1L,
eval_freq = 1L,
early_stopping_rounds = NULL,
save_name = "lightgbm.model",
......
......@@ -41,12 +41,12 @@ the \code{lgb.Booster} object passed to \code{object}.}
}
\value{
For regression or binary classification, it returns a vector of length \code{nrows(data)}.
For multiclass classification, either a \code{num_class * nrows(data)} vector or
a \code{(nrows(data), num_class)} dimension matrix is returned, depending on
the \code{reshape} value.
For multiclass classification, either a \code{num_class * nrows(data)} vector or
a \code{(nrows(data), num_class)} dimension matrix is returned, depending on
the \code{reshape} value.
When \code{predleaf = TRUE}, the output is a matrix object with the
number of columns corresponding to the number of trees.
When \code{predleaf = TRUE}, the output is a matrix object with the
number of columns corresponding to the number of trees.
}
\description{
Predicted values based on class \code{lgb.Booster}
......@@ -64,11 +64,11 @@ valids <- list(test = dtest)
model <- lgb.train(
params = params
, data = dtrain
, nrounds = 10
, nrounds = 10L
, valids = valids
, min_data = 1
, learning_rate = 1
, early_stopping_rounds = 5
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 5L
)
preds <- predict(model, test$data)
......
......@@ -30,11 +30,11 @@ valids <- list(test = dtest)
model <- lgb.train(
params = params
, data = dtrain
, nrounds = 10
, nrounds = 10L
, valids = valids
, min_data = 1
, learning_rate = 1
, early_stopping_rounds = 5
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 5L
)
saveRDS.lgb.Booster(model, "model.rds")
new_model <- readRDS.lgb.Booster("model.rds")
......
......@@ -54,11 +54,11 @@ valids <- list(test = dtest)
model <- lgb.train(
params = params
, data = dtrain
, nrounds = 10
, nrounds = 10L
, valids = valids
, min_data = 1
, learning_rate = 1
, early_stopping_rounds = 5
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 5L
)
saveRDS.lgb.Booster(model, "model.rds")
}
......@@ -12,11 +12,11 @@ setinfo(dataset, ...)
\arguments{
\item{dataset}{Object of class \code{lgb.Dataset}}
\item{...}{other parameters}
\item{name}{the name of the field to get}
\item{info}{the specific field of information to set}
\item{...}{other parameters}
}
\value{
passed object
......
......@@ -12,9 +12,9 @@ slice(dataset, ...)
\arguments{
\item{dataset}{Object of class \code{lgb.Dataset}}
\item{idxset}{an integer vector of indices of rows needed}
\item{...}{other parameters (currently not used)}
\item{idxset}{an integer vector of indices of rows needed}
}
\value{
constructed sub dataset
......@@ -29,7 +29,7 @@ data(agaricus.train, package = "lightgbm")
train <- agaricus.train
dtrain <- lgb.Dataset(train$data, label = train$label)
dsub <- lightgbm::slice(dtrain, 1:42)
dsub <- lightgbm::slice(dtrain, seq_len(42L))
lgb.Dataset.construct(dsub)
labels <- lightgbm::getinfo(dsub, "label")
......
......@@ -3,20 +3,20 @@ use_precompile <- FALSE
use_gpu <- FALSE
use_mingw <- FALSE
if (.Machine$sizeof.pointer != 8){
if (.Machine$sizeof.pointer != 8L) {
stop("Only support 64-bit R, please check your the version of your R and Rtools.")
}
R_int_UUID <- .Internal(internalsID())
R_ver <- as.double(R.Version()$major) + as.double(R.Version()$minor) / 10
R_ver <- as.double(R.Version()$major) + as.double(R.Version()$minor) / 10.0
if (!(R_int_UUID == "0310d4b8-ccb1-4bb8-ba94-d36a55f60262"
|| R_int_UUID == "2fdf6c18-697a-4ba7-b8ef-11c0d92f1327")){
|| R_int_UUID == "2fdf6c18-697a-4ba7-b8ef-11c0d92f1327")) {
print("Warning: unmatched R_INTERNALS_UUID, may cannot run normally.")
}
# Move in CMakeLists.txt
if (!file.copy("../inst/bin/CMakeLists.txt", "CMakeLists.txt", overwrite = TRUE)){
if (!file.copy("../inst/bin/CMakeLists.txt", "CMakeLists.txt", overwrite = TRUE)) {
stop("Copying CMakeLists failed")
}
......@@ -48,14 +48,14 @@ if (!use_precompile) {
# Using this kind-of complicated pattern to avoid matching to
# things like "pgcc"
using_gcc <- grepl(
pattern = '^gcc$|[/\\]+gcc$|^gcc\\-[0-9]+$|[/\\]+gcc\\-[0-9]+$'
, x = Sys.getenv('CC', '')
pattern = "^gcc$|[/\\]+gcc$|^gcc\\-[0-9]+$|[/\\]+gcc\\-[0-9]+$"
, x = Sys.getenv("CC", "")
)
using_gpp <- grepl(
pattern = '^g\\+\\+$|[/\\]+g\\+\\+$|^g\\+\\+\\-[0-9]+$|[/\\]+g\\+\\+\\-[0-9]+$'
, x = Sys.getenv('CXX', '')
pattern = "^g\\+\\+$|[/\\]+g\\+\\+$|^g\\+\\+\\-[0-9]+$|[/\\]+g\\+\\+\\-[0-9]+$"
, x = Sys.getenv("CXX", "")
)
on_mac <- Sys.info()['sysname'] == 'Darwin'
on_mac <- Sys.info()["sysname"] == "Darwin"
if (on_mac && !(using_gcc & using_gpp)) {
cmake_cmd <- paste(cmake_cmd, ' -DOpenMP_C_FLAGS="-Xpreprocessor -fopenmp -I$(brew --prefix libomp)/include" ')
cmake_cmd <- paste(cmake_cmd, ' -DOpenMP_C_LIB_NAMES="omp" ')
......@@ -71,21 +71,21 @@ if (!use_precompile) {
build_cmd <- "mingw32-make.exe _lightgbm"
system(paste0(cmake_cmd, " ..")) # Must build twice for Windows due sh.exe in Rtools
} else {
try_vs <- 0
try_vs <- 0L
local_vs_def <- ""
vs_versions <- c("Visual Studio 16 2019", "Visual Studio 15 2017", "Visual Studio 14 2015")
for (vs in vs_versions){
for (vs in vs_versions) {
vs_def <- paste0(" -G \"", vs, "\" -A x64")
tmp_cmake_cmd <- paste0(cmake_cmd, vs_def)
try_vs <- system(paste0(tmp_cmake_cmd, " .."))
if (try_vs == 0) {
local_vs_def = vs_def
if (try_vs == 0L) {
local_vs_def <- vs_def
break
} else {
unlink("./*", recursive = TRUE) # Clean up build directory
}
}
if (try_vs == 1) {
if (try_vs == 1L) {
cmake_cmd <- paste0(cmake_cmd, " -G \"MinGW Makefiles\" ") # Switch to MinGW on failure, try build once
system(paste0(cmake_cmd, " ..")) # Must build twice for Windows due sh.exe in Rtools
build_cmd <- "mingw32-make.exe _lightgbm"
......
context("basic functions")
data(agaricus.train, package = 'lightgbm')
data(agaricus.test, package = 'lightgbm')
data(agaricus.train, package = "lightgbm")
data(agaricus.test, package = "lightgbm")
train <- agaricus.train
test <- agaricus.test
windows_flag = grepl('Windows', Sys.info()[['sysname']])
windows_flag <- grepl("Windows", Sys.info()[["sysname"]])
test_that("train and predict binary classification", {
nrounds = 10
nrounds <- 10L
bst <- lightgbm(
data = train$data
, label = train$label
, num_leaves = 5
, num_leaves = 5L
, nrounds = nrounds
, objective = "binary"
, metric = "binary_error"
......@@ -22,38 +22,38 @@ test_that("train and predict binary classification", {
expect_lt(min(record_results), 0.02)
pred <- predict(bst, test$data)
expect_equal(length(pred), 1611)
expect_equal(length(pred), 1611L)
pred1 <- predict(bst, train$data, num_iteration = 1)
expect_equal(length(pred1), 6513)
err_pred1 <- sum( (pred1 > 0.5) != train$label) / length(train$label)
err_log <- record_results[1]
pred1 <- predict(bst, train$data, num_iteration = 1L)
expect_equal(length(pred1), 6513L)
err_pred1 <- sum((pred1 > 0.5) != train$label) / length(train$label)
err_log <- record_results[1L]
expect_lt(abs(err_pred1 - err_log), 10e-6)
})
test_that("train and predict softmax", {
lb <- as.numeric(iris$Species) - 1
lb <- as.numeric(iris$Species) - 1L
bst <- lightgbm(
data = as.matrix(iris[, -5])
data = as.matrix(iris[, -5L])
, label = lb
, num_leaves = 4
, num_leaves = 4L
, learning_rate = 0.1
, nrounds = 20
, min_data = 20
, min_hess = 20
, nrounds = 20L
, min_data = 20L
, min_hess = 20.0
, objective = "multiclass"
, metric = "multi_error"
, num_class = 3
, num_class = 3L
)
expect_false(is.null(bst$record_evals))
record_results <- lgb.get.eval.result(bst, "train", "multi_error")
expect_lt(min(record_results), 0.03)
pred <- predict(bst, as.matrix(iris[, -5]))
expect_equal(length(pred), nrow(iris) * 3)
pred <- predict(bst, as.matrix(iris[, -5L]))
expect_equal(length(pred), nrow(iris) * 3L)
})
......@@ -61,11 +61,11 @@ test_that("use of multiple eval metrics works", {
bst <- lightgbm(
data = train$data
, label = train$label
, num_leaves = 4
, learning_rate = 1
, nrounds = 10
, num_leaves = 4L
, learning_rate = 1.0
, nrounds = 10L
, objective = "binary"
, metric = list("binary_error","auc","binary_logloss")
, metric = list("binary_error", "auc", "binary_logloss")
)
expect_false(is.null(bst$record_evals))
})
......@@ -78,28 +78,28 @@ test_that("training continuation works", {
, label = train$label
, free_raw_data = FALSE
)
watchlist = list(train = dtrain)
watchlist <- list(train = dtrain)
param <- list(
objective = "binary"
, metric = "binary_logloss"
, num_leaves = 5
, learning_rate = 1
, num_leaves = 5L
, learning_rate = 1.0
)
# for the reference, use 10 iterations at once:
bst <- lgb.train(param, dtrain, nrounds = 10, watchlist)
err_bst <- lgb.get.eval.result(bst, "train", "binary_logloss", 10)
bst <- lgb.train(param, dtrain, nrounds = 10L, watchlist)
err_bst <- lgb.get.eval.result(bst, "train", "binary_logloss", 10L)
# first 5 iterations:
bst1 <- lgb.train(param, dtrain, nrounds = 5, watchlist)
bst1 <- lgb.train(param, dtrain, nrounds = 5L, watchlist)
# test continuing from a model in file
lgb.save(bst1, "lightgbm.model")
# continue for 5 more:
bst2 <- lgb.train(param, dtrain, nrounds = 5, watchlist, init_model = bst1)
err_bst2 <- lgb.get.eval.result(bst2, "train", "binary_logloss", 10)
bst2 <- lgb.train(param, dtrain, nrounds = 5L, watchlist, init_model = bst1)
err_bst2 <- lgb.get.eval.result(bst2, "train", "binary_logloss", 10L)
expect_lt(abs(err_bst - err_bst2), 0.01)
bst2 <- lgb.train(param, dtrain, nrounds = 5, watchlist, init_model = "lightgbm.model")
err_bst2 <- lgb.get.eval.result(bst2, "train", "binary_logloss", 10)
bst2 <- lgb.train(param, dtrain, nrounds = 5L, watchlist, init_model = "lightgbm.model")
err_bst2 <- lgb.get.eval.result(bst2, "train", "binary_logloss", 10L)
expect_lt(abs(err_bst - err_bst2), 0.01)
})
......@@ -110,11 +110,11 @@ test_that("cv works", {
bst <- lgb.cv(
params
, dtrain
, 10
, nfold = 5
, min_data = 1
, learning_rate = 1
, early_stopping_rounds = 10
, 10L
, nfold = 5L
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 10L
)
expect_false(is.null(bst$record_evals))
})
context('Test models with custom objective')
context("Test models with custom objective")
data(agaricus.train, package = 'lightgbm')
data(agaricus.test, package = 'lightgbm')
data(agaricus.train, package = "lightgbm")
data(agaricus.test, package = "lightgbm")
dtrain <- lgb.Dataset(agaricus.train$data, label = agaricus.train$label)
dtest <- lgb.Dataset(agaricus.test$data, label = agaricus.test$label)
watchlist <- list(eval = dtest, train = dtrain)
logregobj <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label")
preds <- 1 / (1 + exp(-preds))
preds <- 1.0 / (1.0 + exp(-preds))
grad <- preds - labels
hess <- preds * (1 - preds)
hess <- preds * (1.0 - preds)
return(list(grad = grad, hess = hess))
}
evalerror <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label")
err <- as.numeric(sum(labels != (preds > 0))) / length(labels)
err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
return(list(
name = "error"
, value = err
......@@ -25,12 +25,12 @@ evalerror <- function(preds, dtrain) {
}
param <- list(
num_leaves = 8
, learning_rate = 1
num_leaves = 8L
, learning_rate = 1.0
, objective = logregobj
, metric = "auc"
)
num_round <- 10
num_round <- 10L
test_that("custom objective works", {
bst <- lgb.train(param, dtrain, num_round, watchlist, eval = evalerror)
......
......@@ -3,49 +3,49 @@ require(Matrix)
context("testing lgb.Dataset functionality")
data(agaricus.test, package = 'lightgbm')
test_data <- agaricus.test$data[1:100,]
test_label <- agaricus.test$label[1:100]
data(agaricus.test, package = "lightgbm")
test_data <- agaricus.test$data[1L:100L, ]
test_label <- agaricus.test$label[1L:100L]
test_that("lgb.Dataset: basic construction, saving, loading", {
# from sparse matrix
dtest1 <- lgb.Dataset(test_data, label = test_label)
# from dense matrix
dtest2 <- lgb.Dataset(as.matrix(test_data), label = test_label)
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest2, 'label'))
expect_equal(getinfo(dtest1, "label"), getinfo(dtest2, "label"))
# save to a local file
tmp_file <- tempfile('lgb.Dataset_')
tmp_file <- tempfile("lgb.Dataset_")
lgb.Dataset.save(dtest1, tmp_file)
# read from a local file
dtest3 <- lgb.Dataset(tmp_file)
lgb.Dataset.construct(dtest3)
unlink(tmp_file)
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest3, 'label'))
expect_equal(getinfo(dtest1, "label"), getinfo(dtest3, "label"))
})
test_that("lgb.Dataset: getinfo & setinfo", {
dtest <- lgb.Dataset(test_data)
dtest$construct()
setinfo(dtest, 'label', test_label)
labels <- getinfo(dtest, 'label')
expect_equal(test_label, getinfo(dtest, 'label'))
setinfo(dtest, "label", test_label)
labels <- getinfo(dtest, "label")
expect_equal(test_label, getinfo(dtest, "label"))
expect_true(length(getinfo(dtest, 'weight')) == 0)
expect_true(length(getinfo(dtest, 'init_score')) == 0)
expect_true(length(getinfo(dtest, "weight")) == 0L)
expect_true(length(getinfo(dtest, "init_score")) == 0L)
# any other label should error
expect_error(setinfo(dtest, 'asdf', test_label))
expect_error(setinfo(dtest, "asdf", test_label))
})
test_that("lgb.Dataset: slice, dim", {
dtest <- lgb.Dataset(test_data, label = test_label)
lgb.Dataset.construct(dtest)
expect_equal(dim(dtest), dim(test_data))
dsub1 <- slice(dtest, 1:42)
dsub1 <- slice(dtest, seq_len(42L))
lgb.Dataset.construct(dsub1)
expect_equal(nrow(dsub1), 42)
expect_equal(nrow(dsub1), 42L)
expect_equal(ncol(dsub1), ncol(test_data))
})
......@@ -54,15 +54,17 @@ test_that("lgb.Dataset: colnames", {
expect_equal(colnames(dtest), colnames(test_data))
lgb.Dataset.construct(dtest)
expect_equal(colnames(dtest), colnames(test_data))
expect_error( colnames(dtest) <- 'asdf')
new_names <- make.names(1:ncol(test_data))
expect_error({
colnames(dtest) <- "asdf"
})
new_names <- make.names(seq_len(ncol(test_data)))
expect_silent(colnames(dtest) <- new_names)
expect_equal(colnames(dtest), new_names)
})
test_that("lgb.Dataset: nrow is correct for a very sparse matrix", {
nr <- 1000
x <- Matrix::rsparsematrix(nr, 100, density = 0.0005)
nr <- 1000L
x <- Matrix::rsparsematrix(nr, 100L, density = 0.0005)
# we want it very sparse, so that last rows are empty
expect_lt(max(x@i), nr)
dtest <- lgb.Dataset(x)
......@@ -70,7 +72,7 @@ test_that("lgb.Dataset: nrow is correct for a very sparse matrix", {
})
test_that("lgb.Dataset: Dataset should be able to construct from matrix and return non-null handle", {
rawData <- matrix(runif(1000), ncol = 10)
rawData <- matrix(runif(1000L), ncol = 10L)
handle <- NA_real_
ref_handle <- NULL
handle <- lightgbm:::lgb.call(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment