Unverified Commit 3b1f16d1 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[tests][R-package] use one `#` symbol for comments (#4940)

* Update lgb.cv.R

* Update test_learning_to_rank.R

* Update test_basic.R
parent a6d1f198
......@@ -528,14 +528,14 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, group, params) {
#' @importFrom stats quantile
lgb.stratified.folds <- function(y, k) {
## Group the numeric data based on their magnitudes
## and sample within those groups.
## When the number of samples is low, we may have
## issues further slicing the numeric data into
## groups. The number of groups will depend on the
## ratio of the number of folds to the sample size.
## At most, we will use quantiles. If the sample
## is too small, we just do regular unstratified CV
# Group the numeric data based on their magnitudes
# and sample within those groups.
# When the number of samples is low, we may have
# issues further slicing the numeric data into
# groups. The number of groups will depend on the
# ratio of the number of folds to the sample size.
# At most, we will use quantiles. If the sample
# is too small, we just do regular unstratified CV
if (is.numeric(y)) {
cuts <- length(y) %/% k
......@@ -555,29 +555,28 @@ lgb.stratified.folds <- function(y, k) {
if (k < length(y)) {
## Reset levels so that the possible levels and
## the levels in the vector are the same
# Reset levels so that the possible levels and
# the levels in the vector are the same
y <- as.factor(as.character(y))
numInClass <- table(y)
foldVector <- vector(mode = "integer", length(y))
## For each class, balance the fold allocation as far
## as possible, then resample the remainder.
## The final assignment of folds is also randomized.
# For each class, balance the fold allocation as far
# as possible, then resample the remainder.
# The final assignment of folds is also randomized.
for (i in seq_along(numInClass)) {
## Create a vector of integers from 1:k as many times as possible without
## going over the number of samples in the class. Note that if the number
## of samples in a class is less than k, nothing is produced here.
# Create a vector of integers from 1:k as many times as possible without
# going over the number of samples in the class. Note that if the number
# of samples in a class is less than k, nothing is produced here.
seqVector <- rep(seq_len(k), numInClass[i] %/% k)
## Add enough random integers to get length(seqVector) == numInClass[i]
# Add enough random integers to get length(seqVector) == numInClass[i]
if (numInClass[i] %% k > 0L) {
seqVector <- c(seqVector, sample.int(k, numInClass[i] %% k))
}
## Shuffle the integers for fold assignment and assign to this classes's data
# Shuffle the integers for fold assignment and assign to this classes's data
foldVector[y == dimnames(numInClass)$y[i]] <- sample(seqVector)
}
......
......@@ -244,8 +244,8 @@ test_that("lightgbm() accepts nrounds as either a top-level argument or paramete
)
top_level_l2 <- top_level_bst$eval_train()[[1L]][["value"]]
params_l2 <- param_bst$eval_train()[[1L]][["value"]]
both_l2 <- both_customized$eval_train()[[1L]][["value"]]
params_l2 <- param_bst$eval_train()[[1L]][["value"]]
both_l2 <- both_customized$eval_train()[[1L]][["value"]]
# check type just to be sure the subsetting didn't return a NULL
expect_true(is.numeric(top_level_l2))
......@@ -781,8 +781,8 @@ test_that("lgb.train() accepts nrounds as either a top-level argument or paramet
)
top_level_l2 <- top_level_bst$eval_train()[[1L]][["value"]]
params_l2 <- param_bst$eval_train()[[1L]][["value"]]
both_l2 <- both_customized$eval_train()[[1L]][["value"]]
params_l2 <- param_bst$eval_train()[[1L]][["value"]]
both_l2 <- both_customized$eval_train()[[1L]][["value"]]
# check type just to be sure the subsetting didn't return a NULL
expect_true(is.numeric(top_level_l2))
......@@ -1003,7 +1003,7 @@ test_that("lgb.train() works with early stopping for classification", {
# train with early stopping #
#############################
early_stopping_rounds <- 5L
bst <- lgb.train(
bst <- lgb.train(
params = list(
objective = "binary"
, metric = "binary_error"
......@@ -1115,7 +1115,7 @@ test_that("lgb.train() works with early stopping for classification with a metri
#############################
early_stopping_rounds <- 5L
# the harsh max_depth guarantees that AUC improves over at least the first few iterations
bst_auc <- lgb.train(
bst_auc <- lgb.train(
params = list(
objective = "binary"
, metric = "auc"
......@@ -1129,7 +1129,7 @@ test_that("lgb.train() works with early stopping for classification with a metri
"valid1" = dvalid
)
)
bst_binary_error <- lgb.train(
bst_binary_error <- lgb.train(
params = list(
objective = "binary"
, metric = "binary_error"
......@@ -1213,7 +1213,7 @@ test_that("lgb.train() works with early stopping for regression", {
# train with early stopping #
#############################
early_stopping_rounds <- 5L
bst <- lgb.train(
bst <- lgb.train(
params = list(
objective = "regression"
, metric = "rmse"
......@@ -1577,7 +1577,7 @@ test_that("lgb.train() works with early stopping for regression with a metric th
# train with early stopping #
#############################
early_stopping_rounds <- 5L
bst <- lgb.train(
bst <- lgb.train(
params = list(
objective = "regression"
, metric = c(
......
......@@ -20,7 +20,7 @@ test_that("learning-to-rank with lgb.train() works as expected", {
, group = rep(150L, 40L)
)
ndcg_at <- "1,2,3"
eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]])
eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]])
params <- list(
objective = "lambdarank"
, metric = "ndcg"
......@@ -74,7 +74,7 @@ test_that("learning-to-rank with lgb.cv() works as expected", {
, group = rep(150L, 40L)
)
ndcg_at <- "1,2,3"
eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]])
eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]])
params <- list(
objective = "lambdarank"
, metric = "ndcg"
......@@ -97,7 +97,7 @@ test_that("learning-to-rank with lgb.cv() works as expected", {
# "valid" should contain results for each metric
eval_results <- cv_bst$record_evals[["valid"]]
eval_names <- c("ndcg@1", "ndcg@2", "ndcg@3")
eval_names <- c("ndcg@1", "ndcg@2", "ndcg@3")
expect_identical(names(eval_results), eval_names)
# check that best score and iter make sense (0.0 < nDCG < 1.0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment