Unverified Commit 8359da63 authored by José Morales's avatar José Morales Committed by GitHub
Browse files

[R-package] allow use of `categorical_features` in Dataset when raw data does...


[R-package] allow use of `categorical_features` in Dataset when raw data does not have column names (fixes #4374) (#5184)

* check for number of columns if data is matrixx for categorical indices check

* check for error when using a greater index than the number of columns

* apply suggestion
Co-authored-by: default avatarJames Lamb <jaylamb20@gmail.com>

* revert whitespace change

* check if is filename instead of matrix
Co-authored-by: default avatarJames Lamb <jaylamb20@gmail.com>
parent f53fa691
...@@ -169,12 +169,13 @@ Dataset <- R6::R6Class( ...@@ -169,12 +169,13 @@ Dataset <- R6::R6Class(
} else { } else {
# Check if more categorical features were output over the feature space # Check if more categorical features were output over the feature space
if (max(private$categorical_feature) > length(private$colnames)) { data_is_not_filename <- !is.character(private$raw_data)
if (data_is_not_filename && max(private$categorical_feature) > ncol(private$raw_data)) {
stop( stop(
"lgb.self.get.handle: supplied a too large value in categorical_feature: " "lgb.self.get.handle: supplied a too large value in categorical_feature: "
, max(private$categorical_feature) , max(private$categorical_feature)
, " but only " , " but only "
, length(private$colnames) , ncol(private$raw_data)
, " features" , " features"
) )
} }
......
...@@ -548,3 +548,18 @@ test_that("lgb.Dataset$get_feature_num_bin() works", { ...@@ -548,3 +548,18 @@ test_that("lgb.Dataset$get_feature_num_bin() works", {
actual_num_bins <- sapply(1L:5L, ds$get_feature_num_bin) actual_num_bins <- sapply(1L:5L, ds$get_feature_num_bin)
expect_identical(actual_num_bins, expected_num_bins) expect_identical(actual_num_bins, expected_num_bins)
}) })
test_that("lgb.Dataset can be constructed with categorical features and without colnames", {
# check that dataset can be constructed
raw_mat <- matrix(rep(c(0L, 1L), 50L), ncol = 1L)
ds <- lgb.Dataset(raw_mat, categorical_feature = 1L)$construct()
sparse_mat <- as(raw_mat, "dgCMatrix")
ds2 <- lgb.Dataset(sparse_mat, categorical_feature = 1L)$construct()
# check that the column names are NULL
expect_null(ds$.__enclos_env__$private$colnames)
expect_null(ds2$.__enclos_env__$private$colnames)
# check for error when index is greater than the number of columns
expect_error({
lgb.Dataset(raw_mat, categorical_feature = 2L)$construct()
}, regexp = "supplied a too large value in categorical_feature: 2 but only 1 features")
})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment