"vscode:/vscode.git/clone" did not exist on "e40c9e291898d9f2868c4075ccfd6b05b2414d19"
Unverified Commit 8359da63 authored by José Morales's avatar José Morales Committed by GitHub
Browse files

[R-package] allow use of `categorical_features` in Dataset when raw data does...


[R-package] allow use of `categorical_features` in Dataset when raw data does not have column names (fixes #4374) (#5184)

* check for number of columns if data is matrixx for categorical indices check

* check for error when using a greater index than the number of columns

* apply suggestion
Co-authored-by: default avatarJames Lamb <jaylamb20@gmail.com>

* revert whitespace change

* check if is filename instead of matrix
Co-authored-by: default avatarJames Lamb <jaylamb20@gmail.com>
parent f53fa691
......@@ -169,12 +169,13 @@ Dataset <- R6::R6Class(
} else {
# Check if more categorical features were output over the feature space
if (max(private$categorical_feature) > length(private$colnames)) {
data_is_not_filename <- !is.character(private$raw_data)
if (data_is_not_filename && max(private$categorical_feature) > ncol(private$raw_data)) {
stop(
"lgb.self.get.handle: supplied a too large value in categorical_feature: "
, max(private$categorical_feature)
, " but only "
, length(private$colnames)
, ncol(private$raw_data)
, " features"
)
}
......
......@@ -548,3 +548,18 @@ test_that("lgb.Dataset$get_feature_num_bin() works", {
actual_num_bins <- sapply(1L:5L, ds$get_feature_num_bin)
expect_identical(actual_num_bins, expected_num_bins)
})
test_that("lgb.Dataset can be constructed with categorical features and without colnames", {
# check that dataset can be constructed
raw_mat <- matrix(rep(c(0L, 1L), 50L), ncol = 1L)
ds <- lgb.Dataset(raw_mat, categorical_feature = 1L)$construct()
sparse_mat <- as(raw_mat, "dgCMatrix")
ds2 <- lgb.Dataset(sparse_mat, categorical_feature = 1L)$construct()
# check that the column names are NULL
expect_null(ds$.__enclos_env__$private$colnames)
expect_null(ds2$.__enclos_env__$private$colnames)
# check for error when index is greater than the number of columns
expect_error({
lgb.Dataset(raw_mat, categorical_feature = 2L)$construct()
}, regexp = "supplied a too large value in categorical_feature: 2 but only 1 features")
})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment