Unverified Commit aae4fe40 authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[R-package] add support for non-ASCII feature names (fixes #2983) (#3647)

* [R-package] add support for non-ASCII feature names (fixes #2983)

* fix Windows
parent 85b9daa9
context("lightgbm()") context("lightgbm()")
ON_WINDOWS <- .Platform$OS.type == "windows"
data(agaricus.train, package = "lightgbm") data(agaricus.train, package = "lightgbm")
data(agaricus.test, package = "lightgbm") data(agaricus.test, package = "lightgbm")
train <- agaricus.train train <- agaricus.train
...@@ -1168,7 +1170,6 @@ test_that("lgb.train() works with early stopping for regression with a metric th ...@@ -1168,7 +1170,6 @@ test_that("lgb.train() works with early stopping for regression with a metric th
test_that("lgb.train() supports non-ASCII feature names", { test_that("lgb.train() supports non-ASCII feature names", {
testthat::skip("UTF-8 feature names are not fully supported in the R package")
dtrain <- lgb.Dataset( dtrain <- lgb.Dataset(
data = matrix(rnorm(400L), ncol = 4L) data = matrix(rnorm(400L), ncol = 4L)
, label = rnorm(100L) , label = rnorm(100L)
...@@ -1185,10 +1186,21 @@ test_that("lgb.train() supports non-ASCII feature names", { ...@@ -1185,10 +1186,21 @@ test_that("lgb.train() supports non-ASCII feature names", {
) )
expect_true(lgb.is.Booster(bst)) expect_true(lgb.is.Booster(bst))
dumped_model <- jsonlite::fromJSON(bst$dump_model()) dumped_model <- jsonlite::fromJSON(bst$dump_model())
# UTF-8 strings are not well-supported on Windows
# * https://developer.r-project.org/Blog/public/2020/05/02/utf-8-support-on-windows/
# * https://developer.r-project.org/Blog/public/2020/07/30/windows/utf-8-build-of-r-and-cran-packages/index.html
if (!ON_WINDOWS) {
expect_identical( expect_identical(
dumped_model[["feature_names"]] dumped_model[["feature_names"]]
, feature_names , feature_names
) )
} else {
expect_identical(
dumped_model[["feature_names"]]
, iconv(feature_names, to = "UTF-8")
)
}
}) })
test_that("when early stopping is not activated, best_iter and best_score come from valids and not training data", { test_that("when early stopping is not activated, best_iter and best_score come from valids and not training data", {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment