Unverified Commit 0d59859c authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

throw error when meet non ascii (#2229)

* throw error when meet non ascii

* check ascii for config strings.
parent 0d02499e
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <LightGBM/config.h> #include <LightGBM/config.h>
#include <LightGBM/feature_group.h> #include <LightGBM/feature_group.h>
#include <LightGBM/meta.h> #include <LightGBM/meta.h>
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/openmp_wrapper.h> #include <LightGBM/utils/openmp_wrapper.h>
#include <LightGBM/utils/random.h> #include <LightGBM/utils/random.h>
#include <LightGBM/utils/text_reader.h> #include <LightGBM/utils/text_reader.h>
...@@ -555,6 +556,10 @@ class Dataset { ...@@ -555,6 +556,10 @@ class Dataset {
// replace ' ' in feature_names with '_' // replace ' ' in feature_names with '_'
bool spaceInFeatureName = false; bool spaceInFeatureName = false;
for (auto& feature_name : feature_names_) { for (auto& feature_name : feature_names_) {
// check ascii
if (!Common::CheckASCII(feature_name)) {
Log::Fatal("Do not support non-ascii characters in feature name.");
}
if (feature_name.find(' ') != std::string::npos) { if (feature_name.find(' ') != std::string::npos) {
spaceInFeatureName = true; spaceInFeatureName = true;
std::replace(feature_name.begin(), feature_name.end(), ' ', '_'); std::replace(feature_name.begin(), feature_name.end(), ' ', '_');
......
...@@ -895,6 +895,15 @@ static T SafeLog(T x) { ...@@ -895,6 +895,15 @@ static T SafeLog(T x) {
} }
} }
inline bool CheckASCII(const std::string& s) {
for (auto c : s) {
if (static_cast<unsigned char>(c) > 127) {
return false;
}
}
return true;
}
} // namespace Common } // namespace Common
} // namespace LightGBM } // namespace LightGBM
......
...@@ -17,6 +17,9 @@ void Config::KV2Map(std::unordered_map<std::string, std::string>& params, const ...@@ -17,6 +17,9 @@ void Config::KV2Map(std::unordered_map<std::string, std::string>& params, const
if (tmp_strs.size() == 2) { if (tmp_strs.size() == 2) {
std::string key = Common::RemoveQuotationSymbol(Common::Trim(tmp_strs[0])); std::string key = Common::RemoveQuotationSymbol(Common::Trim(tmp_strs[0]));
std::string value = Common::RemoveQuotationSymbol(Common::Trim(tmp_strs[1])); std::string value = Common::RemoveQuotationSymbol(Common::Trim(tmp_strs[1]));
if (!Common::CheckASCII(key) || !Common::CheckASCII(value)) {
Log::Fatal("Do not support non-ascii characters in config.");
}
if (key.size() > 0) { if (key.size() > 0) {
auto value_search = params.find(key); auto value_search = params.find(key);
if (value_search == params.end()) { // not set if (value_search == params.end()) { // not set
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment