Unverified Commit f8318088 authored by Chen Yufei's avatar Chen Yufei Committed by GitHub
Browse files

Precise text file parsing (#4081)



* New build option: USE_PRECISE_TEXT_PARSER.

Use fast_double_parser for text file parsing. For each number, fallback
to strtod in case of parse failure.

* Add benchmark for CSVParser with Atof and AtofPrecise.

* Fix lint complaint.

* Fix typo in open result error message.

* Revert "Fix lint complaint."

This reverts commit 92ab0b6bce9f17d7be9eaeb20f19d4a0a36f0387.

* Revert "Add benchmark for CSVParser with Atof and AtofPrecise."

This reverts commit 4f8639abd06c679d4382eb715a1793afd94df3d2.

* Use AtofPrecise in Common::__StringToTHelper.

* [option] precise_float_parser: precise float number parsing for text input.

* Remove USE_PRECISE_TEXT_PARSER compile option.

* test: add test for Common::AtofPrecise.

* test: remove ChunkedArrayTest with 0 length.

This triggers Log::Fatal which aborts the test program.

* fix lint, add copyright.

* Revert "test: remove ChunkedArrayTest with 0 length."

This reverts commit 346c76affe9e78b6ca2738c4a56dbb9c00f31102.

* Use LightGBM::Common::Sign

* save precise_float_parser in model file.

* Fix error checking in AtofPrecise. Add more test cases.

* Remove test case that can't pass under macOS.

* Apply suggestions from code review
Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
parent 02467213
...@@ -820,6 +820,12 @@ Dataset Parameters ...@@ -820,6 +820,12 @@ Dataset Parameters
- **Note**: can be used only in CLI version; for language-specific packages you can use the correspondent function - **Note**: can be used only in CLI version; for language-specific packages you can use the correspondent function
- ``precise_float_parser`` :raw-html:`<a id="precise_float_parser" title="Permalink to this parameter" href="#precise_float_parser">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool
- use precise floating point number parsing for text parser (e.g. CSV, TSV, LibSVM input)
- **Note**: setting this to ``true`` may lead to much slower text parsing
Predict Parameters Predict Parameters
~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~
......
...@@ -714,6 +714,10 @@ struct Config { ...@@ -714,6 +714,10 @@ struct Config {
// desc = **Note**: can be used only in CLI version; for language-specific packages you can use the correspondent function // desc = **Note**: can be used only in CLI version; for language-specific packages you can use the correspondent function
bool save_binary = false; bool save_binary = false;
// desc = use precise floating point number parsing for text parser (e.g. CSV, TSV, LibSVM input)
// desc = **Note**: setting this to ``true`` may lead to much slower text parsing
bool precise_float_parser = false;
#pragma endregion #pragma endregion
#pragma region Predict Parameters #pragma region Predict Parameters
......
...@@ -252,6 +252,8 @@ class Metadata { ...@@ -252,6 +252,8 @@ class Metadata {
/*! \brief Interface for Parser */ /*! \brief Interface for Parser */
class Parser { class Parser {
public: public:
typedef const char* (*AtofFunc)(const char* p, double* out);
/*! \brief virtual destructor */ /*! \brief virtual destructor */
virtual ~Parser() {} virtual ~Parser() {}
...@@ -271,9 +273,10 @@ class Parser { ...@@ -271,9 +273,10 @@ class Parser {
* \param filename One Filename of data * \param filename One Filename of data
* \param num_features Pass num_features of this data file if you know, <=0 means don't know * \param num_features Pass num_features of this data file if you know, <=0 means don't know
* \param label_idx index of label column * \param label_idx index of label column
* \param precise_float_parser using precise floating point number parsing if true
* \return Object of parser * \return Object of parser
*/ */
static Parser* CreateParser(const char* filename, bool header, int num_features, int label_idx); static Parser* CreateParser(const char* filename, bool header, int num_features, int label_idx, bool precise_float_parser);
}; };
/*! \brief The main class of data set, /*! \brief The main class of data set,
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <cmath> #include <cmath>
#include <cstdint> #include <cstdint>
#include <cstdio> #include <cstdio>
#include <cstdlib>
#include <cstring> #include <cstring>
#include <functional> #include <functional>
#include <iomanip> #include <iomanip>
...@@ -330,6 +331,27 @@ inline static const char* Atof(const char* p, double* out) { ...@@ -330,6 +331,27 @@ inline static const char* Atof(const char* p, double* out) {
return p; return p;
} }
// Use fast_double_parse and strtod (if parse failed) to parse double.
inline static const char* AtofPrecise(const char* p, double* out) {
const char* end = fast_double_parser::parse_number(p, out);
if (end != nullptr) {
return end;
}
// Rare path: Not in RFC 7159 format. Possible "inf", "nan", etc. Fallback to standard library:
char* end2;
errno = 0; // This is Required before calling strtod.
*out = std::strtod(p, &end2); // strtod is locale aware.
if (end2 == p) {
Log::Fatal("no conversion to double for: %s", p);
}
if (errno == ERANGE) {
Log::Fatal("convert to double got underflow or overflow: %s", p);
}
return end2;
}
inline static bool AtoiAndCheck(const char* p, int* out) { inline static bool AtoiAndCheck(const char* p, int* out) {
const char* after = Atoi(p, out); const char* after = Atoi(p, out);
if (*after != '\0') { if (*after != '\0') {
...@@ -1079,22 +1101,8 @@ struct __StringToTHelper<T, true> { ...@@ -1079,22 +1101,8 @@ struct __StringToTHelper<T, true> {
T operator()(const std::string& str) const { T operator()(const std::string& str) const {
double tmp; double tmp;
// Fast (common) path: For numeric inputs in RFC 7159 format: const char* end = Common::AtofPrecise(str.c_str(), &tmp);
const bool fast_parse_succeeded = fast_double_parser::parse_number(str.c_str(), &tmp); if (end == str.c_str()) {
// Rare path: Not in RFC 7159 format. Possible "inf", "nan", etc.
if (!fast_parse_succeeded) {
std::string strlower(str);
std::transform(strlower.begin(), strlower.end(), strlower.begin(), [](int c) -> char { return static_cast<char>(::tolower(c)); });
if (strlower == std::string("inf"))
tmp = std::numeric_limits<double>::infinity();
else if (strlower == std::string("-inf"))
tmp = -std::numeric_limits<double>::infinity();
else if (strlower == std::string("nan"))
tmp = std::numeric_limits<double>::quiet_NaN();
else if (strlower == std::string("-nan"))
tmp = -std::numeric_limits<double>::quiet_NaN();
else
Log::Fatal("Failed to parse double: %s", str.c_str()); Log::Fatal("Failed to parse double: %s", str.c_str());
} }
......
...@@ -221,7 +221,8 @@ void Application::Predict() { ...@@ -221,7 +221,8 @@ void Application::Predict() {
if (config_.task == TaskType::KRefitTree) { if (config_.task == TaskType::KRefitTree) {
// create predictor // create predictor
Predictor predictor(boosting_.get(), 0, -1, false, true, false, false, 1, 1); Predictor predictor(boosting_.get(), 0, -1, false, true, false, false, 1, 1);
predictor.Predict(config_.data.c_str(), config_.output_result.c_str(), config_.header, config_.predict_disable_shape_check); predictor.Predict(config_.data.c_str(), config_.output_result.c_str(), config_.header, config_.predict_disable_shape_check,
config_.precise_float_parser);
TextReader<int> result_reader(config_.output_result.c_str(), false); TextReader<int> result_reader(config_.output_result.c_str(), false);
result_reader.ReadAllLines(); result_reader.ReadAllLines();
std::vector<std::vector<int>> pred_leaf(result_reader.Lines().size()); std::vector<std::vector<int>> pred_leaf(result_reader.Lines().size());
...@@ -251,7 +252,8 @@ void Application::Predict() { ...@@ -251,7 +252,8 @@ void Application::Predict() {
config_.pred_early_stop, config_.pred_early_stop_freq, config_.pred_early_stop, config_.pred_early_stop_freq,
config_.pred_early_stop_margin); config_.pred_early_stop_margin);
predictor.Predict(config_.data.c_str(), predictor.Predict(config_.data.c_str(),
config_.output_result.c_str(), config_.header, config_.predict_disable_shape_check); config_.output_result.c_str(), config_.header, config_.predict_disable_shape_check,
config_.precise_float_parser);
Log::Info("Finished prediction"); Log::Info("Finished prediction");
} }
} }
......
...@@ -160,13 +160,14 @@ class Predictor { ...@@ -160,13 +160,14 @@ class Predictor {
* \param data_filename Filename of data * \param data_filename Filename of data
* \param result_filename Filename of output result * \param result_filename Filename of output result
*/ */
void Predict(const char* data_filename, const char* result_filename, bool header, bool disable_shape_check) { void Predict(const char* data_filename, const char* result_filename, bool header, bool disable_shape_check, bool precise_float_parser) {
auto writer = VirtualFileWriter::Make(result_filename); auto writer = VirtualFileWriter::Make(result_filename);
if (!writer->Init()) { if (!writer->Init()) {
Log::Fatal("Prediction results file %s cannot be found", result_filename); Log::Fatal("Prediction results file %s cannot be created", result_filename);
} }
auto label_idx = header ? -1 : boosting_->LabelIdx(); auto label_idx = header ? -1 : boosting_->LabelIdx();
auto parser = std::unique_ptr<Parser>(Parser::CreateParser(data_filename, header, boosting_->MaxFeatureIdx() + 1, label_idx)); auto parser = std::unique_ptr<Parser>(Parser::CreateParser(data_filename, header, boosting_->MaxFeatureIdx() + 1, label_idx,
precise_float_parser));
if (parser == nullptr) { if (parser == nullptr) {
Log::Fatal("Could not recognize the data format of data file %s", data_filename); Log::Fatal("Could not recognize the data format of data file %s", data_filename);
......
...@@ -709,7 +709,8 @@ class Booster { ...@@ -709,7 +709,8 @@ class Booster {
Predictor predictor(boosting_.get(), start_iteration, num_iteration, is_raw_score, is_predict_leaf, predict_contrib, Predictor predictor(boosting_.get(), start_iteration, num_iteration, is_raw_score, is_predict_leaf, predict_contrib,
config.pred_early_stop, config.pred_early_stop_freq, config.pred_early_stop_margin); config.pred_early_stop, config.pred_early_stop_freq, config.pred_early_stop_margin);
bool bool_data_has_header = data_has_header > 0 ? true : false; bool bool_data_has_header = data_has_header > 0 ? true : false;
predictor.Predict(data_filename, result_filename, bool_data_has_header, config.predict_disable_shape_check); predictor.Predict(data_filename, result_filename, bool_data_has_header, config.predict_disable_shape_check,
config.precise_float_parser);
} }
void GetPredictAt(int data_idx, double* out_result, int64_t* out_len) const { void GetPredictAt(int data_idx, double* out_result, int64_t* out_len) const {
......
...@@ -261,6 +261,7 @@ const std::unordered_set<std::string>& Config::parameter_set() { ...@@ -261,6 +261,7 @@ const std::unordered_set<std::string>& Config::parameter_set() {
"categorical_feature", "categorical_feature",
"forcedbins_filename", "forcedbins_filename",
"save_binary", "save_binary",
"precise_float_parser",
"start_iteration_predict", "start_iteration_predict",
"num_iteration_predict", "num_iteration_predict",
"predict_raw_score", "predict_raw_score",
...@@ -527,6 +528,8 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str ...@@ -527,6 +528,8 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str
GetBool(params, "save_binary", &save_binary); GetBool(params, "save_binary", &save_binary);
GetBool(params, "precise_float_parser", &precise_float_parser);
GetInt(params, "start_iteration_predict", &start_iteration_predict); GetInt(params, "start_iteration_predict", &start_iteration_predict);
GetInt(params, "num_iteration_predict", &num_iteration_predict); GetInt(params, "num_iteration_predict", &num_iteration_predict);
...@@ -709,6 +712,7 @@ std::string Config::SaveMembersToString() const { ...@@ -709,6 +712,7 @@ std::string Config::SaveMembersToString() const {
str_buf << "[ignore_column: " << ignore_column << "]\n"; str_buf << "[ignore_column: " << ignore_column << "]\n";
str_buf << "[categorical_feature: " << categorical_feature << "]\n"; str_buf << "[categorical_feature: " << categorical_feature << "]\n";
str_buf << "[forcedbins_filename: " << forcedbins_filename << "]\n"; str_buf << "[forcedbins_filename: " << forcedbins_filename << "]\n";
str_buf << "[precise_float_parser: " << precise_float_parser << "]\n";
str_buf << "[objective_seed: " << objective_seed << "]\n"; str_buf << "[objective_seed: " << objective_seed << "]\n";
str_buf << "[num_class: " << num_class << "]\n"; str_buf << "[num_class: " << num_class << "]\n";
str_buf << "[is_unbalance: " << is_unbalance << "]\n"; str_buf << "[is_unbalance: " << is_unbalance << "]\n";
......
...@@ -196,7 +196,8 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac ...@@ -196,7 +196,8 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac
auto bin_filename = CheckCanLoadFromBin(filename); auto bin_filename = CheckCanLoadFromBin(filename);
bool is_load_from_binary = false; bool is_load_from_binary = false;
if (bin_filename.size() == 0) { if (bin_filename.size() == 0) {
auto parser = std::unique_ptr<Parser>(Parser::CreateParser(filename, config_.header, 0, label_idx_)); auto parser = std::unique_ptr<Parser>(Parser::CreateParser(filename, config_.header, 0, label_idx_,
config_.precise_float_parser));
if (parser == nullptr) { if (parser == nullptr) {
Log::Fatal("Could not recognize data format of %s", filename); Log::Fatal("Could not recognize data format of %s", filename);
} }
...@@ -267,7 +268,8 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename, ...@@ -267,7 +268,8 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename,
} }
auto bin_filename = CheckCanLoadFromBin(filename); auto bin_filename = CheckCanLoadFromBin(filename);
if (bin_filename.size() == 0) { if (bin_filename.size() == 0) {
auto parser = std::unique_ptr<Parser>(Parser::CreateParser(filename, config_.header, 0, label_idx_)); auto parser = std::unique_ptr<Parser>(Parser::CreateParser(filename, config_.header, 0, label_idx_,
config_.precise_float_parser));
if (parser == nullptr) { if (parser == nullptr) {
Log::Fatal("Could not recognize data format of %s", filename); Log::Fatal("Could not recognize data format of %s", filename);
} }
......
...@@ -6,9 +6,6 @@ ...@@ -6,9 +6,6 @@
#include <string> #include <string>
#include <algorithm> #include <algorithm>
#include <fstream>
#include <functional>
#include <iostream>
#include <memory> #include <memory>
namespace LightGBM { namespace LightGBM {
...@@ -232,7 +229,7 @@ DataType GetDataType(const char* filename, bool header, ...@@ -232,7 +229,7 @@ DataType GetDataType(const char* filename, bool header,
return type; return type;
} }
Parser* Parser::CreateParser(const char* filename, bool header, int num_features, int label_idx) { Parser* Parser::CreateParser(const char* filename, bool header, int num_features, int label_idx, bool precise_float_parser) {
const int n_read_line = 32; const int n_read_line = 32;
auto lines = ReadKLineFromFile(filename, header, n_read_line); auto lines = ReadKLineFromFile(filename, header, n_read_line);
int num_col = 0; int num_col = 0;
...@@ -242,15 +239,16 @@ Parser* Parser::CreateParser(const char* filename, bool header, int num_features ...@@ -242,15 +239,16 @@ Parser* Parser::CreateParser(const char* filename, bool header, int num_features
} }
std::unique_ptr<Parser> ret; std::unique_ptr<Parser> ret;
int output_label_index = -1; int output_label_index = -1;
AtofFunc atof = precise_float_parser ? Common::AtofPrecise : Common::Atof;
if (type == DataType::LIBSVM) { if (type == DataType::LIBSVM) {
output_label_index = GetLabelIdxForLibsvm(lines[0], num_features, label_idx); output_label_index = GetLabelIdxForLibsvm(lines[0], num_features, label_idx);
ret.reset(new LibSVMParser(output_label_index, num_col)); ret.reset(new LibSVMParser(output_label_index, num_col, atof));
} else if (type == DataType::TSV) { } else if (type == DataType::TSV) {
output_label_index = GetLabelIdxForTSV(lines[0], num_features, label_idx); output_label_index = GetLabelIdxForTSV(lines[0], num_features, label_idx);
ret.reset(new TSVParser(output_label_index, num_col)); ret.reset(new TSVParser(output_label_index, num_col, atof));
} else if (type == DataType::CSV) { } else if (type == DataType::CSV) {
output_label_index = GetLabelIdxForCSV(lines[0], num_features, label_idx); output_label_index = GetLabelIdxForCSV(lines[0], num_features, label_idx);
ret.reset(new CSVParser(output_label_index, num_col)); ret.reset(new CSVParser(output_label_index, num_col, atof));
} }
if (output_label_index < 0 && label_idx >= 0) { if (output_label_index < 0 && label_idx >= 0) {
......
...@@ -17,8 +17,8 @@ namespace LightGBM { ...@@ -17,8 +17,8 @@ namespace LightGBM {
class CSVParser: public Parser { class CSVParser: public Parser {
public: public:
explicit CSVParser(int label_idx, int total_columns) explicit CSVParser(int label_idx, int total_columns, AtofFunc atof)
:label_idx_(label_idx), total_columns_(total_columns) { :label_idx_(label_idx), total_columns_(total_columns), atof_(atof) {
} }
inline void ParseOneLine(const char* str, inline void ParseOneLine(const char* str,
std::vector<std::pair<int, double>>* out_features, double* out_label) const override { std::vector<std::pair<int, double>>* out_features, double* out_label) const override {
...@@ -27,7 +27,7 @@ class CSVParser: public Parser { ...@@ -27,7 +27,7 @@ class CSVParser: public Parser {
int offset = 0; int offset = 0;
*out_label = 0.0f; *out_label = 0.0f;
while (*str != '\0') { while (*str != '\0') {
str = Common::Atof(str, &val); str = atof_(str, &val);
if (idx == label_idx_) { if (idx == label_idx_) {
*out_label = val; *out_label = val;
offset = -1; offset = -1;
...@@ -50,12 +50,13 @@ class CSVParser: public Parser { ...@@ -50,12 +50,13 @@ class CSVParser: public Parser {
private: private:
int label_idx_ = 0; int label_idx_ = 0;
int total_columns_ = -1; int total_columns_ = -1;
AtofFunc atof_;
}; };
class TSVParser: public Parser { class TSVParser: public Parser {
public: public:
explicit TSVParser(int label_idx, int total_columns) explicit TSVParser(int label_idx, int total_columns, AtofFunc atof)
:label_idx_(label_idx), total_columns_(total_columns) { :label_idx_(label_idx), total_columns_(total_columns), atof_(atof) {
} }
inline void ParseOneLine(const char* str, inline void ParseOneLine(const char* str,
std::vector<std::pair<int, double>>* out_features, double* out_label) const override { std::vector<std::pair<int, double>>* out_features, double* out_label) const override {
...@@ -63,7 +64,7 @@ class TSVParser: public Parser { ...@@ -63,7 +64,7 @@ class TSVParser: public Parser {
double val = 0.0f; double val = 0.0f;
int offset = 0; int offset = 0;
while (*str != '\0') { while (*str != '\0') {
str = Common::Atof(str, &val); str = atof_(str, &val);
if (idx == label_idx_) { if (idx == label_idx_) {
*out_label = val; *out_label = val;
offset = -1; offset = -1;
...@@ -86,12 +87,13 @@ class TSVParser: public Parser { ...@@ -86,12 +87,13 @@ class TSVParser: public Parser {
private: private:
int label_idx_ = 0; int label_idx_ = 0;
int total_columns_ = -1; int total_columns_ = -1;
AtofFunc atof_;
}; };
class LibSVMParser: public Parser { class LibSVMParser: public Parser {
public: public:
explicit LibSVMParser(int label_idx, int total_columns) explicit LibSVMParser(int label_idx, int total_columns, AtofFunc atof)
:label_idx_(label_idx), total_columns_(total_columns) { :label_idx_(label_idx), total_columns_(total_columns), atof_(atof) {
if (label_idx > 0) { if (label_idx > 0) {
Log::Fatal("Label should be the first column in a LibSVM file"); Log::Fatal("Label should be the first column in a LibSVM file");
} }
...@@ -101,7 +103,7 @@ class LibSVMParser: public Parser { ...@@ -101,7 +103,7 @@ class LibSVMParser: public Parser {
int idx = 0; int idx = 0;
double val = 0.0f; double val = 0.0f;
if (label_idx_ == 0) { if (label_idx_ == 0) {
str = Common::Atof(str, &val); str = atof_(str, &val);
*out_label = val; *out_label = val;
str = Common::SkipSpaceAndTab(str); str = Common::SkipSpaceAndTab(str);
} }
...@@ -126,6 +128,7 @@ class LibSVMParser: public Parser { ...@@ -126,6 +128,7 @@ class LibSVMParser: public Parser {
private: private:
int label_idx_ = 0; int label_idx_ = 0;
int total_columns_ = -1; int total_columns_ = -1;
AtofFunc atof_;
}; };
} // namespace LightGBM } // namespace LightGBM
......
/*!
* Copyright (c) 2021 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#include <gtest/gtest.h>
#include <limits>
#include "../include/LightGBM/utils/common.h"
// This is a basic test for floating number parsing.
// Most of the test cases come from:
// https://github.com/dmlc/xgboost/blob/master/tests/cpp/common/test_charconv.cc
// https://github.com/Alexhuszagh/rust-lexical/blob/master/data/test-parse-unittests/strtod_tests.toml
class AtofPreciseTest : public testing::Test {
public:
struct AtofTestCase {
const char* data;
double expected;
};
static double TestAtofPrecise(
const char* data, double expected, bool test_eq = true) {
double got = 0;
const char* end = LightGBM::Common::AtofPrecise(data, &got);
EXPECT_TRUE(end != data) << "fail to parse: " << data;
EXPECT_EQ(*end, '\0') << "not parsing to end: " << data;
if (test_eq) {
EXPECT_EQ(expected, got) << "parse string: " << data;
}
return got;
}
static double Int64Bits2Double(uint64_t v) {
union {
uint64_t i;
double d;
} conv;
conv.i = v;
return conv.d;
}
};
TEST_F(AtofPreciseTest, Basic) {
AtofTestCase test_cases[] = {
{ "0", 0.0 },
{ "0E0", 0.0 },
{ "-0E0", 0.0 },
{ "-0", -0.0 },
{ "1", 1.0 },
{ "1E0", 1.0 },
{ "-1", -1.0 },
{ "-1E0", -1.0 },
{ "123456.0", 123456.0 },
{ "432E1", 432E1 },
{ "1.2345678", 1.2345678 },
{ "2.4414062E-4", 2.4414062E-4 },
{ "3.0540412E5", 3.0540412E5 },
{ "3.355445E7", 3.355445E7 },
{ "1.1754944E-38", 1.1754944E-38 },
};
for (auto const& test : test_cases) {
TestAtofPrecise(test.data, test.expected);
}
}
TEST_F(AtofPreciseTest, CornerCases) {
AtofTestCase test_cases[] = {
{ "1e-400", 0.0 },
{ "2.4703282292062326e-324", 0.0 },
{ "4.9406564584124654e-324", Int64Bits2Double(0x0000000000000001LU) },
{ "8.44291197326099e-309", Int64Bits2Double(0x0006123400000001LU) },
// FLT_MAX
{ "3.40282346638528859811704183484516925440e38",
static_cast<double>(std::numeric_limits<float>::max()) },
// FLT_MIN
{ "1.1754943508222875079687365372222456778186655567720875215087517062784172594547271728515625e-38",
static_cast<double>(std::numeric_limits<float>::min()) },
// DBL_MAX (1 + (1 - 2^-52)) * 2^1023 = (2^53 - 1) * 2^971
{ "17976931348623157081452742373170435679807056752584499659891747680315"
"72607800285387605895586327668781715404589535143824642343213268894641"
"82768467546703537516986049910576551282076245490090389328944075868508"
"45513394230458323690322294816580855933212334827479782620414472316873"
"8177180919299881250404026184124858368", std::numeric_limits<double>::max() },
{ "1.7976931348623158e+308", std::numeric_limits<double>::max() },
// 2^971 * (2^53 - 1 + 1/2) : the smallest number resolving to inf
{"179769313486231580793728971405303415079934132710037826936173778980444"
"968292764750946649017977587207096330286416692887910946555547851940402"
"630657488671505820681908902000708383676273854845817711531764475730270"
"069855571366959622842914819860834936475292719074168444365510704342711"
"559699508093042880177904174497792", std::numeric_limits<double>::infinity() },
// Near DBL_MIN
{ "2.2250738585072009e-308", Int64Bits2Double(0x000fffffffffffffLU) },
// DBL_MIN 2^-1022
{ "2.2250738585072012e-308", std::numeric_limits<double>::min() },
{ "2.2250738585072014e-308", std::numeric_limits<double>::min() },
};
for (auto const& test : test_cases) {
TestAtofPrecise(test.data, test.expected);
}
}
TEST_F(AtofPreciseTest, UnderOverFlow) {
double got = 0;
ASSERT_THROW(LightGBM::Common::AtofPrecise("1e+400", &got), std::runtime_error);
}
TEST_F(AtofPreciseTest, ErrorInput) {
double got = 0;
ASSERT_THROW(LightGBM::Common::AtofPrecise("x1", &got), std::runtime_error);
}
TEST_F(AtofPreciseTest, NaN) {
AtofTestCase test_cases[] = {
{ "nan", std::numeric_limits<double>::quiet_NaN() },
{ "NaN", std::numeric_limits<double>::quiet_NaN() },
{ "NAN", std::numeric_limits<double>::quiet_NaN() },
// The behavior for parsing -nan depends on implementation.
// Thus we skip binary check for negative nan.
{ "-nan", -std::numeric_limits<double>::quiet_NaN() },
{ "-NaN", -std::numeric_limits<double>::quiet_NaN() },
{ "-NAN", -std::numeric_limits<double>::quiet_NaN() },
};
for (auto const& test : test_cases) {
double got = TestAtofPrecise(test.data, test.expected, false);
EXPECT_TRUE(std::isnan(got)) << "not parsed as NaN: " << test.data;
if (got > 0) {
// See comment in test_cases.
EXPECT_EQ(memcmp(&got, &test.expected, sizeof(test.expected)), 0)
<< "parsed NaN is not the same for every bit: " << test.data;
}
}
}
TEST_F(AtofPreciseTest, Inf) {
AtofTestCase test_cases[] = {
{ "inf", std::numeric_limits<double>::infinity() },
{ "Inf", std::numeric_limits<double>::infinity() },
{ "INF", std::numeric_limits<double>::infinity() },
{ "-inf", -std::numeric_limits<double>::infinity() },
{ "-Inf", -std::numeric_limits<double>::infinity() },
{ "-INF", -std::numeric_limits<double>::infinity() },
};
for (auto const& test : test_cases) {
double got = TestAtofPrecise(test.data, test.expected, false);
EXPECT_EQ(LightGBM::Common::Sign(test.expected), LightGBM::Common::Sign(got)) << "sign differs parsing: " << test.data;
EXPECT_TRUE(std::isinf(got)) << "not parsed as infinite: " << test.data;
EXPECT_EQ(memcmp(&got, &test.expected, sizeof(test.expected)), 0)
<< "parsed infinite is not the same for every bit: " << test.data;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment