Commit 308e6451 authored by Guolin Ke's avatar Guolin Ke
Browse files

support nan and inf in parser.

parent f3e37b9e
...@@ -80,6 +80,7 @@ inline static const char* Atoi(const char* p, int* out) { ...@@ -80,6 +80,7 @@ inline static const char* Atoi(const char* p, int* out) {
inline static const char* Atof(const char* p, double* out) { inline static const char* Atof(const char* p, double* out) {
int frac; int frac;
double sign, value, scale; double sign, value, scale;
// Skip leading white space, if any. // Skip leading white space, if any.
while (*p == ' ') { while (*p == ' ') {
++p; ++p;
...@@ -95,6 +96,8 @@ inline static const char* Atof(const char* p, double* out) { ...@@ -95,6 +96,8 @@ inline static const char* Atof(const char* p, double* out) {
++p; ++p;
} }
// is a number
if ((*p >= '0' && *p <= '9') || *p == '.' || *p == 'e' || *p == 'E') {
// Get digits before decimal point or exponent, if any. // Get digits before decimal point or exponent, if any.
for (value = 0.0; *p >= '0' && *p <= '9'; ++p) { for (value = 0.0; *p >= '0' && *p <= '9'; ++p) {
value = value * 10.0 + (*p - '0'); value = value * 10.0 + (*p - '0');
...@@ -121,8 +124,7 @@ inline static const char* Atof(const char* p, double* out) { ...@@ -121,8 +124,7 @@ inline static const char* Atof(const char* p, double* out) {
if (*p == '-') { if (*p == '-') {
frac = 1; frac = 1;
++p; ++p;
} } else if (*p == '+') {
else if (*p == '+') {
++p; ++p;
} }
// Get digits of exponent, if any. // Get digits of exponent, if any.
...@@ -137,9 +139,42 @@ inline static const char* Atof(const char* p, double* out) { ...@@ -137,9 +139,42 @@ inline static const char* Atof(const char* p, double* out) {
} }
// Return signed and scaled floating point result. // Return signed and scaled floating point result.
*out = sign * (frac ? (value / scale) : (value * scale)); *out = sign * (frac ? (value / scale) : (value * scale));
} else {
if (*p == 'n' || *p == 'N') {
++p;
if (!(*p == 'a' || *p == 'A')) {
Log::Stderr("meet error while parsing string to float, expect a nan here");
}
++p;
if (!(*p == 'n' || *p == 'N')) {
Log::Stderr("meet error while parsing string to float, expect a nan here");
}
++p;
// default convert nan to 0
*out = 0;
} else if (*p == 'i' || *p == 'I') {
++p;
if (!(*p == 'n' || *p == 'N')) {
Log::Stderr("meet error while parsing string to float, expect a inf here");
}
++p;
if (!(*p == 'f' || *p == 'F')) {
Log::Stderr("meet error while parsing string to float, expect a inf here");
}
++p;
// default inf
*out = sign * 1e308;
} else {
if (*p != '\0') {
Log::Stderr("Meet unknow characters while parsing string to float");
}
}
}
while (*p == ' ') { while (*p == ' ') {
++p; ++p;
} }
return p; return p;
} }
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
namespace LightGBM { namespace LightGBM {
void GetStatistic(const char* str, int* comma_cnt, int* tab_cnt, int *colon_cnt) { void GetStatistic(const char* str, int* comma_cnt, int* tab_cnt, int* colon_cnt) {
*comma_cnt = 0; *comma_cnt = 0;
*tab_cnt = 0; *tab_cnt = 0;
*colon_cnt = 0; *colon_cnt = 0;
......
...@@ -33,13 +33,11 @@ public: ...@@ -33,13 +33,11 @@ public:
double* out_label) const override { double* out_label) const override {
// first column is label // first column is label
str = Common::Atof(str, out_label); str = Common::Atof(str, out_label);
if (*str == ',') { if (*str == ',') {
++str; ++str;
} else if (*str != '\0') { } else if (*str != '\0') {
Log::Stderr("input format error, should be CSV"); Log::Stderr("input format error, should be CSV");
} }
return ParseOneLine(str, out_features); return ParseOneLine(str, out_features);
} }
}; };
...@@ -61,16 +59,14 @@ public: ...@@ -61,16 +59,14 @@ public:
} }
} }
inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features, inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features,
double* out_label) const override{ double* out_label) const override {
// first column is label // first column is label
str = Common::Atof(str, out_label); str = Common::Atof(str, out_label);
if (*str == '\t') { if (*str == '\t') {
++str; ++str;
} else if (*str != '\0') { } else if (*str != '\0') {
Log::Stderr("input format error, should be TSV"); Log::Stderr("input format error, should be TSV");
} }
return ParseOneLine(str, out_features); return ParseOneLine(str, out_features);
} }
}; };
...@@ -94,7 +90,7 @@ public: ...@@ -94,7 +90,7 @@ public:
} }
} }
inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features, inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features,
double* out_label) const override{ double* out_label) const override {
// first column is label // first column is label
str = Common::Atof(str, out_label); str = Common::Atof(str, out_label);
str = Common::SkipSpaceAndTab(str); str = Common::SkipSpaceAndTab(str);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment