Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
308e6451
Commit
308e6451
authored
Oct 11, 2016
by
Guolin Ke
Browse files
support nan and inf in parser.
parent
f3e37b9e
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
83 additions
and
52 deletions
+83
-52
include/LightGBM/utils/common.h
include/LightGBM/utils/common.h
+68
-33
src/io/parser.cpp
src/io/parser.cpp
+1
-1
src/io/parser.hpp
src/io/parser.hpp
+14
-18
No files found.
include/LightGBM/utils/common.h
View file @
308e6451
...
...
@@ -80,6 +80,7 @@ inline static const char* Atoi(const char* p, int* out) {
inline
static
const
char
*
Atof
(
const
char
*
p
,
double
*
out
)
{
int
frac
;
double
sign
,
value
,
scale
;
// Skip leading white space, if any.
while
(
*
p
==
' '
)
{
++
p
;
...
...
@@ -95,51 +96,85 @@ inline static const char* Atof(const char* p, double* out) {
++
p
;
}
// Get digits before decimal point or exponent, if any.
for
(
value
=
0.0
;
*
p
>=
'0'
&&
*
p
<=
'9'
;
++
p
)
{
value
=
value
*
10.0
+
(
*
p
-
'0'
);
}
// is a number
if
((
*
p
>=
'0'
&&
*
p
<=
'9'
)
||
*
p
==
'.'
||
*
p
==
'e'
||
*
p
==
'E'
)
{
// Get digits before decimal point or exponent, if any.
for
(
value
=
0.0
;
*
p
>=
'0'
&&
*
p
<=
'9'
;
++
p
)
{
value
=
value
*
10.0
+
(
*
p
-
'0'
);
}
// Get digits after decimal point, if any.
if
(
*
p
==
'.'
)
{
double
pow10
=
10.0
;
++
p
;
while
(
*
p
>=
'0'
&&
*
p
<=
'9'
)
{
value
+=
(
*
p
-
'0'
)
/
pow10
;
pow10
*=
10.0
;
// Get digits after decimal point, if any.
if
(
*
p
==
'.'
)
{
double
pow10
=
10.0
;
++
p
;
while
(
*
p
>=
'0'
&&
*
p
<=
'9'
)
{
value
+=
(
*
p
-
'0'
)
/
pow10
;
pow10
*=
10.0
;
++
p
;
}
}
}
// Handle exponent, if any.
frac
=
0
;
scale
=
1.0
;
if
((
*
p
==
'e'
)
||
(
*
p
==
'E'
))
{
unsigned
int
expon
;
// Get sign of exponent, if any.
++
p
;
if
(
*
p
==
'-'
)
{
frac
=
1
;
// Handle exponent, if any.
frac
=
0
;
scale
=
1.0
;
if
((
*
p
==
'e'
)
||
(
*
p
==
'E'
))
{
unsigned
int
expon
;
// Get sign of exponent, if any.
++
p
;
if
(
*
p
==
'-'
)
{
frac
=
1
;
++
p
;
}
else
if
(
*
p
==
'+'
)
{
++
p
;
}
// Get digits of exponent, if any.
for
(
expon
=
0
;
*
p
>=
'0'
&&
*
p
<=
'9'
;
++
p
)
{
expon
=
expon
*
10
+
(
*
p
-
'0'
);
}
if
(
expon
>
308
)
expon
=
308
;
// Calculate scaling factor.
while
(
expon
>=
50
)
{
scale
*=
1E50
;
expon
-=
50
;
}
while
(
expon
>=
8
)
{
scale
*=
1E8
;
expon
-=
8
;
}
while
(
expon
>
0
)
{
scale
*=
10.0
;
expon
-=
1
;
}
}
else
if
(
*
p
==
'+'
)
{
// Return signed and scaled floating point result.
*
out
=
sign
*
(
frac
?
(
value
/
scale
)
:
(
value
*
scale
));
}
else
{
if
(
*
p
==
'n'
||
*
p
==
'N'
)
{
++
p
;
if
(
!
(
*
p
==
'a'
||
*
p
==
'A'
))
{
Log
::
Stderr
(
"meet error while parsing string to float, expect a nan here"
);
}
++
p
;
if
(
!
(
*
p
==
'n'
||
*
p
==
'N'
))
{
Log
::
Stderr
(
"meet error while parsing string to float, expect a nan here"
);
}
++
p
;
// default convert nan to 0
*
out
=
0
;
}
else
if
(
*
p
==
'i'
||
*
p
==
'I'
)
{
++
p
;
if
(
!
(
*
p
==
'n'
||
*
p
==
'N'
))
{
Log
::
Stderr
(
"meet error while parsing string to float, expect a inf here"
);
}
++
p
;
if
(
!
(
*
p
==
'f'
||
*
p
==
'F'
))
{
Log
::
Stderr
(
"meet error while parsing string to float, expect a inf here"
);
}
++
p
;
// default inf
*
out
=
sign
*
1e308
;
}
else
{
if
(
*
p
!=
'\0'
)
{
Log
::
Stderr
(
"Meet unknow characters while parsing string to float"
);
}
}
// Get digits of exponent, if any.
for
(
expon
=
0
;
*
p
>=
'0'
&&
*
p
<=
'9'
;
++
p
)
{
expon
=
expon
*
10
+
(
*
p
-
'0'
);
}
if
(
expon
>
308
)
expon
=
308
;
// Calculate scaling factor.
while
(
expon
>=
50
)
{
scale
*=
1E50
;
expon
-=
50
;
}
while
(
expon
>=
8
)
{
scale
*=
1E8
;
expon
-=
8
;
}
while
(
expon
>
0
)
{
scale
*=
10.0
;
expon
-=
1
;
}
}
// Return signed and scaled floating point result.
*
out
=
sign
*
(
frac
?
(
value
/
scale
)
:
(
value
*
scale
));
while
(
*
p
==
' '
)
{
++
p
;
}
return
p
;
}
...
...
src/io/parser.cpp
View file @
308e6451
...
...
@@ -5,7 +5,7 @@
namespace
LightGBM
{
void
GetStatistic
(
const
char
*
str
,
int
*
comma_cnt
,
int
*
tab_cnt
,
int
*
colon_cnt
)
{
void
GetStatistic
(
const
char
*
str
,
int
*
comma_cnt
,
int
*
tab_cnt
,
int
*
colon_cnt
)
{
*
comma_cnt
=
0
;
*
tab_cnt
=
0
;
*
colon_cnt
=
0
;
...
...
src/io/parser.hpp
View file @
308e6451
...
...
@@ -15,7 +15,7 @@ namespace LightGBM {
class
CSVParser
:
public
Parser
{
public:
inline
void
ParseOneLine
(
const
char
*
str
,
std
::
vector
<
std
::
pair
<
int
,
double
>>*
out_features
)
const
override
{
std
::
vector
<
std
::
pair
<
int
,
double
>>*
out_features
)
const
override
{
int
idx
=
0
;
double
val
=
0.0
;
while
(
*
str
!=
'\0'
)
{
...
...
@@ -30,16 +30,14 @@ public:
}
}
inline
void
ParseOneLine
(
const
char
*
str
,
std
::
vector
<
std
::
pair
<
int
,
double
>>*
out_features
,
double
*
out_label
)
const
override
{
double
*
out_label
)
const
override
{
// first column is label
str
=
Common
::
Atof
(
str
,
out_label
);
if
(
*
str
==
','
)
{
++
str
;
}
else
if
(
*
str
!=
'\0'
)
{
Log
::
Stderr
(
"input format error, should be CSV"
);
}
if
(
*
str
==
','
)
{
++
str
;
}
else
if
(
*
str
!=
'\0'
)
{
Log
::
Stderr
(
"input format error, should be CSV"
);
}
return
ParseOneLine
(
str
,
out_features
);
}
};
...
...
@@ -61,16 +59,14 @@ public:
}
}
inline
void
ParseOneLine
(
const
char
*
str
,
std
::
vector
<
std
::
pair
<
int
,
double
>>*
out_features
,
double
*
out_label
)
const
override
{
double
*
out_label
)
const
override
{
// first column is label
str
=
Common
::
Atof
(
str
,
out_label
);
if
(
*
str
==
'\t'
)
{
++
str
;
}
else
if
(
*
str
!=
'\0'
)
{
Log
::
Stderr
(
"input format error, should be TSV"
);
}
if
(
*
str
==
'\t'
)
{
++
str
;
}
else
if
(
*
str
!=
'\0'
)
{
Log
::
Stderr
(
"input format error, should be TSV"
);
}
return
ParseOneLine
(
str
,
out_features
);
}
};
...
...
@@ -94,7 +90,7 @@ public:
}
}
inline
void
ParseOneLine
(
const
char
*
str
,
std
::
vector
<
std
::
pair
<
int
,
double
>>*
out_features
,
double
*
out_label
)
const
override
{
double
*
out_label
)
const
override
{
// first column is label
str
=
Common
::
Atof
(
str
,
out_label
);
str
=
Common
::
SkipSpaceAndTab
(
str
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment