Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
dc584070
Unverified
Commit
dc584070
authored
Oct 09, 2019
by
Guolin Ke
Committed by
GitHub
Oct 09, 2019
Browse files
fix bug in parser
parent
fdb39237
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
27 additions
and
24 deletions
+27
-24
src/io/parser.cpp
src/io/parser.cpp
+27
-24
No files found.
src/io/parser.cpp
View file @
dc584070
...
...
@@ -112,7 +112,10 @@ std::vector<std::string> ReadKLineFromFile(const char* filename, bool header, in
for
(
int
i
=
0
;
i
<
k
;
++
i
)
{
if
(
!
tmp_file
.
eof
())
{
GetLine
(
&
tmp_file
,
&
cur_line
,
reader
.
get
(),
&
buffer
,
buffer_size
);
ret
.
push_back
(
cur_line
);
cur_line
=
Common
::
Trim
(
cur_line
);
if
(
!
cur_line
.
empty
())
{
ret
.
push_back
(
cur_line
);
}
}
else
{
break
;
}
...
...
@@ -142,32 +145,32 @@ DataType GetDataType(const std::vector<std::string>& lines, int* num_col) {
}
else
if
(
comma_cnt
>
0
)
{
type
=
DataType
::
CSV
;
}
}
int
comma_cnt2
=
0
;
int
tab_cnt2
=
0
;
int
colon_cnt2
=
0
;
GetStatistic
(
lines
[
1
].
c_str
(),
&
comma_cnt2
,
&
tab_cnt2
,
&
colon_cnt2
);
if
(
colon_cnt
>
0
||
colon_cnt2
>
0
)
{
type
=
DataType
::
LIBSVM
;
}
else
if
(
tab_cnt
==
tab_cnt2
&&
tab_cnt
>
0
)
{
type
=
DataType
::
TSV
;
}
else
if
(
comma_cnt
==
comma_cnt2
&&
comma_cnt
>
0
)
{
type
=
DataType
::
CSV
;
}
if
(
type
==
DataType
::
TSV
||
type
==
DataType
::
CSV
)
{
// valid the type
for
(
size_t
i
=
2
;
i
<
lines
.
size
();
++
i
)
{
GetStatistic
(
lines
[
i
].
c_str
(),
&
comma_cnt2
,
&
tab_cnt2
,
&
colon_cnt2
);
if
(
type
==
DataType
::
TSV
&&
tab_cnt2
!=
tab_cnt
)
{
type
=
DataType
::
INVALID
;
break
;
}
else
if
(
type
==
DataType
::
CSV
&&
comma_cnt
!=
comma_cnt2
)
{
type
=
DataType
::
INVALID
;
break
;
}
else
if
(
lines
.
size
()
>
1
)
{
int
comma_cnt2
=
0
;
int
tab_cnt2
=
0
;
int
colon_cnt2
=
0
;
GetStatistic
(
lines
[
1
].
c_str
(),
&
comma_cnt2
,
&
tab_cnt2
,
&
colon_cnt2
);
if
(
colon_cnt
>
0
||
colon_cnt2
>
0
)
{
type
=
DataType
::
LIBSVM
;
}
else
if
(
tab_cnt
==
tab_cnt2
&&
tab_cnt
>
0
)
{
type
=
DataType
::
TSV
;
}
else
if
(
comma_cnt
==
comma_cnt2
&&
comma_cnt
>
0
)
{
type
=
DataType
::
CSV
;
}
if
(
type
==
DataType
::
TSV
||
type
==
DataType
::
CSV
)
{
// valid the type
for
(
size_t
i
=
2
;
i
<
lines
.
size
();
++
i
)
{
GetStatistic
(
lines
[
i
].
c_str
(),
&
comma_cnt2
,
&
tab_cnt2
,
&
colon_cnt2
);
if
(
type
==
DataType
::
TSV
&&
tab_cnt2
!=
tab_cnt
)
{
type
=
DataType
::
INVALID
;
break
;
}
else
if
(
type
==
DataType
::
CSV
&&
comma_cnt
!=
comma_cnt2
)
{
type
=
DataType
::
INVALID
;
break
;
}
}
}
}
if
(
type
==
DataType
::
LIBSVM
)
{
int
max_col_idx
=
0
;
for
(
size_t
i
=
0
;
i
<
lines
.
size
();
++
i
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment