Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
c35ecfbf
Unverified
Commit
c35ecfbf
authored
Oct 11, 2022
by
Zhuyi Xue
Committed by
GitHub
Oct 11, 2022
Browse files
renamed cur_cat => cur_cat_idx and added some comments (#5522)
parent
8b720844
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
12 deletions
+12
-12
src/io/bin.cpp
src/io/bin.cpp
+12
-12
No files found.
src/io/bin.cpp
View file @
c35ecfbf
...
...
@@ -352,7 +352,7 @@ namespace LightGBM {
int
zero_cnt
=
static_cast
<
int
>
(
total_sample_cnt
-
num_sample_values
-
na_cnt
);
// find distinct_values first
std
::
vector
<
double
>
distinct_values
;
std
::
vector
<
int
>
counts
;
std
::
vector
<
int
>
counts
;
// count of data points for each distinct feature value.
std
::
stable_sort
(
values
,
values
+
num_sample_values
);
...
...
@@ -389,7 +389,7 @@ namespace LightGBM {
}
min_val_
=
distinct_values
.
front
();
max_val_
=
distinct_values
.
back
();
std
::
vector
<
int
>
cnt_in_bin
;
std
::
vector
<
int
>
cnt_in_bin
;
// count of data points in each bin.
int
num_distinct_values
=
static_cast
<
int
>
(
distinct_values
.
size
());
if
(
bin_type_
==
BinType
::
NumericalBin
)
{
if
(
missing_type_
==
MissingType
::
Zero
)
{
...
...
@@ -446,12 +446,12 @@ namespace LightGBM {
Log
::
Warning
(
"Met categorical feature which contains sparse values. "
"Consider renumbering to consecutive integers started from zero"
);
}
// sort by counts
// sort by counts
in descending order
Common
::
SortForPair
<
int
,
int
>
(
&
counts_int
,
&
distinct_values_int
,
0
,
true
);
// will ignore the categorical of small counts
int
cut_cnt
=
static_cast
<
int
>
(
Common
::
RoundInt
((
total_sample_cnt
-
na_cnt
)
*
0.99
f
));
size_t
cur_cat
=
0
;
size_t
cur_cat
_idx
=
0
;
// index of current category.
categorical_2_bin_
.
clear
();
bin_2_categorical_
.
clear
();
int
used_cnt
=
0
;
...
...
@@ -467,20 +467,20 @@ namespace LightGBM {
categorical_2_bin_
[
-
1
]
=
0
;
cnt_in_bin
.
push_back
(
0
);
num_bin_
=
1
;
while
(
cur_cat
<
distinct_values_int
.
size
()
while
(
cur_cat
_idx
<
distinct_values_int
.
size
()
&&
(
used_cnt
<
cut_cnt
||
num_bin_
<
max_bin
))
{
if
(
counts_int
[
cur_cat
]
<
min_data_in_bin
&&
cur_cat
>
1
)
{
if
(
counts_int
[
cur_cat
_idx
]
<
min_data_in_bin
&&
cur_cat
_idx
>
1
)
{
break
;
}
bin_2_categorical_
.
push_back
(
distinct_values_int
[
cur_cat
]);
categorical_2_bin_
[
distinct_values_int
[
cur_cat
]]
=
static_cast
<
unsigned
int
>
(
num_bin_
);
used_cnt
+=
counts_int
[
cur_cat
];
cnt_in_bin
.
push_back
(
counts_int
[
cur_cat
]);
bin_2_categorical_
.
push_back
(
distinct_values_int
[
cur_cat
_idx
]);
categorical_2_bin_
[
distinct_values_int
[
cur_cat
_idx
]]
=
static_cast
<
unsigned
int
>
(
num_bin_
);
used_cnt
+=
counts_int
[
cur_cat
_idx
];
cnt_in_bin
.
push_back
(
counts_int
[
cur_cat
_idx
]);
++
num_bin_
;
++
cur_cat
;
++
cur_cat
_idx
;
}
// Use MissingType::None to represent this bin contains all categoricals
if
(
cur_cat
==
distinct_values_int
.
size
()
&&
na_cnt
==
0
)
{
if
(
cur_cat
_idx
==
distinct_values_int
.
size
()
&&
na_cnt
==
0
)
{
missing_type_
=
MissingType
::
None
;
}
else
{
missing_type_
=
MissingType
::
NaN
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment