Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
d1e0bab5
Commit
d1e0bab5
authored
Nov 11, 2016
by
Guolin Ke
Browse files
fix some bugs in bin construction.
parent
0f4ea846
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
12 additions
and
7 deletions
+12
-7
src/io/bin.cpp
src/io/bin.cpp
+11
-6
src/io/dataset_loader.cpp
src/io/dataset_loader.cpp
+1
-1
No files found.
src/io/bin.cpp
View file @
d1e0bab5
...
...
@@ -46,7 +46,7 @@ void BinMapper::FindBin(std::vector<double>* values, size_t total_sample_cnt, in
// find distinct_values first
std
::
vector
<
double
>
distinct_values
;
std
::
vector
<
int
>
counts
;
std
::
sort
(
ref_values
.
begin
(),
ref_values
.
end
());
// push 0 first
if
(
zero_cnt
>
0
)
{
...
...
@@ -67,8 +67,8 @@ void BinMapper::FindBin(std::vector<double>* values, size_t total_sample_cnt, in
}
int
num_values
=
static_cast
<
int
>
(
distinct_values
.
size
());
int
cnt_in_bin0
=
0
;
if
(
num_values
<=
max_bin
)
{
std
::
sort
(
distinct_values
.
begin
(),
distinct_values
.
end
());
// use distinct value is enough
num_bin_
=
num_values
;
bin_upper_bound_
=
new
double
[
num_values
];
...
...
@@ -78,12 +78,11 @@ void BinMapper::FindBin(std::vector<double>* values, size_t total_sample_cnt, in
cnt_in_bin0
=
counts
[
0
];
bin_upper_bound_
[
num_values
-
1
]
=
std
::
numeric_limits
<
double
>::
infinity
();
}
else
{
double
min_lower_bound
=
std
::
numeric_limits
<
double
>::
infinity
();
// mean size for one bin
double
mean_bin_size
=
sample_size
/
static_cast
<
double
>
(
max_bin
);
int
rest_sample_cnt
=
static_cast
<
int
>
(
sample_size
);
int
bin_cnt
=
0
;
num_bin_
=
max_bin
;
std
::
vector
<
double
>
upper_bounds
(
max_bin
,
std
::
numeric_limits
<
double
>::
infinity
());
std
::
vector
<
double
>
lower_bounds
(
max_bin
,
std
::
numeric_limits
<
double
>::
infinity
());
// sort by count, descent
...
...
@@ -92,6 +91,10 @@ void BinMapper::FindBin(std::vector<double>* values, size_t total_sample_cnt, in
while
(
counts
[
bin_cnt
]
>
mean_bin_size
)
{
upper_bounds
[
bin_cnt
]
=
distinct_values
[
bin_cnt
];
lower_bounds
[
bin_cnt
]
=
distinct_values
[
bin_cnt
];
if
(
lower_bounds
[
bin_cnt
]
<
min_lower_bound
)
{
min_lower_bound
=
lower_bounds
[
bin_cnt
];
cnt_in_bin0
=
counts
[
bin_cnt
];
}
rest_sample_cnt
-=
counts
[
bin_cnt
];
++
bin_cnt
;
}
...
...
@@ -108,7 +111,10 @@ void BinMapper::FindBin(std::vector<double>* values, size_t total_sample_cnt, in
// need a new bin
if
(
cur_cnt_inbin
>=
mean_bin_size
)
{
upper_bounds
[
bin_cnt
]
=
distinct_values
[
i
];
if
(
bin_cnt
==
0
)
{
cnt_in_bin0
=
cur_cnt_inbin
;
}
if
(
lower_bounds
[
bin_cnt
]
<
min_lower_bound
)
{
min_lower_bound
=
lower_bounds
[
bin_cnt
];
cnt_in_bin0
=
cur_cnt_inbin
;
}
++
bin_cnt
;
lower_bounds
[
bin_cnt
]
=
distinct_values
[
i
+
1
];
if
(
bin_cnt
>=
max_bin
-
1
)
break
;
...
...
@@ -117,7 +123,6 @@ void BinMapper::FindBin(std::vector<double>* values, size_t total_sample_cnt, in
}
}
cur_cnt_inbin
+=
counts
[
num_values
-
1
];
}
Common
::
SortForPair
<
double
,
double
>
(
lower_bounds
,
upper_bounds
,
0
,
false
);
// update bin upper bound
...
...
src/io/dataset_loader.cpp
View file @
d1e0bab5
...
...
@@ -9,7 +9,7 @@
namespace
LightGBM
{
DatasetLoader
::
DatasetLoader
(
const
IOConfig
&
io_config
,
const
PredictFunction
&
predict_fun
)
:
io_config_
(
io_config
),
predict_fun_
(
predict_fun
){
:
io_config_
(
io_config
),
predict_fun_
(
predict_fun
)
,
random_
(
io_config_
.
data_random_seed
)
{
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment