Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
699d4381
Unverified
Commit
699d4381
authored
Dec 05, 2017
by
Guolin Ke
Committed by
GitHub
Dec 05, 2017
Browse files
fix bug in feature fraction (#1099)
* fix feature fraction * fix bugs.
parent
a957bd62
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
19 additions
and
6 deletions
+19
-6
include/LightGBM/dataset.h
include/LightGBM/dataset.h
+9
-1
src/treelearner/serial_tree_learner.cpp
src/treelearner/serial_tree_learner.cpp
+9
-5
src/treelearner/serial_tree_learner.h
src/treelearner/serial_tree_learner.h
+1
-0
No files found.
include/LightGBM/dataset.h
View file @
699d4381
...
@@ -361,7 +361,15 @@ public:
...
@@ -361,7 +361,15 @@ public:
inline
uint64_t
NumTotalBin
()
const
{
inline
uint64_t
NumTotalBin
()
const
{
return
group_bin_boundaries_
.
back
();
return
group_bin_boundaries_
.
back
();
}
}
inline
std
::
vector
<
int
>
ValidFeatureIndices
()
const
{
std
::
vector
<
int
>
ret
;
for
(
int
i
=
0
;
i
<
num_total_features_
;
++
i
)
{
if
(
used_feature_map_
[
i
]
>=
0
)
{
ret
.
push_back
(
i
);
}
}
return
ret
;
}
void
ReSize
(
data_size_t
num_data
);
void
ReSize
(
data_size_t
num_data
);
void
CopySubset
(
const
Dataset
*
fullset
,
const
data_size_t
*
used_indices
,
data_size_t
num_used_indices
,
bool
need_meta_data
);
void
CopySubset
(
const
Dataset
*
fullset
,
const
data_size_t
*
used_indices
,
data_size_t
num_used_indices
,
bool
need_meta_data
);
...
...
src/treelearner/serial_tree_learner.cpp
View file @
699d4381
...
@@ -78,6 +78,7 @@ void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian
...
@@ -78,6 +78,7 @@ void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian
// initialize data partition
// initialize data partition
data_partition_
.
reset
(
new
DataPartition
(
num_data_
,
tree_config_
->
num_leaves
));
data_partition_
.
reset
(
new
DataPartition
(
num_data_
,
tree_config_
->
num_leaves
));
is_feature_used_
.
resize
(
num_features_
);
is_feature_used_
.
resize
(
num_features_
);
valid_feature_indices_
=
train_data_
->
ValidFeatureIndices
();
// initialize ordered gradients and hessians
// initialize ordered gradients and hessians
ordered_gradients_
.
resize
(
num_data_
);
ordered_gradients_
.
resize
(
num_data_
);
ordered_hessians_
.
resize
(
num_data_
);
ordered_hessians_
.
resize
(
num_data_
);
...
@@ -237,16 +238,19 @@ void SerialTreeLearner::BeforeTrain() {
...
@@ -237,16 +238,19 @@ void SerialTreeLearner::BeforeTrain() {
histogram_pool_
.
ResetMap
();
histogram_pool_
.
ResetMap
();
if
(
tree_config_
->
feature_fraction
<
1
)
{
if
(
tree_config_
->
feature_fraction
<
1
)
{
int
used_feature_cnt
=
static_cast
<
int
>
(
train_data_
->
num_total_features
()
*
tree_config_
->
feature_fraction
);
int
used_feature_cnt
=
static_cast
<
int
>
(
valid_feature_indices_
.
size
()
*
tree_config_
->
feature_fraction
);
// at least use one feature
used_feature_cnt
=
std
::
max
(
used_feature_cnt
,
1
);
// initialize used features
// initialize used features
std
::
memset
(
is_feature_used_
.
data
(),
0
,
sizeof
(
int8_t
)
*
num_features_
);
std
::
memset
(
is_feature_used_
.
data
(),
0
,
sizeof
(
int8_t
)
*
num_features_
);
// Get used feature at current tree
// Get used feature at current tree
auto
used_feature
_indices
=
random_
.
Sample
(
train_data_
->
num_total_features
(),
used_feature_cnt
);
auto
sampled
_indices
=
random_
.
Sample
(
valid_feature_indices_
.
size
(),
used_feature_cnt
);
int
omp_loop_size
=
static_cast
<
int
>
(
used_feature
_indices
.
size
());
int
omp_loop_size
=
static_cast
<
int
>
(
sampled
_indices
.
size
());
#pragma omp parallel for schedule(static, 512) if (omp_loop_size >= 1024)
#pragma omp parallel for schedule(static, 512) if (omp_loop_size >= 1024)
for
(
int
i
=
0
;
i
<
omp_loop_size
;
++
i
)
{
for
(
int
i
=
0
;
i
<
omp_loop_size
;
++
i
)
{
int
inner_feature_index
=
train_data_
->
InnerFeatureIndex
(
used_feature_indices
[
i
]);
int
used_feature
=
valid_feature_indices_
[
sampled_indices
[
i
]];
if
(
inner_feature_index
<
0
)
{
continue
;
}
int
inner_feature_index
=
train_data_
->
InnerFeatureIndex
(
used_feature
);
CHECK
(
inner_feature_index
>=
0
);
is_feature_used_
[
inner_feature_index
]
=
1
;
is_feature_used_
[
inner_feature_index
]
=
1
;
}
}
}
else
{
}
else
{
...
...
src/treelearner/serial_tree_learner.h
View file @
699d4381
...
@@ -125,6 +125,7 @@ protected:
...
@@ -125,6 +125,7 @@ protected:
std
::
unique_ptr
<
LeafSplits
>
smaller_leaf_splits_
;
std
::
unique_ptr
<
LeafSplits
>
smaller_leaf_splits_
;
/*! \brief stores best thresholds for all feature for larger leaf */
/*! \brief stores best thresholds for all feature for larger leaf */
std
::
unique_ptr
<
LeafSplits
>
larger_leaf_splits_
;
std
::
unique_ptr
<
LeafSplits
>
larger_leaf_splits_
;
std
::
vector
<
int
>
valid_feature_indices_
;
#ifdef USE_GPU
#ifdef USE_GPU
/*! \brief gradients of current iteration, ordered for cache optimized, aligned to 4K page */
/*! \brief gradients of current iteration, ordered for cache optimized, aligned to 4K page */
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment