Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
47313fb5
Commit
47313fb5
authored
Nov 01, 2016
by
Guolin Ke
Browse files
fixed sumup problem for float type
parent
aa796a85
Changes
24
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
27 additions
and
25 deletions
+27
-25
src/treelearner/leaf_splits.hpp
src/treelearner/leaf_splits.hpp
+10
-10
src/treelearner/serial_tree_learner.cpp
src/treelearner/serial_tree_learner.cpp
+4
-2
src/treelearner/serial_tree_learner.h
src/treelearner/serial_tree_learner.h
+4
-4
src/treelearner/split_info.hpp
src/treelearner/split_info.hpp
+9
-9
No files found.
src/treelearner/leaf_splits.hpp
View file @
47313fb5
...
@@ -33,7 +33,7 @@ public:
...
@@ -33,7 +33,7 @@ public:
* \param sum_gradients
* \param sum_gradients
* \param sum_hessians
* \param sum_hessians
*/
*/
void
Init
(
int
leaf
,
const
DataPartition
*
data_partition
,
score_t
sum_gradients
,
score_t
sum_hessians
)
{
void
Init
(
int
leaf
,
const
DataPartition
*
data_partition
,
double
sum_gradients
,
double
sum_hessians
)
{
leaf_index_
=
leaf
;
leaf_index_
=
leaf
;
num_data_in_leaf_
=
data_partition
->
GetIndexOnLeaf
(
leaf
,
&
data_indices_
);
num_data_in_leaf_
=
data_partition
->
GetIndexOnLeaf
(
leaf
,
&
data_indices_
);
sum_gradients_
=
sum_gradients
;
sum_gradients_
=
sum_gradients
;
...
@@ -52,8 +52,8 @@ public:
...
@@ -52,8 +52,8 @@ public:
num_data_in_leaf_
=
num_data_
;
num_data_in_leaf_
=
num_data_
;
leaf_index_
=
0
;
leaf_index_
=
0
;
data_indices_
=
nullptr
;
data_indices_
=
nullptr
;
score_t
tmp_sum_gradients
=
0.0
;
double
tmp_sum_gradients
=
0.0
f
;
score_t
tmp_sum_hessians
=
0.0
;
double
tmp_sum_hessians
=
0.0
f
;
#pragma omp parallel for schedule(static) reduction(+:tmp_sum_gradients, tmp_sum_hessians)
#pragma omp parallel for schedule(static) reduction(+:tmp_sum_gradients, tmp_sum_hessians)
for
(
data_size_t
i
=
0
;
i
<
num_data_in_leaf_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_in_leaf_
;
++
i
)
{
tmp_sum_gradients
+=
gradients
[
i
];
tmp_sum_gradients
+=
gradients
[
i
];
...
@@ -76,8 +76,8 @@ public:
...
@@ -76,8 +76,8 @@ public:
void
Init
(
int
leaf
,
const
DataPartition
*
data_partition
,
const
score_t
*
gradients
,
const
score_t
*
hessians
)
{
void
Init
(
int
leaf
,
const
DataPartition
*
data_partition
,
const
score_t
*
gradients
,
const
score_t
*
hessians
)
{
leaf_index_
=
leaf
;
leaf_index_
=
leaf
;
num_data_in_leaf_
=
data_partition
->
GetIndexOnLeaf
(
leaf
,
&
data_indices_
);
num_data_in_leaf_
=
data_partition
->
GetIndexOnLeaf
(
leaf
,
&
data_indices_
);
score_t
tmp_sum_gradients
=
0.0
;
double
tmp_sum_gradients
=
0.0
f
;
score_t
tmp_sum_hessians
=
0.0
;
double
tmp_sum_hessians
=
0.0
f
;
#pragma omp parallel for schedule(static) reduction(+:tmp_sum_gradients, tmp_sum_hessians)
#pragma omp parallel for schedule(static) reduction(+:tmp_sum_gradients, tmp_sum_hessians)
for
(
data_size_t
i
=
0
;
i
<
num_data_in_leaf_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_in_leaf_
;
++
i
)
{
data_size_t
idx
=
data_indices_
[
i
];
data_size_t
idx
=
data_indices_
[
i
];
...
@@ -97,7 +97,7 @@ public:
...
@@ -97,7 +97,7 @@ public:
* \param sum_gradients
* \param sum_gradients
* \param sum_hessians
* \param sum_hessians
*/
*/
void
Init
(
score_t
sum_gradients
,
score_t
sum_hessians
)
{
void
Init
(
double
sum_gradients
,
double
sum_hessians
)
{
leaf_index_
=
0
;
leaf_index_
=
0
;
sum_gradients_
=
sum_gradients
;
sum_gradients_
=
sum_gradients
;
sum_hessians_
=
sum_hessians
;
sum_hessians_
=
sum_hessians
;
...
@@ -126,10 +126,10 @@ public:
...
@@ -126,10 +126,10 @@ public:
data_size_t
num_data_in_leaf
()
const
{
return
num_data_in_leaf_
;
}
data_size_t
num_data_in_leaf
()
const
{
return
num_data_in_leaf_
;
}
/*! \brief Get sum of gradients of current leaf */
/*! \brief Get sum of gradients of current leaf */
score_t
sum_gradients
()
const
{
return
sum_gradients_
;
}
double
sum_gradients
()
const
{
return
sum_gradients_
;
}
/*! \brief Get sum of hessians of current leaf */
/*! \brief Get sum of hessians of current leaf */
score_t
sum_hessians
()
const
{
return
sum_hessians_
;
}
double
sum_hessians
()
const
{
return
sum_hessians_
;
}
/*! \brief Get indices of data of current leaf */
/*! \brief Get indices of data of current leaf */
data_size_t
*
data_indices
()
const
{
return
data_indices_
;
}
data_size_t
*
data_indices
()
const
{
return
data_indices_
;
}
...
@@ -147,9 +147,9 @@ private:
...
@@ -147,9 +147,9 @@ private:
/*! \brief number of features */
/*! \brief number of features */
int
num_features_
;
int
num_features_
;
/*! \brief sum of gradients of current leaf */
/*! \brief sum of gradients of current leaf */
score_t
sum_gradients_
;
double
sum_gradients_
;
/*! \brief sum of hessians of current leaf */
/*! \brief sum of hessians of current leaf */
score_t
sum_hessians_
;
double
sum_hessians_
;
/*! \brief indices of data of current leaf */
/*! \brief indices of data of current leaf */
data_size_t
*
data_indices_
;
data_size_t
*
data_indices_
;
};
};
...
...
src/treelearner/serial_tree_learner.cpp
View file @
47313fb5
...
@@ -15,7 +15,7 @@ SerialTreeLearner::SerialTreeLearner(const TreeConfig& tree_config)
...
@@ -15,7 +15,7 @@ SerialTreeLearner::SerialTreeLearner(const TreeConfig& tree_config)
// initialize with nullptr
// initialize with nullptr
num_leaves_
=
tree_config
.
num_leaves
;
num_leaves_
=
tree_config
.
num_leaves
;
min_num_data_one_leaf_
=
static_cast
<
data_size_t
>
(
tree_config
.
min_data_in_leaf
);
min_num_data_one_leaf_
=
static_cast
<
data_size_t
>
(
tree_config
.
min_data_in_leaf
);
min_sum_hessian_one_leaf_
=
static_cast
<
score_t
>
(
tree_config
.
min_sum_hessian_in_leaf
);
min_sum_hessian_one_leaf_
=
static_cast
<
double
>
(
tree_config
.
min_sum_hessian_in_leaf
);
feature_fraction_
=
tree_config
.
feature_fraction
;
feature_fraction_
=
tree_config
.
feature_fraction
;
random_
=
Random
(
tree_config
.
feature_fraction_seed
);
random_
=
Random
(
tree_config
.
feature_fraction_seed
);
histogram_pool_size_
=
tree_config
.
histogram_pool_size
;
histogram_pool_size_
=
tree_config
.
histogram_pool_size
;
...
@@ -415,7 +415,9 @@ void SerialTreeLearner::Split(Tree* tree, int best_Leaf, int* left_leaf, int* ri
...
@@ -415,7 +415,9 @@ void SerialTreeLearner::Split(Tree* tree, int best_Leaf, int* left_leaf, int* ri
*
right_leaf
=
tree
->
Split
(
best_Leaf
,
best_split_info
.
feature
,
best_split_info
.
threshold
,
*
right_leaf
=
tree
->
Split
(
best_Leaf
,
best_split_info
.
feature
,
best_split_info
.
threshold
,
train_data_
->
FeatureAt
(
best_split_info
.
feature
)
->
feature_index
(),
train_data_
->
FeatureAt
(
best_split_info
.
feature
)
->
feature_index
(),
train_data_
->
FeatureAt
(
best_split_info
.
feature
)
->
BinToValue
(
best_split_info
.
threshold
),
train_data_
->
FeatureAt
(
best_split_info
.
feature
)
->
BinToValue
(
best_split_info
.
threshold
),
best_split_info
.
left_output
,
best_split_info
.
right_output
,
best_split_info
.
gain
);
static_cast
<
float
>
(
best_split_info
.
left_output
),
static_cast
<
float
>
(
best_split_info
.
right_output
),
static_cast
<
float
>
(
best_split_info
.
gain
));
// split data partition
// split data partition
data_partition_
->
Split
(
best_Leaf
,
train_data_
->
FeatureAt
(
best_split_info
.
feature
)
->
bin_data
(),
data_partition_
->
Split
(
best_Leaf
,
train_data_
->
FeatureAt
(
best_split_info
.
feature
)
->
bin_data
(),
...
...
src/treelearner/serial_tree_learner.h
View file @
47313fb5
...
@@ -41,7 +41,7 @@ public:
...
@@ -41,7 +41,7 @@ public:
void
AddPredictionToScore
(
score_t
*
out_score
)
const
override
{
void
AddPredictionToScore
(
score_t
*
out_score
)
const
override
{
#pragma omp parallel for schedule(guided)
#pragma omp parallel for schedule(guided)
for
(
int
i
=
0
;
i
<
data_partition_
->
num_leaves
();
++
i
)
{
for
(
int
i
=
0
;
i
<
data_partition_
->
num_leaves
();
++
i
)
{
score_
t
output
=
last_trained_tree_
->
LeafOutput
(
i
);
floa
t
output
=
last_trained_tree_
->
LeafOutput
(
i
);
data_size_t
*
tmp_idx
=
nullptr
;
data_size_t
*
tmp_idx
=
nullptr
;
data_size_t
cnt_leaf_data
=
data_partition_
->
GetIndexOnLeaf
(
i
,
&
tmp_idx
);
data_size_t
cnt_leaf_data
=
data_partition_
->
GetIndexOnLeaf
(
i
,
&
tmp_idx
);
for
(
data_size_t
j
=
0
;
j
<
cnt_leaf_data
;
++
j
)
{
for
(
data_size_t
j
=
0
;
j
<
cnt_leaf_data
;
++
j
)
{
...
@@ -114,7 +114,7 @@ protected:
...
@@ -114,7 +114,7 @@ protected:
/*! \brief mininal data on one leaf */
/*! \brief mininal data on one leaf */
data_size_t
min_num_data_one_leaf_
;
data_size_t
min_num_data_one_leaf_
;
/*! \brief mininal sum hessian on one leaf */
/*! \brief mininal sum hessian on one leaf */
score_t
min_sum_hessian_one_leaf_
;
double
min_sum_hessian_one_leaf_
;
/*! \brief sub-feature fraction rate */
/*! \brief sub-feature fraction rate */
float
feature_fraction_
;
float
feature_fraction_
;
/*! \brief training data partition on leaves */
/*! \brief training data partition on leaves */
...
@@ -186,11 +186,11 @@ inline void SerialTreeLearner::FindBestSplitForLeaf(LeafSplits* leaf_splits) {
...
@@ -186,11 +186,11 @@ inline void SerialTreeLearner::FindBestSplitForLeaf(LeafSplits* leaf_splits) {
if
(
leaf_splits
==
nullptr
||
leaf_splits
->
LeafIndex
()
<
0
)
{
if
(
leaf_splits
==
nullptr
||
leaf_splits
->
LeafIndex
()
<
0
)
{
return
;
return
;
}
}
std
::
vector
<
float
>
gains
;
std
::
vector
<
double
>
gains
;
for
(
size_t
i
=
0
;
i
<
leaf_splits
->
BestSplitPerFeature
().
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
leaf_splits
->
BestSplitPerFeature
().
size
();
++
i
)
{
gains
.
push_back
(
leaf_splits
->
BestSplitPerFeature
()[
i
].
gain
);
gains
.
push_back
(
leaf_splits
->
BestSplitPerFeature
()[
i
].
gain
);
}
}
int
best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
float
>::
ArgMax
(
gains
));
int
best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
double
>::
ArgMax
(
gains
));
int
leaf
=
leaf_splits
->
LeafIndex
();
int
leaf
=
leaf_splits
->
LeafIndex
();
best_split_per_leaf_
[
leaf
]
=
leaf_splits
->
BestSplitPerFeature
()[
best_feature
];
best_split_per_leaf_
[
leaf
]
=
leaf_splits
->
BestSplitPerFeature
()[
best_feature
];
best_split_per_leaf_
[
leaf
].
feature
=
best_feature
;
best_split_per_leaf_
[
leaf
].
feature
=
best_feature
;
...
...
src/treelearner/split_info.hpp
View file @
47313fb5
...
@@ -21,23 +21,23 @@ public:
...
@@ -21,23 +21,23 @@ public:
/*! \brief Split threshold */
/*! \brief Split threshold */
unsigned
int
threshold
;
unsigned
int
threshold
;
/*! \brief Left output after split */
/*! \brief Left output after split */
score_t
left_output
;
double
left_output
;
/*! \brief Right output after split */
/*! \brief Right output after split */
score_t
right_output
;
double
right_output
;
/*! \brief Split gain */
/*! \brief Split gain */
score_t
gain
;
double
gain
;
/*! \brief Left number of data after split */
/*! \brief Left number of data after split */
data_size_t
left_count
;
data_size_t
left_count
;
/*! \brief Right number of data after split */
/*! \brief Right number of data after split */
data_size_t
right_count
;
data_size_t
right_count
;
/*! \brief Left sum gradient after split */
/*! \brief Left sum gradient after split */
score_t
left_sum_gradient
;
double
left_sum_gradient
;
/*! \brief Left sum hessian after split */
/*! \brief Left sum hessian after split */
score_t
left_sum_hessian
;
double
left_sum_hessian
;
/*! \brief Right sum gradient after split */
/*! \brief Right sum gradient after split */
score_t
right_sum_gradient
;
double
right_sum_gradient
;
/*! \brief Right sum hessian after split */
/*! \brief Right sum hessian after split */
score_t
right_sum_hessian
;
double
right_sum_hessian
;
SplitInfo
()
{
SplitInfo
()
{
// initilize with -1 and -inf gain
// initilize with -1 and -inf gain
...
@@ -75,8 +75,8 @@ public:
...
@@ -75,8 +75,8 @@ public:
inline
bool
SplitInfo
::
operator
>
(
const
SplitInfo
&
si
)
const
{
inline
bool
SplitInfo
::
operator
>
(
const
SplitInfo
&
si
)
const
{
score_t
local_gain
=
this
->
gain
;
double
local_gain
=
this
->
gain
;
score_t
other_gain
=
si
.
gain
;
double
other_gain
=
si
.
gain
;
// replace nan with -inf
// replace nan with -inf
if
(
local_gain
==
NAN
)
{
if
(
local_gain
==
NAN
)
{
local_gain
=
kMinScore
;
local_gain
=
kMinScore
;
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment