Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
8a6bd5ec
"src/git@developer.sourcefind.cn:tianlh/lightgbm-dcu.git" did not exist on "5f5c61f31806dcdd2ef87f7d8a7420c7bfdfe369"
Commit
8a6bd5ec
authored
Mar 25, 2017
by
Guolin Ke
Browse files
add FitByExistingTree.
parent
32ef85da
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
95 additions
and
55 deletions
+95
-55
include/LightGBM/tree_learner.h
include/LightGBM/tree_learner.h
+6
-1
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+1
-1
src/boosting/score_updater.hpp
src/boosting/score_updater.hpp
+5
-5
src/treelearner/feature_histogram.hpp
src/treelearner/feature_histogram.hpp
+47
-33
src/treelearner/parallel_tree_learner.h
src/treelearner/parallel_tree_learner.h
+1
-1
src/treelearner/serial_tree_learner.cpp
src/treelearner/serial_tree_learner.cpp
+26
-5
src/treelearner/serial_tree_learner.h
src/treelearner/serial_tree_learner.h
+7
-7
src/treelearner/voting_parallel_tree_learner.cpp
src/treelearner/voting_parallel_tree_learner.cpp
+2
-2
No files found.
include/LightGBM/tree_learner.h
View file @
8a6bd5ec
...
...
@@ -43,6 +43,11 @@ public:
*/
virtual
Tree
*
Train
(
const
score_t
*
gradients
,
const
score_t
*
hessians
)
=
0
;
/*!
* \brief use a existing tree to fit the new gradients and hessians.
*/
virtual
Tree
*
FitByExistingTree
(
const
Tree
*
old_tree
,
const
score_t
*
gradients
,
const
score_t
*
hessians
)
const
=
0
;
/*!
* \brief Set bagging data
* \param used_indices Used data indices
...
...
@@ -55,7 +60,7 @@ public:
* \brief Using last trained tree to predict score then adding to out_score;
* \param out_score output score
*/
virtual
void
AddPredictionToScore
(
double
*
out_score
)
const
=
0
;
virtual
void
AddPredictionToScore
(
const
Tree
*
tree
,
double
*
out_score
)
const
=
0
;
TreeLearner
()
=
default
;
/*! \brief Disable copy */
...
...
src/boosting/gbdt.cpp
View file @
8a6bd5ec
...
...
@@ -452,7 +452,7 @@ void GBDT::UpdateScore(const Tree* tree, const int curr_class) {
#endif
// update training score
if
(
!
is_use_subset_
)
{
train_score_updater_
->
AddScore
(
tree_learner_
.
get
(),
curr_class
);
train_score_updater_
->
AddScore
(
tree_learner_
.
get
(),
tree
,
curr_class
);
}
else
{
train_score_updater_
->
AddScore
(
tree
,
curr_class
);
}
...
...
src/boosting/score_updater.hpp
View file @
8a6bd5ec
...
...
@@ -70,19 +70,19 @@ public:
/*!
* \brief Adding prediction score, only used for training data.
* The training data is partitioned into tree leaves after training
* Based on which We can get prediction quck
i
ly.
* Based on which We can get prediction qu
i
ckly.
* \param tree_learner
* \param curr_class Current class for multiclass training
*/
inline
void
AddScore
(
const
TreeLearner
*
tree_learner
,
int
curr_class
)
{
tree_learner
->
AddPredictionToScore
(
score_
.
data
()
+
curr_class
*
num_data_
);
inline
void
AddScore
(
const
TreeLearner
*
tree_learner
,
const
Tree
*
tree
,
int
curr_class
)
{
tree_learner
->
AddPredictionToScore
(
tree
,
score_
.
data
()
+
curr_class
*
num_data_
);
}
/*!
* \brief Using tree model to get prediction number, then adding to scores for parts of data
* Used for prediction of training out-of-bag data
* \param tree Trained tree model
* \param data_indices Indices of data that will be proc
c
essed
* \param data_cnt Number of data that will be proc
c
essed
* \param data_indices Indices of data that will be processed
* \param data_cnt Number of data that will be processed
* \param curr_class Current class for multiclass training
*/
inline
void
AddScore
(
const
Tree
*
tree
,
const
data_size_t
*
data_indices
,
...
...
src/treelearner/feature_histogram.hpp
View file @
8a6bd5ec
...
...
@@ -8,7 +8,7 @@
#include <cstring>
namespace
LightGBM
namespace
LightGBM
{
class
FeatureMetainfo
{
...
...
@@ -45,10 +45,10 @@ public:
data_
=
data
;
if
(
bin_type
==
BinType
::
NumericalBin
)
{
find_best_threshold_fun_
=
std
::
bind
(
&
FeatureHistogram
::
FindBestThresholdNumerical
,
this
,
std
::
placeholders
::
_1
,
std
::
placeholders
::
_2
,
std
::
placeholders
::
_3
,
std
::
placeholders
::
_4
);
,
std
::
placeholders
::
_2
,
std
::
placeholders
::
_3
,
std
::
placeholders
::
_4
);
}
else
{
find_best_threshold_fun_
=
std
::
bind
(
&
FeatureHistogram
::
FindBestThresholdCategorical
,
this
,
std
::
placeholders
::
_1
,
std
::
placeholders
::
_2
,
std
::
placeholders
::
_3
,
std
::
placeholders
::
_4
);
,
std
::
placeholders
::
_2
,
std
::
placeholders
::
_3
,
std
::
placeholders
::
_4
);
}
}
...
...
@@ -68,12 +68,12 @@ public:
}
void
FindBestThreshold
(
double
sum_gradient
,
double
sum_hessian
,
data_size_t
num_data
,
SplitInfo
*
output
)
{
SplitInfo
*
output
)
{
find_best_threshold_fun_
(
sum_gradient
,
sum_hessian
+
2
*
kEpsilon
,
num_data
,
output
);
}
void
FindBestThresholdNumerical
(
double
sum_gradient
,
double
sum_hessian
,
data_size_t
num_data
,
SplitInfo
*
output
)
{
SplitInfo
*
output
)
{
double
best_sum_left_gradient
=
NAN
;
double
best_sum_left_hessian
=
NAN
;
double
best_gain
=
kMinScore
;
...
...
@@ -82,7 +82,8 @@ public:
double
sum_right_gradient
=
0.0
f
;
double
sum_right_hessian
=
kEpsilon
;
data_size_t
right_count
=
0
;
double
gain_shift
=
GetLeafSplitGain
(
sum_gradient
,
sum_hessian
);
double
gain_shift
=
GetLeafSplitGain
(
sum_gradient
,
sum_hessian
,
meta_
->
tree_config
->
lambda_l1
,
meta_
->
tree_config
->
lambda_l2
);
double
min_gain_shift
=
gain_shift
+
meta_
->
tree_config
->
min_gain_to_split
;
is_splittable_
=
false
;
const
int
bias
=
meta_
->
bias
;
...
...
@@ -95,7 +96,7 @@ public:
right_count
+=
data_
[
t
].
cnt
;
// if data not enough, or sum hessian too small
if
(
right_count
<
meta_
->
tree_config
->
min_data_in_leaf
||
sum_right_hessian
<
meta_
->
tree_config
->
min_sum_hessian_in_leaf
)
continue
;
||
sum_right_hessian
<
meta_
->
tree_config
->
min_sum_hessian_in_leaf
)
continue
;
data_size_t
left_count
=
num_data
-
right_count
;
// if data not enough
if
(
left_count
<
meta_
->
tree_config
->
min_data_in_leaf
)
break
;
...
...
@@ -106,8 +107,10 @@ public:
double
sum_left_gradient
=
sum_gradient
-
sum_right_gradient
;
// current split gain
double
current_gain
=
GetLeafSplitGain
(
sum_left_gradient
,
sum_left_hessian
)
+
GetLeafSplitGain
(
sum_right_gradient
,
sum_right_hessian
);
double
current_gain
=
GetLeafSplitGain
(
sum_left_gradient
,
sum_left_hessian
,
meta_
->
tree_config
->
lambda_l1
,
meta_
->
tree_config
->
lambda_l2
)
+
GetLeafSplitGain
(
sum_right_gradient
,
sum_right_hessian
,
meta_
->
tree_config
->
lambda_l1
,
meta_
->
tree_config
->
lambda_l2
);
// gain with split is worse than without split
if
(
current_gain
<=
min_gain_shift
)
continue
;
...
...
@@ -126,12 +129,14 @@ public:
if
(
is_splittable_
)
{
// update split information
output
->
threshold
=
best_threshold
;
output
->
left_output
=
CalculateSplittedLeafOutput
(
best_sum_left_gradient
,
best_sum_left_hessian
);
output
->
left_output
=
CalculateSplittedLeafOutput
(
best_sum_left_gradient
,
best_sum_left_hessian
,
meta_
->
tree_config
->
lambda_l1
,
meta_
->
tree_config
->
lambda_l2
);
output
->
left_count
=
best_left_count
;
output
->
left_sum_gradient
=
best_sum_left_gradient
;
output
->
left_sum_hessian
=
best_sum_left_hessian
-
kEpsilon
;
output
->
right_output
=
CalculateSplittedLeafOutput
(
sum_gradient
-
best_sum_left_gradient
,
sum_hessian
-
best_sum_left_hessian
);
sum_hessian
-
best_sum_left_hessian
,
meta_
->
tree_config
->
lambda_l1
,
meta_
->
tree_config
->
lambda_l2
);
output
->
right_count
=
num_data
-
best_left_count
;
output
->
right_sum_gradient
=
sum_gradient
-
best_sum_left_gradient
;
output
->
right_sum_hessian
=
sum_hessian
-
best_sum_left_hessian
-
kEpsilon
;
...
...
@@ -142,13 +147,14 @@ public:
}
void
FindBestThresholdCategorical
(
double
sum_gradient
,
double
sum_hessian
,
data_size_t
num_data
,
SplitInfo
*
output
)
{
SplitInfo
*
output
)
{
double
best_gain
=
kMinScore
;
uint32_t
best_threshold
=
static_cast
<
uint32_t
>
(
meta_
->
num_bin
);
data_size_t
best_left_count
=
0
;
double
best_sum_left_gradient
=
0.0
f
;
double
best_sum_left_hessian
=
0.0
f
;
double
gain_shift
=
GetLeafSplitGain
(
sum_gradient
,
sum_hessian
);
double
gain_shift
=
GetLeafSplitGain
(
sum_gradient
,
sum_hessian
,
meta_
->
tree_config
->
lambda_l1
,
meta_
->
tree_config
->
lambda_l2
);
double
min_gain_shift
=
gain_shift
+
meta_
->
tree_config
->
min_gain_to_split
;
is_splittable_
=
false
;
const
int
bias
=
meta_
->
bias
;
...
...
@@ -158,7 +164,7 @@ public:
for
(;
t
>=
t_end
;
--
t
)
{
// if data not enough, or sum hessian too small
if
(
data_
[
t
].
cnt
<
meta_
->
tree_config
->
min_data_in_leaf
||
data_
[
t
].
sum_hessians
<
meta_
->
tree_config
->
min_sum_hessian_in_leaf
)
continue
;
||
data_
[
t
].
sum_hessians
<
meta_
->
tree_config
->
min_sum_hessian_in_leaf
)
continue
;
data_size_t
other_count
=
num_data
-
data_
[
t
].
cnt
;
// if data not enough
if
(
other_count
<
meta_
->
tree_config
->
min_data_in_leaf
)
continue
;
...
...
@@ -169,8 +175,10 @@ public:
double
sum_other_gradient
=
sum_gradient
-
data_
[
t
].
sum_gradients
;
// current split gain
double
current_gain
=
GetLeafSplitGain
(
sum_other_gradient
,
sum_other_hessian
)
+
GetLeafSplitGain
(
data_
[
t
].
sum_gradients
,
data_
[
t
].
sum_hessians
+
kEpsilon
);
double
current_gain
=
GetLeafSplitGain
(
sum_other_gradient
,
sum_other_hessian
,
meta_
->
tree_config
->
lambda_l1
,
meta_
->
tree_config
->
lambda_l2
)
+
GetLeafSplitGain
(
data_
[
t
].
sum_gradients
,
data_
[
t
].
sum_hessians
+
kEpsilon
,
meta_
->
tree_config
->
lambda_l1
,
meta_
->
tree_config
->
lambda_l2
);
// gain with split is worse than without split
if
(
current_gain
<=
min_gain_shift
)
continue
;
...
...
@@ -199,12 +207,14 @@ public:
data_size_t
other_count
=
num_data
-
cnt_bin0
;
double
sum_other_hessian
=
sum_hessian
-
sum_bin0_hessian
-
kEpsilon
;
if
(
cnt_bin0
>=
meta_
->
tree_config
->
min_data_in_leaf
&&
sum_bin0_hessian
>=
meta_
->
tree_config
->
min_sum_hessian_in_leaf
&&
other_count
>=
meta_
->
tree_config
->
min_data_in_leaf
&&
sum_other_hessian
>=
meta_
->
tree_config
->
min_sum_hessian_in_leaf
)
{
&&
sum_bin0_hessian
>=
meta_
->
tree_config
->
min_sum_hessian_in_leaf
&&
other_count
>=
meta_
->
tree_config
->
min_data_in_leaf
&&
sum_other_hessian
>=
meta_
->
tree_config
->
min_sum_hessian_in_leaf
)
{
double
sum_other_gradient
=
sum_gradient
-
sum_bin0_gradient
;
double
current_gain
=
GetLeafSplitGain
(
sum_other_gradient
,
sum_other_hessian
)
+
GetLeafSplitGain
(
sum_bin0_gradient
,
sum_bin0_hessian
+
kEpsilon
);
double
current_gain
=
GetLeafSplitGain
(
sum_other_gradient
,
sum_other_hessian
,
meta_
->
tree_config
->
lambda_l1
,
meta_
->
tree_config
->
lambda_l2
)
+
GetLeafSplitGain
(
sum_bin0_gradient
,
sum_bin0_hessian
+
kEpsilon
,
meta_
->
tree_config
->
lambda_l1
,
meta_
->
tree_config
->
lambda_l2
);
if
(
current_gain
>
min_gain_shift
)
{
is_splittable_
=
true
;
// better split point
...
...
@@ -221,12 +231,14 @@ public:
if
(
is_splittable_
)
{
// update split information
output
->
threshold
=
best_threshold
;
output
->
left_output
=
CalculateSplittedLeafOutput
(
best_sum_left_gradient
,
best_sum_left_hessian
);
output
->
left_output
=
CalculateSplittedLeafOutput
(
best_sum_left_gradient
,
best_sum_left_hessian
,
meta_
->
tree_config
->
lambda_l1
,
meta_
->
tree_config
->
lambda_l2
);
output
->
left_count
=
best_left_count
;
output
->
left_sum_gradient
=
best_sum_left_gradient
;
output
->
left_sum_hessian
=
best_sum_left_hessian
-
kEpsilon
;
output
->
right_output
=
CalculateSplittedLeafOutput
(
sum_gradient
-
best_sum_left_gradient
,
sum_hessian
-
best_sum_left_hessian
);
sum_hessian
-
best_sum_left_hessian
,
meta_
->
tree_config
->
lambda_l1
,
meta_
->
tree_config
->
lambda_l2
);
output
->
right_count
=
num_data
-
best_left_count
;
output
->
right_sum_gradient
=
sum_gradient
-
best_sum_left_gradient
;
output
->
right_sum_hessian
=
sum_hessian
-
best_sum_left_hessian
-
kEpsilon
;
...
...
@@ -260,18 +272,17 @@ public:
*/
void
set_is_splittable
(
bool
val
)
{
is_splittable_
=
val
;
}
private:
/*!
* \brief Calculate the split gain based on regularized sum_gradients and sum_hessians
* \param sum_gradients
* \param sum_hessians
* \return split gain
*/
double
GetLeafSplitGain
(
double
sum_gradients
,
double
sum_hessians
)
const
{
static
double
GetLeafSplitGain
(
double
sum_gradients
,
double
sum_hessians
,
double
l1
,
double
l2
)
{
double
abs_sum_gradients
=
std
::
fabs
(
sum_gradients
);
double
reg_abs_sum_gradients
=
std
::
max
(
0.0
,
abs_sum_gradients
-
meta_
->
tree_config
->
lambda_
l1
);
double
reg_abs_sum_gradients
=
std
::
max
(
0.0
,
abs_sum_gradients
-
l1
);
return
(
reg_abs_sum_gradients
*
reg_abs_sum_gradients
)
/
(
sum_hessians
+
meta_
->
tree_config
->
lambda_
l2
);
/
(
sum_hessians
+
l2
);
}
...
...
@@ -281,12 +292,15 @@ private:
* \param sum_hessians
* \return leaf output
*/
double
CalculateSplittedLeafOutput
(
double
sum_gradients
,
double
sum_hessians
)
const
{
static
double
CalculateSplittedLeafOutput
(
double
sum_gradients
,
double
sum_hessians
,
double
l1
,
double
l2
)
{
double
abs_sum_gradients
=
std
::
fabs
(
sum_gradients
);
double
reg_abs_sum_gradients
=
std
::
max
(
0.0
,
abs_sum_gradients
-
meta_
->
tree_config
->
lambda_
l1
);
double
reg_abs_sum_gradients
=
std
::
max
(
0.0
,
abs_sum_gradients
-
l1
);
return
-
std
::
copysign
(
reg_abs_sum_gradients
,
sum_gradients
)
/
(
sum_hessians
+
meta_
->
tree_config
->
lambda_
l2
);
/
(
sum_hessians
+
l2
);
}
private:
const
FeatureMetainfo
*
meta_
;
/*! \brief sum of gradient of each bin */
HistogramBinEntry
*
data_
;
...
...
@@ -346,7 +360,7 @@ public:
void
DynamicChangeSize
(
const
Dataset
*
train_data
,
const
TreeConfig
*
tree_config
,
int
cache_size
,
int
total_size
)
{
if
(
feature_metas_
.
empty
())
{
feature_metas_
.
resize
(
train_data
->
num_features
());
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
int
i
=
0
;
i
<
train_data
->
num_features
();
++
i
)
{
feature_metas_
[
i
].
num_bin
=
train_data
->
FeatureNumBin
(
i
);
if
(
train_data
->
FeatureBinMapper
(
i
)
->
GetDefaultBin
()
==
0
)
{
...
...
@@ -363,7 +377,7 @@ public:
Reset
(
cache_size
,
total_size
);
pool_
.
resize
(
cache_size
);
data_
.
resize
(
cache_size
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
int
i
=
old_cache_size
;
i
<
cache_size_
;
++
i
)
{
pool_
[
i
].
reset
(
new
FeatureHistogram
[
train_data
->
num_features
()]);
data_
[
i
].
resize
(
num_total_bin
);
...
...
@@ -382,7 +396,7 @@ public:
}
void
ResetConfig
(
const
TreeConfig
*
tree_config
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
feature_metas_
.
size
());
++
i
)
{
feature_metas_
[
i
].
tree_config
=
tree_config
;
}
...
...
src/treelearner/parallel_tree_learner.h
View file @
8a6bd5ec
...
...
@@ -103,7 +103,7 @@ public:
void
ResetConfig
(
const
TreeConfig
*
tree_config
)
override
;
protected:
void
BeforeTrain
()
override
;
bool
BeforeFindBestSplit
(
int
left_leaf
,
int
right_leaf
)
override
;
bool
BeforeFindBestSplit
(
const
Tree
*
tree
,
int
left_leaf
,
int
right_leaf
)
override
;
void
FindBestThresholds
()
override
;
void
FindBestSplitsForLeaves
()
override
;
void
Split
(
Tree
*
tree
,
int
best_Leaf
,
int
*
left_leaf
,
int
*
right_leaf
)
override
;
...
...
src/treelearner/serial_tree_learner.cpp
View file @
8a6bd5ec
...
...
@@ -179,8 +179,6 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
#endif
auto
tree
=
std
::
unique_ptr
<
Tree
>
(
new
Tree
(
tree_config_
->
num_leaves
));
// save pointer to last trained tree
last_trained_tree_
=
tree
.
get
();
// root leaf
int
left_leaf
=
0
;
int
cur_depth
=
1
;
...
...
@@ -191,7 +189,7 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
start_time
=
std
::
chrono
::
steady_clock
::
now
();
#endif
// some initial works before finding best split
if
(
BeforeFindBestSplit
(
left_leaf
,
right_leaf
))
{
if
(
BeforeFindBestSplit
(
tree
.
get
(),
left_leaf
,
right_leaf
))
{
#ifdef TIMETAG
init_split_time
+=
std
::
chrono
::
steady_clock
::
now
()
-
start_time
;
#endif
...
...
@@ -223,6 +221,29 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
return
tree
.
release
();
}
Tree
*
SerialTreeLearner
::
FitByExistingTree
(
const
Tree
*
old_tree
,
const
score_t
*
gradients
,
const
score_t
*
hessians
)
const
{
auto
tree
=
std
::
unique_ptr
<
Tree
>
(
new
Tree
(
*
old_tree
));
CHECK
(
data_partition_
->
num_leaves
()
>=
tree
->
num_leaves
());
#pragma omp parallel for schedule(static)
for
(
int
i
=
0
;
i
<
data_partition_
->
num_leaves
();
++
i
)
{
data_size_t
cnt_leaf_data
=
0
;
auto
tmp_idx
=
data_partition_
->
GetIndexOnLeaf
(
i
,
&
cnt_leaf_data
);
double
sum_grad
=
0.0
f
;
double
sum_hess
=
0.0
f
;
for
(
data_size_t
j
=
0
;
j
<
cnt_leaf_data
;
++
j
)
{
auto
idx
=
tmp_idx
[
j
];
sum_grad
+=
gradients
[
idx
];
sum_hess
+=
hessians
[
idx
];
}
// avoid zero hessians.
if
(
sum_hess
<=
0
)
sum_hess
=
kEpsilon
;
double
output
=
FeatureHistogram
::
CalculateSplittedLeafOutput
(
sum_grad
,
sum_hess
,
tree_config_
->
lambda_l1
,
tree_config_
->
lambda_l2
);
tree
->
SetLeafOutput
(
i
,
output
);
}
return
tree
.
release
();
}
void
SerialTreeLearner
::
BeforeTrain
()
{
// reset histogram pool
...
...
@@ -305,11 +326,11 @@ void SerialTreeLearner::BeforeTrain() {
}
}
bool
SerialTreeLearner
::
BeforeFindBestSplit
(
int
left_leaf
,
int
right_leaf
)
{
bool
SerialTreeLearner
::
BeforeFindBestSplit
(
const
Tree
*
tree
,
int
left_leaf
,
int
right_leaf
)
{
// check depth of current leaf
if
(
tree_config_
->
max_depth
>
0
)
{
// only need to check left leaf, since right leaf is in same level of left leaf
if
(
last_trained_
tree
_
->
leaf_depth
(
left_leaf
)
>=
tree_config_
->
max_depth
)
{
if
(
tree
->
leaf_depth
(
left_leaf
)
>=
tree_config_
->
max_depth
)
{
best_split_per_leaf_
[
left_leaf
].
gain
=
kMinScore
;
if
(
right_leaf
>=
0
)
{
best_split_per_leaf_
[
right_leaf
].
gain
=
kMinScore
;
...
...
src/treelearner/serial_tree_learner.h
View file @
8a6bd5ec
...
...
@@ -38,15 +38,18 @@ public:
Tree
*
Train
(
const
score_t
*
gradients
,
const
score_t
*
hessians
)
override
;
Tree
*
FitByExistingTree
(
const
Tree
*
old_tree
,
const
score_t
*
gradients
,
const
score_t
*
hessians
)
const
override
;
void
SetBaggingData
(
const
data_size_t
*
used_indices
,
data_size_t
num_data
)
override
{
data_partition_
->
SetUsedDataIndices
(
used_indices
,
num_data
);
}
void
AddPredictionToScore
(
double
*
out_score
)
const
override
{
if
(
last_trained_tree_
->
num_leaves
()
<=
1
)
{
return
;
}
void
AddPredictionToScore
(
const
Tree
*
tree
,
double
*
out_score
)
const
override
{
if
(
tree
->
num_leaves
()
<=
1
)
{
return
;
}
CHECK
(
tree
->
num_leaves
()
<=
data_partition_
->
num_leaves
());
#pragma omp parallel for schedule(static)
for
(
int
i
=
0
;
i
<
data_partition_
->
num_leaves
();
++
i
)
{
double
output
=
static_cast
<
double
>
(
last_trained_
tree
_
->
LeafOutput
(
i
));
double
output
=
static_cast
<
double
>
(
tree
->
LeafOutput
(
i
));
data_size_t
cnt_leaf_data
=
0
;
auto
tmp_idx
=
data_partition_
->
GetIndexOnLeaf
(
i
,
&
cnt_leaf_data
);
for
(
data_size_t
j
=
0
;
j
<
cnt_leaf_data
;
++
j
)
{
...
...
@@ -64,7 +67,7 @@ protected:
/*!
* \brief Some initial works before FindBestSplit
*/
virtual
bool
BeforeFindBestSplit
(
int
left_leaf
,
int
right_leaf
);
virtual
bool
BeforeFindBestSplit
(
const
Tree
*
tree
,
int
left_leaf
,
int
right_leaf
);
/*!
...
...
@@ -95,9 +98,6 @@ protected:
* \return The number of data in the leaf_idx leaf
*/
inline
virtual
data_size_t
GetGlobalDataCountInLeaf
(
int
leaf_idx
)
const
;
/*! \brief Last trained decision tree */
const
Tree
*
last_trained_tree_
;
/*! \brief number of data */
data_size_t
num_data_
;
/*! \brief number of features */
...
...
src/treelearner/voting_parallel_tree_learner.cpp
View file @
8a6bd5ec
...
...
@@ -133,8 +133,8 @@ void VotingParallelTreeLearner::BeforeTrain() {
global_data_count_in_leaf_
[
0
]
=
std
::
get
<
0
>
(
data
);
}
bool
VotingParallelTreeLearner
::
BeforeFindBestSplit
(
int
left_leaf
,
int
right_leaf
)
{
if
(
SerialTreeLearner
::
BeforeFindBestSplit
(
left_leaf
,
right_leaf
))
{
bool
VotingParallelTreeLearner
::
BeforeFindBestSplit
(
const
Tree
*
tree
,
int
left_leaf
,
int
right_leaf
)
{
if
(
SerialTreeLearner
::
BeforeFindBestSplit
(
tree
,
left_leaf
,
right_leaf
))
{
data_size_t
num_data_in_left_child
=
GetGlobalDataCountInLeaf
(
left_leaf
);
data_size_t
num_data_in_right_child
=
GetGlobalDataCountInLeaf
(
right_leaf
);
if
(
right_leaf
<
0
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment