Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
47313fb5
"git@developer.sourcefind.cn:tianlh/lightgbm-dcu.git" did not exist on "2d4654a18f970c10d35e1e8e0e60dd102310c4ca"
Commit
47313fb5
authored
Nov 01, 2016
by
Guolin Ke
Browse files
fixed sumup problem for float type
parent
aa796a85
Changes
24
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
115 additions
and
107 deletions
+115
-107
include/LightGBM/bin.h
include/LightGBM/bin.h
+2
-2
include/LightGBM/dataset.h
include/LightGBM/dataset.h
+3
-3
include/LightGBM/tree.h
include/LightGBM/tree.h
+7
-7
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+2
-2
src/boosting/score_updater.hpp
src/boosting/score_updater.hpp
+1
-1
src/io/dataset.cpp
src/io/dataset.cpp
+9
-7
src/io/metadata.cpp
src/io/metadata.cpp
+12
-6
src/io/tree.cpp
src/io/tree.cpp
+4
-4
src/metric/binary_metric.hpp
src/metric/binary_metric.hpp
+9
-9
src/metric/metric.cpp
src/metric/metric.cpp
+8
-8
src/metric/multiclass_metric.hpp
src/metric/multiclass_metric.hpp
+8
-8
src/metric/rank_metric.hpp
src/metric/rank_metric.hpp
+1
-1
src/metric/regression_metric.hpp
src/metric/regression_metric.hpp
+1
-1
src/objective/binary_objective.hpp
src/objective/binary_objective.hpp
+3
-3
src/objective/multiclass_objective.hpp
src/objective/multiclass_objective.hpp
+6
-6
src/objective/objective_function.cpp
src/objective/objective_function.cpp
+4
-4
src/objective/rank_objective.hpp
src/objective/rank_objective.hpp
+2
-2
src/treelearner/data_parallel_tree_learner.cpp
src/treelearner/data_parallel_tree_learner.cpp
+9
-9
src/treelearner/feature_histogram.hpp
src/treelearner/feature_histogram.hpp
+21
-21
src/treelearner/feature_parallel_tree_learner.cpp
src/treelearner/feature_parallel_tree_learner.cpp
+3
-3
No files found.
include/LightGBM/bin.h
View file @
47313fb5
...
@@ -13,9 +13,9 @@ namespace LightGBM {
...
@@ -13,9 +13,9 @@ namespace LightGBM {
struct
HistogramBinEntry
{
struct
HistogramBinEntry
{
public:
public:
/*! \brief Sum of gradients on this bin */
/*! \brief Sum of gradients on this bin */
score_t
sum_gradients
=
0.0
;
double
sum_gradients
=
0.0
;
/*! \brief Sum of hessians on this bin */
/*! \brief Sum of hessians on this bin */
score_t
sum_hessians
=
0.0
;
double
sum_hessians
=
0.0
;
/*! \brief Number of data on this bin */
/*! \brief Number of data on this bin */
data_size_t
cnt
=
0
;
data_size_t
cnt
=
0
;
...
...
include/LightGBM/dataset.h
View file @
47313fb5
...
@@ -83,7 +83,7 @@ public:
...
@@ -83,7 +83,7 @@ public:
* \brief Set initial scores
* \brief Set initial scores
* \param init_score Initial scores, this class will manage memory for init_score.
* \param init_score Initial scores, this class will manage memory for init_score.
*/
*/
void
SetInitScore
(
s
co
re_
t
*
init_score
);
void
SetInitScore
(
co
nst
floa
t
*
init_score
,
data_size_t
len
);
/*!
/*!
...
@@ -166,7 +166,7 @@ public:
...
@@ -166,7 +166,7 @@ public:
* \brief Get initial scores, if not exists, will return nullptr
* \brief Get initial scores, if not exists, will return nullptr
* \return Pointer of initial scores
* \return Pointer of initial scores
*/
*/
inline
const
score_
t
*
init_score
()
const
{
return
init_score_
;
}
inline
const
floa
t
*
init_score
()
const
{
return
init_score_
;
}
/*! \brief Load initial scores from file */
/*! \brief Load initial scores from file */
void
LoadInitialScore
();
void
LoadInitialScore
();
...
@@ -201,7 +201,7 @@ private:
...
@@ -201,7 +201,7 @@ private:
/*! \brief Number of Initial score, used to check correct weight file */
/*! \brief Number of Initial score, used to check correct weight file */
data_size_t
num_init_score_
;
data_size_t
num_init_score_
;
/*! \brief Initial score */
/*! \brief Initial score */
score_
t
*
init_score_
;
floa
t
*
init_score_
;
/*! \brief Queries data */
/*! \brief Queries data */
data_size_t
*
queries_
;
data_size_t
*
queries_
;
};
};
...
...
include/LightGBM/tree.h
View file @
47313fb5
...
@@ -43,11 +43,11 @@ public:
...
@@ -43,11 +43,11 @@ public:
* \return The index of new leaf.
* \return The index of new leaf.
*/
*/
int
Split
(
int
leaf
,
int
feature
,
unsigned
int
threshold
,
int
real_feature
,
int
Split
(
int
leaf
,
int
feature
,
unsigned
int
threshold
,
int
real_feature
,
float
threshold_float
,
score_
t
left_value
,
float
threshold_float
,
floa
t
left_value
,
score_
t
right_value
,
float
gain
);
floa
t
right_value
,
float
gain
);
/*! \brief Get the output of one leave */
/*! \brief Get the output of one leave */
inline
score_
t
LeafOutput
(
int
leaf
)
const
{
return
leaf_value_
[
leaf
];
}
inline
floa
t
LeafOutput
(
int
leaf
)
const
{
return
leaf_value_
[
leaf
];
}
/*!
/*!
* \brief Adding prediction value of this tree model to scores
* \brief Adding prediction value of this tree model to scores
...
@@ -74,7 +74,7 @@ public:
...
@@ -74,7 +74,7 @@ public:
* \param feature_values Feature value of this record
* \param feature_values Feature value of this record
* \return Prediction result
* \return Prediction result
*/
*/
inline
score_
t
Predict
(
const
float
*
feature_values
)
const
;
inline
floa
t
Predict
(
const
float
*
feature_values
)
const
;
inline
int
PredictLeafIndex
(
const
float
*
feature_values
)
const
;
inline
int
PredictLeafIndex
(
const
float
*
feature_values
)
const
;
/*! \brief Get Number of leaves*/
/*! \brief Get Number of leaves*/
...
@@ -93,7 +93,7 @@ public:
...
@@ -93,7 +93,7 @@ public:
*/
*/
inline
void
Shrinkage
(
float
rate
)
{
inline
void
Shrinkage
(
float
rate
)
{
for
(
int
i
=
0
;
i
<
num_leaves_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_leaves_
;
++
i
)
{
leaf_value_
[
i
]
=
static_cast
<
score_t
>
(
leaf_value_
[
i
]
*
rate
)
;
leaf_value_
[
i
]
=
leaf_value_
[
i
]
*
rate
;
}
}
}
}
...
@@ -144,13 +144,13 @@ private:
...
@@ -144,13 +144,13 @@ private:
/*! \brief The parent of leaf */
/*! \brief The parent of leaf */
int
*
leaf_parent_
;
int
*
leaf_parent_
;
/*! \brief Output of leaves */
/*! \brief Output of leaves */
score_
t
*
leaf_value_
;
floa
t
*
leaf_value_
;
/*! \brief Depth for leaves */
/*! \brief Depth for leaves */
int
*
leaf_depth_
;
int
*
leaf_depth_
;
};
};
inline
score_
t
Tree
::
Predict
(
const
float
*
feature_values
)
const
{
inline
floa
t
Tree
::
Predict
(
const
float
*
feature_values
)
const
{
int
leaf
=
GetLeaf
(
feature_values
);
int
leaf
=
GetLeaf
(
feature_values
);
return
LeafOutput
(
leaf
);
return
LeafOutput
(
leaf
);
}
}
...
...
src/boosting/gbdt.cpp
View file @
47313fb5
...
@@ -217,8 +217,8 @@ void GBDT::UpdateScore(const Tree* tree, const int curr_class) {
...
@@ -217,8 +217,8 @@ void GBDT::UpdateScore(const Tree* tree, const int curr_class) {
// update training score
// update training score
train_score_updater_
->
AddScore
(
tree_learner_
[
curr_class
],
curr_class
);
train_score_updater_
->
AddScore
(
tree_learner_
[
curr_class
],
curr_class
);
// update validation score
// update validation score
for
(
auto
&
score_
track
er
:
valid_score_updater_
)
{
for
(
auto
&
score_
updat
er
:
valid_score_updater_
)
{
score_
track
er
->
AddScore
(
tree
,
curr_class
);
score_
updat
er
->
AddScore
(
tree
,
curr_class
);
}
}
}
}
...
...
src/boosting/score_updater.hpp
View file @
47313fb5
...
@@ -24,7 +24,7 @@ public:
...
@@ -24,7 +24,7 @@ public:
score_
=
new
score_t
[
num_data_
*
num_class
];
score_
=
new
score_t
[
num_data_
*
num_class
];
// default start score is zero
// default start score is zero
std
::
memset
(
score_
,
0
,
sizeof
(
score_t
)
*
num_data_
*
num_class
);
std
::
memset
(
score_
,
0
,
sizeof
(
score_t
)
*
num_data_
*
num_class
);
const
score_
t
*
init_score
=
data
->
metadata
().
init_score
();
const
floa
t
*
init_score
=
data
->
metadata
().
init_score
();
// if exists initial score, will start from it
// if exists initial score, will start from it
if
(
init_score
!=
nullptr
)
{
if
(
init_score
!=
nullptr
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
...
...
src/io/dataset.cpp
View file @
47313fb5
...
@@ -541,7 +541,7 @@ void Dataset::ExtractFeaturesFromMemory() {
...
@@ -541,7 +541,7 @@ void Dataset::ExtractFeaturesFromMemory() {
}
}
}
else
{
}
else
{
// if need to prediction with initial model
// if need to prediction with initial model
score_
t
*
init_score
=
new
score_
t
[
num_data_
];
floa
t
*
init_score
=
new
floa
t
[
num_data_
];
#pragma omp parallel for schedule(guided) private(oneline_features) firstprivate(tmp_label)
#pragma omp parallel for schedule(guided) private(oneline_features) firstprivate(tmp_label)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
const
int
tid
=
omp_get_thread_num
();
const
int
tid
=
omp_get_thread_num
();
...
@@ -549,7 +549,7 @@ void Dataset::ExtractFeaturesFromMemory() {
...
@@ -549,7 +549,7 @@ void Dataset::ExtractFeaturesFromMemory() {
// parser
// parser
parser_
->
ParseOneLine
(
text_reader_
->
Lines
()[
i
].
c_str
(),
&
oneline_features
,
&
tmp_label
);
parser_
->
ParseOneLine
(
text_reader_
->
Lines
()[
i
].
c_str
(),
&
oneline_features
,
&
tmp_label
);
// set initial score
// set initial score
init_score
[
i
]
=
static_cast
<
score_
t
>
(
predict_fun_
(
oneline_features
));
init_score
[
i
]
=
static_cast
<
floa
t
>
(
predict_fun_
(
oneline_features
));
// set label
// set label
metadata_
.
SetLabelAt
(
i
,
tmp_label
);
metadata_
.
SetLabelAt
(
i
,
tmp_label
);
// free processed line:
// free processed line:
...
@@ -573,7 +573,8 @@ void Dataset::ExtractFeaturesFromMemory() {
...
@@ -573,7 +573,8 @@ void Dataset::ExtractFeaturesFromMemory() {
}
}
}
}
// metadata_ will manage space of init_score
// metadata_ will manage space of init_score
metadata_
.
SetInitScore
(
init_score
);
metadata_
.
SetInitScore
(
init_score
,
num_data_
);
delete
[]
init_score
;
}
}
#pragma omp parallel for schedule(guided)
#pragma omp parallel for schedule(guided)
...
@@ -586,9 +587,9 @@ void Dataset::ExtractFeaturesFromMemory() {
...
@@ -586,9 +587,9 @@ void Dataset::ExtractFeaturesFromMemory() {
void
Dataset
::
ExtractFeaturesFromFile
()
{
void
Dataset
::
ExtractFeaturesFromFile
()
{
score_
t
*
init_score
=
nullptr
;
floa
t
*
init_score
=
nullptr
;
if
(
predict_fun_
!=
nullptr
)
{
if
(
predict_fun_
!=
nullptr
)
{
init_score
=
new
score_
t
[
num_data_
];
init_score
=
new
floa
t
[
num_data_
];
}
}
std
::
function
<
void
(
data_size_t
,
const
std
::
vector
<
std
::
string
>&
)
>
process_fun
=
std
::
function
<
void
(
data_size_t
,
const
std
::
vector
<
std
::
string
>&
)
>
process_fun
=
[
this
,
&
init_score
]
[
this
,
&
init_score
]
...
@@ -603,7 +604,7 @@ void Dataset::ExtractFeaturesFromFile() {
...
@@ -603,7 +604,7 @@ void Dataset::ExtractFeaturesFromFile() {
parser_
->
ParseOneLine
(
lines
[
i
].
c_str
(),
&
oneline_features
,
&
tmp_label
);
parser_
->
ParseOneLine
(
lines
[
i
].
c_str
(),
&
oneline_features
,
&
tmp_label
);
// set initial score
// set initial score
if
(
init_score
!=
nullptr
)
{
if
(
init_score
!=
nullptr
)
{
init_score
[
start_idx
+
i
]
=
static_cast
<
score_
t
>
(
predict_fun_
(
oneline_features
));
init_score
[
start_idx
+
i
]
=
static_cast
<
floa
t
>
(
predict_fun_
(
oneline_features
));
}
}
// set label
// set label
metadata_
.
SetLabelAt
(
start_idx
+
i
,
tmp_label
);
metadata_
.
SetLabelAt
(
start_idx
+
i
,
tmp_label
);
...
@@ -635,7 +636,8 @@ void Dataset::ExtractFeaturesFromFile() {
...
@@ -635,7 +636,8 @@ void Dataset::ExtractFeaturesFromFile() {
// metadata_ will manage space of init_score
// metadata_ will manage space of init_score
if
(
init_score
!=
nullptr
)
{
if
(
init_score
!=
nullptr
)
{
metadata_
.
SetInitScore
(
init_score
);
metadata_
.
SetInitScore
(
init_score
,
num_data_
);
delete
[]
init_score
;
}
}
#pragma omp parallel for schedule(guided)
#pragma omp parallel for schedule(guided)
...
...
src/io/metadata.cpp
View file @
47313fb5
...
@@ -196,9 +196,9 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
...
@@ -196,9 +196,9 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
// get local initial scores
// get local initial scores
if
(
init_score_
!=
nullptr
)
{
if
(
init_score_
!=
nullptr
)
{
score_
t
*
old_scores
=
init_score_
;
floa
t
*
old_scores
=
init_score_
;
num_init_score_
=
num_data_
;
num_init_score_
=
num_data_
;
init_score_
=
new
score_
t
[
num_init_score_
];
init_score_
=
new
floa
t
[
num_init_score_
];
for
(
size_t
i
=
0
;
i
<
used_data_indices
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
used_data_indices
.
size
();
++
i
)
{
init_score_
[
i
]
=
old_scores
[
used_data_indices
[
i
]];
init_score_
[
i
]
=
old_scores
[
used_data_indices
[
i
]];
}
}
...
@@ -211,10 +211,16 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
...
@@ -211,10 +211,16 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
}
}
void
Metadata
::
SetInitScore
(
score_t
*
init_score
)
{
void
Metadata
::
SetInitScore
(
const
float
*
init_score
,
data_size_t
len
)
{
if
(
num_data_
!=
len
)
{
Log
::
Fatal
(
"len of initial score is not same with #data"
);
}
if
(
init_score_
!=
nullptr
)
{
delete
[]
init_score_
;
}
if
(
init_score_
!=
nullptr
)
{
delete
[]
init_score_
;
}
num_init_score_
=
num_data_
;
num_init_score_
=
num_data_
;
init_score_
=
init_score
;
init_score_
=
new
float
[
num_init_score_
];
for
(
data_size_t
i
=
0
;
i
<
num_init_score_
;
++
i
)
{
init_score_
[
i
]
=
init_score
[
i
];
}
}
}
void
Metadata
::
LoadWeights
()
{
void
Metadata
::
LoadWeights
()
{
...
@@ -245,11 +251,11 @@ void Metadata::LoadInitialScore() {
...
@@ -245,11 +251,11 @@ void Metadata::LoadInitialScore() {
Log
::
Info
(
"Start loading initial scores"
);
Log
::
Info
(
"Start loading initial scores"
);
num_init_score_
=
static_cast
<
data_size_t
>
(
reader
.
Lines
().
size
());
num_init_score_
=
static_cast
<
data_size_t
>
(
reader
.
Lines
().
size
());
init_score_
=
new
score_
t
[
num_init_score_
];
init_score_
=
new
floa
t
[
num_init_score_
];
float
tmp
=
0.0
f
;
float
tmp
=
0.0
f
;
for
(
data_size_t
i
=
0
;
i
<
num_init_score_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_init_score_
;
++
i
)
{
Common
::
Atof
(
reader
.
Lines
()[
i
].
c_str
(),
&
tmp
);
Common
::
Atof
(
reader
.
Lines
()[
i
].
c_str
(),
&
tmp
);
init_score_
[
i
]
=
static_cast
<
score_t
>
(
tmp
)
;
init_score_
[
i
]
=
tmp
;
}
}
}
}
...
...
src/io/tree.cpp
View file @
47313fb5
...
@@ -27,7 +27,7 @@ Tree::Tree(int max_leaves)
...
@@ -27,7 +27,7 @@ Tree::Tree(int max_leaves)
split_gain_
=
new
float
[
max_leaves_
-
1
];
split_gain_
=
new
float
[
max_leaves_
-
1
];
leaf_parent_
=
new
int
[
max_leaves_
];
leaf_parent_
=
new
int
[
max_leaves_
];
leaf_value_
=
new
score_
t
[
max_leaves_
];
leaf_value_
=
new
floa
t
[
max_leaves_
];
leaf_depth_
=
new
int
[
max_leaves_
];
leaf_depth_
=
new
int
[
max_leaves_
];
// root is in the depth 1
// root is in the depth 1
leaf_depth_
[
0
]
=
1
;
leaf_depth_
[
0
]
=
1
;
...
@@ -48,7 +48,7 @@ Tree::~Tree() {
...
@@ -48,7 +48,7 @@ Tree::~Tree() {
}
}
int
Tree
::
Split
(
int
leaf
,
int
feature
,
unsigned
int
threshold_bin
,
int
real_feature
,
int
Tree
::
Split
(
int
leaf
,
int
feature
,
unsigned
int
threshold_bin
,
int
real_feature
,
float
threshold
,
score_
t
left_value
,
score_
t
right_value
,
float
gain
)
{
float
threshold
,
floa
t
left_value
,
floa
t
right_value
,
float
gain
)
{
int
new_node_idx
=
num_leaves_
-
1
;
int
new_node_idx
=
num_leaves_
-
1
;
// update parent info
// update parent info
int
parent
=
leaf_parent_
[
leaf
];
int
parent
=
leaf_parent_
[
leaf
];
...
@@ -124,7 +124,7 @@ std::string Tree::ToString() {
...
@@ -124,7 +124,7 @@ std::string Tree::ToString() {
ss
<<
"leaf_parent="
ss
<<
"leaf_parent="
<<
Common
::
ArrayToString
<
int
>
(
leaf_parent_
,
num_leaves_
,
' '
)
<<
std
::
endl
;
<<
Common
::
ArrayToString
<
int
>
(
leaf_parent_
,
num_leaves_
,
' '
)
<<
std
::
endl
;
ss
<<
"leaf_value="
ss
<<
"leaf_value="
<<
Common
::
ArrayToString
<
score_
t
>
(
leaf_value_
,
num_leaves_
,
' '
)
<<
std
::
endl
;
<<
Common
::
ArrayToString
<
floa
t
>
(
leaf_value_
,
num_leaves_
,
' '
)
<<
std
::
endl
;
ss
<<
std
::
endl
;
ss
<<
std
::
endl
;
return
ss
.
str
();
return
ss
.
str
();
}
}
...
@@ -157,7 +157,7 @@ Tree::Tree(const std::string& str) {
...
@@ -157,7 +157,7 @@ Tree::Tree(const std::string& str) {
threshold_
=
new
float
[
num_leaves_
-
1
];
threshold_
=
new
float
[
num_leaves_
-
1
];
split_gain_
=
new
float
[
num_leaves_
-
1
];
split_gain_
=
new
float
[
num_leaves_
-
1
];
leaf_parent_
=
new
int
[
num_leaves_
];
leaf_parent_
=
new
int
[
num_leaves_
];
leaf_value_
=
new
score_
t
[
num_leaves_
];
leaf_value_
=
new
floa
t
[
num_leaves_
];
split_feature_
=
nullptr
;
split_feature_
=
nullptr
;
threshold_in_bin_
=
nullptr
;
threshold_in_bin_
=
nullptr
;
...
...
src/metric/binary_metric.hpp
View file @
47313fb5
...
@@ -58,7 +58,7 @@ public:
...
@@ -58,7 +58,7 @@ public:
return
false
;
return
false
;
}
}
std
::
vector
<
score_
t
>
Eval
(
const
score_t
*
score
)
const
override
{
std
::
vector
<
floa
t
>
Eval
(
const
score_t
*
score
)
const
override
{
score_t
sum_loss
=
0.0
f
;
score_t
sum_loss
=
0.0
f
;
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
...
@@ -78,7 +78,7 @@ public:
...
@@ -78,7 +78,7 @@ public:
}
}
}
}
score_t
loss
=
sum_loss
/
sum_weights_
;
score_t
loss
=
sum_loss
/
sum_weights_
;
return
std
::
vector
<
score_t
>
(
1
,
loss
);
return
std
::
vector
<
float
>
(
1
,
static_cast
<
float
>
(
loss
)
)
;
}
}
private:
private:
...
@@ -181,7 +181,7 @@ public:
...
@@ -181,7 +181,7 @@ public:
}
}
}
}
std
::
vector
<
score_
t
>
Eval
(
const
score_t
*
score
)
const
override
{
std
::
vector
<
floa
t
>
Eval
(
const
score_t
*
score
)
const
override
{
// get indices sorted by score, descent order
// get indices sorted by score, descent order
std
::
vector
<
data_size_t
>
sorted_idx
;
std
::
vector
<
data_size_t
>
sorted_idx
;
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
...
@@ -189,13 +189,13 @@ public:
...
@@ -189,13 +189,13 @@ public:
}
}
std
::
sort
(
sorted_idx
.
begin
(),
sorted_idx
.
end
(),
[
score
](
data_size_t
a
,
data_size_t
b
)
{
return
score
[
a
]
>
score
[
b
];
});
std
::
sort
(
sorted_idx
.
begin
(),
sorted_idx
.
end
(),
[
score
](
data_size_t
a
,
data_size_t
b
)
{
return
score
[
a
]
>
score
[
b
];
});
// temp sum of postive label
// temp sum of postive label
floa
t
cur_pos
=
0.0
f
;
score_
t
cur_pos
=
0.0
f
;
// total sum of postive label
// total sum of postive label
floa
t
sum_pos
=
0.0
f
;
score_
t
sum_pos
=
0.0
f
;
// accumlate of auc
// accumlate of auc
floa
t
accum
=
0.0
f
;
score_
t
accum
=
0.0
f
;
// temp sum of negative label
// temp sum of negative label
floa
t
cur_neg
=
0.0
f
;
score_
t
cur_neg
=
0.0
f
;
score_t
threshold
=
score
[
sorted_idx
[
0
]];
score_t
threshold
=
score
[
sorted_idx
[
0
]];
if
(
weights_
==
nullptr
)
{
// no weights
if
(
weights_
==
nullptr
)
{
// no weights
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
...
@@ -233,11 +233,11 @@ public:
...
@@ -233,11 +233,11 @@ public:
}
}
accum
+=
cur_neg
*
(
cur_pos
*
0.5
f
+
sum_pos
);
accum
+=
cur_neg
*
(
cur_pos
*
0.5
f
+
sum_pos
);
sum_pos
+=
cur_pos
;
sum_pos
+=
cur_pos
;
floa
t
auc
=
1.0
f
;
score_
t
auc
=
1.0
f
;
if
(
sum_pos
>
0.0
f
&&
sum_pos
!=
sum_weights_
)
{
if
(
sum_pos
>
0.0
f
&&
sum_pos
!=
sum_weights_
)
{
auc
=
accum
/
(
sum_pos
*
(
sum_weights_
-
sum_pos
));
auc
=
accum
/
(
sum_pos
*
(
sum_weights_
-
sum_pos
));
}
}
return
std
::
vector
<
score_t
>
(
1
,
auc
);
return
std
::
vector
<
float
>
(
1
,
static_cast
<
float
>
(
auc
)
)
;
}
}
private:
private:
...
...
src/metric/metric.cpp
View file @
47313fb5
...
@@ -7,21 +7,21 @@
...
@@ -7,21 +7,21 @@
namespace
LightGBM
{
namespace
LightGBM
{
Metric
*
Metric
::
CreateMetric
(
const
std
::
string
&
type
,
const
MetricConfig
&
config
)
{
Metric
*
Metric
::
CreateMetric
(
const
std
::
string
&
type
,
const
MetricConfig
&
config
)
{
if
(
type
==
"l2"
)
{
if
(
type
==
std
::
string
(
"l2"
)
)
{
return
new
L2Metric
(
config
);
return
new
L2Metric
(
config
);
}
else
if
(
type
==
"l1"
)
{
}
else
if
(
type
==
std
::
string
(
"l1"
)
)
{
return
new
L1Metric
(
config
);
return
new
L1Metric
(
config
);
}
else
if
(
type
==
"binary_logloss"
)
{
}
else
if
(
type
==
std
::
string
(
"binary_logloss"
)
)
{
return
new
BinaryLoglossMetric
(
config
);
return
new
BinaryLoglossMetric
(
config
);
}
else
if
(
type
==
"binary_error"
)
{
}
else
if
(
type
==
std
::
string
(
"binary_error"
)
)
{
return
new
BinaryErrorMetric
(
config
);
return
new
BinaryErrorMetric
(
config
);
}
else
if
(
type
==
"auc"
)
{
}
else
if
(
type
==
std
::
string
(
"auc"
)
)
{
return
new
AUCMetric
(
config
);
return
new
AUCMetric
(
config
);
}
else
if
(
type
==
"ndcg"
)
{
}
else
if
(
type
==
std
::
string
(
"ndcg"
)
)
{
return
new
NDCGMetric
(
config
);
return
new
NDCGMetric
(
config
);
}
else
if
(
type
==
"multi_logloss"
){
}
else
if
(
type
==
std
::
string
(
"multi_logloss"
)
)
{
return
new
MultiLoglossMetric
(
config
);
return
new
MultiLoglossMetric
(
config
);
}
else
if
(
type
==
"multi_error"
){
}
else
if
(
type
==
std
::
string
(
"multi_error"
)
)
{
return
new
MultiErrorMetric
(
config
);
return
new
MultiErrorMetric
(
config
);
}
}
return
nullptr
;
return
nullptr
;
...
...
src/metric/multiclass_metric.hpp
View file @
47313fb5
...
@@ -50,14 +50,14 @@ public:
...
@@ -50,14 +50,14 @@ public:
return
false
;
return
false
;
}
}
std
::
vector
<
score_
t
>
Eval
(
const
score_t
*
score
)
const
override
{
std
::
vector
<
floa
t
>
Eval
(
const
score_t
*
score
)
const
override
{
score_t
sum_loss
=
0.0
;
score_t
sum_loss
=
0.0
;
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
score_
t
>
rec
(
num_class_
);
std
::
vector
<
floa
t
>
rec
(
num_class_
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
rec
[
k
]
=
score
[
k
*
num_data_
+
i
];
rec
[
k
]
=
static_cast
<
float
>
(
score
[
k
*
num_data_
+
i
]
)
;
}
}
// add loss
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
rec
);
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
rec
);
...
@@ -65,16 +65,16 @@ public:
...
@@ -65,16 +65,16 @@ public:
}
else
{
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
score_
t
>
rec
(
num_class_
);
std
::
vector
<
floa
t
>
rec
(
num_class_
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
rec
[
k
]
=
score
[
k
*
num_data_
+
i
];
rec
[
k
]
=
static_cast
<
float
>
(
score
[
k
*
num_data_
+
i
]
)
;
}
}
// add loss
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
rec
)
*
weights_
[
i
];
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
rec
)
*
weights_
[
i
];
}
}
}
}
score_t
loss
=
sum_loss
/
sum_weights_
;
score_t
loss
=
sum_loss
/
sum_weights_
;
return
std
::
vector
<
score_t
>
(
1
,
loss
);
return
std
::
vector
<
float
>
(
1
,
static_cast
<
float
>
(
loss
)
)
;
}
}
private:
private:
...
@@ -99,7 +99,7 @@ class MultiErrorMetric: public MulticlassMetric<MultiErrorMetric> {
...
@@ -99,7 +99,7 @@ class MultiErrorMetric: public MulticlassMetric<MultiErrorMetric> {
public:
public:
explicit
MultiErrorMetric
(
const
MetricConfig
&
config
)
:
MulticlassMetric
<
MultiErrorMetric
>
(
config
)
{}
explicit
MultiErrorMetric
(
const
MetricConfig
&
config
)
:
MulticlassMetric
<
MultiErrorMetric
>
(
config
)
{}
inline
static
score_t
LossOnPoint
(
float
label
,
std
::
vector
<
score_
t
>
score
)
{
inline
static
score_t
LossOnPoint
(
float
label
,
std
::
vector
<
floa
t
>
score
)
{
size_t
k
=
static_cast
<
size_t
>
(
label
);
size_t
k
=
static_cast
<
size_t
>
(
label
);
for
(
size_t
i
=
0
;
i
<
score
.
size
();
++
i
){
for
(
size_t
i
=
0
;
i
<
score
.
size
();
++
i
){
if
(
i
!=
k
&&
score
[
i
]
>
score
[
k
])
{
if
(
i
!=
k
&&
score
[
i
]
>
score
[
k
])
{
...
@@ -119,7 +119,7 @@ class MultiLoglossMetric: public MulticlassMetric<MultiLoglossMetric> {
...
@@ -119,7 +119,7 @@ class MultiLoglossMetric: public MulticlassMetric<MultiLoglossMetric> {
public:
public:
explicit
MultiLoglossMetric
(
const
MetricConfig
&
config
)
:
MulticlassMetric
<
MultiLoglossMetric
>
(
config
)
{}
explicit
MultiLoglossMetric
(
const
MetricConfig
&
config
)
:
MulticlassMetric
<
MultiLoglossMetric
>
(
config
)
{}
inline
static
score_t
LossOnPoint
(
float
label
,
std
::
vector
<
score_
t
>
score
)
{
inline
static
score_t
LossOnPoint
(
float
label
,
std
::
vector
<
floa
t
>
score
)
{
size_t
k
=
static_cast
<
size_t
>
(
label
);
size_t
k
=
static_cast
<
size_t
>
(
label
);
Common
::
Softmax
(
&
score
);
Common
::
Softmax
(
&
score
);
if
(
score
[
k
]
>
kEpsilon
)
{
if
(
score
[
k
]
>
kEpsilon
)
{
...
...
src/metric/rank_metric.hpp
View file @
47313fb5
...
@@ -84,7 +84,7 @@ public:
...
@@ -84,7 +84,7 @@ public:
return
true
;
return
true
;
}
}
std
::
vector
<
score_
t
>
Eval
(
const
score_t
*
score
)
const
override
{
std
::
vector
<
floa
t
>
Eval
(
const
score_t
*
score
)
const
override
{
// some buffers for multi-threading sum up
// some buffers for multi-threading sum up
std
::
vector
<
std
::
vector
<
float
>>
result_buffer_
;
std
::
vector
<
std
::
vector
<
float
>>
result_buffer_
;
for
(
int
i
=
0
;
i
<
num_threads_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_threads_
;
++
i
)
{
...
...
src/metric/regression_metric.hpp
View file @
47313fb5
...
@@ -67,7 +67,7 @@ public:
...
@@ -67,7 +67,7 @@ public:
}
}
}
}
score_t
loss
=
PointWiseLossCalculator
::
AverageLoss
(
sum_loss
,
sum_weights_
);
score_t
loss
=
PointWiseLossCalculator
::
AverageLoss
(
sum_loss
,
sum_weights_
);
return
std
::
vector
<
float
>
(
1
,
loss
);
return
std
::
vector
<
float
>
(
1
,
static_cast
<
float
>
(
loss
)
)
;
}
}
...
...
src/objective/binary_objective.hpp
View file @
47313fb5
...
@@ -14,7 +14,7 @@ class BinaryLogloss: public ObjectiveFunction {
...
@@ -14,7 +14,7 @@ class BinaryLogloss: public ObjectiveFunction {
public:
public:
explicit
BinaryLogloss
(
const
ObjectiveConfig
&
config
)
{
explicit
BinaryLogloss
(
const
ObjectiveConfig
&
config
)
{
is_unbalance_
=
config
.
is_unbalance
;
is_unbalance_
=
config
.
is_unbalance
;
sigmoid_
=
static_cast
<
score_
t
>
(
config
.
sigmoid
);
sigmoid_
=
static_cast
<
floa
t
>
(
config
.
sigmoid
);
if
(
sigmoid_
<=
0.0
)
{
if
(
sigmoid_
<=
0.0
)
{
Log
::
Fatal
(
"Sigmoid parameter %f :should greater than zero"
,
sigmoid_
);
Log
::
Fatal
(
"Sigmoid parameter %f :should greater than zero"
,
sigmoid_
);
}
}
...
@@ -92,11 +92,11 @@ private:
...
@@ -92,11 +92,11 @@ private:
/*! \brief True if using unbalance training */
/*! \brief True if using unbalance training */
bool
is_unbalance_
;
bool
is_unbalance_
;
/*! \brief Sigmoid parameter */
/*! \brief Sigmoid parameter */
score_
t
sigmoid_
;
floa
t
sigmoid_
;
/*! \brief Values for positive and negative labels */
/*! \brief Values for positive and negative labels */
int
label_val_
[
2
];
int
label_val_
[
2
];
/*! \brief Weights for positive and negative labels */
/*! \brief Weights for positive and negative labels */
score_
t
label_weights_
[
2
];
floa
t
label_weights_
[
2
];
/*! \brief Weights for data */
/*! \brief Weights for data */
const
float
*
weights_
;
const
float
*
weights_
;
};
};
...
...
src/objective/multiclass_objective.hpp
View file @
47313fb5
...
@@ -38,13 +38,13 @@ public:
...
@@ -38,13 +38,13 @@ public:
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
score_
t
>
rec
(
num_class_
);
std
::
vector
<
floa
t
>
rec
(
num_class_
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
){
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
){
rec
[
k
]
=
score
[
k
*
num_data_
+
i
];
rec
[
k
]
=
static_cast
<
float
>
(
score
[
k
*
num_data_
+
i
]
)
;
}
}
Common
::
Softmax
(
&
rec
);
Common
::
Softmax
(
&
rec
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
score_t
p
=
rec
[
k
];
score_t
p
=
static_cast
<
score_t
>
(
rec
[
k
]
)
;
if
(
label_int_
[
i
]
==
k
)
{
if
(
label_int_
[
i
]
==
k
)
{
gradients
[
k
*
num_data_
+
i
]
=
p
-
1.0
f
;
gradients
[
k
*
num_data_
+
i
]
=
p
-
1.0
f
;
}
else
{
}
else
{
...
@@ -56,13 +56,13 @@ public:
...
@@ -56,13 +56,13 @@ public:
}
else
{
}
else
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
score_
t
>
rec
(
num_class_
);
std
::
vector
<
floa
t
>
rec
(
num_class_
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
){
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
){
rec
[
k
]
=
score
[
k
*
num_data_
+
i
];
rec
[
k
]
=
static_cast
<
float
>
(
score
[
k
*
num_data_
+
i
]
)
;
}
}
Common
::
Softmax
(
&
rec
);
Common
::
Softmax
(
&
rec
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
float
p
=
rec
[
k
];
score_t
p
=
static_cast
<
score_t
>
(
rec
[
k
]
)
;
if
(
label_int_
[
i
]
==
k
)
{
if
(
label_int_
[
i
]
==
k
)
{
gradients
[
k
*
num_data_
+
i
]
=
(
p
-
1.0
f
)
*
weights_
[
i
];
gradients
[
k
*
num_data_
+
i
]
=
(
p
-
1.0
f
)
*
weights_
[
i
];
}
else
{
}
else
{
...
...
src/objective/objective_function.cpp
View file @
47313fb5
...
@@ -7,13 +7,13 @@
...
@@ -7,13 +7,13 @@
namespace
LightGBM
{
namespace
LightGBM
{
ObjectiveFunction
*
ObjectiveFunction
::
CreateObjectiveFunction
(
const
std
::
string
&
type
,
const
ObjectiveConfig
&
config
)
{
ObjectiveFunction
*
ObjectiveFunction
::
CreateObjectiveFunction
(
const
std
::
string
&
type
,
const
ObjectiveConfig
&
config
)
{
if
(
type
==
"regression"
)
{
if
(
type
==
std
::
string
(
"regression"
)
)
{
return
new
RegressionL2loss
(
config
);
return
new
RegressionL2loss
(
config
);
}
else
if
(
type
==
"binary"
)
{
}
else
if
(
type
==
std
::
string
(
"binary"
)
)
{
return
new
BinaryLogloss
(
config
);
return
new
BinaryLogloss
(
config
);
}
else
if
(
type
==
"lambdarank"
)
{
}
else
if
(
type
==
std
::
string
(
"lambdarank"
)
)
{
return
new
LambdarankNDCG
(
config
);
return
new
LambdarankNDCG
(
config
);
}
else
if
(
type
==
"multiclass"
)
{
}
else
if
(
type
==
std
::
string
(
"multiclass"
)
)
{
return
new
MulticlassLogloss
(
config
);
return
new
MulticlassLogloss
(
config
);
}
}
return
nullptr
;
return
nullptr
;
...
...
src/objective/rank_objective.hpp
View file @
47313fb5
...
@@ -19,7 +19,7 @@ namespace LightGBM {
...
@@ -19,7 +19,7 @@ namespace LightGBM {
class
LambdarankNDCG
:
public
ObjectiveFunction
{
class
LambdarankNDCG
:
public
ObjectiveFunction
{
public:
public:
explicit
LambdarankNDCG
(
const
ObjectiveConfig
&
config
)
{
explicit
LambdarankNDCG
(
const
ObjectiveConfig
&
config
)
{
sigmoid_
=
static_cast
<
score_
t
>
(
config
.
sigmoid
);
sigmoid_
=
static_cast
<
floa
t
>
(
config
.
sigmoid
);
// initialize DCG calculator
// initialize DCG calculator
DCGCalculator
::
Init
(
config
.
label_gain
);
DCGCalculator
::
Init
(
config
.
label_gain
);
// copy lable gain to local
// copy lable gain to local
...
@@ -207,7 +207,7 @@ private:
...
@@ -207,7 +207,7 @@ private:
/*! \brief Cache inverse max DCG, speed up calculation */
/*! \brief Cache inverse max DCG, speed up calculation */
score_t
*
inverse_max_dcgs_
;
score_t
*
inverse_max_dcgs_
;
/*! \brief Simgoid param */
/*! \brief Simgoid param */
score_
t
sigmoid_
;
floa
t
sigmoid_
;
/*! \brief Optimized NDCG@ */
/*! \brief Optimized NDCG@ */
int
optimize_pos_at_
;
int
optimize_pos_at_
;
/*! \brief Number of queries */
/*! \brief Number of queries */
...
...
src/treelearner/data_parallel_tree_learner.cpp
View file @
47313fb5
...
@@ -103,19 +103,19 @@ void DataParallelTreeLearner::BeforeTrain() {
...
@@ -103,19 +103,19 @@ void DataParallelTreeLearner::BeforeTrain() {
}
}
// sync global data sumup info
// sync global data sumup info
std
::
tuple
<
data_size_t
,
score_t
,
score_t
>
data
(
smaller_leaf_splits_
->
num_data_in_leaf
(),
std
::
tuple
<
data_size_t
,
double
,
double
>
data
(
smaller_leaf_splits_
->
num_data_in_leaf
(),
smaller_leaf_splits_
->
sum_gradients
(),
smaller_leaf_splits_
->
sum_hessians
());
smaller_leaf_splits_
->
sum_gradients
(),
smaller_leaf_splits_
->
sum_hessians
());
int
size
=
sizeof
(
data
);
int
size
=
sizeof
(
data
);
std
::
memcpy
(
input_buffer_
,
&
data
,
size
);
std
::
memcpy
(
input_buffer_
,
&
data
,
size
);
// global sumup reduce
// global sumup reduce
Network
::
Allreduce
(
input_buffer_
,
size
,
size
,
output_buffer_
,
[](
const
char
*
src
,
char
*
dst
,
int
len
)
{
Network
::
Allreduce
(
input_buffer_
,
size
,
size
,
output_buffer_
,
[](
const
char
*
src
,
char
*
dst
,
int
len
)
{
int
used_size
=
0
;
int
used_size
=
0
;
int
type_size
=
sizeof
(
std
::
tuple
<
data_size_t
,
score_t
,
score_t
>
);
int
type_size
=
sizeof
(
std
::
tuple
<
data_size_t
,
double
,
double
>
);
const
std
::
tuple
<
data_size_t
,
score_t
,
score_t
>
*
p1
;
const
std
::
tuple
<
data_size_t
,
double
,
double
>
*
p1
;
std
::
tuple
<
data_size_t
,
score_t
,
score_t
>
*
p2
;
std
::
tuple
<
data_size_t
,
double
,
double
>
*
p2
;
while
(
used_size
<
len
)
{
while
(
used_size
<
len
)
{
p1
=
reinterpret_cast
<
const
std
::
tuple
<
data_size_t
,
score_t
,
score_t
>
*>
(
src
);
p1
=
reinterpret_cast
<
const
std
::
tuple
<
data_size_t
,
double
,
double
>
*>
(
src
);
p2
=
reinterpret_cast
<
std
::
tuple
<
data_size_t
,
score_t
,
score_t
>
*>
(
dst
);
p2
=
reinterpret_cast
<
std
::
tuple
<
data_size_t
,
double
,
double
>
*>
(
dst
);
std
::
get
<
0
>
(
*
p2
)
=
std
::
get
<
0
>
(
*
p2
)
+
std
::
get
<
0
>
(
*
p1
);
std
::
get
<
0
>
(
*
p2
)
=
std
::
get
<
0
>
(
*
p2
)
+
std
::
get
<
0
>
(
*
p1
);
std
::
get
<
1
>
(
*
p2
)
=
std
::
get
<
1
>
(
*
p2
)
+
std
::
get
<
1
>
(
*
p1
);
std
::
get
<
1
>
(
*
p2
)
=
std
::
get
<
1
>
(
*
p2
)
+
std
::
get
<
1
>
(
*
p1
);
std
::
get
<
2
>
(
*
p2
)
=
std
::
get
<
2
>
(
*
p2
)
+
std
::
get
<
2
>
(
*
p1
);
std
::
get
<
2
>
(
*
p2
)
=
std
::
get
<
2
>
(
*
p2
)
+
std
::
get
<
2
>
(
*
p1
);
...
@@ -200,12 +200,12 @@ void DataParallelTreeLearner::FindBestThresholds() {
...
@@ -200,12 +200,12 @@ void DataParallelTreeLearner::FindBestThresholds() {
void
DataParallelTreeLearner
::
FindBestSplitsForLeaves
()
{
void
DataParallelTreeLearner
::
FindBestSplitsForLeaves
()
{
int
smaller_best_feature
=
-
1
,
larger_best_feature
=
-
1
;
int
smaller_best_feature
=
-
1
,
larger_best_feature
=
-
1
;
SplitInfo
smaller_best
,
larger_best
;
SplitInfo
smaller_best
,
larger_best
;
std
::
vector
<
float
>
gains
;
std
::
vector
<
double
>
gains
;
// find local best split for smaller leaf
// find local best split for smaller leaf
for
(
size_t
i
=
0
;
i
<
smaller_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
smaller_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
gains
.
push_back
(
smaller_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
gains
.
push_back
(
smaller_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
}
}
smaller_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
float
>::
ArgMax
(
gains
));
smaller_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
double
>::
ArgMax
(
gains
));
smaller_best
=
smaller_leaf_splits_
->
BestSplitPerFeature
()[
smaller_best_feature
];
smaller_best
=
smaller_leaf_splits_
->
BestSplitPerFeature
()[
smaller_best_feature
];
// find local best split for larger leaf
// find local best split for larger leaf
if
(
larger_leaf_splits_
->
LeafIndex
()
>=
0
)
{
if
(
larger_leaf_splits_
->
LeafIndex
()
>=
0
)
{
...
@@ -213,7 +213,7 @@ void DataParallelTreeLearner::FindBestSplitsForLeaves() {
...
@@ -213,7 +213,7 @@ void DataParallelTreeLearner::FindBestSplitsForLeaves() {
for
(
size_t
i
=
0
;
i
<
larger_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
larger_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
gains
.
push_back
(
larger_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
gains
.
push_back
(
larger_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
}
}
larger_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
float
>::
ArgMax
(
gains
));
larger_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
double
>::
ArgMax
(
gains
));
larger_best
=
larger_leaf_splits_
->
BestSplitPerFeature
()[
larger_best_feature
];
larger_best
=
larger_leaf_splits_
->
BestSplitPerFeature
()[
larger_best_feature
];
}
}
...
...
src/treelearner/feature_histogram.hpp
View file @
47313fb5
...
@@ -26,7 +26,7 @@ public:
...
@@ -26,7 +26,7 @@ public:
* \param min_num_data_one_leaf minimal number of data in one leaf
* \param min_num_data_one_leaf minimal number of data in one leaf
*/
*/
void
Init
(
const
Feature
*
feature
,
int
feature_idx
,
data_size_t
min_num_data_one_leaf
,
void
Init
(
const
Feature
*
feature
,
int
feature_idx
,
data_size_t
min_num_data_one_leaf
,
score_t
min_sum_hessian_one_leaf
)
{
double
min_sum_hessian_one_leaf
)
{
feature_idx_
=
feature_idx
;
feature_idx_
=
feature_idx
;
min_num_data_one_leaf_
=
min_num_data_one_leaf
;
min_num_data_one_leaf_
=
min_num_data_one_leaf
;
min_sum_hessian_one_leaf_
=
min_sum_hessian_one_leaf
;
min_sum_hessian_one_leaf_
=
min_sum_hessian_one_leaf
;
...
@@ -45,8 +45,8 @@ public:
...
@@ -45,8 +45,8 @@ public:
* \param ordered_hessians Ordered hessians
* \param ordered_hessians Ordered hessians
* \param data_indices data indices of current leaf
* \param data_indices data indices of current leaf
*/
*/
void
Construct
(
data_size_t
*
data_indices
,
data_size_t
num_data
,
score_t
sum_gradients
,
void
Construct
(
data_size_t
*
data_indices
,
data_size_t
num_data
,
double
sum_gradients
,
score_t
sum_hessians
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
)
{
double
sum_hessians
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
)
{
std
::
memset
(
data_
,
0
,
sizeof
(
HistogramBinEntry
)
*
num_bins_
);
std
::
memset
(
data_
,
0
,
sizeof
(
HistogramBinEntry
)
*
num_bins_
);
num_data_
=
num_data
;
num_data_
=
num_data
;
sum_gradients_
=
sum_gradients
;
sum_gradients_
=
sum_gradients
;
...
@@ -63,8 +63,8 @@ public:
...
@@ -63,8 +63,8 @@ public:
* \param gradients
* \param gradients
* \param hessian
* \param hessian
*/
*/
void
Construct
(
const
OrderedBin
*
ordered_bin
,
int
leaf
,
data_size_t
num_data
,
score_t
sum_gradients
,
void
Construct
(
const
OrderedBin
*
ordered_bin
,
int
leaf
,
data_size_t
num_data
,
double
sum_gradients
,
score_t
sum_hessians
,
const
score_t
*
gradients
,
const
score_t
*
hessians
)
{
double
sum_hessians
,
const
score_t
*
gradients
,
const
score_t
*
hessians
)
{
std
::
memset
(
data_
,
0
,
sizeof
(
HistogramBinEntry
)
*
num_bins_
);
std
::
memset
(
data_
,
0
,
sizeof
(
HistogramBinEntry
)
*
num_bins_
);
num_data_
=
num_data
;
num_data_
=
num_data
;
sum_gradients_
=
sum_gradients
;
sum_gradients_
=
sum_gradients
;
...
@@ -78,7 +78,7 @@ public:
...
@@ -78,7 +78,7 @@ public:
* \param sum_gradients sum of gradients of current leaf
* \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of hessians of current leaf
* \param sum_hessians sum of hessians of current leaf
*/
*/
void
SetSumup
(
data_size_t
num_data
,
score_t
sum_gradients
,
score_t
sum_hessians
)
{
void
SetSumup
(
data_size_t
num_data
,
double
sum_gradients
,
double
sum_hessians
)
{
num_data_
=
num_data
;
num_data_
=
num_data
;
sum_gradients_
=
sum_gradients
;
sum_gradients_
=
sum_gradients
;
sum_hessians_
=
sum_hessians
+
2
*
kEpsilon
;
sum_hessians_
=
sum_hessians
+
2
*
kEpsilon
;
...
@@ -104,15 +104,15 @@ public:
...
@@ -104,15 +104,15 @@ public:
* \param output The best split result
* \param output The best split result
*/
*/
void
FindBestThreshold
(
SplitInfo
*
output
)
{
void
FindBestThreshold
(
SplitInfo
*
output
)
{
score_t
best_sum_left_gradient
=
NAN
;
double
best_sum_left_gradient
=
NAN
;
score_t
best_sum_left_hessian
=
NAN
;
double
best_sum_left_hessian
=
NAN
;
score_t
best_gain
=
kMinScore
;
double
best_gain
=
kMinScore
;
data_size_t
best_left_count
=
0
;
data_size_t
best_left_count
=
0
;
unsigned
int
best_threshold
=
static_cast
<
unsigned
int
>
(
num_bins_
);
unsigned
int
best_threshold
=
static_cast
<
unsigned
int
>
(
num_bins_
);
score_t
sum_right_gradient
=
0.0
f
;
double
sum_right_gradient
=
0.0
f
;
score_t
sum_right_hessian
=
kEpsilon
;
double
sum_right_hessian
=
kEpsilon
;
data_size_t
right_count
=
0
;
data_size_t
right_count
=
0
;
score_t
gain_shift
=
GetLeafSplitGain
(
sum_gradients_
,
sum_hessians_
);
double
gain_shift
=
GetLeafSplitGain
(
sum_gradients_
,
sum_hessians_
);
is_splittable_
=
false
;
is_splittable_
=
false
;
// from right to left, and we don't need data in bin0
// from right to left, and we don't need data in bin0
for
(
unsigned
int
t
=
num_bins_
-
1
;
t
>
0
;
--
t
)
{
for
(
unsigned
int
t
=
num_bins_
-
1
;
t
>
0
;
--
t
)
{
...
@@ -125,14 +125,14 @@ public:
...
@@ -125,14 +125,14 @@ public:
// if data not enough
// if data not enough
if
(
left_count
<
min_num_data_one_leaf_
)
break
;
if
(
left_count
<
min_num_data_one_leaf_
)
break
;
score_t
sum_left_hessian
=
sum_hessians_
-
sum_right_hessian
;
double
sum_left_hessian
=
sum_hessians_
-
sum_right_hessian
;
// if sum hessian too small
// if sum hessian too small
if
(
sum_left_hessian
<
min_sum_hessian_one_leaf_
)
{
if
(
sum_left_hessian
<
min_sum_hessian_one_leaf_
)
{
break
;
break
;
}
}
score_t
sum_left_gradient
=
sum_gradients_
-
sum_right_gradient
;
double
sum_left_gradient
=
sum_gradients_
-
sum_right_gradient
;
// current split gain
// current split gain
score_t
current_gain
=
GetLeafSplitGain
(
sum_left_gradient
,
sum_left_hessian
)
+
GetLeafSplitGain
(
sum_right_gradient
,
sum_right_hessian
);
double
current_gain
=
GetLeafSplitGain
(
sum_left_gradient
,
sum_left_hessian
)
+
GetLeafSplitGain
(
sum_right_gradient
,
sum_right_hessian
);
// gain is worst than no perform split
// gain is worst than no perform split
if
(
current_gain
<
gain_shift
)
{
if
(
current_gain
<
gain_shift
)
{
continue
;
continue
;
...
@@ -195,7 +195,7 @@ public:
...
@@ -195,7 +195,7 @@ public:
/*!
/*!
* \brief Set min sum hessian in one leaf
* \brief Set min sum hessian in one leaf
*/
*/
void
SetMinSumHessianOneLeaf
(
score_t
new_val
)
{
void
SetMinSumHessianOneLeaf
(
double
new_val
)
{
min_sum_hessian_one_leaf_
=
new_val
;
min_sum_hessian_one_leaf_
=
new_val
;
}
}
...
@@ -216,7 +216,7 @@ private:
...
@@ -216,7 +216,7 @@ private:
* \param sum_hessians
* \param sum_hessians
* \return split gain
* \return split gain
*/
*/
score_t
GetLeafSplitGain
(
score_t
sum_gradients
,
score_t
sum_hessians
)
const
{
double
GetLeafSplitGain
(
double
sum_gradients
,
double
sum_hessians
)
const
{
return
(
sum_gradients
*
sum_gradients
)
/
(
sum_hessians
);
return
(
sum_gradients
*
sum_gradients
)
/
(
sum_hessians
);
}
}
...
@@ -226,7 +226,7 @@ private:
...
@@ -226,7 +226,7 @@ private:
* \param sum_hessians
* \param sum_hessians
* \return leaf output
* \return leaf output
*/
*/
score_t
CalculateSplittedLeafOutput
(
score_t
sum_gradients
,
score_t
sum_hessians
)
const
{
double
CalculateSplittedLeafOutput
(
double
sum_gradients
,
double
sum_hessians
)
const
{
return
-
(
sum_gradients
)
/
(
sum_hessians
);
return
-
(
sum_gradients
)
/
(
sum_hessians
);
}
}
...
@@ -234,7 +234,7 @@ private:
...
@@ -234,7 +234,7 @@ private:
/*! \brief minimal number of data in one leaf */
/*! \brief minimal number of data in one leaf */
data_size_t
min_num_data_one_leaf_
;
data_size_t
min_num_data_one_leaf_
;
/*! \brief minimal sum hessian of data in one leaf */
/*! \brief minimal sum hessian of data in one leaf */
score_t
min_sum_hessian_one_leaf_
;
double
min_sum_hessian_one_leaf_
;
/*! \brief the bin data of current feature */
/*! \brief the bin data of current feature */
const
Bin
*
bin_data_
;
const
Bin
*
bin_data_
;
/*! \brief number of bin of histogram */
/*! \brief number of bin of histogram */
...
@@ -244,9 +244,9 @@ private:
...
@@ -244,9 +244,9 @@ private:
/*! \brief number of all data */
/*! \brief number of all data */
data_size_t
num_data_
;
data_size_t
num_data_
;
/*! \brief sum of gradient of current leaf */
/*! \brief sum of gradient of current leaf */
score_t
sum_gradients_
;
double
sum_gradients_
;
/*! \brief sum of hessians of current leaf */
/*! \brief sum of hessians of current leaf */
score_t
sum_hessians_
;
double
sum_hessians_
;
/*! \brief False if this histogram cannot split */
/*! \brief False if this histogram cannot split */
bool
is_splittable_
=
true
;
bool
is_splittable_
=
true
;
};
};
...
...
src/treelearner/feature_parallel_tree_learner.cpp
View file @
47313fb5
...
@@ -47,11 +47,11 @@ void FeatureParallelTreeLearner::FindBestSplitsForLeaves() {
...
@@ -47,11 +47,11 @@ void FeatureParallelTreeLearner::FindBestSplitsForLeaves() {
int
smaller_best_feature
=
-
1
,
larger_best_feature
=
-
1
;
int
smaller_best_feature
=
-
1
,
larger_best_feature
=
-
1
;
SplitInfo
smaller_best
,
larger_best
;
SplitInfo
smaller_best
,
larger_best
;
// get best split at smaller leaf
// get best split at smaller leaf
std
::
vector
<
float
>
gains
;
std
::
vector
<
double
>
gains
;
for
(
size_t
i
=
0
;
i
<
smaller_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
smaller_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
gains
.
push_back
(
smaller_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
gains
.
push_back
(
smaller_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
}
}
smaller_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
float
>::
ArgMax
(
gains
));
smaller_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
double
>::
ArgMax
(
gains
));
smaller_best
=
smaller_leaf_splits_
->
BestSplitPerFeature
()[
smaller_best_feature
];
smaller_best
=
smaller_leaf_splits_
->
BestSplitPerFeature
()[
smaller_best_feature
];
// get best split at larger leaf
// get best split at larger leaf
if
(
larger_leaf_splits_
->
LeafIndex
()
>=
0
)
{
if
(
larger_leaf_splits_
->
LeafIndex
()
>=
0
)
{
...
@@ -59,7 +59,7 @@ void FeatureParallelTreeLearner::FindBestSplitsForLeaves() {
...
@@ -59,7 +59,7 @@ void FeatureParallelTreeLearner::FindBestSplitsForLeaves() {
for
(
size_t
i
=
0
;
i
<
larger_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
larger_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
gains
.
push_back
(
larger_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
gains
.
push_back
(
larger_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
}
}
larger_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
float
>::
ArgMax
(
gains
));
larger_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
double
>::
ArgMax
(
gains
));
larger_best
=
larger_leaf_splits_
->
BestSplitPerFeature
()[
larger_best_feature
];
larger_best
=
larger_leaf_splits_
->
BestSplitPerFeature
()[
larger_best_feature
];
}
}
// sync global best info
// sync global best info
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment