Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
47313fb5
Commit
47313fb5
authored
Nov 01, 2016
by
Guolin Ke
Browse files
fixed sumup problem for float type
parent
aa796a85
Changes
24
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
115 additions
and
107 deletions
+115
-107
include/LightGBM/bin.h
include/LightGBM/bin.h
+2
-2
include/LightGBM/dataset.h
include/LightGBM/dataset.h
+3
-3
include/LightGBM/tree.h
include/LightGBM/tree.h
+7
-7
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+2
-2
src/boosting/score_updater.hpp
src/boosting/score_updater.hpp
+1
-1
src/io/dataset.cpp
src/io/dataset.cpp
+9
-7
src/io/metadata.cpp
src/io/metadata.cpp
+12
-6
src/io/tree.cpp
src/io/tree.cpp
+4
-4
src/metric/binary_metric.hpp
src/metric/binary_metric.hpp
+9
-9
src/metric/metric.cpp
src/metric/metric.cpp
+8
-8
src/metric/multiclass_metric.hpp
src/metric/multiclass_metric.hpp
+8
-8
src/metric/rank_metric.hpp
src/metric/rank_metric.hpp
+1
-1
src/metric/regression_metric.hpp
src/metric/regression_metric.hpp
+1
-1
src/objective/binary_objective.hpp
src/objective/binary_objective.hpp
+3
-3
src/objective/multiclass_objective.hpp
src/objective/multiclass_objective.hpp
+6
-6
src/objective/objective_function.cpp
src/objective/objective_function.cpp
+4
-4
src/objective/rank_objective.hpp
src/objective/rank_objective.hpp
+2
-2
src/treelearner/data_parallel_tree_learner.cpp
src/treelearner/data_parallel_tree_learner.cpp
+9
-9
src/treelearner/feature_histogram.hpp
src/treelearner/feature_histogram.hpp
+21
-21
src/treelearner/feature_parallel_tree_learner.cpp
src/treelearner/feature_parallel_tree_learner.cpp
+3
-3
No files found.
include/LightGBM/bin.h
View file @
47313fb5
...
...
@@ -13,9 +13,9 @@ namespace LightGBM {
struct
HistogramBinEntry
{
public:
/*! \brief Sum of gradients on this bin */
score_t
sum_gradients
=
0.0
;
double
sum_gradients
=
0.0
;
/*! \brief Sum of hessians on this bin */
score_t
sum_hessians
=
0.0
;
double
sum_hessians
=
0.0
;
/*! \brief Number of data on this bin */
data_size_t
cnt
=
0
;
...
...
include/LightGBM/dataset.h
View file @
47313fb5
...
...
@@ -83,7 +83,7 @@ public:
* \brief Set initial scores
* \param init_score Initial scores, this class will manage memory for init_score.
*/
void
SetInitScore
(
s
co
re_
t
*
init_score
);
void
SetInitScore
(
co
nst
floa
t
*
init_score
,
data_size_t
len
);
/*!
...
...
@@ -166,7 +166,7 @@ public:
* \brief Get initial scores, if not exists, will return nullptr
* \return Pointer of initial scores
*/
inline
const
score_
t
*
init_score
()
const
{
return
init_score_
;
}
inline
const
floa
t
*
init_score
()
const
{
return
init_score_
;
}
/*! \brief Load initial scores from file */
void
LoadInitialScore
();
...
...
@@ -201,7 +201,7 @@ private:
/*! \brief Number of Initial score, used to check correct weight file */
data_size_t
num_init_score_
;
/*! \brief Initial score */
score_
t
*
init_score_
;
floa
t
*
init_score_
;
/*! \brief Queries data */
data_size_t
*
queries_
;
};
...
...
include/LightGBM/tree.h
View file @
47313fb5
...
...
@@ -43,11 +43,11 @@ public:
* \return The index of new leaf.
*/
int
Split
(
int
leaf
,
int
feature
,
unsigned
int
threshold
,
int
real_feature
,
float
threshold_float
,
score_
t
left_value
,
score_
t
right_value
,
float
gain
);
float
threshold_float
,
floa
t
left_value
,
floa
t
right_value
,
float
gain
);
/*! \brief Get the output of one leave */
inline
score_
t
LeafOutput
(
int
leaf
)
const
{
return
leaf_value_
[
leaf
];
}
inline
floa
t
LeafOutput
(
int
leaf
)
const
{
return
leaf_value_
[
leaf
];
}
/*!
* \brief Adding prediction value of this tree model to scores
...
...
@@ -74,7 +74,7 @@ public:
* \param feature_values Feature value of this record
* \return Prediction result
*/
inline
score_
t
Predict
(
const
float
*
feature_values
)
const
;
inline
floa
t
Predict
(
const
float
*
feature_values
)
const
;
inline
int
PredictLeafIndex
(
const
float
*
feature_values
)
const
;
/*! \brief Get Number of leaves*/
...
...
@@ -93,7 +93,7 @@ public:
*/
inline
void
Shrinkage
(
float
rate
)
{
for
(
int
i
=
0
;
i
<
num_leaves_
;
++
i
)
{
leaf_value_
[
i
]
=
static_cast
<
score_t
>
(
leaf_value_
[
i
]
*
rate
)
;
leaf_value_
[
i
]
=
leaf_value_
[
i
]
*
rate
;
}
}
...
...
@@ -144,13 +144,13 @@ private:
/*! \brief The parent of leaf */
int
*
leaf_parent_
;
/*! \brief Output of leaves */
score_
t
*
leaf_value_
;
floa
t
*
leaf_value_
;
/*! \brief Depth for leaves */
int
*
leaf_depth_
;
};
inline
score_
t
Tree
::
Predict
(
const
float
*
feature_values
)
const
{
inline
floa
t
Tree
::
Predict
(
const
float
*
feature_values
)
const
{
int
leaf
=
GetLeaf
(
feature_values
);
return
LeafOutput
(
leaf
);
}
...
...
src/boosting/gbdt.cpp
View file @
47313fb5
...
...
@@ -217,8 +217,8 @@ void GBDT::UpdateScore(const Tree* tree, const int curr_class) {
// update training score
train_score_updater_
->
AddScore
(
tree_learner_
[
curr_class
],
curr_class
);
// update validation score
for
(
auto
&
score_
track
er
:
valid_score_updater_
)
{
score_
track
er
->
AddScore
(
tree
,
curr_class
);
for
(
auto
&
score_
updat
er
:
valid_score_updater_
)
{
score_
updat
er
->
AddScore
(
tree
,
curr_class
);
}
}
...
...
src/boosting/score_updater.hpp
View file @
47313fb5
...
...
@@ -24,7 +24,7 @@ public:
score_
=
new
score_t
[
num_data_
*
num_class
];
// default start score is zero
std
::
memset
(
score_
,
0
,
sizeof
(
score_t
)
*
num_data_
*
num_class
);
const
score_
t
*
init_score
=
data
->
metadata
().
init_score
();
const
floa
t
*
init_score
=
data
->
metadata
().
init_score
();
// if exists initial score, will start from it
if
(
init_score
!=
nullptr
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
...
...
src/io/dataset.cpp
View file @
47313fb5
...
...
@@ -541,7 +541,7 @@ void Dataset::ExtractFeaturesFromMemory() {
}
}
else
{
// if need to prediction with initial model
score_
t
*
init_score
=
new
score_
t
[
num_data_
];
floa
t
*
init_score
=
new
floa
t
[
num_data_
];
#pragma omp parallel for schedule(guided) private(oneline_features) firstprivate(tmp_label)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
const
int
tid
=
omp_get_thread_num
();
...
...
@@ -549,7 +549,7 @@ void Dataset::ExtractFeaturesFromMemory() {
// parser
parser_
->
ParseOneLine
(
text_reader_
->
Lines
()[
i
].
c_str
(),
&
oneline_features
,
&
tmp_label
);
// set initial score
init_score
[
i
]
=
static_cast
<
score_
t
>
(
predict_fun_
(
oneline_features
));
init_score
[
i
]
=
static_cast
<
floa
t
>
(
predict_fun_
(
oneline_features
));
// set label
metadata_
.
SetLabelAt
(
i
,
tmp_label
);
// free processed line:
...
...
@@ -573,7 +573,8 @@ void Dataset::ExtractFeaturesFromMemory() {
}
}
// metadata_ will manage space of init_score
metadata_
.
SetInitScore
(
init_score
);
metadata_
.
SetInitScore
(
init_score
,
num_data_
);
delete
[]
init_score
;
}
#pragma omp parallel for schedule(guided)
...
...
@@ -586,9 +587,9 @@ void Dataset::ExtractFeaturesFromMemory() {
void
Dataset
::
ExtractFeaturesFromFile
()
{
score_
t
*
init_score
=
nullptr
;
floa
t
*
init_score
=
nullptr
;
if
(
predict_fun_
!=
nullptr
)
{
init_score
=
new
score_
t
[
num_data_
];
init_score
=
new
floa
t
[
num_data_
];
}
std
::
function
<
void
(
data_size_t
,
const
std
::
vector
<
std
::
string
>&
)
>
process_fun
=
[
this
,
&
init_score
]
...
...
@@ -603,7 +604,7 @@ void Dataset::ExtractFeaturesFromFile() {
parser_
->
ParseOneLine
(
lines
[
i
].
c_str
(),
&
oneline_features
,
&
tmp_label
);
// set initial score
if
(
init_score
!=
nullptr
)
{
init_score
[
start_idx
+
i
]
=
static_cast
<
score_
t
>
(
predict_fun_
(
oneline_features
));
init_score
[
start_idx
+
i
]
=
static_cast
<
floa
t
>
(
predict_fun_
(
oneline_features
));
}
// set label
metadata_
.
SetLabelAt
(
start_idx
+
i
,
tmp_label
);
...
...
@@ -635,7 +636,8 @@ void Dataset::ExtractFeaturesFromFile() {
// metadata_ will manage space of init_score
if
(
init_score
!=
nullptr
)
{
metadata_
.
SetInitScore
(
init_score
);
metadata_
.
SetInitScore
(
init_score
,
num_data_
);
delete
[]
init_score
;
}
#pragma omp parallel for schedule(guided)
...
...
src/io/metadata.cpp
View file @
47313fb5
...
...
@@ -196,9 +196,9 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
// get local initial scores
if
(
init_score_
!=
nullptr
)
{
score_
t
*
old_scores
=
init_score_
;
floa
t
*
old_scores
=
init_score_
;
num_init_score_
=
num_data_
;
init_score_
=
new
score_
t
[
num_init_score_
];
init_score_
=
new
floa
t
[
num_init_score_
];
for
(
size_t
i
=
0
;
i
<
used_data_indices
.
size
();
++
i
)
{
init_score_
[
i
]
=
old_scores
[
used_data_indices
[
i
]];
}
...
...
@@ -211,10 +211,16 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
}
void
Metadata
::
SetInitScore
(
score_t
*
init_score
)
{
void
Metadata
::
SetInitScore
(
const
float
*
init_score
,
data_size_t
len
)
{
if
(
num_data_
!=
len
)
{
Log
::
Fatal
(
"len of initial score is not same with #data"
);
}
if
(
init_score_
!=
nullptr
)
{
delete
[]
init_score_
;
}
num_init_score_
=
num_data_
;
init_score_
=
init_score
;
init_score_
=
new
float
[
num_init_score_
];
for
(
data_size_t
i
=
0
;
i
<
num_init_score_
;
++
i
)
{
init_score_
[
i
]
=
init_score
[
i
];
}
}
void
Metadata
::
LoadWeights
()
{
...
...
@@ -245,11 +251,11 @@ void Metadata::LoadInitialScore() {
Log
::
Info
(
"Start loading initial scores"
);
num_init_score_
=
static_cast
<
data_size_t
>
(
reader
.
Lines
().
size
());
init_score_
=
new
score_
t
[
num_init_score_
];
init_score_
=
new
floa
t
[
num_init_score_
];
float
tmp
=
0.0
f
;
for
(
data_size_t
i
=
0
;
i
<
num_init_score_
;
++
i
)
{
Common
::
Atof
(
reader
.
Lines
()[
i
].
c_str
(),
&
tmp
);
init_score_
[
i
]
=
static_cast
<
score_t
>
(
tmp
)
;
init_score_
[
i
]
=
tmp
;
}
}
...
...
src/io/tree.cpp
View file @
47313fb5
...
...
@@ -27,7 +27,7 @@ Tree::Tree(int max_leaves)
split_gain_
=
new
float
[
max_leaves_
-
1
];
leaf_parent_
=
new
int
[
max_leaves_
];
leaf_value_
=
new
score_
t
[
max_leaves_
];
leaf_value_
=
new
floa
t
[
max_leaves_
];
leaf_depth_
=
new
int
[
max_leaves_
];
// root is in the depth 1
leaf_depth_
[
0
]
=
1
;
...
...
@@ -48,7 +48,7 @@ Tree::~Tree() {
}
int
Tree
::
Split
(
int
leaf
,
int
feature
,
unsigned
int
threshold_bin
,
int
real_feature
,
float
threshold
,
score_
t
left_value
,
score_
t
right_value
,
float
gain
)
{
float
threshold
,
floa
t
left_value
,
floa
t
right_value
,
float
gain
)
{
int
new_node_idx
=
num_leaves_
-
1
;
// update parent info
int
parent
=
leaf_parent_
[
leaf
];
...
...
@@ -124,7 +124,7 @@ std::string Tree::ToString() {
ss
<<
"leaf_parent="
<<
Common
::
ArrayToString
<
int
>
(
leaf_parent_
,
num_leaves_
,
' '
)
<<
std
::
endl
;
ss
<<
"leaf_value="
<<
Common
::
ArrayToString
<
score_
t
>
(
leaf_value_
,
num_leaves_
,
' '
)
<<
std
::
endl
;
<<
Common
::
ArrayToString
<
floa
t
>
(
leaf_value_
,
num_leaves_
,
' '
)
<<
std
::
endl
;
ss
<<
std
::
endl
;
return
ss
.
str
();
}
...
...
@@ -157,7 +157,7 @@ Tree::Tree(const std::string& str) {
threshold_
=
new
float
[
num_leaves_
-
1
];
split_gain_
=
new
float
[
num_leaves_
-
1
];
leaf_parent_
=
new
int
[
num_leaves_
];
leaf_value_
=
new
score_
t
[
num_leaves_
];
leaf_value_
=
new
floa
t
[
num_leaves_
];
split_feature_
=
nullptr
;
threshold_in_bin_
=
nullptr
;
...
...
src/metric/binary_metric.hpp
View file @
47313fb5
...
...
@@ -58,7 +58,7 @@ public:
return
false
;
}
std
::
vector
<
score_
t
>
Eval
(
const
score_t
*
score
)
const
override
{
std
::
vector
<
floa
t
>
Eval
(
const
score_t
*
score
)
const
override
{
score_t
sum_loss
=
0.0
f
;
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
...
...
@@ -78,7 +78,7 @@ public:
}
}
score_t
loss
=
sum_loss
/
sum_weights_
;
return
std
::
vector
<
score_t
>
(
1
,
loss
);
return
std
::
vector
<
float
>
(
1
,
static_cast
<
float
>
(
loss
)
)
;
}
private:
...
...
@@ -181,7 +181,7 @@ public:
}
}
std
::
vector
<
score_
t
>
Eval
(
const
score_t
*
score
)
const
override
{
std
::
vector
<
floa
t
>
Eval
(
const
score_t
*
score
)
const
override
{
// get indices sorted by score, descent order
std
::
vector
<
data_size_t
>
sorted_idx
;
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
...
...
@@ -189,13 +189,13 @@ public:
}
std
::
sort
(
sorted_idx
.
begin
(),
sorted_idx
.
end
(),
[
score
](
data_size_t
a
,
data_size_t
b
)
{
return
score
[
a
]
>
score
[
b
];
});
// temp sum of postive label
floa
t
cur_pos
=
0.0
f
;
score_
t
cur_pos
=
0.0
f
;
// total sum of postive label
floa
t
sum_pos
=
0.0
f
;
score_
t
sum_pos
=
0.0
f
;
// accumlate of auc
floa
t
accum
=
0.0
f
;
score_
t
accum
=
0.0
f
;
// temp sum of negative label
floa
t
cur_neg
=
0.0
f
;
score_
t
cur_neg
=
0.0
f
;
score_t
threshold
=
score
[
sorted_idx
[
0
]];
if
(
weights_
==
nullptr
)
{
// no weights
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
...
...
@@ -233,11 +233,11 @@ public:
}
accum
+=
cur_neg
*
(
cur_pos
*
0.5
f
+
sum_pos
);
sum_pos
+=
cur_pos
;
floa
t
auc
=
1.0
f
;
score_
t
auc
=
1.0
f
;
if
(
sum_pos
>
0.0
f
&&
sum_pos
!=
sum_weights_
)
{
auc
=
accum
/
(
sum_pos
*
(
sum_weights_
-
sum_pos
));
}
return
std
::
vector
<
score_t
>
(
1
,
auc
);
return
std
::
vector
<
float
>
(
1
,
static_cast
<
float
>
(
auc
)
)
;
}
private:
...
...
src/metric/metric.cpp
View file @
47313fb5
...
...
@@ -7,21 +7,21 @@
namespace
LightGBM
{
Metric
*
Metric
::
CreateMetric
(
const
std
::
string
&
type
,
const
MetricConfig
&
config
)
{
if
(
type
==
"l2"
)
{
if
(
type
==
std
::
string
(
"l2"
)
)
{
return
new
L2Metric
(
config
);
}
else
if
(
type
==
"l1"
)
{
}
else
if
(
type
==
std
::
string
(
"l1"
)
)
{
return
new
L1Metric
(
config
);
}
else
if
(
type
==
"binary_logloss"
)
{
}
else
if
(
type
==
std
::
string
(
"binary_logloss"
)
)
{
return
new
BinaryLoglossMetric
(
config
);
}
else
if
(
type
==
"binary_error"
)
{
}
else
if
(
type
==
std
::
string
(
"binary_error"
)
)
{
return
new
BinaryErrorMetric
(
config
);
}
else
if
(
type
==
"auc"
)
{
}
else
if
(
type
==
std
::
string
(
"auc"
)
)
{
return
new
AUCMetric
(
config
);
}
else
if
(
type
==
"ndcg"
)
{
}
else
if
(
type
==
std
::
string
(
"ndcg"
)
)
{
return
new
NDCGMetric
(
config
);
}
else
if
(
type
==
"multi_logloss"
){
}
else
if
(
type
==
std
::
string
(
"multi_logloss"
)
)
{
return
new
MultiLoglossMetric
(
config
);
}
else
if
(
type
==
"multi_error"
){
}
else
if
(
type
==
std
::
string
(
"multi_error"
)
)
{
return
new
MultiErrorMetric
(
config
);
}
return
nullptr
;
...
...
src/metric/multiclass_metric.hpp
View file @
47313fb5
...
...
@@ -50,14 +50,14 @@ public:
return
false
;
}
std
::
vector
<
score_
t
>
Eval
(
const
score_t
*
score
)
const
override
{
std
::
vector
<
floa
t
>
Eval
(
const
score_t
*
score
)
const
override
{
score_t
sum_loss
=
0.0
;
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
score_
t
>
rec
(
num_class_
);
std
::
vector
<
floa
t
>
rec
(
num_class_
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
rec
[
k
]
=
score
[
k
*
num_data_
+
i
];
rec
[
k
]
=
static_cast
<
float
>
(
score
[
k
*
num_data_
+
i
]
)
;
}
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
rec
);
...
...
@@ -65,16 +65,16 @@ public:
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
score_
t
>
rec
(
num_class_
);
std
::
vector
<
floa
t
>
rec
(
num_class_
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
rec
[
k
]
=
score
[
k
*
num_data_
+
i
];
rec
[
k
]
=
static_cast
<
float
>
(
score
[
k
*
num_data_
+
i
]
)
;
}
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
rec
)
*
weights_
[
i
];
}
}
score_t
loss
=
sum_loss
/
sum_weights_
;
return
std
::
vector
<
score_t
>
(
1
,
loss
);
return
std
::
vector
<
float
>
(
1
,
static_cast
<
float
>
(
loss
)
)
;
}
private:
...
...
@@ -99,7 +99,7 @@ class MultiErrorMetric: public MulticlassMetric<MultiErrorMetric> {
public:
explicit
MultiErrorMetric
(
const
MetricConfig
&
config
)
:
MulticlassMetric
<
MultiErrorMetric
>
(
config
)
{}
inline
static
score_t
LossOnPoint
(
float
label
,
std
::
vector
<
score_
t
>
score
)
{
inline
static
score_t
LossOnPoint
(
float
label
,
std
::
vector
<
floa
t
>
score
)
{
size_t
k
=
static_cast
<
size_t
>
(
label
);
for
(
size_t
i
=
0
;
i
<
score
.
size
();
++
i
){
if
(
i
!=
k
&&
score
[
i
]
>
score
[
k
])
{
...
...
@@ -119,7 +119,7 @@ class MultiLoglossMetric: public MulticlassMetric<MultiLoglossMetric> {
public:
explicit
MultiLoglossMetric
(
const
MetricConfig
&
config
)
:
MulticlassMetric
<
MultiLoglossMetric
>
(
config
)
{}
inline
static
score_t
LossOnPoint
(
float
label
,
std
::
vector
<
score_
t
>
score
)
{
inline
static
score_t
LossOnPoint
(
float
label
,
std
::
vector
<
floa
t
>
score
)
{
size_t
k
=
static_cast
<
size_t
>
(
label
);
Common
::
Softmax
(
&
score
);
if
(
score
[
k
]
>
kEpsilon
)
{
...
...
src/metric/rank_metric.hpp
View file @
47313fb5
...
...
@@ -84,7 +84,7 @@ public:
return
true
;
}
std
::
vector
<
score_
t
>
Eval
(
const
score_t
*
score
)
const
override
{
std
::
vector
<
floa
t
>
Eval
(
const
score_t
*
score
)
const
override
{
// some buffers for multi-threading sum up
std
::
vector
<
std
::
vector
<
float
>>
result_buffer_
;
for
(
int
i
=
0
;
i
<
num_threads_
;
++
i
)
{
...
...
src/metric/regression_metric.hpp
View file @
47313fb5
...
...
@@ -67,7 +67,7 @@ public:
}
}
score_t
loss
=
PointWiseLossCalculator
::
AverageLoss
(
sum_loss
,
sum_weights_
);
return
std
::
vector
<
float
>
(
1
,
loss
);
return
std
::
vector
<
float
>
(
1
,
static_cast
<
float
>
(
loss
)
)
;
}
...
...
src/objective/binary_objective.hpp
View file @
47313fb5
...
...
@@ -14,7 +14,7 @@ class BinaryLogloss: public ObjectiveFunction {
public:
explicit
BinaryLogloss
(
const
ObjectiveConfig
&
config
)
{
is_unbalance_
=
config
.
is_unbalance
;
sigmoid_
=
static_cast
<
score_
t
>
(
config
.
sigmoid
);
sigmoid_
=
static_cast
<
floa
t
>
(
config
.
sigmoid
);
if
(
sigmoid_
<=
0.0
)
{
Log
::
Fatal
(
"Sigmoid parameter %f :should greater than zero"
,
sigmoid_
);
}
...
...
@@ -92,11 +92,11 @@ private:
/*! \brief True if using unbalance training */
bool
is_unbalance_
;
/*! \brief Sigmoid parameter */
score_
t
sigmoid_
;
floa
t
sigmoid_
;
/*! \brief Values for positive and negative labels */
int
label_val_
[
2
];
/*! \brief Weights for positive and negative labels */
score_
t
label_weights_
[
2
];
floa
t
label_weights_
[
2
];
/*! \brief Weights for data */
const
float
*
weights_
;
};
...
...
src/objective/multiclass_objective.hpp
View file @
47313fb5
...
...
@@ -38,13 +38,13 @@ public:
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
score_
t
>
rec
(
num_class_
);
std
::
vector
<
floa
t
>
rec
(
num_class_
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
){
rec
[
k
]
=
score
[
k
*
num_data_
+
i
];
rec
[
k
]
=
static_cast
<
float
>
(
score
[
k
*
num_data_
+
i
]
)
;
}
Common
::
Softmax
(
&
rec
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
score_t
p
=
rec
[
k
];
score_t
p
=
static_cast
<
score_t
>
(
rec
[
k
]
)
;
if
(
label_int_
[
i
]
==
k
)
{
gradients
[
k
*
num_data_
+
i
]
=
p
-
1.0
f
;
}
else
{
...
...
@@ -56,13 +56,13 @@ public:
}
else
{
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
score_
t
>
rec
(
num_class_
);
std
::
vector
<
floa
t
>
rec
(
num_class_
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
){
rec
[
k
]
=
score
[
k
*
num_data_
+
i
];
rec
[
k
]
=
static_cast
<
float
>
(
score
[
k
*
num_data_
+
i
]
)
;
}
Common
::
Softmax
(
&
rec
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
float
p
=
rec
[
k
];
score_t
p
=
static_cast
<
score_t
>
(
rec
[
k
]
)
;
if
(
label_int_
[
i
]
==
k
)
{
gradients
[
k
*
num_data_
+
i
]
=
(
p
-
1.0
f
)
*
weights_
[
i
];
}
else
{
...
...
src/objective/objective_function.cpp
View file @
47313fb5
...
...
@@ -7,13 +7,13 @@
namespace
LightGBM
{
ObjectiveFunction
*
ObjectiveFunction
::
CreateObjectiveFunction
(
const
std
::
string
&
type
,
const
ObjectiveConfig
&
config
)
{
if
(
type
==
"regression"
)
{
if
(
type
==
std
::
string
(
"regression"
)
)
{
return
new
RegressionL2loss
(
config
);
}
else
if
(
type
==
"binary"
)
{
}
else
if
(
type
==
std
::
string
(
"binary"
)
)
{
return
new
BinaryLogloss
(
config
);
}
else
if
(
type
==
"lambdarank"
)
{
}
else
if
(
type
==
std
::
string
(
"lambdarank"
)
)
{
return
new
LambdarankNDCG
(
config
);
}
else
if
(
type
==
"multiclass"
)
{
}
else
if
(
type
==
std
::
string
(
"multiclass"
)
)
{
return
new
MulticlassLogloss
(
config
);
}
return
nullptr
;
...
...
src/objective/rank_objective.hpp
View file @
47313fb5
...
...
@@ -19,7 +19,7 @@ namespace LightGBM {
class
LambdarankNDCG
:
public
ObjectiveFunction
{
public:
explicit
LambdarankNDCG
(
const
ObjectiveConfig
&
config
)
{
sigmoid_
=
static_cast
<
score_
t
>
(
config
.
sigmoid
);
sigmoid_
=
static_cast
<
floa
t
>
(
config
.
sigmoid
);
// initialize DCG calculator
DCGCalculator
::
Init
(
config
.
label_gain
);
// copy lable gain to local
...
...
@@ -207,7 +207,7 @@ private:
/*! \brief Cache inverse max DCG, speed up calculation */
score_t
*
inverse_max_dcgs_
;
/*! \brief Simgoid param */
score_
t
sigmoid_
;
floa
t
sigmoid_
;
/*! \brief Optimized NDCG@ */
int
optimize_pos_at_
;
/*! \brief Number of queries */
...
...
src/treelearner/data_parallel_tree_learner.cpp
View file @
47313fb5
...
...
@@ -103,19 +103,19 @@ void DataParallelTreeLearner::BeforeTrain() {
}
// sync global data sumup info
std
::
tuple
<
data_size_t
,
score_t
,
score_t
>
data
(
smaller_leaf_splits_
->
num_data_in_leaf
(),
std
::
tuple
<
data_size_t
,
double
,
double
>
data
(
smaller_leaf_splits_
->
num_data_in_leaf
(),
smaller_leaf_splits_
->
sum_gradients
(),
smaller_leaf_splits_
->
sum_hessians
());
int
size
=
sizeof
(
data
);
std
::
memcpy
(
input_buffer_
,
&
data
,
size
);
// global sumup reduce
Network
::
Allreduce
(
input_buffer_
,
size
,
size
,
output_buffer_
,
[](
const
char
*
src
,
char
*
dst
,
int
len
)
{
int
used_size
=
0
;
int
type_size
=
sizeof
(
std
::
tuple
<
data_size_t
,
score_t
,
score_t
>
);
const
std
::
tuple
<
data_size_t
,
score_t
,
score_t
>
*
p1
;
std
::
tuple
<
data_size_t
,
score_t
,
score_t
>
*
p2
;
int
type_size
=
sizeof
(
std
::
tuple
<
data_size_t
,
double
,
double
>
);
const
std
::
tuple
<
data_size_t
,
double
,
double
>
*
p1
;
std
::
tuple
<
data_size_t
,
double
,
double
>
*
p2
;
while
(
used_size
<
len
)
{
p1
=
reinterpret_cast
<
const
std
::
tuple
<
data_size_t
,
score_t
,
score_t
>
*>
(
src
);
p2
=
reinterpret_cast
<
std
::
tuple
<
data_size_t
,
score_t
,
score_t
>
*>
(
dst
);
p1
=
reinterpret_cast
<
const
std
::
tuple
<
data_size_t
,
double
,
double
>
*>
(
src
);
p2
=
reinterpret_cast
<
std
::
tuple
<
data_size_t
,
double
,
double
>
*>
(
dst
);
std
::
get
<
0
>
(
*
p2
)
=
std
::
get
<
0
>
(
*
p2
)
+
std
::
get
<
0
>
(
*
p1
);
std
::
get
<
1
>
(
*
p2
)
=
std
::
get
<
1
>
(
*
p2
)
+
std
::
get
<
1
>
(
*
p1
);
std
::
get
<
2
>
(
*
p2
)
=
std
::
get
<
2
>
(
*
p2
)
+
std
::
get
<
2
>
(
*
p1
);
...
...
@@ -200,12 +200,12 @@ void DataParallelTreeLearner::FindBestThresholds() {
void
DataParallelTreeLearner
::
FindBestSplitsForLeaves
()
{
int
smaller_best_feature
=
-
1
,
larger_best_feature
=
-
1
;
SplitInfo
smaller_best
,
larger_best
;
std
::
vector
<
float
>
gains
;
std
::
vector
<
double
>
gains
;
// find local best split for smaller leaf
for
(
size_t
i
=
0
;
i
<
smaller_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
gains
.
push_back
(
smaller_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
}
smaller_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
float
>::
ArgMax
(
gains
));
smaller_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
double
>::
ArgMax
(
gains
));
smaller_best
=
smaller_leaf_splits_
->
BestSplitPerFeature
()[
smaller_best_feature
];
// find local best split for larger leaf
if
(
larger_leaf_splits_
->
LeafIndex
()
>=
0
)
{
...
...
@@ -213,7 +213,7 @@ void DataParallelTreeLearner::FindBestSplitsForLeaves() {
for
(
size_t
i
=
0
;
i
<
larger_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
gains
.
push_back
(
larger_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
}
larger_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
float
>::
ArgMax
(
gains
));
larger_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
double
>::
ArgMax
(
gains
));
larger_best
=
larger_leaf_splits_
->
BestSplitPerFeature
()[
larger_best_feature
];
}
...
...
src/treelearner/feature_histogram.hpp
View file @
47313fb5
...
...
@@ -26,7 +26,7 @@ public:
* \param min_num_data_one_leaf minimal number of data in one leaf
*/
void
Init
(
const
Feature
*
feature
,
int
feature_idx
,
data_size_t
min_num_data_one_leaf
,
score_t
min_sum_hessian_one_leaf
)
{
double
min_sum_hessian_one_leaf
)
{
feature_idx_
=
feature_idx
;
min_num_data_one_leaf_
=
min_num_data_one_leaf
;
min_sum_hessian_one_leaf_
=
min_sum_hessian_one_leaf
;
...
...
@@ -45,8 +45,8 @@ public:
* \param ordered_hessians Ordered hessians
* \param data_indices data indices of current leaf
*/
void
Construct
(
data_size_t
*
data_indices
,
data_size_t
num_data
,
score_t
sum_gradients
,
score_t
sum_hessians
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
)
{
void
Construct
(
data_size_t
*
data_indices
,
data_size_t
num_data
,
double
sum_gradients
,
double
sum_hessians
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
)
{
std
::
memset
(
data_
,
0
,
sizeof
(
HistogramBinEntry
)
*
num_bins_
);
num_data_
=
num_data
;
sum_gradients_
=
sum_gradients
;
...
...
@@ -63,8 +63,8 @@ public:
* \param gradients
* \param hessian
*/
void
Construct
(
const
OrderedBin
*
ordered_bin
,
int
leaf
,
data_size_t
num_data
,
score_t
sum_gradients
,
score_t
sum_hessians
,
const
score_t
*
gradients
,
const
score_t
*
hessians
)
{
void
Construct
(
const
OrderedBin
*
ordered_bin
,
int
leaf
,
data_size_t
num_data
,
double
sum_gradients
,
double
sum_hessians
,
const
score_t
*
gradients
,
const
score_t
*
hessians
)
{
std
::
memset
(
data_
,
0
,
sizeof
(
HistogramBinEntry
)
*
num_bins_
);
num_data_
=
num_data
;
sum_gradients_
=
sum_gradients
;
...
...
@@ -78,7 +78,7 @@ public:
* \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of hessians of current leaf
*/
void
SetSumup
(
data_size_t
num_data
,
score_t
sum_gradients
,
score_t
sum_hessians
)
{
void
SetSumup
(
data_size_t
num_data
,
double
sum_gradients
,
double
sum_hessians
)
{
num_data_
=
num_data
;
sum_gradients_
=
sum_gradients
;
sum_hessians_
=
sum_hessians
+
2
*
kEpsilon
;
...
...
@@ -104,15 +104,15 @@ public:
* \param output The best split result
*/
void
FindBestThreshold
(
SplitInfo
*
output
)
{
score_t
best_sum_left_gradient
=
NAN
;
score_t
best_sum_left_hessian
=
NAN
;
score_t
best_gain
=
kMinScore
;
double
best_sum_left_gradient
=
NAN
;
double
best_sum_left_hessian
=
NAN
;
double
best_gain
=
kMinScore
;
data_size_t
best_left_count
=
0
;
unsigned
int
best_threshold
=
static_cast
<
unsigned
int
>
(
num_bins_
);
score_t
sum_right_gradient
=
0.0
f
;
score_t
sum_right_hessian
=
kEpsilon
;
double
sum_right_gradient
=
0.0
f
;
double
sum_right_hessian
=
kEpsilon
;
data_size_t
right_count
=
0
;
score_t
gain_shift
=
GetLeafSplitGain
(
sum_gradients_
,
sum_hessians_
);
double
gain_shift
=
GetLeafSplitGain
(
sum_gradients_
,
sum_hessians_
);
is_splittable_
=
false
;
// from right to left, and we don't need data in bin0
for
(
unsigned
int
t
=
num_bins_
-
1
;
t
>
0
;
--
t
)
{
...
...
@@ -125,14 +125,14 @@ public:
// if data not enough
if
(
left_count
<
min_num_data_one_leaf_
)
break
;
score_t
sum_left_hessian
=
sum_hessians_
-
sum_right_hessian
;
double
sum_left_hessian
=
sum_hessians_
-
sum_right_hessian
;
// if sum hessian too small
if
(
sum_left_hessian
<
min_sum_hessian_one_leaf_
)
{
break
;
}
score_t
sum_left_gradient
=
sum_gradients_
-
sum_right_gradient
;
double
sum_left_gradient
=
sum_gradients_
-
sum_right_gradient
;
// current split gain
score_t
current_gain
=
GetLeafSplitGain
(
sum_left_gradient
,
sum_left_hessian
)
+
GetLeafSplitGain
(
sum_right_gradient
,
sum_right_hessian
);
double
current_gain
=
GetLeafSplitGain
(
sum_left_gradient
,
sum_left_hessian
)
+
GetLeafSplitGain
(
sum_right_gradient
,
sum_right_hessian
);
// gain is worst than no perform split
if
(
current_gain
<
gain_shift
)
{
continue
;
...
...
@@ -195,7 +195,7 @@ public:
/*!
* \brief Set min sum hessian in one leaf
*/
void
SetMinSumHessianOneLeaf
(
score_t
new_val
)
{
void
SetMinSumHessianOneLeaf
(
double
new_val
)
{
min_sum_hessian_one_leaf_
=
new_val
;
}
...
...
@@ -216,7 +216,7 @@ private:
* \param sum_hessians
* \return split gain
*/
score_t
GetLeafSplitGain
(
score_t
sum_gradients
,
score_t
sum_hessians
)
const
{
double
GetLeafSplitGain
(
double
sum_gradients
,
double
sum_hessians
)
const
{
return
(
sum_gradients
*
sum_gradients
)
/
(
sum_hessians
);
}
...
...
@@ -226,7 +226,7 @@ private:
* \param sum_hessians
* \return leaf output
*/
score_t
CalculateSplittedLeafOutput
(
score_t
sum_gradients
,
score_t
sum_hessians
)
const
{
double
CalculateSplittedLeafOutput
(
double
sum_gradients
,
double
sum_hessians
)
const
{
return
-
(
sum_gradients
)
/
(
sum_hessians
);
}
...
...
@@ -234,7 +234,7 @@ private:
/*! \brief minimal number of data in one leaf */
data_size_t
min_num_data_one_leaf_
;
/*! \brief minimal sum hessian of data in one leaf */
score_t
min_sum_hessian_one_leaf_
;
double
min_sum_hessian_one_leaf_
;
/*! \brief the bin data of current feature */
const
Bin
*
bin_data_
;
/*! \brief number of bin of histogram */
...
...
@@ -244,9 +244,9 @@ private:
/*! \brief number of all data */
data_size_t
num_data_
;
/*! \brief sum of gradient of current leaf */
score_t
sum_gradients_
;
double
sum_gradients_
;
/*! \brief sum of hessians of current leaf */
score_t
sum_hessians_
;
double
sum_hessians_
;
/*! \brief False if this histogram cannot split */
bool
is_splittable_
=
true
;
};
...
...
src/treelearner/feature_parallel_tree_learner.cpp
View file @
47313fb5
...
...
@@ -47,11 +47,11 @@ void FeatureParallelTreeLearner::FindBestSplitsForLeaves() {
int
smaller_best_feature
=
-
1
,
larger_best_feature
=
-
1
;
SplitInfo
smaller_best
,
larger_best
;
// get best split at smaller leaf
std
::
vector
<
float
>
gains
;
std
::
vector
<
double
>
gains
;
for
(
size_t
i
=
0
;
i
<
smaller_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
gains
.
push_back
(
smaller_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
}
smaller_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
float
>::
ArgMax
(
gains
));
smaller_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
double
>::
ArgMax
(
gains
));
smaller_best
=
smaller_leaf_splits_
->
BestSplitPerFeature
()[
smaller_best_feature
];
// get best split at larger leaf
if
(
larger_leaf_splits_
->
LeafIndex
()
>=
0
)
{
...
...
@@ -59,7 +59,7 @@ void FeatureParallelTreeLearner::FindBestSplitsForLeaves() {
for
(
size_t
i
=
0
;
i
<
larger_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
gains
.
push_back
(
larger_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
}
larger_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
float
>::
ArgMax
(
gains
));
larger_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
double
>::
ArgMax
(
gains
));
larger_best
=
larger_leaf_splits_
->
BestSplitPerFeature
()[
larger_best_feature
];
}
// sync global best info
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment