Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
47313fb5
"googlemock/vscode:/vscode.git/clone" did not exist on "6c8c74eceec0f81754243da261174ef13108faa3"
Commit
47313fb5
authored
Nov 01, 2016
by
Guolin Ke
Browse files
fixed sumup problem for float type
parent
aa796a85
Changes
24
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
115 additions
and
107 deletions
+115
-107
include/LightGBM/bin.h
include/LightGBM/bin.h
+2
-2
include/LightGBM/dataset.h
include/LightGBM/dataset.h
+3
-3
include/LightGBM/tree.h
include/LightGBM/tree.h
+7
-7
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+2
-2
src/boosting/score_updater.hpp
src/boosting/score_updater.hpp
+1
-1
src/io/dataset.cpp
src/io/dataset.cpp
+9
-7
src/io/metadata.cpp
src/io/metadata.cpp
+12
-6
src/io/tree.cpp
src/io/tree.cpp
+4
-4
src/metric/binary_metric.hpp
src/metric/binary_metric.hpp
+9
-9
src/metric/metric.cpp
src/metric/metric.cpp
+8
-8
src/metric/multiclass_metric.hpp
src/metric/multiclass_metric.hpp
+8
-8
src/metric/rank_metric.hpp
src/metric/rank_metric.hpp
+1
-1
src/metric/regression_metric.hpp
src/metric/regression_metric.hpp
+1
-1
src/objective/binary_objective.hpp
src/objective/binary_objective.hpp
+3
-3
src/objective/multiclass_objective.hpp
src/objective/multiclass_objective.hpp
+6
-6
src/objective/objective_function.cpp
src/objective/objective_function.cpp
+4
-4
src/objective/rank_objective.hpp
src/objective/rank_objective.hpp
+2
-2
src/treelearner/data_parallel_tree_learner.cpp
src/treelearner/data_parallel_tree_learner.cpp
+9
-9
src/treelearner/feature_histogram.hpp
src/treelearner/feature_histogram.hpp
+21
-21
src/treelearner/feature_parallel_tree_learner.cpp
src/treelearner/feature_parallel_tree_learner.cpp
+3
-3
No files found.
include/LightGBM/bin.h
View file @
47313fb5
...
...
@@ -13,9 +13,9 @@ namespace LightGBM {
struct
HistogramBinEntry
{
public:
/*! \brief Sum of gradients on this bin */
score_t
sum_gradients
=
0.0
;
double
sum_gradients
=
0.0
;
/*! \brief Sum of hessians on this bin */
score_t
sum_hessians
=
0.0
;
double
sum_hessians
=
0.0
;
/*! \brief Number of data on this bin */
data_size_t
cnt
=
0
;
...
...
include/LightGBM/dataset.h
View file @
47313fb5
...
...
@@ -83,7 +83,7 @@ public:
* \brief Set initial scores
* \param init_score Initial scores, this class will manage memory for init_score.
*/
void
SetInitScore
(
s
co
re_
t
*
init_score
);
void
SetInitScore
(
co
nst
floa
t
*
init_score
,
data_size_t
len
);
/*!
...
...
@@ -166,7 +166,7 @@ public:
* \brief Get initial scores, if not exists, will return nullptr
* \return Pointer of initial scores
*/
inline
const
score_
t
*
init_score
()
const
{
return
init_score_
;
}
inline
const
floa
t
*
init_score
()
const
{
return
init_score_
;
}
/*! \brief Load initial scores from file */
void
LoadInitialScore
();
...
...
@@ -201,7 +201,7 @@ private:
/*! \brief Number of Initial score, used to check correct weight file */
data_size_t
num_init_score_
;
/*! \brief Initial score */
score_
t
*
init_score_
;
floa
t
*
init_score_
;
/*! \brief Queries data */
data_size_t
*
queries_
;
};
...
...
include/LightGBM/tree.h
View file @
47313fb5
...
...
@@ -43,11 +43,11 @@ public:
* \return The index of new leaf.
*/
int
Split
(
int
leaf
,
int
feature
,
unsigned
int
threshold
,
int
real_feature
,
float
threshold_float
,
score_
t
left_value
,
score_
t
right_value
,
float
gain
);
float
threshold_float
,
floa
t
left_value
,
floa
t
right_value
,
float
gain
);
/*! \brief Get the output of one leave */
inline
score_
t
LeafOutput
(
int
leaf
)
const
{
return
leaf_value_
[
leaf
];
}
inline
floa
t
LeafOutput
(
int
leaf
)
const
{
return
leaf_value_
[
leaf
];
}
/*!
* \brief Adding prediction value of this tree model to scores
...
...
@@ -74,7 +74,7 @@ public:
* \param feature_values Feature value of this record
* \return Prediction result
*/
inline
score_
t
Predict
(
const
float
*
feature_values
)
const
;
inline
floa
t
Predict
(
const
float
*
feature_values
)
const
;
inline
int
PredictLeafIndex
(
const
float
*
feature_values
)
const
;
/*! \brief Get Number of leaves*/
...
...
@@ -93,7 +93,7 @@ public:
*/
inline
void
Shrinkage
(
float
rate
)
{
for
(
int
i
=
0
;
i
<
num_leaves_
;
++
i
)
{
leaf_value_
[
i
]
=
static_cast
<
score_t
>
(
leaf_value_
[
i
]
*
rate
)
;
leaf_value_
[
i
]
=
leaf_value_
[
i
]
*
rate
;
}
}
...
...
@@ -144,13 +144,13 @@ private:
/*! \brief The parent of leaf */
int
*
leaf_parent_
;
/*! \brief Output of leaves */
score_
t
*
leaf_value_
;
floa
t
*
leaf_value_
;
/*! \brief Depth for leaves */
int
*
leaf_depth_
;
};
inline
score_
t
Tree
::
Predict
(
const
float
*
feature_values
)
const
{
inline
floa
t
Tree
::
Predict
(
const
float
*
feature_values
)
const
{
int
leaf
=
GetLeaf
(
feature_values
);
return
LeafOutput
(
leaf
);
}
...
...
src/boosting/gbdt.cpp
View file @
47313fb5
...
...
@@ -217,8 +217,8 @@ void GBDT::UpdateScore(const Tree* tree, const int curr_class) {
// update training score
train_score_updater_
->
AddScore
(
tree_learner_
[
curr_class
],
curr_class
);
// update validation score
for
(
auto
&
score_
track
er
:
valid_score_updater_
)
{
score_
track
er
->
AddScore
(
tree
,
curr_class
);
for
(
auto
&
score_
updat
er
:
valid_score_updater_
)
{
score_
updat
er
->
AddScore
(
tree
,
curr_class
);
}
}
...
...
src/boosting/score_updater.hpp
View file @
47313fb5
...
...
@@ -24,7 +24,7 @@ public:
score_
=
new
score_t
[
num_data_
*
num_class
];
// default start score is zero
std
::
memset
(
score_
,
0
,
sizeof
(
score_t
)
*
num_data_
*
num_class
);
const
score_
t
*
init_score
=
data
->
metadata
().
init_score
();
const
floa
t
*
init_score
=
data
->
metadata
().
init_score
();
// if exists initial score, will start from it
if
(
init_score
!=
nullptr
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
...
...
src/io/dataset.cpp
View file @
47313fb5
...
...
@@ -541,7 +541,7 @@ void Dataset::ExtractFeaturesFromMemory() {
}
}
else
{
// if need to prediction with initial model
score_
t
*
init_score
=
new
score_
t
[
num_data_
];
floa
t
*
init_score
=
new
floa
t
[
num_data_
];
#pragma omp parallel for schedule(guided) private(oneline_features) firstprivate(tmp_label)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
const
int
tid
=
omp_get_thread_num
();
...
...
@@ -549,7 +549,7 @@ void Dataset::ExtractFeaturesFromMemory() {
// parser
parser_
->
ParseOneLine
(
text_reader_
->
Lines
()[
i
].
c_str
(),
&
oneline_features
,
&
tmp_label
);
// set initial score
init_score
[
i
]
=
static_cast
<
score_
t
>
(
predict_fun_
(
oneline_features
));
init_score
[
i
]
=
static_cast
<
floa
t
>
(
predict_fun_
(
oneline_features
));
// set label
metadata_
.
SetLabelAt
(
i
,
tmp_label
);
// free processed line:
...
...
@@ -573,7 +573,8 @@ void Dataset::ExtractFeaturesFromMemory() {
}
}
// metadata_ will manage space of init_score
metadata_
.
SetInitScore
(
init_score
);
metadata_
.
SetInitScore
(
init_score
,
num_data_
);
delete
[]
init_score
;
}
#pragma omp parallel for schedule(guided)
...
...
@@ -586,9 +587,9 @@ void Dataset::ExtractFeaturesFromMemory() {
void
Dataset
::
ExtractFeaturesFromFile
()
{
score_
t
*
init_score
=
nullptr
;
floa
t
*
init_score
=
nullptr
;
if
(
predict_fun_
!=
nullptr
)
{
init_score
=
new
score_
t
[
num_data_
];
init_score
=
new
floa
t
[
num_data_
];
}
std
::
function
<
void
(
data_size_t
,
const
std
::
vector
<
std
::
string
>&
)
>
process_fun
=
[
this
,
&
init_score
]
...
...
@@ -603,7 +604,7 @@ void Dataset::ExtractFeaturesFromFile() {
parser_
->
ParseOneLine
(
lines
[
i
].
c_str
(),
&
oneline_features
,
&
tmp_label
);
// set initial score
if
(
init_score
!=
nullptr
)
{
init_score
[
start_idx
+
i
]
=
static_cast
<
score_
t
>
(
predict_fun_
(
oneline_features
));
init_score
[
start_idx
+
i
]
=
static_cast
<
floa
t
>
(
predict_fun_
(
oneline_features
));
}
// set label
metadata_
.
SetLabelAt
(
start_idx
+
i
,
tmp_label
);
...
...
@@ -635,7 +636,8 @@ void Dataset::ExtractFeaturesFromFile() {
// metadata_ will manage space of init_score
if
(
init_score
!=
nullptr
)
{
metadata_
.
SetInitScore
(
init_score
);
metadata_
.
SetInitScore
(
init_score
,
num_data_
);
delete
[]
init_score
;
}
#pragma omp parallel for schedule(guided)
...
...
src/io/metadata.cpp
View file @
47313fb5
...
...
@@ -196,9 +196,9 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
// get local initial scores
if
(
init_score_
!=
nullptr
)
{
score_
t
*
old_scores
=
init_score_
;
floa
t
*
old_scores
=
init_score_
;
num_init_score_
=
num_data_
;
init_score_
=
new
score_
t
[
num_init_score_
];
init_score_
=
new
floa
t
[
num_init_score_
];
for
(
size_t
i
=
0
;
i
<
used_data_indices
.
size
();
++
i
)
{
init_score_
[
i
]
=
old_scores
[
used_data_indices
[
i
]];
}
...
...
@@ -211,10 +211,16 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
}
void
Metadata
::
SetInitScore
(
score_t
*
init_score
)
{
void
Metadata
::
SetInitScore
(
const
float
*
init_score
,
data_size_t
len
)
{
if
(
num_data_
!=
len
)
{
Log
::
Fatal
(
"len of initial score is not same with #data"
);
}
if
(
init_score_
!=
nullptr
)
{
delete
[]
init_score_
;
}
num_init_score_
=
num_data_
;
init_score_
=
init_score
;
init_score_
=
new
float
[
num_init_score_
];
for
(
data_size_t
i
=
0
;
i
<
num_init_score_
;
++
i
)
{
init_score_
[
i
]
=
init_score
[
i
];
}
}
void
Metadata
::
LoadWeights
()
{
...
...
@@ -245,11 +251,11 @@ void Metadata::LoadInitialScore() {
Log
::
Info
(
"Start loading initial scores"
);
num_init_score_
=
static_cast
<
data_size_t
>
(
reader
.
Lines
().
size
());
init_score_
=
new
score_
t
[
num_init_score_
];
init_score_
=
new
floa
t
[
num_init_score_
];
float
tmp
=
0.0
f
;
for
(
data_size_t
i
=
0
;
i
<
num_init_score_
;
++
i
)
{
Common
::
Atof
(
reader
.
Lines
()[
i
].
c_str
(),
&
tmp
);
init_score_
[
i
]
=
static_cast
<
score_t
>
(
tmp
)
;
init_score_
[
i
]
=
tmp
;
}
}
...
...
src/io/tree.cpp
View file @
47313fb5
...
...
@@ -27,7 +27,7 @@ Tree::Tree(int max_leaves)
split_gain_
=
new
float
[
max_leaves_
-
1
];
leaf_parent_
=
new
int
[
max_leaves_
];
leaf_value_
=
new
score_
t
[
max_leaves_
];
leaf_value_
=
new
floa
t
[
max_leaves_
];
leaf_depth_
=
new
int
[
max_leaves_
];
// root is in the depth 1
leaf_depth_
[
0
]
=
1
;
...
...
@@ -48,7 +48,7 @@ Tree::~Tree() {
}
int
Tree
::
Split
(
int
leaf
,
int
feature
,
unsigned
int
threshold_bin
,
int
real_feature
,
float
threshold
,
score_
t
left_value
,
score_
t
right_value
,
float
gain
)
{
float
threshold
,
floa
t
left_value
,
floa
t
right_value
,
float
gain
)
{
int
new_node_idx
=
num_leaves_
-
1
;
// update parent info
int
parent
=
leaf_parent_
[
leaf
];
...
...
@@ -124,7 +124,7 @@ std::string Tree::ToString() {
ss
<<
"leaf_parent="
<<
Common
::
ArrayToString
<
int
>
(
leaf_parent_
,
num_leaves_
,
' '
)
<<
std
::
endl
;
ss
<<
"leaf_value="
<<
Common
::
ArrayToString
<
score_
t
>
(
leaf_value_
,
num_leaves_
,
' '
)
<<
std
::
endl
;
<<
Common
::
ArrayToString
<
floa
t
>
(
leaf_value_
,
num_leaves_
,
' '
)
<<
std
::
endl
;
ss
<<
std
::
endl
;
return
ss
.
str
();
}
...
...
@@ -157,7 +157,7 @@ Tree::Tree(const std::string& str) {
threshold_
=
new
float
[
num_leaves_
-
1
];
split_gain_
=
new
float
[
num_leaves_
-
1
];
leaf_parent_
=
new
int
[
num_leaves_
];
leaf_value_
=
new
score_
t
[
num_leaves_
];
leaf_value_
=
new
floa
t
[
num_leaves_
];
split_feature_
=
nullptr
;
threshold_in_bin_
=
nullptr
;
...
...
src/metric/binary_metric.hpp
View file @
47313fb5
...
...
@@ -58,7 +58,7 @@ public:
return
false
;
}
std
::
vector
<
score_
t
>
Eval
(
const
score_t
*
score
)
const
override
{
std
::
vector
<
floa
t
>
Eval
(
const
score_t
*
score
)
const
override
{
score_t
sum_loss
=
0.0
f
;
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
...
...
@@ -78,7 +78,7 @@ public:
}
}
score_t
loss
=
sum_loss
/
sum_weights_
;
return
std
::
vector
<
score_t
>
(
1
,
loss
);
return
std
::
vector
<
float
>
(
1
,
static_cast
<
float
>
(
loss
)
)
;
}
private:
...
...
@@ -181,7 +181,7 @@ public:
}
}
std
::
vector
<
score_
t
>
Eval
(
const
score_t
*
score
)
const
override
{
std
::
vector
<
floa
t
>
Eval
(
const
score_t
*
score
)
const
override
{
// get indices sorted by score, descent order
std
::
vector
<
data_size_t
>
sorted_idx
;
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
...
...
@@ -189,13 +189,13 @@ public:
}
std
::
sort
(
sorted_idx
.
begin
(),
sorted_idx
.
end
(),
[
score
](
data_size_t
a
,
data_size_t
b
)
{
return
score
[
a
]
>
score
[
b
];
});
// temp sum of postive label
floa
t
cur_pos
=
0.0
f
;
score_
t
cur_pos
=
0.0
f
;
// total sum of postive label
floa
t
sum_pos
=
0.0
f
;
score_
t
sum_pos
=
0.0
f
;
// accumlate of auc
floa
t
accum
=
0.0
f
;
score_
t
accum
=
0.0
f
;
// temp sum of negative label
floa
t
cur_neg
=
0.0
f
;
score_
t
cur_neg
=
0.0
f
;
score_t
threshold
=
score
[
sorted_idx
[
0
]];
if
(
weights_
==
nullptr
)
{
// no weights
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
...
...
@@ -233,11 +233,11 @@ public:
}
accum
+=
cur_neg
*
(
cur_pos
*
0.5
f
+
sum_pos
);
sum_pos
+=
cur_pos
;
floa
t
auc
=
1.0
f
;
score_
t
auc
=
1.0
f
;
if
(
sum_pos
>
0.0
f
&&
sum_pos
!=
sum_weights_
)
{
auc
=
accum
/
(
sum_pos
*
(
sum_weights_
-
sum_pos
));
}
return
std
::
vector
<
score_t
>
(
1
,
auc
);
return
std
::
vector
<
float
>
(
1
,
static_cast
<
float
>
(
auc
)
)
;
}
private:
...
...
src/metric/metric.cpp
View file @
47313fb5
...
...
@@ -7,21 +7,21 @@
namespace
LightGBM
{
Metric
*
Metric
::
CreateMetric
(
const
std
::
string
&
type
,
const
MetricConfig
&
config
)
{
if
(
type
==
"l2"
)
{
if
(
type
==
std
::
string
(
"l2"
)
)
{
return
new
L2Metric
(
config
);
}
else
if
(
type
==
"l1"
)
{
}
else
if
(
type
==
std
::
string
(
"l1"
)
)
{
return
new
L1Metric
(
config
);
}
else
if
(
type
==
"binary_logloss"
)
{
}
else
if
(
type
==
std
::
string
(
"binary_logloss"
)
)
{
return
new
BinaryLoglossMetric
(
config
);
}
else
if
(
type
==
"binary_error"
)
{
}
else
if
(
type
==
std
::
string
(
"binary_error"
)
)
{
return
new
BinaryErrorMetric
(
config
);
}
else
if
(
type
==
"auc"
)
{
}
else
if
(
type
==
std
::
string
(
"auc"
)
)
{
return
new
AUCMetric
(
config
);
}
else
if
(
type
==
"ndcg"
)
{
}
else
if
(
type
==
std
::
string
(
"ndcg"
)
)
{
return
new
NDCGMetric
(
config
);
}
else
if
(
type
==
"multi_logloss"
){
}
else
if
(
type
==
std
::
string
(
"multi_logloss"
)
)
{
return
new
MultiLoglossMetric
(
config
);
}
else
if
(
type
==
"multi_error"
){
}
else
if
(
type
==
std
::
string
(
"multi_error"
)
)
{
return
new
MultiErrorMetric
(
config
);
}
return
nullptr
;
...
...
src/metric/multiclass_metric.hpp
View file @
47313fb5
...
...
@@ -50,14 +50,14 @@ public:
return
false
;
}
std
::
vector
<
score_
t
>
Eval
(
const
score_t
*
score
)
const
override
{
std
::
vector
<
floa
t
>
Eval
(
const
score_t
*
score
)
const
override
{
score_t
sum_loss
=
0.0
;
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
score_
t
>
rec
(
num_class_
);
std
::
vector
<
floa
t
>
rec
(
num_class_
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
rec
[
k
]
=
score
[
k
*
num_data_
+
i
];
rec
[
k
]
=
static_cast
<
float
>
(
score
[
k
*
num_data_
+
i
]
)
;
}
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
rec
);
...
...
@@ -65,16 +65,16 @@ public:
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
score_
t
>
rec
(
num_class_
);
std
::
vector
<
floa
t
>
rec
(
num_class_
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
rec
[
k
]
=
score
[
k
*
num_data_
+
i
];
rec
[
k
]
=
static_cast
<
float
>
(
score
[
k
*
num_data_
+
i
]
)
;
}
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
rec
)
*
weights_
[
i
];
}
}
score_t
loss
=
sum_loss
/
sum_weights_
;
return
std
::
vector
<
score_t
>
(
1
,
loss
);
return
std
::
vector
<
float
>
(
1
,
static_cast
<
float
>
(
loss
)
)
;
}
private:
...
...
@@ -99,7 +99,7 @@ class MultiErrorMetric: public MulticlassMetric<MultiErrorMetric> {
public:
explicit
MultiErrorMetric
(
const
MetricConfig
&
config
)
:
MulticlassMetric
<
MultiErrorMetric
>
(
config
)
{}
inline
static
score_t
LossOnPoint
(
float
label
,
std
::
vector
<
score_
t
>
score
)
{
inline
static
score_t
LossOnPoint
(
float
label
,
std
::
vector
<
floa
t
>
score
)
{
size_t
k
=
static_cast
<
size_t
>
(
label
);
for
(
size_t
i
=
0
;
i
<
score
.
size
();
++
i
){
if
(
i
!=
k
&&
score
[
i
]
>
score
[
k
])
{
...
...
@@ -119,7 +119,7 @@ class MultiLoglossMetric: public MulticlassMetric<MultiLoglossMetric> {
public:
explicit
MultiLoglossMetric
(
const
MetricConfig
&
config
)
:
MulticlassMetric
<
MultiLoglossMetric
>
(
config
)
{}
inline
static
score_t
LossOnPoint
(
float
label
,
std
::
vector
<
score_
t
>
score
)
{
inline
static
score_t
LossOnPoint
(
float
label
,
std
::
vector
<
floa
t
>
score
)
{
size_t
k
=
static_cast
<
size_t
>
(
label
);
Common
::
Softmax
(
&
score
);
if
(
score
[
k
]
>
kEpsilon
)
{
...
...
src/metric/rank_metric.hpp
View file @
47313fb5
...
...
@@ -84,7 +84,7 @@ public:
return
true
;
}
std
::
vector
<
score_
t
>
Eval
(
const
score_t
*
score
)
const
override
{
std
::
vector
<
floa
t
>
Eval
(
const
score_t
*
score
)
const
override
{
// some buffers for multi-threading sum up
std
::
vector
<
std
::
vector
<
float
>>
result_buffer_
;
for
(
int
i
=
0
;
i
<
num_threads_
;
++
i
)
{
...
...
src/metric/regression_metric.hpp
View file @
47313fb5
...
...
@@ -67,7 +67,7 @@ public:
}
}
score_t
loss
=
PointWiseLossCalculator
::
AverageLoss
(
sum_loss
,
sum_weights_
);
return
std
::
vector
<
float
>
(
1
,
loss
);
return
std
::
vector
<
float
>
(
1
,
static_cast
<
float
>
(
loss
)
)
;
}
...
...
src/objective/binary_objective.hpp
View file @
47313fb5
...
...
@@ -14,7 +14,7 @@ class BinaryLogloss: public ObjectiveFunction {
public:
explicit
BinaryLogloss
(
const
ObjectiveConfig
&
config
)
{
is_unbalance_
=
config
.
is_unbalance
;
sigmoid_
=
static_cast
<
score_
t
>
(
config
.
sigmoid
);
sigmoid_
=
static_cast
<
floa
t
>
(
config
.
sigmoid
);
if
(
sigmoid_
<=
0.0
)
{
Log
::
Fatal
(
"Sigmoid parameter %f :should greater than zero"
,
sigmoid_
);
}
...
...
@@ -92,11 +92,11 @@ private:
/*! \brief True if using unbalance training */
bool
is_unbalance_
;
/*! \brief Sigmoid parameter */
score_
t
sigmoid_
;
floa
t
sigmoid_
;
/*! \brief Values for positive and negative labels */
int
label_val_
[
2
];
/*! \brief Weights for positive and negative labels */
score_
t
label_weights_
[
2
];
floa
t
label_weights_
[
2
];
/*! \brief Weights for data */
const
float
*
weights_
;
};
...
...
src/objective/multiclass_objective.hpp
View file @
47313fb5
...
...
@@ -38,13 +38,13 @@ public:
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
score_
t
>
rec
(
num_class_
);
std
::
vector
<
floa
t
>
rec
(
num_class_
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
){
rec
[
k
]
=
score
[
k
*
num_data_
+
i
];
rec
[
k
]
=
static_cast
<
float
>
(
score
[
k
*
num_data_
+
i
]
)
;
}
Common
::
Softmax
(
&
rec
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
score_t
p
=
rec
[
k
];
score_t
p
=
static_cast
<
score_t
>
(
rec
[
k
]
)
;
if
(
label_int_
[
i
]
==
k
)
{
gradients
[
k
*
num_data_
+
i
]
=
p
-
1.0
f
;
}
else
{
...
...
@@ -56,13 +56,13 @@ public:
}
else
{
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
score_
t
>
rec
(
num_class_
);
std
::
vector
<
floa
t
>
rec
(
num_class_
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
){
rec
[
k
]
=
score
[
k
*
num_data_
+
i
];
rec
[
k
]
=
static_cast
<
float
>
(
score
[
k
*
num_data_
+
i
]
)
;
}
Common
::
Softmax
(
&
rec
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
float
p
=
rec
[
k
];
score_t
p
=
static_cast
<
score_t
>
(
rec
[
k
]
)
;
if
(
label_int_
[
i
]
==
k
)
{
gradients
[
k
*
num_data_
+
i
]
=
(
p
-
1.0
f
)
*
weights_
[
i
];
}
else
{
...
...
src/objective/objective_function.cpp
View file @
47313fb5
...
...
@@ -7,13 +7,13 @@
namespace
LightGBM
{
ObjectiveFunction
*
ObjectiveFunction
::
CreateObjectiveFunction
(
const
std
::
string
&
type
,
const
ObjectiveConfig
&
config
)
{
if
(
type
==
"regression"
)
{
if
(
type
==
std
::
string
(
"regression"
)
)
{
return
new
RegressionL2loss
(
config
);
}
else
if
(
type
==
"binary"
)
{
}
else
if
(
type
==
std
::
string
(
"binary"
)
)
{
return
new
BinaryLogloss
(
config
);
}
else
if
(
type
==
"lambdarank"
)
{
}
else
if
(
type
==
std
::
string
(
"lambdarank"
)
)
{
return
new
LambdarankNDCG
(
config
);
}
else
if
(
type
==
"multiclass"
)
{
}
else
if
(
type
==
std
::
string
(
"multiclass"
)
)
{
return
new
MulticlassLogloss
(
config
);
}
return
nullptr
;
...
...
src/objective/rank_objective.hpp
View file @
47313fb5
...
...
@@ -19,7 +19,7 @@ namespace LightGBM {
class
LambdarankNDCG
:
public
ObjectiveFunction
{
public:
explicit
LambdarankNDCG
(
const
ObjectiveConfig
&
config
)
{
sigmoid_
=
static_cast
<
score_
t
>
(
config
.
sigmoid
);
sigmoid_
=
static_cast
<
floa
t
>
(
config
.
sigmoid
);
// initialize DCG calculator
DCGCalculator
::
Init
(
config
.
label_gain
);
// copy lable gain to local
...
...
@@ -207,7 +207,7 @@ private:
/*! \brief Cache inverse max DCG, speed up calculation */
score_t
*
inverse_max_dcgs_
;
/*! \brief Simgoid param */
score_
t
sigmoid_
;
floa
t
sigmoid_
;
/*! \brief Optimized NDCG@ */
int
optimize_pos_at_
;
/*! \brief Number of queries */
...
...
src/treelearner/data_parallel_tree_learner.cpp
View file @
47313fb5
...
...
@@ -103,19 +103,19 @@ void DataParallelTreeLearner::BeforeTrain() {
}
// sync global data sumup info
std
::
tuple
<
data_size_t
,
score_t
,
score_t
>
data
(
smaller_leaf_splits_
->
num_data_in_leaf
(),
std
::
tuple
<
data_size_t
,
double
,
double
>
data
(
smaller_leaf_splits_
->
num_data_in_leaf
(),
smaller_leaf_splits_
->
sum_gradients
(),
smaller_leaf_splits_
->
sum_hessians
());
int
size
=
sizeof
(
data
);
std
::
memcpy
(
input_buffer_
,
&
data
,
size
);
// global sumup reduce
Network
::
Allreduce
(
input_buffer_
,
size
,
size
,
output_buffer_
,
[](
const
char
*
src
,
char
*
dst
,
int
len
)
{
int
used_size
=
0
;
int
type_size
=
sizeof
(
std
::
tuple
<
data_size_t
,
score_t
,
score_t
>
);
const
std
::
tuple
<
data_size_t
,
score_t
,
score_t
>
*
p1
;
std
::
tuple
<
data_size_t
,
score_t
,
score_t
>
*
p2
;
int
type_size
=
sizeof
(
std
::
tuple
<
data_size_t
,
double
,
double
>
);
const
std
::
tuple
<
data_size_t
,
double
,
double
>
*
p1
;
std
::
tuple
<
data_size_t
,
double
,
double
>
*
p2
;
while
(
used_size
<
len
)
{
p1
=
reinterpret_cast
<
const
std
::
tuple
<
data_size_t
,
score_t
,
score_t
>
*>
(
src
);
p2
=
reinterpret_cast
<
std
::
tuple
<
data_size_t
,
score_t
,
score_t
>
*>
(
dst
);
p1
=
reinterpret_cast
<
const
std
::
tuple
<
data_size_t
,
double
,
double
>
*>
(
src
);
p2
=
reinterpret_cast
<
std
::
tuple
<
data_size_t
,
double
,
double
>
*>
(
dst
);
std
::
get
<
0
>
(
*
p2
)
=
std
::
get
<
0
>
(
*
p2
)
+
std
::
get
<
0
>
(
*
p1
);
std
::
get
<
1
>
(
*
p2
)
=
std
::
get
<
1
>
(
*
p2
)
+
std
::
get
<
1
>
(
*
p1
);
std
::
get
<
2
>
(
*
p2
)
=
std
::
get
<
2
>
(
*
p2
)
+
std
::
get
<
2
>
(
*
p1
);
...
...
@@ -200,12 +200,12 @@ void DataParallelTreeLearner::FindBestThresholds() {
void
DataParallelTreeLearner
::
FindBestSplitsForLeaves
()
{
int
smaller_best_feature
=
-
1
,
larger_best_feature
=
-
1
;
SplitInfo
smaller_best
,
larger_best
;
std
::
vector
<
float
>
gains
;
std
::
vector
<
double
>
gains
;
// find local best split for smaller leaf
for
(
size_t
i
=
0
;
i
<
smaller_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
gains
.
push_back
(
smaller_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
}
smaller_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
float
>::
ArgMax
(
gains
));
smaller_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
double
>::
ArgMax
(
gains
));
smaller_best
=
smaller_leaf_splits_
->
BestSplitPerFeature
()[
smaller_best_feature
];
// find local best split for larger leaf
if
(
larger_leaf_splits_
->
LeafIndex
()
>=
0
)
{
...
...
@@ -213,7 +213,7 @@ void DataParallelTreeLearner::FindBestSplitsForLeaves() {
for
(
size_t
i
=
0
;
i
<
larger_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
gains
.
push_back
(
larger_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
}
larger_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
float
>::
ArgMax
(
gains
));
larger_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
double
>::
ArgMax
(
gains
));
larger_best
=
larger_leaf_splits_
->
BestSplitPerFeature
()[
larger_best_feature
];
}
...
...
src/treelearner/feature_histogram.hpp
View file @
47313fb5
...
...
@@ -26,7 +26,7 @@ public:
* \param min_num_data_one_leaf minimal number of data in one leaf
*/
void
Init
(
const
Feature
*
feature
,
int
feature_idx
,
data_size_t
min_num_data_one_leaf
,
score_t
min_sum_hessian_one_leaf
)
{
double
min_sum_hessian_one_leaf
)
{
feature_idx_
=
feature_idx
;
min_num_data_one_leaf_
=
min_num_data_one_leaf
;
min_sum_hessian_one_leaf_
=
min_sum_hessian_one_leaf
;
...
...
@@ -45,8 +45,8 @@ public:
* \param ordered_hessians Ordered hessians
* \param data_indices data indices of current leaf
*/
void
Construct
(
data_size_t
*
data_indices
,
data_size_t
num_data
,
score_t
sum_gradients
,
score_t
sum_hessians
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
)
{
void
Construct
(
data_size_t
*
data_indices
,
data_size_t
num_data
,
double
sum_gradients
,
double
sum_hessians
,
const
score_t
*
ordered_gradients
,
const
score_t
*
ordered_hessians
)
{
std
::
memset
(
data_
,
0
,
sizeof
(
HistogramBinEntry
)
*
num_bins_
);
num_data_
=
num_data
;
sum_gradients_
=
sum_gradients
;
...
...
@@ -63,8 +63,8 @@ public:
* \param gradients
* \param hessian
*/
void
Construct
(
const
OrderedBin
*
ordered_bin
,
int
leaf
,
data_size_t
num_data
,
score_t
sum_gradients
,
score_t
sum_hessians
,
const
score_t
*
gradients
,
const
score_t
*
hessians
)
{
void
Construct
(
const
OrderedBin
*
ordered_bin
,
int
leaf
,
data_size_t
num_data
,
double
sum_gradients
,
double
sum_hessians
,
const
score_t
*
gradients
,
const
score_t
*
hessians
)
{
std
::
memset
(
data_
,
0
,
sizeof
(
HistogramBinEntry
)
*
num_bins_
);
num_data_
=
num_data
;
sum_gradients_
=
sum_gradients
;
...
...
@@ -78,7 +78,7 @@ public:
* \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of hessians of current leaf
*/
void
SetSumup
(
data_size_t
num_data
,
score_t
sum_gradients
,
score_t
sum_hessians
)
{
void
SetSumup
(
data_size_t
num_data
,
double
sum_gradients
,
double
sum_hessians
)
{
num_data_
=
num_data
;
sum_gradients_
=
sum_gradients
;
sum_hessians_
=
sum_hessians
+
2
*
kEpsilon
;
...
...
@@ -104,15 +104,15 @@ public:
* \param output The best split result
*/
void
FindBestThreshold
(
SplitInfo
*
output
)
{
score_t
best_sum_left_gradient
=
NAN
;
score_t
best_sum_left_hessian
=
NAN
;
score_t
best_gain
=
kMinScore
;
double
best_sum_left_gradient
=
NAN
;
double
best_sum_left_hessian
=
NAN
;
double
best_gain
=
kMinScore
;
data_size_t
best_left_count
=
0
;
unsigned
int
best_threshold
=
static_cast
<
unsigned
int
>
(
num_bins_
);
score_t
sum_right_gradient
=
0.0
f
;
score_t
sum_right_hessian
=
kEpsilon
;
double
sum_right_gradient
=
0.0
f
;
double
sum_right_hessian
=
kEpsilon
;
data_size_t
right_count
=
0
;
score_t
gain_shift
=
GetLeafSplitGain
(
sum_gradients_
,
sum_hessians_
);
double
gain_shift
=
GetLeafSplitGain
(
sum_gradients_
,
sum_hessians_
);
is_splittable_
=
false
;
// from right to left, and we don't need data in bin0
for
(
unsigned
int
t
=
num_bins_
-
1
;
t
>
0
;
--
t
)
{
...
...
@@ -125,14 +125,14 @@ public:
// if data not enough
if
(
left_count
<
min_num_data_one_leaf_
)
break
;
score_t
sum_left_hessian
=
sum_hessians_
-
sum_right_hessian
;
double
sum_left_hessian
=
sum_hessians_
-
sum_right_hessian
;
// if sum hessian too small
if
(
sum_left_hessian
<
min_sum_hessian_one_leaf_
)
{
break
;
}
score_t
sum_left_gradient
=
sum_gradients_
-
sum_right_gradient
;
double
sum_left_gradient
=
sum_gradients_
-
sum_right_gradient
;
// current split gain
score_t
current_gain
=
GetLeafSplitGain
(
sum_left_gradient
,
sum_left_hessian
)
+
GetLeafSplitGain
(
sum_right_gradient
,
sum_right_hessian
);
double
current_gain
=
GetLeafSplitGain
(
sum_left_gradient
,
sum_left_hessian
)
+
GetLeafSplitGain
(
sum_right_gradient
,
sum_right_hessian
);
// gain is worst than no perform split
if
(
current_gain
<
gain_shift
)
{
continue
;
...
...
@@ -195,7 +195,7 @@ public:
/*!
* \brief Set min sum hessian in one leaf
*/
void
SetMinSumHessianOneLeaf
(
score_t
new_val
)
{
void
SetMinSumHessianOneLeaf
(
double
new_val
)
{
min_sum_hessian_one_leaf_
=
new_val
;
}
...
...
@@ -216,7 +216,7 @@ private:
* \param sum_hessians
* \return split gain
*/
score_t
GetLeafSplitGain
(
score_t
sum_gradients
,
score_t
sum_hessians
)
const
{
double
GetLeafSplitGain
(
double
sum_gradients
,
double
sum_hessians
)
const
{
return
(
sum_gradients
*
sum_gradients
)
/
(
sum_hessians
);
}
...
...
@@ -226,7 +226,7 @@ private:
* \param sum_hessians
* \return leaf output
*/
score_t
CalculateSplittedLeafOutput
(
score_t
sum_gradients
,
score_t
sum_hessians
)
const
{
double
CalculateSplittedLeafOutput
(
double
sum_gradients
,
double
sum_hessians
)
const
{
return
-
(
sum_gradients
)
/
(
sum_hessians
);
}
...
...
@@ -234,7 +234,7 @@ private:
/*! \brief minimal number of data in one leaf */
data_size_t
min_num_data_one_leaf_
;
/*! \brief minimal sum hessian of data in one leaf */
score_t
min_sum_hessian_one_leaf_
;
double
min_sum_hessian_one_leaf_
;
/*! \brief the bin data of current feature */
const
Bin
*
bin_data_
;
/*! \brief number of bin of histogram */
...
...
@@ -244,9 +244,9 @@ private:
/*! \brief number of all data */
data_size_t
num_data_
;
/*! \brief sum of gradient of current leaf */
score_t
sum_gradients_
;
double
sum_gradients_
;
/*! \brief sum of hessians of current leaf */
score_t
sum_hessians_
;
double
sum_hessians_
;
/*! \brief False if this histogram cannot split */
bool
is_splittable_
=
true
;
};
...
...
src/treelearner/feature_parallel_tree_learner.cpp
View file @
47313fb5
...
...
@@ -47,11 +47,11 @@ void FeatureParallelTreeLearner::FindBestSplitsForLeaves() {
int
smaller_best_feature
=
-
1
,
larger_best_feature
=
-
1
;
SplitInfo
smaller_best
,
larger_best
;
// get best split at smaller leaf
std
::
vector
<
float
>
gains
;
std
::
vector
<
double
>
gains
;
for
(
size_t
i
=
0
;
i
<
smaller_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
gains
.
push_back
(
smaller_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
}
smaller_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
float
>::
ArgMax
(
gains
));
smaller_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
double
>::
ArgMax
(
gains
));
smaller_best
=
smaller_leaf_splits_
->
BestSplitPerFeature
()[
smaller_best_feature
];
// get best split at larger leaf
if
(
larger_leaf_splits_
->
LeafIndex
()
>=
0
)
{
...
...
@@ -59,7 +59,7 @@ void FeatureParallelTreeLearner::FindBestSplitsForLeaves() {
for
(
size_t
i
=
0
;
i
<
larger_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
gains
.
push_back
(
larger_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
}
larger_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
float
>::
ArgMax
(
gains
));
larger_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
double
>::
ArgMax
(
gains
));
larger_best
=
larger_leaf_splits_
->
BestSplitPerFeature
()[
larger_best_feature
];
}
// sync global best info
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment