Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
5d022898
"tests/vscode:/vscode.git/clone" did not exist on "317b29de0f16428610e2e4d6a6953bee5a2d0ec2"
Commit
5d022898
authored
Oct 31, 2016
by
Guolin Ke
Committed by
xuehui
Oct 31, 2016
Browse files
refine some interface for better expose api. (#52)
parent
66804b93
Changes
35
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
268 additions
and
266 deletions
+268
-266
src/io/metadata.cpp
src/io/metadata.cpp
+3
-3
src/io/parser.hpp
src/io/parser.hpp
+6
-6
src/io/sparse_bin.hpp
src/io/sparse_bin.hpp
+5
-5
src/io/tree.cpp
src/io/tree.cpp
+14
-14
src/metric/binary_metric.hpp
src/metric/binary_metric.hpp
+101
-100
src/metric/dcg_calculator.cpp
src/metric/dcg_calculator.cpp
+12
-12
src/metric/rank_metric.hpp
src/metric/rank_metric.hpp
+76
-77
src/metric/regression_metric.hpp
src/metric/regression_metric.hpp
+33
-31
src/objective/binary_objective.hpp
src/objective/binary_objective.hpp
+1
-1
src/objective/rank_objective.hpp
src/objective/rank_objective.hpp
+2
-2
src/objective/regression_objective.hpp
src/objective/regression_objective.hpp
+2
-2
src/treelearner/data_parallel_tree_learner.cpp
src/treelearner/data_parallel_tree_learner.cpp
+3
-3
src/treelearner/feature_parallel_tree_learner.cpp
src/treelearner/feature_parallel_tree_learner.cpp
+3
-3
src/treelearner/serial_tree_learner.cpp
src/treelearner/serial_tree_learner.cpp
+1
-1
src/treelearner/serial_tree_learner.h
src/treelearner/serial_tree_learner.h
+6
-6
No files found.
src/io/metadata.cpp
View file @
5d022898
...
...
@@ -231,9 +231,9 @@ void Metadata::LoadWeights() {
num_weights_
=
static_cast
<
data_size_t
>
(
reader
.
Lines
().
size
());
weights_
=
new
float
[
num_weights_
];
for
(
data_size_t
i
=
0
;
i
<
num_weights_
;
++
i
)
{
double
tmp_weight
=
0.0
f
;
float
tmp_weight
=
0.0
f
;
Common
::
Atof
(
reader
.
Lines
()[
i
].
c_str
(),
&
tmp_weight
);
weights_
[
i
]
=
static_cast
<
float
>
(
tmp_weight
)
;
weights_
[
i
]
=
tmp_weight
;
}
}
...
...
@@ -246,7 +246,7 @@ void Metadata::LoadInitialScore() {
Log
::
Info
(
"Start loading initial scores"
);
num_init_score_
=
static_cast
<
data_size_t
>
(
reader
.
Lines
().
size
());
init_score_
=
new
score_t
[
num_init_score_
];
double
tmp
=
0.0
f
;
float
tmp
=
0.0
f
;
for
(
data_size_t
i
=
0
;
i
<
num_init_score_
;
++
i
)
{
Common
::
Atof
(
reader
.
Lines
()[
i
].
c_str
(),
&
tmp
);
init_score_
[
i
]
=
static_cast
<
score_t
>
(
tmp
);
...
...
src/io/parser.hpp
View file @
5d022898
...
...
@@ -18,9 +18,9 @@ public:
:
label_idx_
(
label_idx
)
{
}
inline
void
ParseOneLine
(
const
char
*
str
,
std
::
vector
<
std
::
pair
<
int
,
double
>>*
out_features
,
double
*
out_label
)
const
override
{
std
::
vector
<
std
::
pair
<
int
,
float
>>*
out_features
,
float
*
out_label
)
const
override
{
int
idx
=
0
;
double
val
=
0.0
;
float
val
=
0.0
f
;
int
bias
=
0
;
*
out_label
=
0.0
f
;
while
(
*
str
!=
'\0'
)
{
...
...
@@ -50,9 +50,9 @@ public:
:
label_idx_
(
label_idx
)
{
}
inline
void
ParseOneLine
(
const
char
*
str
,
std
::
vector
<
std
::
pair
<
int
,
double
>>*
out_features
,
double
*
out_label
)
const
override
{
std
::
vector
<
std
::
pair
<
int
,
float
>>*
out_features
,
float
*
out_label
)
const
override
{
int
idx
=
0
;
double
val
=
0.0
;
float
val
=
0.0
f
;
int
bias
=
0
;
while
(
*
str
!=
'\0'
)
{
str
=
Common
::
Atof
(
str
,
&
val
);
...
...
@@ -83,9 +83,9 @@ public:
}
}
inline
void
ParseOneLine
(
const
char
*
str
,
std
::
vector
<
std
::
pair
<
int
,
double
>>*
out_features
,
double
*
out_label
)
const
override
{
std
::
vector
<
std
::
pair
<
int
,
float
>>*
out_features
,
float
*
out_label
)
const
override
{
int
idx
=
0
;
double
val
=
0.0
;
float
val
=
0.0
f
;
if
(
label_idx_
==
0
)
{
str
=
Common
::
Atof
(
str
,
&
val
);
*
out_label
=
val
;
...
...
src/io/sparse_bin.hpp
View file @
5d022898
...
...
@@ -64,7 +64,7 @@ public:
data_size_t
cur_pos
=
fast_pair
.
second
;
data_size_t
lte_count
=
0
;
data_size_t
gt_count
=
0
;
for
(
data_size_t
i
=
0
;
i
<
num_data
;
i
++
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
const
data_size_t
idx
=
data_indices
[
i
];
while
(
cur_pos
<
idx
&&
j
<
num_vals_
)
{
++
j
;
...
...
@@ -92,12 +92,12 @@ public:
void
FinishLoad
()
override
{
// get total non zero size
size_t
non_zero_size
=
0
;
for
(
size_t
i
=
0
;
i
<
push_buffers_
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
push_buffers_
.
size
();
++
i
)
{
non_zero_size
+=
push_buffers_
[
i
].
size
();
}
// merge
non_zero_pair_
.
reserve
(
non_zero_size
);
for
(
size_t
i
=
0
;
i
<
push_buffers_
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
push_buffers_
.
size
();
++
i
)
{
non_zero_pair_
.
insert
(
non_zero_pair_
.
end
(),
push_buffers_
[
i
].
begin
(),
push_buffers_
[
i
].
end
());
push_buffers_
[
i
].
clear
();
push_buffers_
[
i
].
shrink_to_fit
();
...
...
@@ -122,7 +122,7 @@ public:
// transform to delta array
const
uint8_t
kMaxDelta
=
255
;
data_size_t
last_idx
=
0
;
for
(
size_t
i
=
0
;
i
<
non_zero_pair
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
non_zero_pair
.
size
();
++
i
)
{
const
data_size_t
cur_idx
=
non_zero_pair
[
i
].
first
;
const
VAL_T
bin
=
non_zero_pair
[
i
].
second
;
data_size_t
cur_delta
=
cur_idx
-
last_idx
;
...
...
@@ -198,7 +198,7 @@ public:
delta_
.
clear
();
vals_
.
clear
();
num_vals_
=
tmp_num_vals
;
for
(
data_size_t
i
=
0
;
i
<
num_vals_
;
i
++
)
{
for
(
data_size_t
i
=
0
;
i
<
num_vals_
;
++
i
)
{
delta_
.
push_back
(
tmp_delta
[
i
]);
vals_
.
push_back
(
tmp_vals
[
i
]);
}
...
...
src/io/tree.cpp
View file @
5d022898
...
...
@@ -23,8 +23,8 @@ Tree::Tree(int max_leaves)
split_feature_
=
new
int
[
max_leaves_
-
1
];
split_feature_real_
=
new
int
[
max_leaves_
-
1
];
threshold_in_bin_
=
new
unsigned
int
[
max_leaves_
-
1
];
threshold_
=
new
double
[
max_leaves_
-
1
];
split_gain_
=
new
double
[
max_leaves_
-
1
];
threshold_
=
new
float
[
max_leaves_
-
1
];
split_gain_
=
new
float
[
max_leaves_
-
1
];
leaf_parent_
=
new
int
[
max_leaves_
];
leaf_value_
=
new
score_t
[
max_leaves_
];
...
...
@@ -48,7 +48,7 @@ Tree::~Tree() {
}
int
Tree
::
Split
(
int
leaf
,
int
feature
,
unsigned
int
threshold_bin
,
int
real_feature
,
double
threshold
,
score_t
left_value
,
score_t
right_value
,
double
gain
)
{
float
threshold
,
score_t
left_value
,
score_t
right_value
,
float
gain
)
{
int
new_node_idx
=
num_leaves_
-
1
;
// update parent info
int
parent
=
leaf_parent_
[
leaf
];
...
...
@@ -85,10 +85,10 @@ int Tree::Split(int leaf, int feature, unsigned int threshold_bin, int real_feat
void
Tree
::
AddPredictionToScore
(
const
Dataset
*
data
,
data_size_t
num_data
,
score_t
*
score
)
const
{
Threading
::
For
<
data_size_t
>
(
0
,
num_data
,
[
this
,
data
,
score
](
int
,
data_size_t
start
,
data_size_t
end
)
{
std
::
vector
<
BinIterator
*>
iterators
;
for
(
int
i
=
0
;
i
<
data
->
num_features
();
i
++
)
{
for
(
int
i
=
0
;
i
<
data
->
num_features
();
++
i
)
{
iterators
.
push_back
(
data
->
FeatureAt
(
i
)
->
bin_data
()
->
GetIterator
(
start
));
}
for
(
data_size_t
i
=
start
;
i
<
end
;
i
++
)
{
for
(
data_size_t
i
=
start
;
i
<
end
;
++
i
)
{
score
[
i
]
+=
leaf_value_
[
GetLeaf
(
iterators
,
i
)];
}
});
...
...
@@ -99,10 +99,10 @@ void Tree::AddPredictionToScore(const Dataset* data, const data_size_t* used_dat
Threading
::
For
<
data_size_t
>
(
0
,
num_data
,
[
this
,
data
,
used_data_indices
,
score
](
int
,
data_size_t
start
,
data_size_t
end
)
{
std
::
vector
<
BinIterator
*>
iterators
;
for
(
int
i
=
0
;
i
<
data
->
num_features
();
i
++
)
{
for
(
int
i
=
0
;
i
<
data
->
num_features
();
++
i
)
{
iterators
.
push_back
(
data
->
FeatureAt
(
i
)
->
bin_data
()
->
GetIterator
(
used_data_indices
[
start
]));
}
for
(
data_size_t
i
=
start
;
i
<
end
;
i
++
)
{
for
(
data_size_t
i
=
start
;
i
<
end
;
++
i
)
{
score
[
used_data_indices
[
i
]]
+=
leaf_value_
[
GetLeaf
(
iterators
,
used_data_indices
[
i
])];
}
});
...
...
@@ -114,9 +114,9 @@ std::string Tree::ToString() {
ss
<<
"split_feature="
<<
Common
::
ArrayToString
<
int
>
(
split_feature_real_
,
num_leaves_
-
1
,
' '
)
<<
std
::
endl
;
ss
<<
"split_gain="
<<
Common
::
ArrayToString
<
double
>
(
split_gain_
,
num_leaves_
-
1
,
' '
)
<<
std
::
endl
;
<<
Common
::
ArrayToString
<
float
>
(
split_gain_
,
num_leaves_
-
1
,
' '
)
<<
std
::
endl
;
ss
<<
"threshold="
<<
Common
::
ArrayToString
<
double
>
(
threshold_
,
num_leaves_
-
1
,
' '
)
<<
std
::
endl
;
<<
Common
::
ArrayToString
<
float
>
(
threshold_
,
num_leaves_
-
1
,
' '
)
<<
std
::
endl
;
ss
<<
"left_child="
<<
Common
::
ArrayToString
<
int
>
(
left_child_
,
num_leaves_
-
1
,
' '
)
<<
std
::
endl
;
ss
<<
"right_child="
...
...
@@ -154,8 +154,8 @@ Tree::Tree(const std::string& str) {
left_child_
=
new
int
[
num_leaves_
-
1
];
right_child_
=
new
int
[
num_leaves_
-
1
];
split_feature_real_
=
new
int
[
num_leaves_
-
1
];
threshold_
=
new
double
[
num_leaves_
-
1
];
split_gain_
=
new
double
[
num_leaves_
-
1
];
threshold_
=
new
float
[
num_leaves_
-
1
];
split_gain_
=
new
float
[
num_leaves_
-
1
];
leaf_parent_
=
new
int
[
num_leaves_
];
leaf_value_
=
new
score_t
[
num_leaves_
];
...
...
@@ -165,9 +165,9 @@ Tree::Tree(const std::string& str) {
Common
::
StringToIntArray
(
key_vals
[
"split_feature"
],
' '
,
num_leaves_
-
1
,
split_feature_real_
);
Common
::
StringTo
Double
Array
(
key_vals
[
"split_gain"
],
' '
,
Common
::
StringTo
Float
Array
(
key_vals
[
"split_gain"
],
' '
,
num_leaves_
-
1
,
split_gain_
);
Common
::
StringTo
Double
Array
(
key_vals
[
"threshold"
],
' '
,
Common
::
StringTo
Float
Array
(
key_vals
[
"threshold"
],
' '
,
num_leaves_
-
1
,
threshold_
);
Common
::
StringToIntArray
(
key_vals
[
"left_child"
],
' '
,
num_leaves_
-
1
,
left_child_
);
...
...
@@ -175,7 +175,7 @@ Tree::Tree(const std::string& str) {
num_leaves_
-
1
,
right_child_
);
Common
::
StringToIntArray
(
key_vals
[
"leaf_parent"
],
' '
,
num_leaves_
,
leaf_parent_
);
Common
::
StringTo
Double
Array
(
key_vals
[
"leaf_value"
],
' '
,
Common
::
StringTo
Float
Array
(
key_vals
[
"leaf_value"
],
' '
,
num_leaves_
,
leaf_value_
);
}
...
...
src/metric/binary_metric.hpp
View file @
5d022898
...
...
@@ -7,6 +7,7 @@
#include <algorithm>
#include <vector>
#include <sstream>
namespace
LightGBM
{
...
...
@@ -18,9 +19,6 @@ template<typename PointWiseLossCalculator>
class
BinaryMetric
:
public
Metric
{
public:
explicit
BinaryMetric
(
const
MetricConfig
&
config
)
{
early_stopping_round_
=
config
.
early_stopping_round
;
output_freq_
=
config
.
output_freq
;
the_bigger_the_better
=
false
;
sigmoid_
=
static_cast
<
score_t
>
(
config
.
sigmoid
);
if
(
sigmoid_
<=
0.0
f
)
{
Log
::
Fatal
(
"Sigmoid param %f should greater than zero"
,
sigmoid_
);
...
...
@@ -32,7 +30,9 @@ public:
}
void
Init
(
const
char
*
test_name
,
const
Metadata
&
metadata
,
data_size_t
num_data
)
override
{
name
=
test_name
;
std
::
stringstream
str_buf
;
str_buf
<<
test_name
<<
"'s "
<<
PointWiseLossCalculator
::
Name
();
name_
=
str_buf
.
str
();
num_data_
=
num_data
;
// get label
label_
=
metadata
.
label
();
...
...
@@ -41,7 +41,7 @@ public:
weights_
=
metadata
.
weights
();
if
(
weights_
==
nullptr
)
{
sum_weights_
=
static_cast
<
double
>
(
num_data_
);
sum_weights_
=
static_cast
<
float
>
(
num_data_
);
}
else
{
sum_weights_
=
0.0
f
;
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
...
...
@@ -50,38 +50,38 @@ public:
}
}
score_t
PrintAndGetLoss
(
int
iter
,
const
score_t
*
score
)
const
override
{
const
char
*
GetName
()
const
override
{
return
name_
.
c_str
();
}
bool
is_bigger_better
()
const
override
{
return
false
;
}
std
::
vector
<
score_t
>
Eval
(
const
score_t
*
score
)
const
override
{
score_t
sum_loss
=
0.0
f
;
if
(
early_stopping_round_
>
0
||
(
output_freq_
>
0
&&
iter
%
output_freq_
==
0
))
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
// sigmoid transform
score_t
prob
=
1.0
f
/
(
1.0
f
+
std
::
exp
(
-
2.0
f
*
sigmoid_
*
score
[
i
]));
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
prob
);
}
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
// sigmoid transform
score_t
prob
=
1.0
f
/
(
1.0
f
+
std
::
exp
(
-
2.0
f
*
sigmoid_
*
score
[
i
]));
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
prob
)
*
weights_
[
i
];
}
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
// sigmoid transform
score_t
prob
=
1.0
f
/
(
1.0
f
+
std
::
exp
(
-
2.0
f
*
sigmoid_
*
score
[
i
]));
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
prob
);
}
score_t
loss
=
sum_loss
/
sum_weights_
;
if
(
output_freq_
>
0
&&
iter
%
output_freq_
==
0
){
Log
::
Info
(
"Iteration:%d, %s's %s: %f"
,
iter
,
name
,
PointWiseLossCalculator
::
Name
(),
loss
);
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
// sigmoid transform
score_t
prob
=
1.0
f
/
(
1.0
f
+
std
::
exp
(
-
2.0
f
*
sigmoid_
*
score
[
i
]));
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
prob
)
*
weights_
[
i
];
}
return
loss
;
}
return
0.0
f
;
score_t
loss
=
sum_loss
/
sum_weights_
;
return
std
::
vector
<
score_t
>
(
1
,
loss
);
}
private:
/*! \brief Output frequently */
int
output_freq_
;
/*! \brief Number of data */
data_size_t
num_data_
;
/*! \brief Pointer of label */
...
...
@@ -89,9 +89,9 @@ private:
/*! \brief Pointer of weighs */
const
float
*
weights_
;
/*! \brief Sum weights */
double
sum_weights_
;
float
sum_weights_
;
/*! \brief Name of test set */
const
char
*
name
;
std
::
string
name
_
;
/*! \brief Sigmoid parameter */
score_t
sigmoid_
;
};
...
...
@@ -145,17 +145,26 @@ public:
*/
class
AUCMetric
:
public
Metric
{
public:
explicit
AUCMetric
(
const
MetricConfig
&
config
)
{
early_stopping_round_
=
config
.
early_stopping_round
;
output_freq_
=
config
.
output_freq
;
the_bigger_the_better
=
true
;
explicit
AUCMetric
(
const
MetricConfig
&
)
{
}
virtual
~
AUCMetric
()
{
}
const
char
*
GetName
()
const
override
{
return
name_
.
c_str
();
}
bool
is_bigger_better
()
const
override
{
return
true
;
}
void
Init
(
const
char
*
test_name
,
const
Metadata
&
metadata
,
data_size_t
num_data
)
override
{
name
=
test_name
;
std
::
stringstream
str_buf
;
str_buf
<<
test_name
<<
"'s AUC"
;
name_
=
str_buf
.
str
();
num_data_
=
num_data
;
// get label
label_
=
metadata
.
label
();
...
...
@@ -163,7 +172,7 @@ public:
weights_
=
metadata
.
weights
();
if
(
weights_
==
nullptr
)
{
sum_weights_
=
static_cast
<
double
>
(
num_data_
);
sum_weights_
=
static_cast
<
float
>
(
num_data_
);
}
else
{
sum_weights_
=
0.0
f
;
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
...
...
@@ -172,74 +181,66 @@ public:
}
}
score_t
PrintAndGetLoss
(
int
iter
,
const
score_t
*
score
)
const
override
{
if
(
early_stopping_round_
>
0
||
(
output_freq_
>
0
&&
iter
%
output_freq_
==
0
))
{
// get indices sorted by score, descent order
std
::
vector
<
data_size_t
>
sorted_idx
;
std
::
vector
<
score_t
>
Eval
(
const
score_t
*
score
)
const
override
{
// get indices sorted by score, descent order
std
::
vector
<
data_size_t
>
sorted_idx
;
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
sorted_idx
.
emplace_back
(
i
);
}
std
::
sort
(
sorted_idx
.
begin
(),
sorted_idx
.
end
(),
[
score
](
data_size_t
a
,
data_size_t
b
)
{
return
score
[
a
]
>
score
[
b
];
});
// temp sum of postive label
float
cur_pos
=
0.0
f
;
// total sum of postive label
float
sum_pos
=
0.0
f
;
// accumlate of auc
float
accum
=
0.0
f
;
// temp sum of negative label
float
cur_neg
=
0.0
f
;
score_t
threshold
=
score
[
sorted_idx
[
0
]];
if
(
weights_
==
nullptr
)
{
// no weights
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
sorted_idx
.
emplace_back
(
i
);
}
std
::
sort
(
sorted_idx
.
begin
(),
sorted_idx
.
end
(),
[
score
](
data_size_t
a
,
data_size_t
b
)
{
return
score
[
a
]
>
score
[
b
];
});
// temp sum of postive label
double
cur_pos
=
0.0
;
// total sum of postive label
double
sum_pos
=
0.0
;
// accumlate of auc
double
accum
=
0.0
;
// temp sum of negative label
double
cur_neg
=
0.0
;
score_t
threshold
=
score
[
sorted_idx
[
0
]];
if
(
weights_
==
nullptr
)
{
// not weights
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
const
float
cur_label
=
label_
[
sorted_idx
[
i
]];
const
score_t
cur_score
=
score
[
sorted_idx
[
i
]];
// new threshold
if
(
cur_score
!=
threshold
)
{
threshold
=
cur_score
;
// accmulate
accum
+=
cur_neg
*
(
cur_pos
*
0.5
+
sum_pos
);
sum_pos
+=
cur_pos
;
// reset
cur_neg
=
cur_pos
=
0.0
;
}
cur_neg
+=
1.0
-
cur_label
;
cur_pos
+=
cur_label
;
const
float
cur_label
=
label_
[
sorted_idx
[
i
]];
const
score_t
cur_score
=
score
[
sorted_idx
[
i
]];
// new threshold
if
(
cur_score
!=
threshold
)
{
threshold
=
cur_score
;
// accmulate
accum
+=
cur_neg
*
(
cur_pos
*
0.5
f
+
sum_pos
);
sum_pos
+=
cur_pos
;
// reset
cur_neg
=
cur_pos
=
0.0
f
;
}
}
else
{
// has weights
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
const
float
cur_label
=
label_
[
sorted_idx
[
i
]];
const
score_t
cur_score
=
score
[
sorted_idx
[
i
]];
const
float
cur_weight
=
weights_
[
sorted_idx
[
i
]];
// new threshold
if
(
cur_score
!=
threshold
)
{
threshold
=
cur_score
;
// accmulate
accum
+=
cur_neg
*
(
cur_pos
*
0.5
+
sum_pos
);
sum_pos
+=
cur_pos
;
// reset
cur_neg
=
cur_pos
=
0.0
;
}
cur_neg
+=
(
1.0
-
cur_label
)
*
cur_weight
;
cur_pos
+=
cur_label
*
cur_weight
;
}
}
accum
+=
cur_neg
*
(
cur_pos
*
0.5
+
sum_pos
);
sum_pos
+=
cur_pos
;
double
auc
=
1.0
;
if
(
sum_pos
>
0.0
f
&&
sum_pos
!=
sum_weights_
)
{
auc
=
accum
/
(
sum_pos
*
(
sum_weights_
-
sum_pos
));
cur_neg
+=
1.0
f
-
cur_label
;
cur_pos
+=
cur_label
;
}
if
(
output_freq_
>
0
&&
iter
%
output_freq_
==
0
){
Log
::
Info
(
"Iteration:%d, %s's %s: %f"
,
iter
,
name
,
"auc"
,
auc
);
}
else
{
// has weights
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
const
float
cur_label
=
label_
[
sorted_idx
[
i
]];
const
score_t
cur_score
=
score
[
sorted_idx
[
i
]];
const
float
cur_weight
=
weights_
[
sorted_idx
[
i
]];
// new threshold
if
(
cur_score
!=
threshold
)
{
threshold
=
cur_score
;
// accmulate
accum
+=
cur_neg
*
(
cur_pos
*
0.5
f
+
sum_pos
);
sum_pos
+=
cur_pos
;
// reset
cur_neg
=
cur_pos
=
0.0
f
;
}
cur_neg
+=
(
1.0
f
-
cur_label
)
*
cur_weight
;
cur_pos
+=
cur_label
*
cur_weight
;
}
return
auc
;
}
return
0.0
f
;
accum
+=
cur_neg
*
(
cur_pos
*
0.5
f
+
sum_pos
);
sum_pos
+=
cur_pos
;
float
auc
=
1.0
f
;
if
(
sum_pos
>
0.0
f
&&
sum_pos
!=
sum_weights_
)
{
auc
=
accum
/
(
sum_pos
*
(
sum_weights_
-
sum_pos
));
}
return
std
::
vector
<
score_t
>
(
1
,
auc
);
}
private:
/*! \brief Output frequency */
int
output_freq_
;
/*! \brief Number of data */
data_size_t
num_data_
;
/*! \brief Pointer of label */
...
...
@@ -247,9 +248,9 @@ private:
/*! \brief Pointer of weighs */
const
float
*
weights_
;
/*! \brief Sum weights */
double
sum_weights_
;
float
sum_weights_
;
/*! \brief Name of test set */
const
char
*
name
;
std
::
string
name
_
;
};
}
// namespace LightGBM
...
...
src/metric/dcg_calculator.cpp
View file @
5d022898
...
...
@@ -11,23 +11,23 @@ namespace LightGBM {
/*! \brief Declaration for some static members */
bool
DCGCalculator
::
is_inited_
=
false
;
std
::
vector
<
double
>
DCGCalculator
::
label_gain_
;
std
::
vector
<
double
>
DCGCalculator
::
discount_
;
std
::
vector
<
float
>
DCGCalculator
::
label_gain_
;
std
::
vector
<
float
>
DCGCalculator
::
discount_
;
const
data_size_t
DCGCalculator
::
kMaxPosition
=
10000
;
void
DCGCalculator
::
Init
(
std
::
vector
<
double
>
input_label_gain
)
{
void
DCGCalculator
::
Init
(
std
::
vector
<
float
>
input_label_gain
)
{
// only inited one time
if
(
is_inited_
)
{
return
;
}
label_gain_
=
input_label_gain
;
discount_
.
clear
();
for
(
data_size_t
i
=
0
;
i
<
kMaxPosition
;
++
i
)
{
discount_
.
emplace_back
(
1.0
/
std
::
log2
(
2.0
+
i
));
discount_
.
emplace_back
(
1.0
f
/
std
::
log2
(
2.0
f
+
i
));
}
is_inited_
=
true
;
}
double
DCGCalculator
::
CalMaxDCGAtK
(
data_size_t
k
,
const
float
*
label
,
data_size_t
num_data
)
{
double
ret
=
0.0
;
float
DCGCalculator
::
CalMaxDCGAtK
(
data_size_t
k
,
const
float
*
label
,
data_size_t
num_data
)
{
float
ret
=
0.0
f
;
// counts for all labels
std
::
vector
<
data_size_t
>
label_cnt
(
label_gain_
.
size
(),
0
);
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
...
...
@@ -53,14 +53,14 @@ double DCGCalculator::CalMaxDCGAtK(data_size_t k, const float* label, data_size_
void
DCGCalculator
::
CalMaxDCG
(
const
std
::
vector
<
data_size_t
>&
ks
,
const
float
*
label
,
data_size_t
num_data
,
std
::
vector
<
double
>*
out
)
{
std
::
vector
<
float
>*
out
)
{
std
::
vector
<
data_size_t
>
label_cnt
(
label_gain_
.
size
(),
0
);
// counts for all labels
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
if
(
static_cast
<
size_t
>
(
label
[
i
])
>=
label_cnt
.
size
())
{
Log
::
Fatal
(
"label excel %d"
,
label
[
i
]);
}
++
label_cnt
[
static_cast
<
int
>
(
label
[
i
])];
}
double
cur_result
=
0.0
;
float
cur_result
=
0.0
f
;
data_size_t
cur_left
=
0
;
size_t
top_label
=
label_gain_
.
size
()
-
1
;
// calculate k Max DCG by one pass
...
...
@@ -83,7 +83,7 @@ void DCGCalculator::CalMaxDCG(const std::vector<data_size_t>& ks,
}
double
DCGCalculator
::
CalDCGAtK
(
data_size_t
k
,
const
float
*
label
,
float
DCGCalculator
::
CalDCGAtK
(
data_size_t
k
,
const
float
*
label
,
const
score_t
*
score
,
data_size_t
num_data
)
{
// get sorted indices by score
std
::
vector
<
data_size_t
>
sorted_idx
;
...
...
@@ -94,7 +94,7 @@ double DCGCalculator::CalDCGAtK(data_size_t k, const float* label,
[
score
](
data_size_t
a
,
data_size_t
b
)
{
return
score
[
a
]
>
score
[
b
];
});
if
(
k
>
num_data
)
{
k
=
num_data
;
}
double
dcg
=
0.0
;
float
dcg
=
0.0
f
;
// calculate dcg
for
(
data_size_t
i
=
0
;
i
<
k
;
++
i
)
{
data_size_t
idx
=
sorted_idx
[
i
];
...
...
@@ -104,7 +104,7 @@ double DCGCalculator::CalDCGAtK(data_size_t k, const float* label,
}
void
DCGCalculator
::
CalDCG
(
const
std
::
vector
<
data_size_t
>&
ks
,
const
float
*
label
,
const
score_t
*
score
,
data_size_t
num_data
,
std
::
vector
<
double
>*
out
)
{
const
score_t
*
score
,
data_size_t
num_data
,
std
::
vector
<
float
>*
out
)
{
// get sorted indices by score
std
::
vector
<
data_size_t
>
sorted_idx
;
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
...
...
@@ -113,7 +113,7 @@ void DCGCalculator::CalDCG(const std::vector<data_size_t>& ks, const float* labe
std
::
sort
(
sorted_idx
.
begin
(),
sorted_idx
.
end
(),
[
score
](
data_size_t
a
,
data_size_t
b
)
{
return
score
[
a
]
>
score
[
b
];
});
double
cur_result
=
0.0
;
float
cur_result
=
0.0
f
;
data_size_t
cur_left
=
0
;
// calculate multi dcg by one pass
for
(
size_t
i
=
0
;
i
<
ks
.
size
();
++
i
)
{
...
...
src/metric/rank_metric.hpp
View file @
5d022898
...
...
@@ -16,9 +16,6 @@ namespace LightGBM {
class
NDCGMetric
:
public
Metric
{
public:
explicit
NDCGMetric
(
const
MetricConfig
&
config
)
{
early_stopping_round_
=
config
.
early_stopping_round
;
output_freq_
=
config
.
output_freq
;
the_bigger_the_better
=
true
;
// get eval position
for
(
auto
k
:
config
.
eval_at
)
{
eval_at_
.
push_back
(
static_cast
<
data_size_t
>
(
k
));
...
...
@@ -26,8 +23,8 @@ public:
// initialize DCG calculator
DCGCalculator
::
Init
(
config
.
label_gain
);
// get number of threads
#pragma omp parallel
#pragma omp master
#pragma omp parallel
#pragma omp master
{
num_threads_
=
omp_get_num_threads
();
}
...
...
@@ -36,7 +33,12 @@ public:
~
NDCGMetric
()
{
}
void
Init
(
const
char
*
test_name
,
const
Metadata
&
metadata
,
data_size_t
num_data
)
override
{
name
=
test_name
;
std
::
stringstream
str_buf
;
str_buf
<<
test_name
<<
"'s "
;
for
(
auto
k
:
eval_at_
)
{
str_buf
<<
"NDCG@"
+
std
::
to_string
(
k
)
+
" "
;
}
name_
=
str_buf
.
str
();
num_data_
=
num_data
;
// get label
label_
=
metadata
.
label
();
...
...
@@ -49,7 +51,7 @@ public:
// get query weights
query_weights_
=
metadata
.
query_weights
();
if
(
query_weights_
==
nullptr
)
{
sum_query_weights_
=
static_cast
<
double
>
(
num_queries_
);
sum_query_weights_
=
static_cast
<
float
>
(
num_queries_
);
}
else
{
sum_query_weights_
=
0.0
f
;
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
...
...
@@ -58,99 +60,96 @@ public:
}
// cache the inverse max DCG for all querys, used to calculate NDCG
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
inverse_max_dcgs_
.
emplace_back
(
eval_at_
.
size
(),
0.0
);
inverse_max_dcgs_
.
emplace_back
(
eval_at_
.
size
(),
0.0
f
);
DCGCalculator
::
CalMaxDCG
(
eval_at_
,
label_
+
query_boundaries_
[
i
],
query_boundaries_
[
i
+
1
]
-
query_boundaries_
[
i
],
&
inverse_max_dcgs_
[
i
]);
query_boundaries_
[
i
+
1
]
-
query_boundaries_
[
i
],
&
inverse_max_dcgs_
[
i
]);
for
(
size_t
j
=
0
;
j
<
inverse_max_dcgs_
[
i
].
size
();
++
j
)
{
if
(
inverse_max_dcgs_
[
i
][
j
]
>
0.0
)
{
inverse_max_dcgs_
[
i
][
j
]
=
1.0
/
inverse_max_dcgs_
[
i
][
j
];
}
else
{
if
(
inverse_max_dcgs_
[
i
][
j
]
>
0.0
f
)
{
inverse_max_dcgs_
[
i
][
j
]
=
1.0
f
/
inverse_max_dcgs_
[
i
][
j
];
}
else
{
// marking negative for all negative querys.
// if one meet this query, it's ndcg will be set as -1.
inverse_max_dcgs_
[
i
][
j
]
=
-
1.0
;
inverse_max_dcgs_
[
i
][
j
]
=
-
1.0
f
;
}
}
}
}
score_t
PrintAndGetLoss
(
int
iter
,
const
score_t
*
score
)
const
override
{
if
(
early_stopping_round_
>
0
||
(
output_freq_
>
0
&&
iter
%
output_freq_
==
0
))
{
// some buffers for multi-threading sum up
std
::
vector
<
std
::
vector
<
double
>>
result_buffer_
;
for
(
int
i
=
0
;
i
<
num_threads_
;
++
i
)
{
result_buffer_
.
emplace_back
(
eval_at_
.
size
(),
0.0
);
}
std
::
vector
<
double
>
tmp_dcg
(
eval_at_
.
size
(),
0.0
);
if
(
query_weights_
==
nullptr
)
{
#pragma omp parallel for schedule(guided) firstprivate(tmp_dcg)
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
const
int
tid
=
omp_get_thread_num
();
// if all doc in this query are all negative, let its NDCG=1
if
(
inverse_max_dcgs_
[
i
][
0
]
<=
0.0
)
{
for
(
size_t
j
=
0
;
j
<
eval_at_
.
size
();
++
j
)
{
result_buffer_
[
tid
][
j
]
+=
1.0
;
}
}
else
{
// calculate DCG
DCGCalculator
::
CalDCG
(
eval_at_
,
label_
+
query_boundaries_
[
i
],
score
+
query_boundaries_
[
i
],
query_boundaries_
[
i
+
1
]
-
query_boundaries_
[
i
],
&
tmp_dcg
);
// calculate NDCG
for
(
size_t
j
=
0
;
j
<
eval_at_
.
size
();
++
j
)
{
result_buffer_
[
tid
][
j
]
+=
tmp_dcg
[
j
]
*
inverse_max_dcgs_
[
i
][
j
];
}
const
char
*
GetName
()
const
override
{
return
name_
.
c_str
();
}
bool
is_bigger_better
()
const
override
{
return
true
;
}
std
::
vector
<
score_t
>
Eval
(
const
score_t
*
score
)
const
override
{
// some buffers for multi-threading sum up
std
::
vector
<
std
::
vector
<
float
>>
result_buffer_
;
for
(
int
i
=
0
;
i
<
num_threads_
;
++
i
)
{
result_buffer_
.
emplace_back
(
eval_at_
.
size
(),
0.0
f
);
}
std
::
vector
<
float
>
tmp_dcg
(
eval_at_
.
size
(),
0.0
f
);
if
(
query_weights_
==
nullptr
)
{
#pragma omp parallel for schedule(guided) firstprivate(tmp_dcg)
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
const
int
tid
=
omp_get_thread_num
();
// if all doc in this query are all negative, let its NDCG=1
if
(
inverse_max_dcgs_
[
i
][
0
]
<=
0.0
f
)
{
for
(
size_t
j
=
0
;
j
<
eval_at_
.
size
();
++
j
)
{
result_buffer_
[
tid
][
j
]
+=
1.0
f
;
}
}
}
else
{
#pragma omp parallel for schedule(guided) firstprivate(tmp_dcg)
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
const
int
tid
=
omp_get_thread_num
();
// if all doc in this query are all negative, let its NDCG=1
if
(
inverse_max_dcgs_
[
i
][
0
]
<=
0.0
)
{
for
(
size_t
j
=
0
;
j
<
eval_at_
.
size
();
++
j
)
{
result_buffer_
[
tid
][
j
]
+=
1.0
;
}
}
else
{
// calculate DCG
DCGCalculator
::
CalDCG
(
eval_at_
,
label_
+
query_boundaries_
[
i
],
score
+
query_boundaries_
[
i
],
query_boundaries_
[
i
+
1
]
-
query_boundaries_
[
i
],
&
tmp_dcg
);
// calculate NDCG
for
(
size_t
j
=
0
;
j
<
eval_at_
.
size
();
++
j
)
{
result_buffer_
[
tid
][
j
]
+=
tmp_dcg
[
j
]
*
inverse_max_dcgs_
[
i
][
j
]
*
query_weights_
[
i
];
}
}
else
{
// calculate DCG
DCGCalculator
::
CalDCG
(
eval_at_
,
label_
+
query_boundaries_
[
i
],
score
+
query_boundaries_
[
i
],
query_boundaries_
[
i
+
1
]
-
query_boundaries_
[
i
],
&
tmp_dcg
);
// calculate NDCG
for
(
size_t
j
=
0
;
j
<
eval_at_
.
size
();
++
j
)
{
result_buffer_
[
tid
][
j
]
+=
tmp_dcg
[
j
]
*
inverse_max_dcgs_
[
i
][
j
];
}
}
}
// Get final average NDCG
std
::
vector
<
double
>
result
(
eval_at_
.
size
(),
0.0
);
std
::
stringstream
result_ss
;
for
(
size_t
j
=
0
;
j
<
result
.
size
();
++
j
)
{
for
(
int
i
=
0
;
i
<
num_threads_
;
++
i
)
{
result
[
j
]
+=
result_buffer_
[
i
][
j
];
}
else
{
#pragma omp parallel for schedule(guided) firstprivate(tmp_dcg)
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
const
int
tid
=
omp_get_thread_num
();
// if all doc in this query are all negative, let its NDCG=1
if
(
inverse_max_dcgs_
[
i
][
0
]
<=
0.0
f
)
{
for
(
size_t
j
=
0
;
j
<
eval_at_
.
size
();
++
j
)
{
result_buffer_
[
tid
][
j
]
+=
1.0
f
;
}
}
else
{
// calculate DCG
DCGCalculator
::
CalDCG
(
eval_at_
,
label_
+
query_boundaries_
[
i
],
score
+
query_boundaries_
[
i
],
query_boundaries_
[
i
+
1
]
-
query_boundaries_
[
i
],
&
tmp_dcg
);
// calculate NDCG
for
(
size_t
j
=
0
;
j
<
eval_at_
.
size
();
++
j
)
{
result_buffer_
[
tid
][
j
]
+=
tmp_dcg
[
j
]
*
inverse_max_dcgs_
[
i
][
j
]
*
query_weights_
[
i
];
}
}
result
[
j
]
/=
sum_query_weights_
;
result_ss
<<
"NDCG@"
<<
eval_at_
[
j
]
<<
":"
<<
result
[
j
]
<<
"
\t
"
;
}
if
(
output_freq_
>
0
&&
iter
%
output_freq_
==
0
){
Log
::
Info
(
"Iteration:%d, Test:%s, %s "
,
iter
,
name
,
result_ss
.
str
().
c_str
());
}
// Get final average NDCG
std
::
vector
<
float
>
result
(
eval_at_
.
size
(),
0.0
f
);
for
(
size_t
j
=
0
;
j
<
result
.
size
();
++
j
)
{
for
(
int
i
=
0
;
i
<
num_threads_
;
++
i
)
{
result
[
j
]
+=
result_buffer_
[
i
][
j
];
}
re
turn
result
[
0
]
;
re
sult
[
j
]
/=
sum_query_weights_
;
}
return
0.0
f
;
return
result
;
}
private:
/*! \brief Output frequently */
int
output_freq_
;
/*! \brief Number of data */
data_size_t
num_data_
;
/*! \brief Pointer of label */
const
float
*
label_
;
/*! \brief Name of test set */
const
char
*
name
;
std
::
string
name
_
;
/*! \brief Query boundaries information */
const
data_size_t
*
query_boundaries_
;
/*! \brief Number of queries */
...
...
@@ -158,11 +157,11 @@ private:
/*! \brief Weights of queries */
const
float
*
query_weights_
;
/*! \brief Sum weights of queries */
double
sum_query_weights_
;
float
sum_query_weights_
;
/*! \brief Evaluate position of NDCG */
std
::
vector
<
data_size_t
>
eval_at_
;
/*! \brief Cache the inverse max dcg for all queries */
std
::
vector
<
std
::
vector
<
double
>>
inverse_max_dcgs_
;
std
::
vector
<
std
::
vector
<
float
>>
inverse_max_dcgs_
;
/*! \brief Number of threads */
int
num_threads_
;
};
...
...
src/metric/regression_metric.hpp
View file @
5d022898
...
...
@@ -15,25 +15,34 @@ namespace LightGBM {
template
<
typename
PointWiseLossCalculator
>
class
RegressionMetric
:
public
Metric
{
public:
explicit
RegressionMetric
(
const
MetricConfig
&
config
)
{
early_stopping_round_
=
config
.
early_stopping_round
;
output_freq_
=
config
.
output_freq
;
the_bigger_the_better
=
false
;
explicit
RegressionMetric
(
const
MetricConfig
&
)
{
}
virtual
~
RegressionMetric
()
{
}
const
char
*
GetName
()
const
override
{
return
name_
.
c_str
();
}
bool
is_bigger_better
()
const
override
{
return
false
;
}
void
Init
(
const
char
*
test_name
,
const
Metadata
&
metadata
,
data_size_t
num_data
)
override
{
name
=
test_name
;
std
::
stringstream
str_buf
;
str_buf
<<
test_name
<<
"'s "
<<
PointWiseLossCalculator
::
Name
();
name_
=
str_buf
.
str
();
num_data_
=
num_data
;
// get label
label_
=
metadata
.
label
();
// get weights
weights_
=
metadata
.
weights
();
if
(
weights_
==
nullptr
)
{
sum_weights_
=
static_cast
<
double
>
(
num_data_
);
sum_weights_
=
static_cast
<
float
>
(
num_data_
);
}
else
{
sum_weights_
=
0.0
f
;
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
...
...
@@ -41,30 +50,25 @@ public:
}
}
}
score_t
PrintAndGetLoss
(
int
iter
,
const
score_t
*
score
)
const
override
{
if
(
early_stopping_round_
>
0
||
(
output_freq_
>
0
&&
iter
%
output_freq_
==
0
))
{
score_t
sum_loss
=
0.0
;
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
score
[
i
]);
}
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
score
[
i
])
*
weights_
[
i
];
}
std
::
vector
<
float
>
Eval
(
const
score_t
*
score
)
const
override
{
score_t
sum_loss
=
0.0
f
;
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
score
[
i
]);
}
score_t
loss
=
PointWiseLossCalculator
::
AverageLoss
(
sum_loss
,
sum_weights_
);
if
(
output_freq_
>
0
&&
iter
%
output_freq_
==
0
){
Log
::
Info
(
"Iteration:%d, %s's %s : %f"
,
iter
,
name
,
PointWiseLossCalculator
::
Name
(),
loss
);
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
score
[
i
])
*
weights_
[
i
];
}
return
loss
;
}
return
0.0
f
;
score_t
loss
=
PointWiseLossCalculator
::
AverageLoss
(
sum_loss
,
sum_weights_
);
return
std
::
vector
<
float
>
(
1
,
loss
);
}
inline
static
score_t
AverageLoss
(
score_t
sum_loss
,
score_t
sum_weights
)
{
...
...
@@ -72,8 +76,6 @@ public:
}
private:
/*! \brief Output frequency */
int
output_freq_
;
/*! \brief Number of data */
data_size_t
num_data_
;
/*! \brief Pointer of label */
...
...
@@ -81,9 +83,9 @@ private:
/*! \brief Pointer of weighs */
const
float
*
weights_
;
/*! \brief Sum weights */
double
sum_weights_
;
float
sum_weights_
;
/*! \brief Name of this test set */
const
char
*
name
;
std
::
string
name
_
;
};
/*! \brief L2 loss for regression task */
...
...
src/objective/binary_objective.hpp
View file @
5d022898
...
...
@@ -80,7 +80,7 @@ public:
}
}
double
GetSigmoid
()
const
override
{
float
GetSigmoid
()
const
override
{
return
sigmoid_
;
}
...
...
src/objective/rank_objective.hpp
View file @
5d022898
...
...
@@ -23,7 +23,7 @@ public:
// initialize DCG calculator
DCGCalculator
::
Init
(
config
.
label_gain
);
// copy lable gain to local
std
::
vector
<
double
>
label_gain
=
config
.
label_gain
;
std
::
vector
<
float
>
label_gain
=
config
.
label_gain
;
for
(
auto
gain
:
label_gain
)
{
label_gain_
.
push_back
(
static_cast
<
score_t
>
(
gain
));
}
...
...
@@ -194,7 +194,7 @@ public:
}
}
double
GetSigmoid
()
const
override
{
float
GetSigmoid
()
const
override
{
// though we use sigmoid transform on objective
// for the prediction, we actually don't need to transform by sigmoid.
// since we only need the ranking score.
...
...
src/objective/regression_objective.hpp
View file @
5d022898
...
...
@@ -38,9 +38,9 @@ public:
}
}
double
GetSigmoid
()
const
override
{
float
GetSigmoid
()
const
override
{
// not sigmoid transform, return -1
return
-
1.0
;
return
-
1.0
f
;
}
private:
...
...
src/treelearner/data_parallel_tree_learner.cpp
View file @
5d022898
...
...
@@ -200,12 +200,12 @@ void DataParallelTreeLearner::FindBestThresholds() {
void
DataParallelTreeLearner
::
FindBestSplitsForLeaves
()
{
int
smaller_best_feature
=
-
1
,
larger_best_feature
=
-
1
;
SplitInfo
smaller_best
,
larger_best
;
std
::
vector
<
double
>
gains
;
std
::
vector
<
float
>
gains
;
// find local best split for smaller leaf
for
(
size_t
i
=
0
;
i
<
smaller_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
gains
.
push_back
(
smaller_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
}
smaller_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
double
>::
ArgMax
(
gains
));
smaller_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
float
>::
ArgMax
(
gains
));
smaller_best
=
smaller_leaf_splits_
->
BestSplitPerFeature
()[
smaller_best_feature
];
// find local best split for larger leaf
if
(
larger_leaf_splits_
->
LeafIndex
()
>=
0
)
{
...
...
@@ -213,7 +213,7 @@ void DataParallelTreeLearner::FindBestSplitsForLeaves() {
for
(
size_t
i
=
0
;
i
<
larger_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
gains
.
push_back
(
larger_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
}
larger_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
double
>::
ArgMax
(
gains
));
larger_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
float
>::
ArgMax
(
gains
));
larger_best
=
larger_leaf_splits_
->
BestSplitPerFeature
()[
larger_best_feature
];
}
...
...
src/treelearner/feature_parallel_tree_learner.cpp
View file @
5d022898
...
...
@@ -47,11 +47,11 @@ void FeatureParallelTreeLearner::FindBestSplitsForLeaves() {
int
smaller_best_feature
=
-
1
,
larger_best_feature
=
-
1
;
SplitInfo
smaller_best
,
larger_best
;
// get best split at smaller leaf
std
::
vector
<
double
>
gains
;
std
::
vector
<
float
>
gains
;
for
(
size_t
i
=
0
;
i
<
smaller_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
gains
.
push_back
(
smaller_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
}
smaller_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
double
>::
ArgMax
(
gains
));
smaller_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
float
>::
ArgMax
(
gains
));
smaller_best
=
smaller_leaf_splits_
->
BestSplitPerFeature
()[
smaller_best_feature
];
// get best split at larger leaf
if
(
larger_leaf_splits_
->
LeafIndex
()
>=
0
)
{
...
...
@@ -59,7 +59,7 @@ void FeatureParallelTreeLearner::FindBestSplitsForLeaves() {
for
(
size_t
i
=
0
;
i
<
larger_leaf_splits_
->
BestSplitPerFeature
().
size
();
++
i
)
{
gains
.
push_back
(
larger_leaf_splits_
->
BestSplitPerFeature
()[
i
].
gain
);
}
larger_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
double
>::
ArgMax
(
gains
));
larger_best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
float
>::
ArgMax
(
gains
));
larger_best
=
larger_leaf_splits_
->
BestSplitPerFeature
()[
larger_best_feature
];
}
// sync global best info
...
...
src/treelearner/serial_tree_learner.cpp
View file @
5d022898
...
...
@@ -15,7 +15,7 @@ SerialTreeLearner::SerialTreeLearner(const TreeConfig& tree_config)
// initialize with nullptr
num_leaves_
=
tree_config
.
num_leaves
;
min_num_data_one_leaf_
=
static_cast
<
data_size_t
>
(
tree_config
.
min_data_in_leaf
);
min_sum_hessian_one_leaf_
=
static_cast
<
floa
t
>
(
tree_config
.
min_sum_hessian_in_leaf
);
min_sum_hessian_one_leaf_
=
static_cast
<
score_
t
>
(
tree_config
.
min_sum_hessian_in_leaf
);
feature_fraction_
=
tree_config
.
feature_fraction
;
random_
=
Random
(
tree_config
.
feature_fraction_seed
);
histogram_pool_size_
=
tree_config
.
histogram_pool_size
;
...
...
src/treelearner/serial_tree_learner.h
View file @
5d022898
...
...
@@ -41,11 +41,11 @@ public:
void
AddPredictionToScore
(
score_t
*
out_score
)
const
override
{
#pragma omp parallel for schedule(guided)
for
(
int
i
=
0
;
i
<
data_partition_
->
num_leaves
();
++
i
)
{
double
output
=
last_trained_tree_
->
LeafOutput
(
i
);
score_t
output
=
last_trained_tree_
->
LeafOutput
(
i
);
data_size_t
*
tmp_idx
=
nullptr
;
data_size_t
cnt_leaf_data
=
data_partition_
->
GetIndexOnLeaf
(
i
,
&
tmp_idx
);
for
(
data_size_t
j
=
0
;
j
<
cnt_leaf_data
;
++
j
)
{
out_score
[
tmp_idx
[
j
]]
+=
static_cast
<
score_t
>
(
output
)
;
out_score
[
tmp_idx
[
j
]]
+=
output
;
}
}
}
...
...
@@ -116,7 +116,7 @@ protected:
/*! \brief mininal sum hessian on one leaf */
score_t
min_sum_hessian_one_leaf_
;
/*! \brief sub-feature fraction rate */
double
feature_fraction_
;
float
feature_fraction_
;
/*! \brief training data partition on leaves */
DataPartition
*
data_partition_
;
/*! \brief used for generate used features */
...
...
@@ -160,7 +160,7 @@ protected:
/*! \brief is_data_in_leaf_[i] != 0 means i-th data is marked */
char
*
is_data_in_leaf_
;
/*! \brief max cache size(unit:GB) for historical histogram. < 0 means not limit */
double
histogram_pool_size_
;
float
histogram_pool_size_
;
/*! \brief used to cache historical histogram to speed up*/
LRUPool
<
FeatureHistogram
*>
histogram_pool_
;
/*! \brief max depth of tree model */
...
...
@@ -186,11 +186,11 @@ inline void SerialTreeLearner::FindBestSplitForLeaf(LeafSplits* leaf_splits) {
if
(
leaf_splits
==
nullptr
||
leaf_splits
->
LeafIndex
()
<
0
)
{
return
;
}
std
::
vector
<
double
>
gains
;
std
::
vector
<
float
>
gains
;
for
(
size_t
i
=
0
;
i
<
leaf_splits
->
BestSplitPerFeature
().
size
();
++
i
)
{
gains
.
push_back
(
leaf_splits
->
BestSplitPerFeature
()[
i
].
gain
);
}
int
best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
double
>::
ArgMax
(
gains
));
int
best_feature
=
static_cast
<
int
>
(
ArrayArgs
<
float
>::
ArgMax
(
gains
));
int
leaf
=
leaf_splits
->
LeafIndex
();
best_split_per_leaf_
[
leaf
]
=
leaf_splits
->
BestSplitPerFeature
()[
best_feature
];
best_split_per_leaf_
[
leaf
].
feature
=
best_feature
;
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment