Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
3b50aeac
Commit
3b50aeac
authored
Nov 02, 2016
by
Guolin Ke
Browse files
merge from master
parents
504d400c
c96ae6af
Changes
30
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
70 additions
and
70 deletions
+70
-70
src/metric/dcg_calculator.cpp
src/metric/dcg_calculator.cpp
+15
-12
src/metric/multiclass_metric.hpp
src/metric/multiclass_metric.hpp
+13
-13
src/metric/rank_metric.hpp
src/metric/rank_metric.hpp
+7
-7
src/metric/regression_metric.hpp
src/metric/regression_metric.hpp
+8
-8
src/objective/binary_objective.hpp
src/objective/binary_objective.hpp
+5
-5
src/objective/multiclass_objective.hpp
src/objective/multiclass_objective.hpp
+5
-5
src/objective/rank_objective.hpp
src/objective/rank_objective.hpp
+9
-12
src/objective/regression_objective.hpp
src/objective/regression_objective.hpp
+1
-1
src/treelearner/serial_tree_learner.cpp
src/treelearner/serial_tree_learner.cpp
+3
-3
src/treelearner/serial_tree_learner.h
src/treelearner/serial_tree_learner.h
+4
-4
No files found.
src/metric/dcg_calculator.cpp
View file @
3b50aeac
...
@@ -11,14 +11,17 @@ namespace LightGBM {
...
@@ -11,14 +11,17 @@ namespace LightGBM {
/*! \brief Declaration for some static members */
/*! \brief Declaration for some static members */
bool
DCGCalculator
::
is_inited_
=
false
;
bool
DCGCalculator
::
is_inited_
=
false
;
std
::
vector
<
floa
t
>
DCGCalculator
::
label_gain_
;
std
::
vector
<
score_
t
>
DCGCalculator
::
label_gain_
;
std
::
vector
<
floa
t
>
DCGCalculator
::
discount_
;
std
::
vector
<
score_
t
>
DCGCalculator
::
discount_
;
const
data_size_t
DCGCalculator
::
kMaxPosition
=
10000
;
const
data_size_t
DCGCalculator
::
kMaxPosition
=
10000
;
void
DCGCalculator
::
Init
(
std
::
vector
<
float
>
input_label_gain
)
{
void
DCGCalculator
::
Init
(
std
::
vector
<
double
>
input_label_gain
)
{
// only inited one time
// only inited one time
if
(
is_inited_
)
{
return
;
}
if
(
is_inited_
)
{
return
;
}
label_gain_
=
input_label_gain
;
label_gain_
.
clear
();
for
(
size_t
i
=
0
;
i
<
input_label_gain
.
size
();
++
i
){
label_gain_
.
push_back
(
static_cast
<
score_t
>
(
input_label_gain
[
i
]));
}
discount_
.
clear
();
discount_
.
clear
();
for
(
data_size_t
i
=
0
;
i
<
kMaxPosition
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
kMaxPosition
;
++
i
)
{
discount_
.
emplace_back
(
1.0
f
/
std
::
log2
(
2.0
f
+
i
));
discount_
.
emplace_back
(
1.0
f
/
std
::
log2
(
2.0
f
+
i
));
...
@@ -26,8 +29,8 @@ void DCGCalculator::Init(std::vector<float> input_label_gain) {
...
@@ -26,8 +29,8 @@ void DCGCalculator::Init(std::vector<float> input_label_gain) {
is_inited_
=
true
;
is_inited_
=
true
;
}
}
floa
t
DCGCalculator
::
CalMaxDCGAtK
(
data_size_t
k
,
const
float
*
label
,
data_size_t
num_data
)
{
score_
t
DCGCalculator
::
CalMaxDCGAtK
(
data_size_t
k
,
const
float
*
label
,
data_size_t
num_data
)
{
floa
t
ret
=
0.0
f
;
score_
t
ret
=
0.0
f
;
// counts for all labels
// counts for all labels
std
::
vector
<
data_size_t
>
label_cnt
(
label_gain_
.
size
(),
0
);
std
::
vector
<
data_size_t
>
label_cnt
(
label_gain_
.
size
(),
0
);
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
...
@@ -53,14 +56,14 @@ float DCGCalculator::CalMaxDCGAtK(data_size_t k, const float* label, data_size_t
...
@@ -53,14 +56,14 @@ float DCGCalculator::CalMaxDCGAtK(data_size_t k, const float* label, data_size_t
void
DCGCalculator
::
CalMaxDCG
(
const
std
::
vector
<
data_size_t
>&
ks
,
void
DCGCalculator
::
CalMaxDCG
(
const
std
::
vector
<
data_size_t
>&
ks
,
const
float
*
label
,
const
float
*
label
,
data_size_t
num_data
,
data_size_t
num_data
,
std
::
vector
<
floa
t
>*
out
)
{
std
::
vector
<
score_
t
>*
out
)
{
std
::
vector
<
data_size_t
>
label_cnt
(
label_gain_
.
size
(),
0
);
std
::
vector
<
data_size_t
>
label_cnt
(
label_gain_
.
size
(),
0
);
// counts for all labels
// counts for all labels
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
if
(
static_cast
<
size_t
>
(
label
[
i
])
>=
label_cnt
.
size
())
{
Log
::
Fatal
(
"label excel %d"
,
label
[
i
]);
}
if
(
static_cast
<
size_t
>
(
label
[
i
])
>=
label_cnt
.
size
())
{
Log
::
Fatal
(
"label excel %d"
,
label
[
i
]);
}
++
label_cnt
[
static_cast
<
int
>
(
label
[
i
])];
++
label_cnt
[
static_cast
<
int
>
(
label
[
i
])];
}
}
floa
t
cur_result
=
0.0
f
;
score_
t
cur_result
=
0.0
f
;
data_size_t
cur_left
=
0
;
data_size_t
cur_left
=
0
;
size_t
top_label
=
label_gain_
.
size
()
-
1
;
size_t
top_label
=
label_gain_
.
size
()
-
1
;
// calculate k Max DCG by one pass
// calculate k Max DCG by one pass
...
@@ -83,7 +86,7 @@ void DCGCalculator::CalMaxDCG(const std::vector<data_size_t>& ks,
...
@@ -83,7 +86,7 @@ void DCGCalculator::CalMaxDCG(const std::vector<data_size_t>& ks,
}
}
floa
t
DCGCalculator
::
CalDCGAtK
(
data_size_t
k
,
const
float
*
label
,
score_
t
DCGCalculator
::
CalDCGAtK
(
data_size_t
k
,
const
float
*
label
,
const
score_t
*
score
,
data_size_t
num_data
)
{
const
score_t
*
score
,
data_size_t
num_data
)
{
// get sorted indices by score
// get sorted indices by score
std
::
vector
<
data_size_t
>
sorted_idx
;
std
::
vector
<
data_size_t
>
sorted_idx
;
...
@@ -94,7 +97,7 @@ float DCGCalculator::CalDCGAtK(data_size_t k, const float* label,
...
@@ -94,7 +97,7 @@ float DCGCalculator::CalDCGAtK(data_size_t k, const float* label,
[
score
](
data_size_t
a
,
data_size_t
b
)
{
return
score
[
a
]
>
score
[
b
];
});
[
score
](
data_size_t
a
,
data_size_t
b
)
{
return
score
[
a
]
>
score
[
b
];
});
if
(
k
>
num_data
)
{
k
=
num_data
;
}
if
(
k
>
num_data
)
{
k
=
num_data
;
}
floa
t
dcg
=
0.0
f
;
score_
t
dcg
=
0.0
f
;
// calculate dcg
// calculate dcg
for
(
data_size_t
i
=
0
;
i
<
k
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
k
;
++
i
)
{
data_size_t
idx
=
sorted_idx
[
i
];
data_size_t
idx
=
sorted_idx
[
i
];
...
@@ -104,7 +107,7 @@ float DCGCalculator::CalDCGAtK(data_size_t k, const float* label,
...
@@ -104,7 +107,7 @@ float DCGCalculator::CalDCGAtK(data_size_t k, const float* label,
}
}
void
DCGCalculator
::
CalDCG
(
const
std
::
vector
<
data_size_t
>&
ks
,
const
float
*
label
,
void
DCGCalculator
::
CalDCG
(
const
std
::
vector
<
data_size_t
>&
ks
,
const
float
*
label
,
const
score_t
*
score
,
data_size_t
num_data
,
std
::
vector
<
floa
t
>*
out
)
{
const
score_t
*
score
,
data_size_t
num_data
,
std
::
vector
<
score_
t
>*
out
)
{
// get sorted indices by score
// get sorted indices by score
std
::
vector
<
data_size_t
>
sorted_idx
;
std
::
vector
<
data_size_t
>
sorted_idx
;
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
...
@@ -113,7 +116,7 @@ void DCGCalculator::CalDCG(const std::vector<data_size_t>& ks, const float* labe
...
@@ -113,7 +116,7 @@ void DCGCalculator::CalDCG(const std::vector<data_size_t>& ks, const float* labe
std
::
sort
(
sorted_idx
.
begin
(),
sorted_idx
.
end
(),
std
::
sort
(
sorted_idx
.
begin
(),
sorted_idx
.
end
(),
[
score
](
data_size_t
a
,
data_size_t
b
)
{
return
score
[
a
]
>
score
[
b
];
});
[
score
](
data_size_t
a
,
data_size_t
b
)
{
return
score
[
a
]
>
score
[
b
];
});
floa
t
cur_result
=
0.0
f
;
score_
t
cur_result
=
0.0
f
;
data_size_t
cur_left
=
0
;
data_size_t
cur_left
=
0
;
// calculate multi dcg by one pass
// calculate multi dcg by one pass
for
(
size_t
i
=
0
;
i
<
ks
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
ks
.
size
();
++
i
)
{
...
...
src/metric/multiclass_metric.hpp
View file @
3b50aeac
...
@@ -33,7 +33,7 @@ public:
...
@@ -33,7 +33,7 @@ public:
// get weights
// get weights
weights_
=
metadata
.
weights
();
weights_
=
metadata
.
weights
();
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
sum_weights_
=
static_cast
<
float
>
(
num_data_
);
sum_weights_
=
static_cast
<
double
>
(
num_data_
);
}
else
{
}
else
{
sum_weights_
=
0.0
f
;
sum_weights_
=
0.0
f
;
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
...
@@ -50,14 +50,14 @@ public:
...
@@ -50,14 +50,14 @@ public:
return
false
;
return
false
;
}
}
std
::
vector
<
float
>
Eval
(
const
score_t
*
score
)
const
override
{
std
::
vector
<
double
>
Eval
(
const
score_t
*
score
)
const
override
{
score_t
sum_loss
=
0.0
;
double
sum_loss
=
0.0
;
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
float
>
rec
(
num_class_
);
std
::
vector
<
double
>
rec
(
num_class_
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
rec
[
k
]
=
static_cast
<
float
>
(
score
[
k
*
num_data_
+
i
]);
rec
[
k
]
=
static_cast
<
double
>
(
score
[
k
*
num_data_
+
i
]);
}
}
// add loss
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
rec
);
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
rec
);
...
@@ -65,16 +65,16 @@ public:
...
@@ -65,16 +65,16 @@ public:
}
else
{
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
float
>
rec
(
num_class_
);
std
::
vector
<
double
>
rec
(
num_class_
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
rec
[
k
]
=
static_cast
<
float
>
(
score
[
k
*
num_data_
+
i
]);
rec
[
k
]
=
static_cast
<
double
>
(
score
[
k
*
num_data_
+
i
]);
}
}
// add loss
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
rec
)
*
weights_
[
i
];
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
rec
)
*
weights_
[
i
];
}
}
}
}
score_t
loss
=
sum_loss
/
sum_weights_
;
double
loss
=
sum_loss
/
sum_weights_
;
return
std
::
vector
<
float
>
(
1
,
static_cast
<
float
>
(
loss
)
)
;
return
std
::
vector
<
double
>
(
1
,
loss
);
}
}
private:
private:
...
@@ -89,7 +89,7 @@ private:
...
@@ -89,7 +89,7 @@ private:
/*! \brief Pointer of weighs */
/*! \brief Pointer of weighs */
const
float
*
weights_
;
const
float
*
weights_
;
/*! \brief Sum weights */
/*! \brief Sum weights */
float
sum_weights_
;
double
sum_weights_
;
/*! \brief Name of this test set */
/*! \brief Name of this test set */
std
::
string
name_
;
std
::
string
name_
;
};
};
...
@@ -99,7 +99,7 @@ class MultiErrorMetric: public MulticlassMetric<MultiErrorMetric> {
...
@@ -99,7 +99,7 @@ class MultiErrorMetric: public MulticlassMetric<MultiErrorMetric> {
public:
public:
explicit
MultiErrorMetric
(
const
MetricConfig
&
config
)
:
MulticlassMetric
<
MultiErrorMetric
>
(
config
)
{}
explicit
MultiErrorMetric
(
const
MetricConfig
&
config
)
:
MulticlassMetric
<
MultiErrorMetric
>
(
config
)
{}
inline
static
score_t
LossOnPoint
(
float
label
,
std
::
vector
<
float
>
score
)
{
inline
static
score_t
LossOnPoint
(
float
label
,
std
::
vector
<
double
>
score
)
{
size_t
k
=
static_cast
<
size_t
>
(
label
);
size_t
k
=
static_cast
<
size_t
>
(
label
);
for
(
size_t
i
=
0
;
i
<
score
.
size
();
++
i
){
for
(
size_t
i
=
0
;
i
<
score
.
size
();
++
i
){
if
(
i
!=
k
&&
score
[
i
]
>
score
[
k
])
{
if
(
i
!=
k
&&
score
[
i
]
>
score
[
k
])
{
...
@@ -119,11 +119,11 @@ class MultiLoglossMetric: public MulticlassMetric<MultiLoglossMetric> {
...
@@ -119,11 +119,11 @@ class MultiLoglossMetric: public MulticlassMetric<MultiLoglossMetric> {
public:
public:
explicit
MultiLoglossMetric
(
const
MetricConfig
&
config
)
:
MulticlassMetric
<
MultiLoglossMetric
>
(
config
)
{}
explicit
MultiLoglossMetric
(
const
MetricConfig
&
config
)
:
MulticlassMetric
<
MultiLoglossMetric
>
(
config
)
{}
inline
static
score_t
LossOnPoint
(
float
label
,
std
::
vector
<
float
>
score
)
{
inline
static
score_t
LossOnPoint
(
float
label
,
std
::
vector
<
double
>
score
)
{
size_t
k
=
static_cast
<
size_t
>
(
label
);
size_t
k
=
static_cast
<
size_t
>
(
label
);
Common
::
Softmax
(
&
score
);
Common
::
Softmax
(
&
score
);
if
(
score
[
k
]
>
kEpsilon
)
{
if
(
score
[
k
]
>
kEpsilon
)
{
return
-
std
::
log
(
score
[
k
]);
return
static_cast
<
score_t
>
(
-
std
::
log
(
score
[
k
])
)
;
}
else
{
}
else
{
return
-
std
::
log
(
kEpsilon
);
return
-
std
::
log
(
kEpsilon
);
}
}
...
...
src/metric/rank_metric.hpp
View file @
3b50aeac
...
@@ -51,7 +51,7 @@ public:
...
@@ -51,7 +51,7 @@ public:
// get query weights
// get query weights
query_weights_
=
metadata
.
query_weights
();
query_weights_
=
metadata
.
query_weights
();
if
(
query_weights_
==
nullptr
)
{
if
(
query_weights_
==
nullptr
)
{
sum_query_weights_
=
static_cast
<
float
>
(
num_queries_
);
sum_query_weights_
=
static_cast
<
double
>
(
num_queries_
);
}
else
{
}
else
{
sum_query_weights_
=
0.0
f
;
sum_query_weights_
=
0.0
f
;
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
...
@@ -84,13 +84,13 @@ public:
...
@@ -84,13 +84,13 @@ public:
return
true
;
return
true
;
}
}
std
::
vector
<
float
>
Eval
(
const
score_t
*
score
)
const
override
{
std
::
vector
<
double
>
Eval
(
const
score_t
*
score
)
const
override
{
// some buffers for multi-threading sum up
// some buffers for multi-threading sum up
std
::
vector
<
std
::
vector
<
float
>>
result_buffer_
;
std
::
vector
<
std
::
vector
<
double
>>
result_buffer_
;
for
(
int
i
=
0
;
i
<
num_threads_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_threads_
;
++
i
)
{
result_buffer_
.
emplace_back
(
eval_at_
.
size
(),
0.0
f
);
result_buffer_
.
emplace_back
(
eval_at_
.
size
(),
0.0
f
);
}
}
std
::
vector
<
floa
t
>
tmp_dcg
(
eval_at_
.
size
(),
0.0
f
);
std
::
vector
<
score_
t
>
tmp_dcg
(
eval_at_
.
size
(),
0.0
f
);
if
(
query_weights_
==
nullptr
)
{
if
(
query_weights_
==
nullptr
)
{
#pragma omp parallel for schedule(guided) firstprivate(tmp_dcg)
#pragma omp parallel for schedule(guided) firstprivate(tmp_dcg)
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
...
@@ -133,7 +133,7 @@ public:
...
@@ -133,7 +133,7 @@ public:
}
}
}
}
// Get final average NDCG
// Get final average NDCG
std
::
vector
<
float
>
result
(
eval_at_
.
size
(),
0.0
f
);
std
::
vector
<
double
>
result
(
eval_at_
.
size
(),
0.0
f
);
for
(
size_t
j
=
0
;
j
<
result
.
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
result
.
size
();
++
j
)
{
for
(
int
i
=
0
;
i
<
num_threads_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_threads_
;
++
i
)
{
result
[
j
]
+=
result_buffer_
[
i
][
j
];
result
[
j
]
+=
result_buffer_
[
i
][
j
];
...
@@ -157,11 +157,11 @@ private:
...
@@ -157,11 +157,11 @@ private:
/*! \brief Weights of queries */
/*! \brief Weights of queries */
const
float
*
query_weights_
;
const
float
*
query_weights_
;
/*! \brief Sum weights of queries */
/*! \brief Sum weights of queries */
float
sum_query_weights_
;
double
sum_query_weights_
;
/*! \brief Evaluate position of NDCG */
/*! \brief Evaluate position of NDCG */
std
::
vector
<
data_size_t
>
eval_at_
;
std
::
vector
<
data_size_t
>
eval_at_
;
/*! \brief Cache the inverse max dcg for all queries */
/*! \brief Cache the inverse max dcg for all queries */
std
::
vector
<
std
::
vector
<
floa
t
>>
inverse_max_dcgs_
;
std
::
vector
<
std
::
vector
<
score_
t
>>
inverse_max_dcgs_
;
/*! \brief Number of threads */
/*! \brief Number of threads */
int
num_threads_
;
int
num_threads_
;
};
};
...
...
src/metric/regression_metric.hpp
View file @
3b50aeac
...
@@ -42,7 +42,7 @@ public:
...
@@ -42,7 +42,7 @@ public:
// get weights
// get weights
weights_
=
metadata
.
weights
();
weights_
=
metadata
.
weights
();
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
sum_weights_
=
static_cast
<
float
>
(
num_data_
);
sum_weights_
=
static_cast
<
double
>
(
num_data_
);
}
else
{
}
else
{
sum_weights_
=
0.0
f
;
sum_weights_
=
0.0
f
;
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
...
@@ -51,8 +51,8 @@ public:
...
@@ -51,8 +51,8 @@ public:
}
}
}
}
std
::
vector
<
float
>
Eval
(
const
score_t
*
score
)
const
override
{
std
::
vector
<
double
>
Eval
(
const
score_t
*
score
)
const
override
{
score_t
sum_loss
=
0.0
f
;
double
sum_loss
=
0.0
f
;
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
...
@@ -66,12 +66,12 @@ public:
...
@@ -66,12 +66,12 @@ public:
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
score
[
i
])
*
weights_
[
i
];
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
score
[
i
])
*
weights_
[
i
];
}
}
}
}
score_t
loss
=
PointWiseLossCalculator
::
AverageLoss
(
sum_loss
,
sum_weights_
);
double
loss
=
PointWiseLossCalculator
::
AverageLoss
(
sum_loss
,
sum_weights_
);
return
std
::
vector
<
float
>
(
1
,
static_cast
<
float
>
(
loss
)
)
;
return
std
::
vector
<
double
>
(
1
,
loss
);
}
}
inline
static
score_t
AverageLoss
(
score_t
sum_loss
,
score_t
sum_weights
)
{
inline
static
double
AverageLoss
(
double
sum_loss
,
double
sum_weights
)
{
return
sum_loss
/
sum_weights
;
return
sum_loss
/
sum_weights
;
}
}
...
@@ -83,7 +83,7 @@ private:
...
@@ -83,7 +83,7 @@ private:
/*! \brief Pointer of weighs */
/*! \brief Pointer of weighs */
const
float
*
weights_
;
const
float
*
weights_
;
/*! \brief Sum weights */
/*! \brief Sum weights */
float
sum_weights_
;
double
sum_weights_
;
/*! \brief Name of this test set */
/*! \brief Name of this test set */
std
::
string
name_
;
std
::
string
name_
;
};
};
...
@@ -97,7 +97,7 @@ public:
...
@@ -97,7 +97,7 @@ public:
return
(
score
-
label
)
*
(
score
-
label
);
return
(
score
-
label
)
*
(
score
-
label
);
}
}
inline
static
score_t
AverageLoss
(
score_t
sum_loss
,
score_t
sum_weights
)
{
inline
static
double
AverageLoss
(
double
sum_loss
,
double
sum_weights
)
{
// need sqrt the result for L2 loss
// need sqrt the result for L2 loss
return
std
::
sqrt
(
sum_loss
/
sum_weights
);
return
std
::
sqrt
(
sum_loss
/
sum_weights
);
}
}
...
...
src/objective/binary_objective.hpp
View file @
3b50aeac
...
@@ -14,7 +14,7 @@ class BinaryLogloss: public ObjectiveFunction {
...
@@ -14,7 +14,7 @@ class BinaryLogloss: public ObjectiveFunction {
public:
public:
explicit
BinaryLogloss
(
const
ObjectiveConfig
&
config
)
{
explicit
BinaryLogloss
(
const
ObjectiveConfig
&
config
)
{
is_unbalance_
=
config
.
is_unbalance
;
is_unbalance_
=
config
.
is_unbalance
;
sigmoid_
=
static_cast
<
floa
t
>
(
config
.
sigmoid
);
sigmoid_
=
static_cast
<
score_
t
>
(
config
.
sigmoid
);
if
(
sigmoid_
<=
0.0
)
{
if
(
sigmoid_
<=
0.0
)
{
Log
::
Fatal
(
"Sigmoid parameter %f :should greater than zero"
,
sigmoid_
);
Log
::
Fatal
(
"Sigmoid parameter %f :should greater than zero"
,
sigmoid_
);
}
}
...
@@ -48,7 +48,7 @@ public:
...
@@ -48,7 +48,7 @@ public:
// if using unbalance, change the labels weight
// if using unbalance, change the labels weight
if
(
is_unbalance_
)
{
if
(
is_unbalance_
)
{
label_weights_
[
1
]
=
1.0
f
;
label_weights_
[
1
]
=
1.0
f
;
label_weights_
[
0
]
=
static_cast
<
floa
t
>
(
cnt_positive
)
/
cnt_negative
;
label_weights_
[
0
]
=
static_cast
<
score_
t
>
(
cnt_positive
)
/
cnt_negative
;
}
}
}
}
...
@@ -80,7 +80,7 @@ public:
...
@@ -80,7 +80,7 @@ public:
}
}
}
}
floa
t
GetSigmoid
()
const
override
{
score_
t
GetSigmoid
()
const
override
{
return
sigmoid_
;
return
sigmoid_
;
}
}
...
@@ -92,11 +92,11 @@ private:
...
@@ -92,11 +92,11 @@ private:
/*! \brief True if using unbalance training */
/*! \brief True if using unbalance training */
bool
is_unbalance_
;
bool
is_unbalance_
;
/*! \brief Sigmoid parameter */
/*! \brief Sigmoid parameter */
floa
t
sigmoid_
;
score_
t
sigmoid_
;
/*! \brief Values for positive and negative labels */
/*! \brief Values for positive and negative labels */
int
label_val_
[
2
];
int
label_val_
[
2
];
/*! \brief Weights for positive and negative labels */
/*! \brief Weights for positive and negative labels */
floa
t
label_weights_
[
2
];
score_
t
label_weights_
[
2
];
/*! \brief Weights for data */
/*! \brief Weights for data */
const
float
*
weights_
;
const
float
*
weights_
;
};
};
...
...
src/objective/multiclass_objective.hpp
View file @
3b50aeac
...
@@ -38,9 +38,9 @@ public:
...
@@ -38,9 +38,9 @@ public:
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
float
>
rec
(
num_class_
);
std
::
vector
<
double
>
rec
(
num_class_
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
){
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
){
rec
[
k
]
=
static_cast
<
float
>
(
score
[
k
*
num_data_
+
i
]);
rec
[
k
]
=
static_cast
<
double
>
(
score
[
k
*
num_data_
+
i
]);
}
}
Common
::
Softmax
(
&
rec
);
Common
::
Softmax
(
&
rec
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
...
@@ -56,9 +56,9 @@ public:
...
@@ -56,9 +56,9 @@ public:
}
else
{
}
else
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
float
>
rec
(
num_class_
);
std
::
vector
<
double
>
rec
(
num_class_
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
){
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
){
rec
[
k
]
=
static_cast
<
float
>
(
score
[
k
*
num_data_
+
i
]);
rec
[
k
]
=
static_cast
<
double
>
(
score
[
k
*
num_data_
+
i
]);
}
}
Common
::
Softmax
(
&
rec
);
Common
::
Softmax
(
&
rec
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
...
@@ -74,7 +74,7 @@ public:
...
@@ -74,7 +74,7 @@ public:
}
}
}
}
floa
t
GetSigmoid
()
const
override
{
score_
t
GetSigmoid
()
const
override
{
return
-
1.0
f
;
return
-
1.0
f
;
}
}
...
...
src/objective/rank_objective.hpp
View file @
3b50aeac
...
@@ -19,11 +19,11 @@ namespace LightGBM {
...
@@ -19,11 +19,11 @@ namespace LightGBM {
class
LambdarankNDCG
:
public
ObjectiveFunction
{
class
LambdarankNDCG
:
public
ObjectiveFunction
{
public:
public:
explicit
LambdarankNDCG
(
const
ObjectiveConfig
&
config
)
{
explicit
LambdarankNDCG
(
const
ObjectiveConfig
&
config
)
{
sigmoid_
=
static_cast
<
floa
t
>
(
config
.
sigmoid
);
sigmoid_
=
static_cast
<
score_
t
>
(
config
.
sigmoid
);
// initialize DCG calculator
// initialize DCG calculator
DCGCalculator
::
Init
(
config
.
label_gain
);
DCGCalculator
::
Init
(
config
.
label_gain
);
// copy lable gain to local
// copy lable gain to local
std
::
vector
<
float
>
label_gain
=
config
.
label_gain
;
std
::
vector
<
double
>
label_gain
=
config
.
label_gain
;
for
(
auto
gain
:
label_gain
)
{
for
(
auto
gain
:
label_gain
)
{
label_gain_
.
push_back
(
static_cast
<
score_t
>
(
gain
));
label_gain_
.
push_back
(
static_cast
<
score_t
>
(
gain
));
}
}
...
@@ -53,10 +53,9 @@ public:
...
@@ -53,10 +53,9 @@ public:
// cache inverse max DCG, avoid computation many times
// cache inverse max DCG, avoid computation many times
inverse_max_dcgs_
=
new
score_t
[
num_queries_
];
inverse_max_dcgs_
=
new
score_t
[
num_queries_
];
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
inverse_max_dcgs_
[
i
]
=
static_cast
<
score_t
>
(
inverse_max_dcgs_
[
i
]
=
DCGCalculator
::
CalMaxDCGAtK
(
optimize_pos_at_
,
DCGCalculator
::
CalMaxDCGAtK
(
optimize_pos_at_
,
label_
+
query_boundaries_
[
i
],
label_
+
query_boundaries_
[
i
],
query_boundaries_
[
i
+
1
]
-
query_boundaries_
[
i
])
)
;
query_boundaries_
[
i
+
1
]
-
query_boundaries_
[
i
]);
if
(
inverse_max_dcgs_
[
i
]
>
0.0
)
{
if
(
inverse_max_dcgs_
[
i
]
>
0.0
)
{
inverse_max_dcgs_
[
i
]
=
1.0
f
/
inverse_max_dcgs_
[
i
];
inverse_max_dcgs_
[
i
]
=
1.0
f
/
inverse_max_dcgs_
[
i
];
...
@@ -113,8 +112,7 @@ public:
...
@@ -113,8 +112,7 @@ public:
const
score_t
high_score
=
score
[
high
];
const
score_t
high_score
=
score
[
high
];
if
(
high_score
==
kMinScore
)
{
continue
;
}
if
(
high_score
==
kMinScore
)
{
continue
;
}
const
score_t
high_label_gain
=
label_gain_
[
high_label
];
const
score_t
high_label_gain
=
label_gain_
[
high_label
];
const
score_t
high_discount
=
const
score_t
high_discount
=
DCGCalculator
::
GetDiscount
(
i
);
static_cast
<
score_t
>
(
DCGCalculator
::
GetDiscount
(
i
));
score_t
high_sum_lambda
=
0.0
;
score_t
high_sum_lambda
=
0.0
;
score_t
high_sum_hessian
=
0.0
;
score_t
high_sum_hessian
=
0.0
;
for
(
data_size_t
j
=
0
;
j
<
cnt
;
++
j
)
{
for
(
data_size_t
j
=
0
;
j
<
cnt
;
++
j
)
{
...
@@ -130,8 +128,7 @@ public:
...
@@ -130,8 +128,7 @@ public:
const
score_t
delta_score
=
high_score
-
low_score
;
const
score_t
delta_score
=
high_score
-
low_score
;
const
score_t
low_label_gain
=
label_gain_
[
low_label
];
const
score_t
low_label_gain
=
label_gain_
[
low_label
];
const
score_t
low_discount
=
const
score_t
low_discount
=
DCGCalculator
::
GetDiscount
(
j
);
static_cast
<
score_t
>
(
DCGCalculator
::
GetDiscount
(
j
));
// get dcg gap
// get dcg gap
const
score_t
dcg_gap
=
high_label_gain
-
low_label_gain
;
const
score_t
dcg_gap
=
high_label_gain
-
low_label_gain
;
// get discount of this pair
// get discount of this pair
...
@@ -194,11 +191,11 @@ public:
...
@@ -194,11 +191,11 @@ public:
}
}
}
}
floa
t
GetSigmoid
()
const
override
{
score_
t
GetSigmoid
()
const
override
{
// though we use sigmoid transform on objective
// though we use sigmoid transform on objective
// for the prediction, we actually don't need to transform by sigmoid.
// for the prediction, we actually don't need to transform by sigmoid.
// since we only need the ranking score.
// since we only need the ranking score.
return
-
1.0
;
return
-
1.0
f
;
}
}
private:
private:
...
@@ -207,7 +204,7 @@ private:
...
@@ -207,7 +204,7 @@ private:
/*! \brief Cache inverse max DCG, speed up calculation */
/*! \brief Cache inverse max DCG, speed up calculation */
score_t
*
inverse_max_dcgs_
;
score_t
*
inverse_max_dcgs_
;
/*! \brief Simgoid param */
/*! \brief Simgoid param */
floa
t
sigmoid_
;
score_
t
sigmoid_
;
/*! \brief Optimized NDCG@ */
/*! \brief Optimized NDCG@ */
int
optimize_pos_at_
;
int
optimize_pos_at_
;
/*! \brief Number of queries */
/*! \brief Number of queries */
...
...
src/objective/regression_objective.hpp
View file @
3b50aeac
...
@@ -38,7 +38,7 @@ public:
...
@@ -38,7 +38,7 @@ public:
}
}
}
}
floa
t
GetSigmoid
()
const
override
{
score_
t
GetSigmoid
()
const
override
{
// not sigmoid transform, return -1
// not sigmoid transform, return -1
return
-
1.0
f
;
return
-
1.0
f
;
}
}
...
...
src/treelearner/serial_tree_learner.cpp
View file @
3b50aeac
...
@@ -415,9 +415,9 @@ void SerialTreeLearner::Split(Tree* tree, int best_Leaf, int* left_leaf, int* ri
...
@@ -415,9 +415,9 @@ void SerialTreeLearner::Split(Tree* tree, int best_Leaf, int* left_leaf, int* ri
*
right_leaf
=
tree
->
Split
(
best_Leaf
,
best_split_info
.
feature
,
best_split_info
.
threshold
,
*
right_leaf
=
tree
->
Split
(
best_Leaf
,
best_split_info
.
feature
,
best_split_info
.
threshold
,
train_data_
->
FeatureAt
(
best_split_info
.
feature
)
->
feature_index
(),
train_data_
->
FeatureAt
(
best_split_info
.
feature
)
->
feature_index
(),
train_data_
->
FeatureAt
(
best_split_info
.
feature
)
->
BinToValue
(
best_split_info
.
threshold
),
train_data_
->
FeatureAt
(
best_split_info
.
feature
)
->
BinToValue
(
best_split_info
.
threshold
),
static_cast
<
float
>
(
best_split_info
.
left_output
),
static_cast
<
double
>
(
best_split_info
.
left_output
),
static_cast
<
float
>
(
best_split_info
.
right_output
),
static_cast
<
double
>
(
best_split_info
.
right_output
),
static_cast
<
float
>
(
best_split_info
.
gain
));
static_cast
<
double
>
(
best_split_info
.
gain
));
// split data partition
// split data partition
data_partition_
->
Split
(
best_Leaf
,
train_data_
->
FeatureAt
(
best_split_info
.
feature
)
->
bin_data
(),
data_partition_
->
Split
(
best_Leaf
,
train_data_
->
FeatureAt
(
best_split_info
.
feature
)
->
bin_data
(),
...
...
src/treelearner/serial_tree_learner.h
View file @
3b50aeac
...
@@ -38,10 +38,10 @@ public:
...
@@ -38,10 +38,10 @@ public:
data_partition_
->
SetUsedDataIndices
(
used_indices
,
num_data
);
data_partition_
->
SetUsedDataIndices
(
used_indices
,
num_data
);
}
}
void
AddPredictionToScore
(
score_t
*
out_score
)
const
override
{
void
AddPredictionToScore
(
score_t
*
out_score
)
const
override
{
#pragma omp parallel for schedule(guided)
#pragma omp parallel for schedule(guided)
for
(
int
i
=
0
;
i
<
data_partition_
->
num_leaves
();
++
i
)
{
for
(
int
i
=
0
;
i
<
data_partition_
->
num_leaves
();
++
i
)
{
floa
t
output
=
last_trained_tree_
->
LeafOutput
(
i
);
score_
t
output
=
static_cast
<
score_t
>
(
last_trained_tree_
->
LeafOutput
(
i
)
)
;
data_size_t
*
tmp_idx
=
nullptr
;
data_size_t
*
tmp_idx
=
nullptr
;
data_size_t
cnt_leaf_data
=
data_partition_
->
GetIndexOnLeaf
(
i
,
&
tmp_idx
);
data_size_t
cnt_leaf_data
=
data_partition_
->
GetIndexOnLeaf
(
i
,
&
tmp_idx
);
for
(
data_size_t
j
=
0
;
j
<
cnt_leaf_data
;
++
j
)
{
for
(
data_size_t
j
=
0
;
j
<
cnt_leaf_data
;
++
j
)
{
...
@@ -116,7 +116,7 @@ protected:
...
@@ -116,7 +116,7 @@ protected:
/*! \brief mininal sum hessian on one leaf */
/*! \brief mininal sum hessian on one leaf */
double
min_sum_hessian_one_leaf_
;
double
min_sum_hessian_one_leaf_
;
/*! \brief sub-feature fraction rate */
/*! \brief sub-feature fraction rate */
float
feature_fraction_
;
double
feature_fraction_
;
/*! \brief training data partition on leaves */
/*! \brief training data partition on leaves */
DataPartition
*
data_partition_
;
DataPartition
*
data_partition_
;
/*! \brief used for generate used features */
/*! \brief used for generate used features */
...
@@ -160,7 +160,7 @@ protected:
...
@@ -160,7 +160,7 @@ protected:
/*! \brief is_data_in_leaf_[i] != 0 means i-th data is marked */
/*! \brief is_data_in_leaf_[i] != 0 means i-th data is marked */
char
*
is_data_in_leaf_
;
char
*
is_data_in_leaf_
;
/*! \brief max cache size(unit:GB) for historical histogram. < 0 means not limit */
/*! \brief max cache size(unit:GB) for historical histogram. < 0 means not limit */
float
histogram_pool_size_
;
double
histogram_pool_size_
;
/*! \brief used to cache historical histogram to speed up*/
/*! \brief used to cache historical histogram to speed up*/
LRUPool
<
FeatureHistogram
*>
histogram_pool_
;
LRUPool
<
FeatureHistogram
*>
histogram_pool_
;
/*! \brief max depth of tree model */
/*! \brief max depth of tree model */
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment