Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
8ed371ce
Unverified
Commit
8ed371ce
authored
Oct 09, 2023
by
James Lamb
Committed by
GitHub
Oct 09, 2023
Browse files
set explicit number of threads in every OpenMP `parallel` region (#6135)
parent
992f5056
Changes
45
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
107 additions
and
107 deletions
+107
-107
src/io/tree.cpp
src/io/tree.cpp
+2
-2
src/metric/binary_metric.hpp
src/metric/binary_metric.hpp
+4
-4
src/metric/map_metric.hpp
src/metric/map_metric.hpp
+2
-2
src/metric/multiclass_metric.hpp
src/metric/multiclass_metric.hpp
+4
-4
src/metric/rank_metric.hpp
src/metric/rank_metric.hpp
+3
-3
src/metric/regression_metric.hpp
src/metric/regression_metric.hpp
+4
-4
src/metric/xentropy_metric.hpp
src/metric/xentropy_metric.hpp
+12
-12
src/objective/binary_objective.hpp
src/objective/binary_objective.hpp
+5
-5
src/objective/multiclass_objective.hpp
src/objective/multiclass_objective.hpp
+2
-2
src/objective/rank_objective.hpp
src/objective/rank_objective.hpp
+2
-2
src/objective/regression_objective.hpp
src/objective/regression_objective.hpp
+23
-23
src/objective/xentropy_objective.hpp
src/objective/xentropy_objective.hpp
+8
-8
src/treelearner/col_sampler.hpp
src/treelearner/col_sampler.hpp
+3
-3
src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
+1
-1
src/treelearner/data_parallel_tree_learner.cpp
src/treelearner/data_parallel_tree_learner.cpp
+4
-4
src/treelearner/data_partition.hpp
src/treelearner/data_partition.hpp
+1
-1
src/treelearner/feature_histogram.hpp
src/treelearner/feature_histogram.hpp
+4
-4
src/treelearner/gpu_tree_learner.cpp
src/treelearner/gpu_tree_learner.cpp
+17
-17
src/treelearner/gradient_discretizer.cpp
src/treelearner/gradient_discretizer.cpp
+2
-2
src/treelearner/leaf_splits.hpp
src/treelearner/leaf_splits.hpp
+4
-4
No files found.
src/io/tree.cpp
View file @
8ed371ce
...
@@ -153,7 +153,7 @@ int Tree::SplitCategorical(int leaf, int feature, int real_feature, const uint32
...
@@ -153,7 +153,7 @@ int Tree::SplitCategorical(int leaf, int feature, int real_feature, const uint32
void
Tree
::
AddPredictionToScore
(
const
Dataset
*
data
,
data_size_t
num_data
,
double
*
score
)
const
{
void
Tree
::
AddPredictionToScore
(
const
Dataset
*
data
,
data_size_t
num_data
,
double
*
score
)
const
{
if
(
!
is_linear_
&&
num_leaves_
<=
1
)
{
if
(
!
is_linear_
&&
num_leaves_
<=
1
)
{
if
(
leaf_value_
[
0
]
!=
0.0
f
)
{
if
(
leaf_value_
[
0
]
!=
0.0
f
)
{
#pragma omp parallel for schedule(static, 512) if (num_data >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (num_data >= 1024)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
score
[
i
]
+=
leaf_value_
[
0
];
score
[
i
]
+=
leaf_value_
[
0
];
}
}
...
@@ -234,7 +234,7 @@ void Tree::AddPredictionToScore(const Dataset* data,
...
@@ -234,7 +234,7 @@ void Tree::AddPredictionToScore(const Dataset* data,
data_size_t
num_data
,
double
*
score
)
const
{
data_size_t
num_data
,
double
*
score
)
const
{
if
(
!
is_linear_
&&
num_leaves_
<=
1
)
{
if
(
!
is_linear_
&&
num_leaves_
<=
1
)
{
if
(
leaf_value_
[
0
]
!=
0.0
f
)
{
if
(
leaf_value_
[
0
]
!=
0.0
f
)
{
#pragma omp parallel for schedule(static, 512) if (num_data >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (num_data >= 1024)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
score
[
used_data_indices
[
i
]]
+=
leaf_value_
[
0
];
score
[
used_data_indices
[
i
]]
+=
leaf_value_
[
0
];
}
}
...
...
src/metric/binary_metric.hpp
View file @
8ed371ce
...
@@ -61,13 +61,13 @@ class BinaryMetric: public Metric {
...
@@ -61,13 +61,13 @@ class BinaryMetric: public Metric {
double
sum_loss
=
0.0
f
;
double
sum_loss
=
0.0
f
;
if
(
objective
==
nullptr
)
{
if
(
objective
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
// add loss
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
score
[
i
]);
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
score
[
i
]);
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
// add loss
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
score
[
i
])
*
weights_
[
i
];
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
score
[
i
])
*
weights_
[
i
];
...
@@ -75,7 +75,7 @@ class BinaryMetric: public Metric {
...
@@ -75,7 +75,7 @@ class BinaryMetric: public Metric {
}
}
}
else
{
}
else
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
double
prob
=
0
;
double
prob
=
0
;
objective
->
ConvertOutput
(
&
score
[
i
],
&
prob
);
objective
->
ConvertOutput
(
&
score
[
i
],
&
prob
);
...
@@ -83,7 +83,7 @@ class BinaryMetric: public Metric {
...
@@ -83,7 +83,7 @@ class BinaryMetric: public Metric {
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
prob
);
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
prob
);
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
double
prob
=
0
;
double
prob
=
0
;
objective
->
ConvertOutput
(
&
score
[
i
],
&
prob
);
objective
->
ConvertOutput
(
&
score
[
i
],
&
prob
);
...
...
src/metric/map_metric.hpp
View file @
8ed371ce
...
@@ -111,7 +111,7 @@ class MapMetric:public Metric {
...
@@ -111,7 +111,7 @@ class MapMetric:public Metric {
}
}
std
::
vector
<
double
>
tmp_map
(
eval_at_
.
size
(),
0.0
f
);
std
::
vector
<
double
>
tmp_map
(
eval_at_
.
size
(),
0.0
f
);
if
(
query_weights_
==
nullptr
)
{
if
(
query_weights_
==
nullptr
)
{
#pragma omp parallel for schedule(guided) firstprivate(tmp_map)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(guided) firstprivate(tmp_map)
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
const
int
tid
=
omp_get_thread_num
();
const
int
tid
=
omp_get_thread_num
();
CalMapAtK
(
eval_at_
,
npos_per_query_
[
i
],
label_
+
query_boundaries_
[
i
],
CalMapAtK
(
eval_at_
,
npos_per_query_
[
i
],
label_
+
query_boundaries_
[
i
],
...
@@ -121,7 +121,7 @@ class MapMetric:public Metric {
...
@@ -121,7 +121,7 @@ class MapMetric:public Metric {
}
}
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(guided) firstprivate(tmp_map)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(guided) firstprivate(tmp_map)
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
const
int
tid
=
omp_get_thread_num
();
const
int
tid
=
omp_get_thread_num
();
CalMapAtK
(
eval_at_
,
npos_per_query_
[
i
],
label_
+
query_boundaries_
[
i
],
CalMapAtK
(
eval_at_
,
npos_per_query_
[
i
],
label_
+
query_boundaries_
[
i
],
...
...
src/metric/multiclass_metric.hpp
View file @
8ed371ce
...
@@ -63,7 +63,7 @@ class MulticlassMetric: public Metric {
...
@@ -63,7 +63,7 @@ class MulticlassMetric: public Metric {
}
}
if
(
objective
!=
nullptr
)
{
if
(
objective
!=
nullptr
)
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
double
>
raw_score
(
num_tree_per_iteration
);
std
::
vector
<
double
>
raw_score
(
num_tree_per_iteration
);
for
(
int
k
=
0
;
k
<
num_tree_per_iteration
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_tree_per_iteration
;
++
k
)
{
...
@@ -76,7 +76,7 @@ class MulticlassMetric: public Metric {
...
@@ -76,7 +76,7 @@ class MulticlassMetric: public Metric {
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
&
rec
,
config_
);
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
&
rec
,
config_
);
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
double
>
raw_score
(
num_tree_per_iteration
);
std
::
vector
<
double
>
raw_score
(
num_tree_per_iteration
);
for
(
int
k
=
0
;
k
<
num_tree_per_iteration
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_tree_per_iteration
;
++
k
)
{
...
@@ -91,7 +91,7 @@ class MulticlassMetric: public Metric {
...
@@ -91,7 +91,7 @@ class MulticlassMetric: public Metric {
}
}
}
else
{
}
else
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
double
>
rec
(
num_tree_per_iteration
);
std
::
vector
<
double
>
rec
(
num_tree_per_iteration
);
for
(
int
k
=
0
;
k
<
num_tree_per_iteration
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_tree_per_iteration
;
++
k
)
{
...
@@ -102,7 +102,7 @@ class MulticlassMetric: public Metric {
...
@@ -102,7 +102,7 @@ class MulticlassMetric: public Metric {
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
&
rec
,
config_
);
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
&
rec
,
config_
);
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
std
::
vector
<
double
>
rec
(
num_tree_per_iteration
);
std
::
vector
<
double
>
rec
(
num_tree_per_iteration
);
for
(
int
k
=
0
;
k
<
num_tree_per_iteration
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_tree_per_iteration
;
++
k
)
{
...
...
src/metric/rank_metric.hpp
View file @
8ed371ce
...
@@ -57,7 +57,7 @@ class NDCGMetric:public Metric {
...
@@ -57,7 +57,7 @@ class NDCGMetric:public Metric {
}
}
inverse_max_dcgs_
.
resize
(
num_queries_
);
inverse_max_dcgs_
.
resize
(
num_queries_
);
// cache the inverse max DCG for all queries, used to calculate NDCG
// cache the inverse max DCG for all queries, used to calculate NDCG
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
inverse_max_dcgs_
[
i
].
resize
(
eval_at_
.
size
(),
0.0
f
);
inverse_max_dcgs_
[
i
].
resize
(
eval_at_
.
size
(),
0.0
f
);
DCGCalculator
::
CalMaxDCG
(
eval_at_
,
label_
+
query_boundaries_
[
i
],
DCGCalculator
::
CalMaxDCG
(
eval_at_
,
label_
+
query_boundaries_
[
i
],
...
@@ -92,7 +92,7 @@ class NDCGMetric:public Metric {
...
@@ -92,7 +92,7 @@ class NDCGMetric:public Metric {
}
}
std
::
vector
<
double
>
tmp_dcg
(
eval_at_
.
size
(),
0.0
f
);
std
::
vector
<
double
>
tmp_dcg
(
eval_at_
.
size
(),
0.0
f
);
if
(
query_weights_
==
nullptr
)
{
if
(
query_weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) firstprivate(tmp_dcg)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) firstprivate(tmp_dcg)
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
const
int
tid
=
omp_get_thread_num
();
const
int
tid
=
omp_get_thread_num
();
// if all doc in this query are all negative, let its NDCG=1
// if all doc in this query are all negative, let its NDCG=1
...
@@ -112,7 +112,7 @@ class NDCGMetric:public Metric {
...
@@ -112,7 +112,7 @@ class NDCGMetric:public Metric {
}
}
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static) firstprivate(tmp_dcg)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) firstprivate(tmp_dcg)
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
const
int
tid
=
omp_get_thread_num
();
const
int
tid
=
omp_get_thread_num
();
// if all doc in this query are all negative, let its NDCG=1
// if all doc in this query are all negative, let its NDCG=1
...
...
src/metric/regression_metric.hpp
View file @
8ed371ce
...
@@ -59,13 +59,13 @@ class RegressionMetric: public Metric {
...
@@ -59,13 +59,13 @@ class RegressionMetric: public Metric {
double
sum_loss
=
0.0
f
;
double
sum_loss
=
0.0
f
;
if
(
objective
==
nullptr
)
{
if
(
objective
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
// add loss
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
score
[
i
],
config_
);
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
score
[
i
],
config_
);
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
// add loss
// add loss
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
score
[
i
],
config_
)
*
weights_
[
i
];
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
score
[
i
],
config_
)
*
weights_
[
i
];
...
@@ -73,7 +73,7 @@ class RegressionMetric: public Metric {
...
@@ -73,7 +73,7 @@ class RegressionMetric: public Metric {
}
}
}
else
{
}
else
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
// add loss
// add loss
double
t
=
0
;
double
t
=
0
;
...
@@ -81,7 +81,7 @@ class RegressionMetric: public Metric {
...
@@ -81,7 +81,7 @@ class RegressionMetric: public Metric {
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
t
,
config_
);
sum_loss
+=
PointWiseLossCalculator
::
LossOnPoint
(
label_
[
i
],
t
,
config_
);
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
// add loss
// add loss
double
t
=
0
;
double
t
=
0
;
...
...
src/metric/xentropy_metric.hpp
View file @
8ed371ce
...
@@ -107,26 +107,26 @@ class CrossEntropyMetric : public Metric {
...
@@ -107,26 +107,26 @@ class CrossEntropyMetric : public Metric {
double
sum_loss
=
0.0
f
;
double
sum_loss
=
0.0
f
;
if
(
objective
==
nullptr
)
{
if
(
objective
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
sum_loss
+=
XentLoss
(
label_
[
i
],
score
[
i
]);
// NOTE: does not work unless score is a probability
sum_loss
+=
XentLoss
(
label_
[
i
],
score
[
i
]);
// NOTE: does not work unless score is a probability
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
sum_loss
+=
XentLoss
(
label_
[
i
],
score
[
i
])
*
weights_
[
i
];
// NOTE: does not work unless score is a probability
sum_loss
+=
XentLoss
(
label_
[
i
],
score
[
i
])
*
weights_
[
i
];
// NOTE: does not work unless score is a probability
}
}
}
}
}
else
{
}
else
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
double
p
=
0
;
double
p
=
0
;
objective
->
ConvertOutput
(
&
score
[
i
],
&
p
);
objective
->
ConvertOutput
(
&
score
[
i
],
&
p
);
sum_loss
+=
XentLoss
(
label_
[
i
],
p
);
sum_loss
+=
XentLoss
(
label_
[
i
],
p
);
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
double
p
=
0
;
double
p
=
0
;
objective
->
ConvertOutput
(
&
score
[
i
],
&
p
);
objective
->
ConvertOutput
(
&
score
[
i
],
&
p
);
...
@@ -192,13 +192,13 @@ class CrossEntropyLambdaMetric : public Metric {
...
@@ -192,13 +192,13 @@ class CrossEntropyLambdaMetric : public Metric {
double
sum_loss
=
0.0
f
;
double
sum_loss
=
0.0
f
;
if
(
objective
==
nullptr
)
{
if
(
objective
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
double
hhat
=
std
::
log1p
(
std
::
exp
(
score
[
i
]));
// auto-convert
double
hhat
=
std
::
log1p
(
std
::
exp
(
score
[
i
]));
// auto-convert
sum_loss
+=
XentLambdaLoss
(
label_
[
i
],
1.0
f
,
hhat
);
sum_loss
+=
XentLambdaLoss
(
label_
[
i
],
1.0
f
,
hhat
);
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
double
hhat
=
std
::
log1p
(
std
::
exp
(
score
[
i
]));
// auto-convert
double
hhat
=
std
::
log1p
(
std
::
exp
(
score
[
i
]));
// auto-convert
sum_loss
+=
XentLambdaLoss
(
label_
[
i
],
weights_
[
i
],
hhat
);
sum_loss
+=
XentLambdaLoss
(
label_
[
i
],
weights_
[
i
],
hhat
);
...
@@ -206,14 +206,14 @@ class CrossEntropyLambdaMetric : public Metric {
...
@@ -206,14 +206,14 @@ class CrossEntropyLambdaMetric : public Metric {
}
}
}
else
{
}
else
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
double
hhat
=
0
;
double
hhat
=
0
;
objective
->
ConvertOutput
(
&
score
[
i
],
&
hhat
);
// NOTE: this only works if objective = "xentlambda"
objective
->
ConvertOutput
(
&
score
[
i
],
&
hhat
);
// NOTE: this only works if objective = "xentlambda"
sum_loss
+=
XentLambdaLoss
(
label_
[
i
],
1.0
f
,
hhat
);
sum_loss
+=
XentLambdaLoss
(
label_
[
i
],
1.0
f
,
hhat
);
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
double
hhat
=
0
;
double
hhat
=
0
;
objective
->
ConvertOutput
(
&
score
[
i
],
&
hhat
);
// NOTE: this only works if objective = "xentlambda"
objective
->
ConvertOutput
(
&
score
[
i
],
&
hhat
);
// NOTE: this only works if objective = "xentlambda"
...
@@ -299,26 +299,26 @@ class KullbackLeiblerDivergence : public Metric {
...
@@ -299,26 +299,26 @@ class KullbackLeiblerDivergence : public Metric {
double
sum_loss
=
0.0
f
;
double
sum_loss
=
0.0
f
;
if
(
objective
==
nullptr
)
{
if
(
objective
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
sum_loss
+=
XentLoss
(
label_
[
i
],
score
[
i
]);
// NOTE: does not work unless score is a probability
sum_loss
+=
XentLoss
(
label_
[
i
],
score
[
i
]);
// NOTE: does not work unless score is a probability
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
sum_loss
+=
XentLoss
(
label_
[
i
],
score
[
i
])
*
weights_
[
i
];
// NOTE: does not work unless score is a probability
sum_loss
+=
XentLoss
(
label_
[
i
],
score
[
i
])
*
weights_
[
i
];
// NOTE: does not work unless score is a probability
}
}
}
}
}
else
{
}
else
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
double
p
=
0
;
double
p
=
0
;
objective
->
ConvertOutput
(
&
score
[
i
],
&
p
);
objective
->
ConvertOutput
(
&
score
[
i
],
&
p
);
sum_loss
+=
XentLoss
(
label_
[
i
],
p
);
sum_loss
+=
XentLoss
(
label_
[
i
],
p
);
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_loss)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
double
p
=
0
;
double
p
=
0
;
objective
->
ConvertOutput
(
&
score
[
i
],
&
p
);
objective
->
ConvertOutput
(
&
score
[
i
],
&
p
);
...
...
src/objective/binary_objective.hpp
View file @
8ed371ce
...
@@ -63,7 +63,7 @@ class BinaryLogloss: public ObjectiveFunction {
...
@@ -63,7 +63,7 @@ class BinaryLogloss: public ObjectiveFunction {
data_size_t
cnt_positive
=
0
;
data_size_t
cnt_positive
=
0
;
data_size_t
cnt_negative
=
0
;
data_size_t
cnt_negative
=
0
;
// count for positive and negative samples
// count for positive and negative samples
#pragma omp parallel for schedule(static) reduction(+:cnt_positive, cnt_negative)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:cnt_positive, cnt_negative)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
if
(
is_pos_
(
label_
[
i
]))
{
if
(
is_pos_
(
label_
[
i
]))
{
++
cnt_positive
;
++
cnt_positive
;
...
@@ -107,7 +107,7 @@ class BinaryLogloss: public ObjectiveFunction {
...
@@ -107,7 +107,7 @@ class BinaryLogloss: public ObjectiveFunction {
return
;
return
;
}
}
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
// get label and label weights
// get label and label weights
const
int
is_pos
=
is_pos_
(
label_
[
i
]);
const
int
is_pos
=
is_pos_
(
label_
[
i
]);
...
@@ -120,7 +120,7 @@ class BinaryLogloss: public ObjectiveFunction {
...
@@ -120,7 +120,7 @@ class BinaryLogloss: public ObjectiveFunction {
hessians
[
i
]
=
static_cast
<
score_t
>
(
abs_response
*
(
sigmoid_
-
abs_response
)
*
label_weight
);
hessians
[
i
]
=
static_cast
<
score_t
>
(
abs_response
*
(
sigmoid_
-
abs_response
)
*
label_weight
);
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
// get label and label weights
// get label and label weights
const
int
is_pos
=
is_pos_
(
label_
[
i
]);
const
int
is_pos
=
is_pos_
(
label_
[
i
]);
...
@@ -140,14 +140,14 @@ class BinaryLogloss: public ObjectiveFunction {
...
@@ -140,14 +140,14 @@ class BinaryLogloss: public ObjectiveFunction {
double
suml
=
0.0
f
;
double
suml
=
0.0
f
;
double
sumw
=
0.0
f
;
double
sumw
=
0.0
f
;
if
(
weights_
!=
nullptr
)
{
if
(
weights_
!=
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:suml, sumw) if (!deterministic_)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:suml, sumw) if (!deterministic_)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
suml
+=
is_pos_
(
label_
[
i
])
*
weights_
[
i
];
suml
+=
is_pos_
(
label_
[
i
])
*
weights_
[
i
];
sumw
+=
weights_
[
i
];
sumw
+=
weights_
[
i
];
}
}
}
else
{
}
else
{
sumw
=
static_cast
<
double
>
(
num_data_
);
sumw
=
static_cast
<
double
>
(
num_data_
);
#pragma omp parallel for schedule(static) reduction(+:suml) if (!deterministic_)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:suml) if (!deterministic_)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
suml
+=
is_pos_
(
label_
[
i
]);
suml
+=
is_pos_
(
label_
[
i
]);
}
}
...
...
src/objective/multiclass_objective.hpp
View file @
8ed371ce
...
@@ -86,7 +86,7 @@ class MulticlassSoftmax: public ObjectiveFunction {
...
@@ -86,7 +86,7 @@ class MulticlassSoftmax: public ObjectiveFunction {
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
std
::
vector
<
double
>
rec
;
std
::
vector
<
double
>
rec
;
#pragma omp parallel for schedule(static) private(rec)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) private(rec)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
rec
.
resize
(
num_class_
);
rec
.
resize
(
num_class_
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
...
@@ -107,7 +107,7 @@ class MulticlassSoftmax: public ObjectiveFunction {
...
@@ -107,7 +107,7 @@ class MulticlassSoftmax: public ObjectiveFunction {
}
}
}
else
{
}
else
{
std
::
vector
<
double
>
rec
;
std
::
vector
<
double
>
rec
;
#pragma omp parallel for schedule(static) private(rec)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) private(rec)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
rec
.
resize
(
num_class_
);
rec
.
resize
(
num_class_
);
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_class_
;
++
k
)
{
...
...
src/objective/rank_objective.hpp
View file @
8ed371ce
...
@@ -58,7 +58,7 @@ class RankingObjective : public ObjectiveFunction {
...
@@ -58,7 +58,7 @@ class RankingObjective : public ObjectiveFunction {
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
score_t
*
hessians
)
const
override
{
#pragma omp parallel for schedule(guided)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(guided)
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
const
data_size_t
start
=
query_boundaries_
[
i
];
const
data_size_t
start
=
query_boundaries_
[
i
];
const
data_size_t
cnt
=
query_boundaries_
[
i
+
1
]
-
query_boundaries_
[
i
];
const
data_size_t
cnt
=
query_boundaries_
[
i
+
1
]
-
query_boundaries_
[
i
];
...
@@ -157,7 +157,7 @@ class LambdarankNDCG : public RankingObjective {
...
@@ -157,7 +157,7 @@ class LambdarankNDCG : public RankingObjective {
DCGCalculator
::
CheckMetadata
(
metadata
,
num_queries_
);
DCGCalculator
::
CheckMetadata
(
metadata
,
num_queries_
);
DCGCalculator
::
CheckLabel
(
label_
,
num_data_
);
DCGCalculator
::
CheckLabel
(
label_
,
num_data_
);
inverse_max_dcgs_
.
resize
(
num_queries_
);
inverse_max_dcgs_
.
resize
(
num_queries_
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
inverse_max_dcgs_
[
i
]
=
DCGCalculator
::
CalMaxDCGAtK
(
inverse_max_dcgs_
[
i
]
=
DCGCalculator
::
CalMaxDCGAtK
(
truncation_level_
,
label_
+
query_boundaries_
[
i
],
truncation_level_
,
label_
+
query_boundaries_
[
i
],
...
...
src/objective/regression_objective.hpp
View file @
8ed371ce
...
@@ -115,7 +115,7 @@ class RegressionL2loss: public ObjectiveFunction {
...
@@ -115,7 +115,7 @@ class RegressionL2loss: public ObjectiveFunction {
label_
=
metadata
.
label
();
label_
=
metadata
.
label
();
if
(
sqrt_
)
{
if
(
sqrt_
)
{
trans_label_
.
resize
(
num_data_
);
trans_label_
.
resize
(
num_data_
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
trans_label_
[
i
]
=
Common
::
Sign
(
label_
[
i
])
*
std
::
sqrt
(
std
::
fabs
(
label_
[
i
]));
trans_label_
[
i
]
=
Common
::
Sign
(
label_
[
i
])
*
std
::
sqrt
(
std
::
fabs
(
label_
[
i
]));
}
}
...
@@ -127,13 +127,13 @@ class RegressionL2loss: public ObjectiveFunction {
...
@@ -127,13 +127,13 @@ class RegressionL2loss: public ObjectiveFunction {
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
score_t
*
hessians
)
const
override
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
gradients
[
i
]
=
static_cast
<
score_t
>
(
score
[
i
]
-
label_
[
i
]);
gradients
[
i
]
=
static_cast
<
score_t
>
(
score
[
i
]
-
label_
[
i
]);
hessians
[
i
]
=
1.0
f
;
hessians
[
i
]
=
1.0
f
;
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
gradients
[
i
]
=
static_cast
<
score_t
>
(
static_cast
<
score_t
>
((
score
[
i
]
-
label_
[
i
]))
*
weights_
[
i
]);
gradients
[
i
]
=
static_cast
<
score_t
>
(
static_cast
<
score_t
>
((
score
[
i
]
-
label_
[
i
]))
*
weights_
[
i
]);
hessians
[
i
]
=
static_cast
<
score_t
>
(
weights_
[
i
]);
hessians
[
i
]
=
static_cast
<
score_t
>
(
weights_
[
i
]);
...
@@ -174,14 +174,14 @@ class RegressionL2loss: public ObjectiveFunction {
...
@@ -174,14 +174,14 @@ class RegressionL2loss: public ObjectiveFunction {
double
suml
=
0.0
f
;
double
suml
=
0.0
f
;
double
sumw
=
0.0
f
;
double
sumw
=
0.0
f
;
if
(
weights_
!=
nullptr
)
{
if
(
weights_
!=
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:suml, sumw) if (!deterministic_)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:suml, sumw) if (!deterministic_)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
suml
+=
static_cast
<
double
>
(
label_
[
i
])
*
weights_
[
i
];
suml
+=
static_cast
<
double
>
(
label_
[
i
])
*
weights_
[
i
];
sumw
+=
weights_
[
i
];
sumw
+=
weights_
[
i
];
}
}
}
else
{
}
else
{
sumw
=
static_cast
<
double
>
(
num_data_
);
sumw
=
static_cast
<
double
>
(
num_data_
);
#pragma omp parallel for schedule(static) reduction(+:suml) if (!deterministic_)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:suml) if (!deterministic_)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
suml
+=
label_
[
i
];
suml
+=
label_
[
i
];
}
}
...
@@ -217,14 +217,14 @@ class RegressionL1loss: public RegressionL2loss {
...
@@ -217,14 +217,14 @@ class RegressionL1loss: public RegressionL2loss {
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
score_t
*
hessians
)
const
override
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
const
double
diff
=
score
[
i
]
-
label_
[
i
];
const
double
diff
=
score
[
i
]
-
label_
[
i
];
gradients
[
i
]
=
static_cast
<
score_t
>
(
Common
::
Sign
(
diff
));
gradients
[
i
]
=
static_cast
<
score_t
>
(
Common
::
Sign
(
diff
));
hessians
[
i
]
=
1.0
f
;
hessians
[
i
]
=
1.0
f
;
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
const
double
diff
=
score
[
i
]
-
label_
[
i
];
const
double
diff
=
score
[
i
]
-
label_
[
i
];
gradients
[
i
]
=
static_cast
<
score_t
>
(
Common
::
Sign
(
diff
)
*
weights_
[
i
]);
gradients
[
i
]
=
static_cast
<
score_t
>
(
Common
::
Sign
(
diff
)
*
weights_
[
i
]);
...
@@ -313,7 +313,7 @@ class RegressionHuberLoss: public RegressionL2loss {
...
@@ -313,7 +313,7 @@ class RegressionHuberLoss: public RegressionL2loss {
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
score_t
*
hessians
)
const
override
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
const
double
diff
=
score
[
i
]
-
label_
[
i
];
const
double
diff
=
score
[
i
]
-
label_
[
i
];
if
(
std
::
abs
(
diff
)
<=
alpha_
)
{
if
(
std
::
abs
(
diff
)
<=
alpha_
)
{
...
@@ -324,7 +324,7 @@ class RegressionHuberLoss: public RegressionL2loss {
...
@@ -324,7 +324,7 @@ class RegressionHuberLoss: public RegressionL2loss {
hessians
[
i
]
=
1.0
f
;
hessians
[
i
]
=
1.0
f
;
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
const
double
diff
=
score
[
i
]
-
label_
[
i
];
const
double
diff
=
score
[
i
]
-
label_
[
i
];
if
(
std
::
abs
(
diff
)
<=
alpha_
)
{
if
(
std
::
abs
(
diff
)
<=
alpha_
)
{
...
@@ -362,14 +362,14 @@ class RegressionFairLoss: public RegressionL2loss {
...
@@ -362,14 +362,14 @@ class RegressionFairLoss: public RegressionL2loss {
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
score_t
*
hessians
)
const
override
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
const
double
x
=
score
[
i
]
-
label_
[
i
];
const
double
x
=
score
[
i
]
-
label_
[
i
];
gradients
[
i
]
=
static_cast
<
score_t
>
(
c_
*
x
/
(
std
::
fabs
(
x
)
+
c_
));
gradients
[
i
]
=
static_cast
<
score_t
>
(
c_
*
x
/
(
std
::
fabs
(
x
)
+
c_
));
hessians
[
i
]
=
static_cast
<
score_t
>
(
c_
*
c_
/
((
std
::
fabs
(
x
)
+
c_
)
*
(
std
::
fabs
(
x
)
+
c_
)));
hessians
[
i
]
=
static_cast
<
score_t
>
(
c_
*
c_
/
((
std
::
fabs
(
x
)
+
c_
)
*
(
std
::
fabs
(
x
)
+
c_
)));
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
const
double
x
=
score
[
i
]
-
label_
[
i
];
const
double
x
=
score
[
i
]
-
label_
[
i
];
gradients
[
i
]
=
static_cast
<
score_t
>
(
c_
*
x
/
(
std
::
fabs
(
x
)
+
c_
)
*
weights_
[
i
]);
gradients
[
i
]
=
static_cast
<
score_t
>
(
c_
*
x
/
(
std
::
fabs
(
x
)
+
c_
)
*
weights_
[
i
]);
...
@@ -441,14 +441,14 @@ class RegressionPoissonLoss: public RegressionL2loss {
...
@@ -441,14 +441,14 @@ class RegressionPoissonLoss: public RegressionL2loss {
score_t
*
hessians
)
const
override
{
score_t
*
hessians
)
const
override
{
double
exp_max_delta_step_
=
std
::
exp
(
max_delta_step_
);
double
exp_max_delta_step_
=
std
::
exp
(
max_delta_step_
);
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
double
exp_score
=
std
::
exp
(
score
[
i
]);
double
exp_score
=
std
::
exp
(
score
[
i
]);
gradients
[
i
]
=
static_cast
<
score_t
>
(
exp_score
-
label_
[
i
]);
gradients
[
i
]
=
static_cast
<
score_t
>
(
exp_score
-
label_
[
i
]);
hessians
[
i
]
=
static_cast
<
score_t
>
(
exp_score
*
exp_max_delta_step_
);
hessians
[
i
]
=
static_cast
<
score_t
>
(
exp_score
*
exp_max_delta_step_
);
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
double
exp_score
=
std
::
exp
(
score
[
i
]);
double
exp_score
=
std
::
exp
(
score
[
i
]);
gradients
[
i
]
=
static_cast
<
score_t
>
((
exp_score
-
label_
[
i
])
*
weights_
[
i
]);
gradients
[
i
]
=
static_cast
<
score_t
>
((
exp_score
-
label_
[
i
])
*
weights_
[
i
]);
...
@@ -493,7 +493,7 @@ class RegressionQuantileloss : public RegressionL2loss {
...
@@ -493,7 +493,7 @@ class RegressionQuantileloss : public RegressionL2loss {
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
score_t
*
hessians
)
const
override
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
score_t
delta
=
static_cast
<
score_t
>
(
score
[
i
]
-
label_
[
i
]);
score_t
delta
=
static_cast
<
score_t
>
(
score
[
i
]
-
label_
[
i
]);
if
(
delta
>=
0
)
{
if
(
delta
>=
0
)
{
...
@@ -504,7 +504,7 @@ class RegressionQuantileloss : public RegressionL2loss {
...
@@ -504,7 +504,7 @@ class RegressionQuantileloss : public RegressionL2loss {
hessians
[
i
]
=
1.0
f
;
hessians
[
i
]
=
1.0
f
;
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
score_t
delta
=
static_cast
<
score_t
>
(
score
[
i
]
-
label_
[
i
]);
score_t
delta
=
static_cast
<
score_t
>
(
score
[
i
]
-
label_
[
i
]);
if
(
delta
>=
0
)
{
if
(
delta
>=
0
)
{
...
@@ -598,12 +598,12 @@ class RegressionMAPELOSS : public RegressionL1loss {
...
@@ -598,12 +598,12 @@ class RegressionMAPELOSS : public RegressionL1loss {
}
}
label_weight_
.
resize
(
num_data
);
label_weight_
.
resize
(
num_data
);
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
label_weight_
[
i
]
=
1.0
f
/
std
::
max
(
1.0
f
,
std
::
fabs
(
label_
[
i
]));
label_weight_
[
i
]
=
1.0
f
/
std
::
max
(
1.0
f
,
std
::
fabs
(
label_
[
i
]));
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
label_weight_
[
i
]
=
1.0
f
/
std
::
max
(
1.0
f
,
std
::
fabs
(
label_
[
i
]))
*
weights_
[
i
];
label_weight_
[
i
]
=
1.0
f
/
std
::
max
(
1.0
f
,
std
::
fabs
(
label_
[
i
]))
*
weights_
[
i
];
}
}
...
@@ -613,14 +613,14 @@ class RegressionMAPELOSS : public RegressionL1loss {
...
@@ -613,14 +613,14 @@ class RegressionMAPELOSS : public RegressionL1loss {
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
score_t
*
hessians
)
const
override
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
const
double
diff
=
score
[
i
]
-
label_
[
i
];
const
double
diff
=
score
[
i
]
-
label_
[
i
];
gradients
[
i
]
=
static_cast
<
score_t
>
(
Common
::
Sign
(
diff
)
*
label_weight_
[
i
]);
gradients
[
i
]
=
static_cast
<
score_t
>
(
Common
::
Sign
(
diff
)
*
label_weight_
[
i
]);
hessians
[
i
]
=
1.0
f
;
hessians
[
i
]
=
1.0
f
;
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
const
double
diff
=
score
[
i
]
-
label_
[
i
];
const
double
diff
=
score
[
i
]
-
label_
[
i
];
gradients
[
i
]
=
static_cast
<
score_t
>
(
Common
::
Sign
(
diff
)
*
label_weight_
[
i
]);
gradients
[
i
]
=
static_cast
<
score_t
>
(
Common
::
Sign
(
diff
)
*
label_weight_
[
i
]);
...
@@ -690,14 +690,14 @@ class RegressionGammaLoss : public RegressionPoissonLoss {
...
@@ -690,14 +690,14 @@ class RegressionGammaLoss : public RegressionPoissonLoss {
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
score_t
*
hessians
)
const
override
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
double
exp_score
=
std
::
exp
(
-
score
[
i
]);
double
exp_score
=
std
::
exp
(
-
score
[
i
]);
gradients
[
i
]
=
static_cast
<
score_t
>
(
1.0
-
label_
[
i
]
*
exp_score
);
gradients
[
i
]
=
static_cast
<
score_t
>
(
1.0
-
label_
[
i
]
*
exp_score
);
hessians
[
i
]
=
static_cast
<
score_t
>
(
label_
[
i
]
*
exp_score
);
hessians
[
i
]
=
static_cast
<
score_t
>
(
label_
[
i
]
*
exp_score
);
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
double
exp_score
=
std
::
exp
(
-
score
[
i
]);
double
exp_score
=
std
::
exp
(
-
score
[
i
]);
gradients
[
i
]
=
static_cast
<
score_t
>
((
1.0
-
label_
[
i
]
*
exp_score
)
*
weights_
[
i
]);
gradients
[
i
]
=
static_cast
<
score_t
>
((
1.0
-
label_
[
i
]
*
exp_score
)
*
weights_
[
i
]);
...
@@ -728,7 +728,7 @@ class RegressionTweedieLoss: public RegressionPoissonLoss {
...
@@ -728,7 +728,7 @@ class RegressionTweedieLoss: public RegressionPoissonLoss {
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
score_t
*
hessians
)
const
override
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
double
exp_1_score
=
std
::
exp
((
1
-
rho_
)
*
score
[
i
]);
double
exp_1_score
=
std
::
exp
((
1
-
rho_
)
*
score
[
i
]);
double
exp_2_score
=
std
::
exp
((
2
-
rho_
)
*
score
[
i
]);
double
exp_2_score
=
std
::
exp
((
2
-
rho_
)
*
score
[
i
]);
...
@@ -737,7 +737,7 @@ class RegressionTweedieLoss: public RegressionPoissonLoss {
...
@@ -737,7 +737,7 @@ class RegressionTweedieLoss: public RegressionPoissonLoss {
(
2
-
rho_
)
*
exp_2_score
);
(
2
-
rho_
)
*
exp_2_score
);
}
}
}
else
{
}
else
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
double
exp_1_score
=
std
::
exp
((
1
-
rho_
)
*
score
[
i
]);
double
exp_1_score
=
std
::
exp
((
1
-
rho_
)
*
score
[
i
]);
double
exp_2_score
=
std
::
exp
((
2
-
rho_
)
*
score
[
i
]);
double
exp_2_score
=
std
::
exp
((
2
-
rho_
)
*
score
[
i
]);
...
...
src/objective/xentropy_objective.hpp
View file @
8ed371ce
...
@@ -77,7 +77,7 @@ class CrossEntropy: public ObjectiveFunction {
...
@@ -77,7 +77,7 @@ class CrossEntropy: public ObjectiveFunction {
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
// compute pointwise gradients and Hessians with implied unit weights
// compute pointwise gradients and Hessians with implied unit weights
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
const
double
z
=
1.0
f
/
(
1.0
f
+
std
::
exp
(
-
score
[
i
]));
const
double
z
=
1.0
f
/
(
1.0
f
+
std
::
exp
(
-
score
[
i
]));
gradients
[
i
]
=
static_cast
<
score_t
>
(
z
-
label_
[
i
]);
gradients
[
i
]
=
static_cast
<
score_t
>
(
z
-
label_
[
i
]);
...
@@ -85,7 +85,7 @@ class CrossEntropy: public ObjectiveFunction {
...
@@ -85,7 +85,7 @@ class CrossEntropy: public ObjectiveFunction {
}
}
}
else
{
}
else
{
// compute pointwise gradients and Hessians with given weights
// compute pointwise gradients and Hessians with given weights
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
const
double
z
=
1.0
f
/
(
1.0
f
+
std
::
exp
(
-
score
[
i
]));
const
double
z
=
1.0
f
/
(
1.0
f
+
std
::
exp
(
-
score
[
i
]));
gradients
[
i
]
=
static_cast
<
score_t
>
((
z
-
label_
[
i
])
*
weights_
[
i
]);
gradients
[
i
]
=
static_cast
<
score_t
>
((
z
-
label_
[
i
])
*
weights_
[
i
]);
...
@@ -114,7 +114,7 @@ class CrossEntropy: public ObjectiveFunction {
...
@@ -114,7 +114,7 @@ class CrossEntropy: public ObjectiveFunction {
double
suml
=
0.0
f
;
double
suml
=
0.0
f
;
double
sumw
=
0.0
f
;
double
sumw
=
0.0
f
;
if
(
weights_
!=
nullptr
)
{
if
(
weights_
!=
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:suml, sumw) if (!deterministic_)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:suml, sumw) if (!deterministic_)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
suml
+=
static_cast
<
double
>
(
label_
[
i
])
*
weights_
[
i
];
suml
+=
static_cast
<
double
>
(
label_
[
i
])
*
weights_
[
i
];
...
@@ -122,7 +122,7 @@ class CrossEntropy: public ObjectiveFunction {
...
@@ -122,7 +122,7 @@ class CrossEntropy: public ObjectiveFunction {
}
}
}
else
{
}
else
{
sumw
=
static_cast
<
double
>
(
num_data_
);
sumw
=
static_cast
<
double
>
(
num_data_
);
#pragma omp parallel for schedule(static) reduction(+:suml) if (!deterministic_)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:suml) if (!deterministic_)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
suml
+=
label_
[
i
];
suml
+=
label_
[
i
];
...
@@ -190,7 +190,7 @@ class CrossEntropyLambda: public ObjectiveFunction {
...
@@ -190,7 +190,7 @@ class CrossEntropyLambda: public ObjectiveFunction {
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
void
GetGradients
(
const
double
*
score
,
score_t
*
gradients
,
score_t
*
hessians
)
const
override
{
if
(
weights_
==
nullptr
)
{
if
(
weights_
==
nullptr
)
{
// compute pointwise gradients and Hessians with implied unit weights; exactly equivalent to CrossEntropy with unit weights
// compute pointwise gradients and Hessians with implied unit weights; exactly equivalent to CrossEntropy with unit weights
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
const
double
z
=
1.0
f
/
(
1.0
f
+
std
::
exp
(
-
score
[
i
]));
const
double
z
=
1.0
f
/
(
1.0
f
+
std
::
exp
(
-
score
[
i
]));
gradients
[
i
]
=
static_cast
<
score_t
>
(
z
-
label_
[
i
]);
gradients
[
i
]
=
static_cast
<
score_t
>
(
z
-
label_
[
i
]);
...
@@ -198,7 +198,7 @@ class CrossEntropyLambda: public ObjectiveFunction {
...
@@ -198,7 +198,7 @@ class CrossEntropyLambda: public ObjectiveFunction {
}
}
}
else
{
}
else
{
// compute pointwise gradients and Hessians with given weights
// compute pointwise gradients and Hessians with given weights
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
const
double
w
=
weights_
[
i
];
const
double
w
=
weights_
[
i
];
const
double
y
=
label_
[
i
];
const
double
y
=
label_
[
i
];
...
@@ -244,7 +244,7 @@ class CrossEntropyLambda: public ObjectiveFunction {
...
@@ -244,7 +244,7 @@ class CrossEntropyLambda: public ObjectiveFunction {
double
suml
=
0.0
f
;
double
suml
=
0.0
f
;
double
sumw
=
0.0
f
;
double
sumw
=
0.0
f
;
if
(
weights_
!=
nullptr
)
{
if
(
weights_
!=
nullptr
)
{
#pragma omp parallel for schedule(static) reduction(+:suml, sumw) if (!deterministic_)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:suml, sumw) if (!deterministic_)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
suml
+=
static_cast
<
double
>
(
label_
[
i
])
*
weights_
[
i
];
suml
+=
static_cast
<
double
>
(
label_
[
i
])
*
weights_
[
i
];
...
@@ -252,7 +252,7 @@ class CrossEntropyLambda: public ObjectiveFunction {
...
@@ -252,7 +252,7 @@ class CrossEntropyLambda: public ObjectiveFunction {
}
}
}
else
{
}
else
{
sumw
=
static_cast
<
double
>
(
num_data_
);
sumw
=
static_cast
<
double
>
(
num_data_
);
#pragma omp parallel for schedule(static) reduction(+:suml) if (!deterministic_)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:suml) if (!deterministic_)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
suml
+=
label_
[
i
];
suml
+=
label_
[
i
];
...
...
src/treelearner/col_sampler.hpp
View file @
8ed371ce
...
@@ -79,7 +79,7 @@ class ColSampler {
...
@@ -79,7 +79,7 @@ class ColSampler {
static_cast
<
int
>
(
valid_feature_indices_
.
size
()),
used_cnt_bytree_
);
static_cast
<
int
>
(
valid_feature_indices_
.
size
()),
used_cnt_bytree_
);
int
omp_loop_size
=
static_cast
<
int
>
(
used_feature_indices_
.
size
());
int
omp_loop_size
=
static_cast
<
int
>
(
used_feature_indices_
.
size
());
#pragma omp parallel for schedule(static, 512) if (omp_loop_size >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (omp_loop_size >= 1024)
for
(
int
i
=
0
;
i
<
omp_loop_size
;
++
i
)
{
for
(
int
i
=
0
;
i
<
omp_loop_size
;
++
i
)
{
int
used_feature
=
valid_feature_indices_
[
used_feature_indices_
[
i
]];
int
used_feature
=
valid_feature_indices_
[
used_feature_indices_
[
i
]];
int
inner_feature_index
=
train_data_
->
InnerFeatureIndex
(
used_feature
);
int
inner_feature_index
=
train_data_
->
InnerFeatureIndex
(
used_feature
);
...
@@ -142,7 +142,7 @@ class ColSampler {
...
@@ -142,7 +142,7 @@ class ColSampler {
auto
sampled_indices
=
random_
.
Sample
(
auto
sampled_indices
=
random_
.
Sample
(
static_cast
<
int
>
((
*
allowed_used_feature_indices
).
size
()),
used_feature_cnt
);
static_cast
<
int
>
((
*
allowed_used_feature_indices
).
size
()),
used_feature_cnt
);
int
omp_loop_size
=
static_cast
<
int
>
(
sampled_indices
.
size
());
int
omp_loop_size
=
static_cast
<
int
>
(
sampled_indices
.
size
());
#pragma omp parallel for schedule(static, 512) if (omp_loop_size >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (omp_loop_size >= 1024)
for
(
int
i
=
0
;
i
<
omp_loop_size
;
++
i
)
{
for
(
int
i
=
0
;
i
<
omp_loop_size
;
++
i
)
{
int
used_feature
=
int
used_feature
=
valid_feature_indices_
[(
*
allowed_used_feature_indices
)[
sampled_indices
[
i
]]];
valid_feature_indices_
[(
*
allowed_used_feature_indices
)[
sampled_indices
[
i
]]];
...
@@ -168,7 +168,7 @@ class ColSampler {
...
@@ -168,7 +168,7 @@ class ColSampler {
auto
sampled_indices
=
random_
.
Sample
(
auto
sampled_indices
=
random_
.
Sample
(
static_cast
<
int
>
((
*
allowed_valid_feature_indices
).
size
()),
used_feature_cnt
);
static_cast
<
int
>
((
*
allowed_valid_feature_indices
).
size
()),
used_feature_cnt
);
int
omp_loop_size
=
static_cast
<
int
>
(
sampled_indices
.
size
());
int
omp_loop_size
=
static_cast
<
int
>
(
sampled_indices
.
size
());
#pragma omp parallel for schedule(static, 512) if (omp_loop_size >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (omp_loop_size >= 1024)
for
(
int
i
=
0
;
i
<
omp_loop_size
;
++
i
)
{
for
(
int
i
=
0
;
i
<
omp_loop_size
;
++
i
)
{
int
used_feature
=
(
*
allowed_valid_feature_indices
)[
sampled_indices
[
i
]];
int
used_feature
=
(
*
allowed_valid_feature_indices
)[
sampled_indices
[
i
]];
int
inner_feature_index
=
train_data_
->
InnerFeatureIndex
(
used_feature
);
int
inner_feature_index
=
train_data_
->
InnerFeatureIndex
(
used_feature
);
...
...
src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
View file @
8ed371ce
...
@@ -405,7 +405,7 @@ void CUDASingleGPUTreeLearner::RenewTreeOutput(Tree* tree, const ObjectiveFuncti
...
@@ -405,7 +405,7 @@ void CUDASingleGPUTreeLearner::RenewTreeOutput(Tree* tree, const ObjectiveFuncti
}
}
std
::
vector
<
int
>
n_nozeroworker_perleaf
(
cuda_tree
->
num_leaves
(),
1
);
std
::
vector
<
int
>
n_nozeroworker_perleaf
(
cuda_tree
->
num_leaves
(),
1
);
int
num_machines
=
Network
::
num_machines
();
int
num_machines
=
Network
::
num_machines
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
cuda_tree
->
num_leaves
();
++
i
)
{
for
(
int
i
=
0
;
i
<
cuda_tree
->
num_leaves
();
++
i
)
{
const
double
output
=
static_cast
<
double
>
(
cuda_tree
->
LeafOutput
(
i
));
const
double
output
=
static_cast
<
double
>
(
cuda_tree
->
LeafOutput
(
i
));
data_size_t
cnt_leaf_data
=
leaf_num_data_
[
i
];
data_size_t
cnt_leaf_data
=
leaf_num_data_
[
i
];
...
...
src/treelearner/data_parallel_tree_learner.cpp
View file @
8ed371ce
...
@@ -228,7 +228,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree) {
...
@@ -228,7 +228,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree) {
if
(
local_data_on_smaller_leaf
<=
0
)
{
if
(
local_data_on_smaller_leaf
<=
0
)
{
// clear histogram buffer before synchronizing
// clear histogram buffer before synchronizing
// otherwise histogram contents from the previous iteration will be sent
// otherwise histogram contents from the previous iteration will be sent
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
feature_index
=
0
;
feature_index
<
this
->
num_features_
;
++
feature_index
)
{
for
(
int
feature_index
=
0
;
feature_index
<
this
->
num_features_
;
++
feature_index
)
{
if
(
this
->
col_sampler_
.
is_feature_used_bytree
()[
feature_index
]
==
false
)
if
(
this
->
col_sampler_
.
is_feature_used_bytree
()[
feature_index
]
==
false
)
continue
;
continue
;
...
@@ -249,7 +249,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree) {
...
@@ -249,7 +249,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree) {
// construct local histograms
// construct local histograms
global_timer
.
Start
(
"DataParallelTreeLearner::ReduceHistogram"
);
global_timer
.
Start
(
"DataParallelTreeLearner::ReduceHistogram"
);
global_timer
.
Start
(
"DataParallelTreeLearner::ReduceHistogram::Copy"
);
global_timer
.
Start
(
"DataParallelTreeLearner::ReduceHistogram::Copy"
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
feature_index
=
0
;
feature_index
<
this
->
num_features_
;
++
feature_index
)
{
for
(
int
feature_index
=
0
;
feature_index
<
this
->
num_features_
;
++
feature_index
)
{
if
(
this
->
col_sampler_
.
is_feature_used_bytree
()[
feature_index
]
==
false
)
if
(
this
->
col_sampler_
.
is_feature_used_bytree
()[
feature_index
]
==
false
)
continue
;
continue
;
...
@@ -318,7 +318,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(const
...
@@ -318,7 +318,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(const
if
(
parent_num_bits
>
16
&&
larger_leaf_num_bits
<=
16
)
{
if
(
parent_num_bits
>
16
&&
larger_leaf_num_bits
<=
16
)
{
CHECK_LE
(
smaller_leaf_num_bits
,
16
);
CHECK_LE
(
smaller_leaf_num_bits
,
16
);
OMP_INIT_EX
();
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
feature_index
=
0
;
feature_index
<
this
->
num_features_
;
++
feature_index
)
{
for
(
int
feature_index
=
0
;
feature_index
<
this
->
num_features_
;
++
feature_index
)
{
OMP_LOOP_EX_BEGIN
();
OMP_LOOP_EX_BEGIN
();
if
(
!
is_feature_aggregated_
[
feature_index
])
continue
;
if
(
!
is_feature_aggregated_
[
feature_index
])
continue
;
...
@@ -330,7 +330,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(const
...
@@ -330,7 +330,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(const
}
}
OMP_INIT_EX
();
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
feature_index
=
0
;
feature_index
<
this
->
num_features_
;
++
feature_index
)
{
for
(
int
feature_index
=
0
;
feature_index
<
this
->
num_features_
;
++
feature_index
)
{
OMP_LOOP_EX_BEGIN
();
OMP_LOOP_EX_BEGIN
();
if
(
!
is_feature_aggregated_
[
feature_index
])
continue
;
if
(
!
is_feature_aggregated_
[
feature_index
])
continue
;
...
...
src/treelearner/data_partition.hpp
View file @
8ed371ce
...
@@ -52,7 +52,7 @@ class DataPartition {
...
@@ -52,7 +52,7 @@ class DataPartition {
if
(
used_data_indices_
==
nullptr
)
{
if
(
used_data_indices_
==
nullptr
)
{
// if using all data
// if using all data
leaf_count_
[
0
]
=
num_data_
;
leaf_count_
[
0
]
=
num_data_
;
#pragma omp parallel for schedule(static, 512) if (num_data_ >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (num_data_ >= 1024)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
indices_
[
i
]
=
i
;
indices_
[
i
]
=
i
;
}
}
...
...
src/treelearner/feature_histogram.hpp
View file @
8ed371ce
...
@@ -1692,7 +1692,7 @@ class HistogramPool {
...
@@ -1692,7 +1692,7 @@ class HistogramPool {
auto
&
ref_feature_meta
=
*
feature_meta
;
auto
&
ref_feature_meta
=
*
feature_meta
;
const
int
num_feature
=
train_data
->
num_features
();
const
int
num_feature
=
train_data
->
num_features
();
ref_feature_meta
.
resize
(
num_feature
);
ref_feature_meta
.
resize
(
num_feature
);
#pragma omp parallel for schedule(static, 512) if (num_feature >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (num_feature >= 1024)
for
(
int
i
=
0
;
i
<
num_feature
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_feature
;
++
i
)
{
if
(
USE_DATA
)
{
if
(
USE_DATA
)
{
ref_feature_meta
[
i
].
num_bin
=
train_data
->
FeatureNumBin
(
i
);
ref_feature_meta
[
i
].
num_bin
=
train_data
->
FeatureNumBin
(
i
);
...
@@ -1749,7 +1749,7 @@ class HistogramPool {
...
@@ -1749,7 +1749,7 @@ class HistogramPool {
if
(
config
->
use_quantized_grad
)
{
if
(
config
->
use_quantized_grad
)
{
OMP_INIT_EX
();
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
old_cache_size
;
i
<
cache_size
;
++
i
)
{
for
(
int
i
=
old_cache_size
;
i
<
cache_size
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
OMP_LOOP_EX_BEGIN
();
pool_
[
i
].
reset
(
new
FeatureHistogram
[
train_data
->
num_features
()]);
pool_
[
i
].
reset
(
new
FeatureHistogram
[
train_data
->
num_features
()]);
...
@@ -1763,7 +1763,7 @@ class HistogramPool {
...
@@ -1763,7 +1763,7 @@ class HistogramPool {
OMP_THROW_EX
();
OMP_THROW_EX
();
}
else
{
}
else
{
OMP_INIT_EX
();
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
old_cache_size
;
i
<
cache_size
;
++
i
)
{
for
(
int
i
=
old_cache_size
;
i
<
cache_size
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
OMP_LOOP_EX_BEGIN
();
pool_
[
i
].
reset
(
new
FeatureHistogram
[
train_data
->
num_features
()]);
pool_
[
i
].
reset
(
new
FeatureHistogram
[
train_data
->
num_features
()]);
...
@@ -1787,7 +1787,7 @@ class HistogramPool {
...
@@ -1787,7 +1787,7 @@ class HistogramPool {
old_config
->
extra_trees
!=
config
->
extra_trees
||
old_config
->
extra_trees
!=
config
->
extra_trees
||
old_config
->
max_delta_step
!=
config
->
max_delta_step
||
old_config
->
max_delta_step
!=
config
->
max_delta_step
||
old_config
->
path_smooth
!=
config
->
path_smooth
)
{
old_config
->
path_smooth
!=
config
->
path_smooth
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
cache_size_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
cache_size_
;
++
i
)
{
for
(
int
j
=
0
;
j
<
train_data
->
num_features
();
++
j
)
{
for
(
int
j
=
0
;
j
<
train_data
->
num_features
();
++
j
)
{
pool_
[
i
][
j
].
ResetFunc
();
pool_
[
i
][
j
].
ResetFunc
();
...
...
src/treelearner/gpu_tree_learner.cpp
View file @
8ed371ce
...
@@ -191,7 +191,7 @@ void GPUTreeLearner::WaitAndGetHistograms(hist_t* histograms) {
...
@@ -191,7 +191,7 @@ void GPUTreeLearner::WaitAndGetHistograms(hist_t* histograms) {
HistType
*
hist_outputs
=
reinterpret_cast
<
HistType
*>
(
host_histogram_outputs_
);
HistType
*
hist_outputs
=
reinterpret_cast
<
HistType
*>
(
host_histogram_outputs_
);
// when the output is ready, the computation is done
// when the output is ready, the computation is done
histograms_wait_obj_
.
wait
();
histograms_wait_obj_
.
wait
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
num_dense_feature_groups_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_dense_feature_groups_
;
++
i
)
{
if
(
!
feature_masks_
[
i
])
{
if
(
!
feature_masks_
[
i
])
{
continue
;
continue
;
...
@@ -359,7 +359,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
...
@@ -359,7 +359,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
0
,
num_data_
*
sizeof
(
Feature4
)));
0
,
num_data_
*
sizeof
(
Feature4
)));
}
}
// building Feature4 bundles; each thread handles dword_features_ features
// building Feature4 bundles; each thread handles dword_features_ features
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
dense_feature_group_map_
.
size
()
/
dword_features_
);
++
i
)
{
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
dense_feature_group_map_
.
size
()
/
dword_features_
);
++
i
)
{
int
tid
=
omp_get_thread_num
();
int
tid
=
omp_get_thread_num
();
Feature4
*
host4
=
host4_ptrs
[
tid
];
Feature4
*
host4
=
host4_ptrs
[
tid
];
...
@@ -451,7 +451,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
...
@@ -451,7 +451,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
BinIterator
*
bin_iter
=
train_data_
->
FeatureGroupIterator
(
dense_dword_ind
[
i
]);
BinIterator
*
bin_iter
=
train_data_
->
FeatureGroupIterator
(
dense_dword_ind
[
i
]);
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
,
true
>*>
(
bin_iter
)
!=
0
)
{
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
,
true
>*>
(
bin_iter
)
!=
0
)
{
DenseBinIterator
<
uint8_t
,
true
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>*>
(
bin_iter
);
DenseBinIterator
<
uint8_t
,
true
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>*>
(
bin_iter
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
i
>>
1
]
|=
(
uint8_t
)((
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
host4
[
j
].
s
[
i
>>
1
]
|=
(
uint8_t
)((
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
+
((
j
+
i
)
&
(
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
-
1
)))
+
((
j
+
i
)
&
(
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
-
1
)))
...
@@ -464,14 +464,14 @@ void GPUTreeLearner::AllocateGPUMemory() {
...
@@ -464,14 +464,14 @@ void GPUTreeLearner::AllocateGPUMemory() {
BinIterator
*
bin_iter
=
train_data_
->
FeatureGroupIterator
(
dense_dword_ind
[
i
]);
BinIterator
*
bin_iter
=
train_data_
->
FeatureGroupIterator
(
dense_dword_ind
[
i
]);
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
,
false
>*>
(
bin_iter
)
!=
0
)
{
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
,
false
>*>
(
bin_iter
)
!=
0
)
{
DenseBinIterator
<
uint8_t
,
false
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
,
false
>*>
(
bin_iter
);
DenseBinIterator
<
uint8_t
,
false
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
,
false
>*>
(
bin_iter
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
i
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
host4
[
j
].
s
[
i
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
+
((
j
+
i
)
&
(
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
-
1
)));
+
((
j
+
i
)
&
(
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
-
1
)));
}
}
}
else
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
,
true
>*>
(
bin_iter
)
!=
0
)
{
}
else
if
(
dynamic_cast
<
DenseBinIterator
<
uint8_t
,
true
>*>
(
bin_iter
)
!=
0
)
{
DenseBinIterator
<
uint8_t
,
true
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>*>
(
bin_iter
);
DenseBinIterator
<
uint8_t
,
true
>
iter
=
*
static_cast
<
DenseBinIterator
<
uint8_t
,
true
>*>
(
bin_iter
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
host4
[
j
].
s
[
i
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
host4
[
j
].
s
[
i
]
=
(
uint8_t
)(
iter
.
RawGet
(
j
)
*
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
+
((
j
+
i
)
&
(
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
-
1
)));
+
((
j
+
i
)
&
(
device_bin_mults_
[
copied_feature4
*
dword_features_
+
i
]
-
1
)));
...
@@ -485,7 +485,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
...
@@ -485,7 +485,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
}
}
// fill the leftover features
// fill the leftover features
if
(
dword_features_
==
8
)
{
if
(
dword_features_
==
8
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
for
(
int
i
=
k
;
i
<
dword_features_
;
++
i
)
{
for
(
int
i
=
k
;
i
<
dword_features_
;
++
i
)
{
// fill this empty feature with some "random" value
// fill this empty feature with some "random" value
...
@@ -493,7 +493,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
...
@@ -493,7 +493,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
}
}
}
}
}
else
if
(
dword_features_
==
4
)
{
}
else
if
(
dword_features_
==
4
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
for
(
int
j
=
0
;
j
<
num_data_
;
++
j
)
{
for
(
int
i
=
k
;
i
<
dword_features_
;
++
i
)
{
for
(
int
i
=
k
;
i
<
dword_features_
;
++
i
)
{
// fill this empty feature with some "random" value
// fill this empty feature with some "random" value
...
@@ -572,7 +572,7 @@ void GPUTreeLearner::BuildGPUKernels() {
...
@@ -572,7 +572,7 @@ void GPUTreeLearner::BuildGPUKernels() {
// currently we don't use constant memory
// currently we don't use constant memory
int
use_constants
=
0
;
int
use_constants
=
0
;
OMP_INIT_EX
();
OMP_INIT_EX
();
#pragma omp parallel for schedule(guided)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(guided)
for
(
int
i
=
0
;
i
<=
kMaxLogWorkgroupsPerFeature
;
++
i
)
{
for
(
int
i
=
0
;
i
<=
kMaxLogWorkgroupsPerFeature
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
OMP_LOOP_EX_BEGIN
();
boost
::
compute
::
program
program
;
boost
::
compute
::
program
program
;
...
@@ -811,7 +811,7 @@ void GPUTreeLearner::BeforeTrain() {
...
@@ -811,7 +811,7 @@ void GPUTreeLearner::BeforeTrain() {
// transfer the indices to GPU
// transfer the indices to GPU
indices_future_
=
boost
::
compute
::
copy_async
(
indices
,
indices
+
cnt
,
device_data_indices_
->
begin
(),
queue_
);
indices_future_
=
boost
::
compute
::
copy_async
(
indices
,
indices
+
cnt
,
device_data_indices_
->
begin
(),
queue_
);
if
(
!
share_state_
->
is_constant_hessian
)
{
if
(
!
share_state_
->
is_constant_hessian
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
cnt
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
cnt
;
++
i
)
{
ordered_hessians_
[
i
]
=
hessians_
[
indices
[
i
]];
ordered_hessians_
[
i
]
=
hessians_
[
indices
[
i
]];
}
}
...
@@ -827,7 +827,7 @@ void GPUTreeLearner::BeforeTrain() {
...
@@ -827,7 +827,7 @@ void GPUTreeLearner::BeforeTrain() {
histogram_fulldata_kernels_
[
i
].
set_arg
(
6
,
const_hessian
);
histogram_fulldata_kernels_
[
i
].
set_arg
(
6
,
const_hessian
);
}
}
}
}
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
cnt
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
cnt
;
++
i
)
{
ordered_gradients_
[
i
]
=
gradients_
[
indices
[
i
]];
ordered_gradients_
[
i
]
=
gradients_
[
indices
[
i
]];
}
}
...
@@ -865,7 +865,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri
...
@@ -865,7 +865,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri
indices_future_
=
boost
::
compute
::
copy_async
(
indices
+
begin
,
indices
+
end
,
device_data_indices_
->
begin
(),
queue_
);
indices_future_
=
boost
::
compute
::
copy_async
(
indices
+
begin
,
indices
+
end
,
device_data_indices_
->
begin
(),
queue_
);
if
(
!
share_state_
->
is_constant_hessian
)
{
if
(
!
share_state_
->
is_constant_hessian
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
begin
;
i
<
end
;
++
i
)
{
for
(
data_size_t
i
=
begin
;
i
<
end
;
++
i
)
{
ordered_hessians_
[
i
-
begin
]
=
hessians_
[
indices
[
i
]];
ordered_hessians_
[
i
-
begin
]
=
hessians_
[
indices
[
i
]];
}
}
...
@@ -873,7 +873,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri
...
@@ -873,7 +873,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri
hessians_future_
=
queue_
.
enqueue_write_buffer_async
(
device_hessians_
,
0
,
(
end
-
begin
)
*
sizeof
(
score_t
),
ptr_pinned_hessians_
);
hessians_future_
=
queue_
.
enqueue_write_buffer_async
(
device_hessians_
,
0
,
(
end
-
begin
)
*
sizeof
(
score_t
),
ptr_pinned_hessians_
);
}
}
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
begin
;
i
<
end
;
++
i
)
{
for
(
data_size_t
i
=
begin
;
i
<
end
;
++
i
)
{
ordered_gradients_
[
i
-
begin
]
=
gradients_
[
indices
[
i
]];
ordered_gradients_
[
i
-
begin
]
=
gradients_
[
indices
[
i
]];
}
}
...
@@ -907,7 +907,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
...
@@ -907,7 +907,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
// generate and copy ordered_gradients if gradients is not null
// generate and copy ordered_gradients if gradients is not null
if
(
gradients
!=
nullptr
)
{
if
(
gradients
!=
nullptr
)
{
if
(
num_data
!=
num_data_
)
{
if
(
num_data
!=
num_data_
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
ordered_gradients
[
i
]
=
gradients
[
data_indices
[
i
]];
ordered_gradients
[
i
]
=
gradients
[
data_indices
[
i
]];
}
}
...
@@ -919,7 +919,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
...
@@ -919,7 +919,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
// generate and copy ordered_hessians if Hessians is not null
// generate and copy ordered_hessians if Hessians is not null
if
(
hessians
!=
nullptr
&&
!
share_state_
->
is_constant_hessian
)
{
if
(
hessians
!=
nullptr
&&
!
share_state_
->
is_constant_hessian
)
{
if
(
num_data
!=
num_data_
)
{
if
(
num_data
!=
num_data_
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
ordered_hessians
[
i
]
=
hessians
[
data_indices
[
i
]];
ordered_hessians
[
i
]
=
hessians
[
data_indices
[
i
]];
}
}
...
@@ -930,7 +930,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
...
@@ -930,7 +930,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
}
}
// converted indices in is_feature_used to feature-group indices
// converted indices in is_feature_used to feature-group indices
std
::
vector
<
int8_t
>
is_feature_group_used
(
num_feature_groups_
,
0
);
std
::
vector
<
int8_t
>
is_feature_group_used
(
num_feature_groups_
,
0
);
#pragma omp parallel for schedule(static, 1024) if (num_features_ >= 2048)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 1024) if (num_features_ >= 2048)
for
(
int
i
=
0
;
i
<
num_features_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_features_
;
++
i
)
{
if
(
is_feature_used
[
i
])
{
if
(
is_feature_used
[
i
])
{
is_feature_group_used
[
train_data_
->
Feature2Group
(
i
)]
=
1
;
is_feature_group_used
[
train_data_
->
Feature2Group
(
i
)]
=
1
;
...
@@ -938,7 +938,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
...
@@ -938,7 +938,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
}
}
// construct the feature masks for dense feature-groups
// construct the feature masks for dense feature-groups
int
used_dense_feature_groups
=
0
;
int
used_dense_feature_groups
=
0
;
#pragma omp parallel for schedule(static, 1024) reduction(+:used_dense_feature_groups) if (num_dense_feature_groups_ >= 2048)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 1024) reduction(+:used_dense_feature_groups) if (num_dense_feature_groups_ >= 2048)
for
(
int
i
=
0
;
i
<
num_dense_feature_groups_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_dense_feature_groups_
;
++
i
)
{
if
(
is_feature_group_used
[
dense_feature_group_map_
[
i
]])
{
if
(
is_feature_group_used
[
dense_feature_group_map_
[
i
]])
{
feature_masks_
[
i
]
=
1
;
feature_masks_
[
i
]
=
1
;
...
@@ -973,7 +973,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
...
@@ -973,7 +973,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
void
GPUTreeLearner
::
ConstructHistograms
(
const
std
::
vector
<
int8_t
>&
is_feature_used
,
bool
use_subtract
)
{
void
GPUTreeLearner
::
ConstructHistograms
(
const
std
::
vector
<
int8_t
>&
is_feature_used
,
bool
use_subtract
)
{
std
::
vector
<
int8_t
>
is_sparse_feature_used
(
num_features_
,
0
);
std
::
vector
<
int8_t
>
is_sparse_feature_used
(
num_features_
,
0
);
std
::
vector
<
int8_t
>
is_dense_feature_used
(
num_features_
,
0
);
std
::
vector
<
int8_t
>
is_dense_feature_used
(
num_features_
,
0
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
feature_index
=
0
;
feature_index
<
num_features_
;
++
feature_index
)
{
for
(
int
feature_index
=
0
;
feature_index
<
num_features_
;
++
feature_index
)
{
if
(
!
col_sampler_
.
is_feature_used_bytree
()[
feature_index
])
continue
;
if
(
!
col_sampler_
.
is_feature_used_bytree
()[
feature_index
])
continue
;
if
(
!
is_feature_used
[
feature_index
])
continue
;
if
(
!
is_feature_used
[
feature_index
])
continue
;
...
...
src/treelearner/gradient_discretizer.cpp
View file @
8ed371ce
...
@@ -216,7 +216,7 @@ void GradientDiscretizer::RenewIntGradTreeOutput(
...
@@ -216,7 +216,7 @@ void GradientDiscretizer::RenewIntGradTreeOutput(
data_size_t
leaf_cnt
=
0
;
data_size_t
leaf_cnt
=
0
;
const
data_size_t
*
data_indices
=
data_partition
->
GetIndexOnLeaf
(
leaf_id
,
&
leaf_cnt
);
const
data_size_t
*
data_indices
=
data_partition
->
GetIndexOnLeaf
(
leaf_id
,
&
leaf_cnt
);
double
sum_gradient
=
0.0
f
,
sum_hessian
=
0.0
f
;
double
sum_gradient
=
0.0
f
,
sum_hessian
=
0.0
f
;
#pragma omp parallel for schedule(static) reduction(+:sum_gradient, sum_hessian)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_gradient, sum_hessian)
for
(
data_size_t
i
=
0
;
i
<
leaf_cnt
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
leaf_cnt
;
++
i
)
{
const
data_size_t
index
=
data_indices
[
i
];
const
data_size_t
index
=
data_indices
[
i
];
const
score_t
grad
=
gradients
[
index
];
const
score_t
grad
=
gradients
[
index
];
...
@@ -242,7 +242,7 @@ void GradientDiscretizer::RenewIntGradTreeOutput(
...
@@ -242,7 +242,7 @@ void GradientDiscretizer::RenewIntGradTreeOutput(
data_size_t
leaf_cnt
=
0
;
data_size_t
leaf_cnt
=
0
;
const
data_size_t
*
data_indices
=
data_partition
->
GetIndexOnLeaf
(
leaf_id
,
&
leaf_cnt
);
const
data_size_t
*
data_indices
=
data_partition
->
GetIndexOnLeaf
(
leaf_id
,
&
leaf_cnt
);
double
sum_gradient
=
0.0
f
,
sum_hessian
=
0.0
f
;
double
sum_gradient
=
0.0
f
,
sum_hessian
=
0.0
f
;
#pragma omp parallel for schedule(static) reduction(+:sum_gradient, sum_hessian)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum_gradient, sum_hessian)
for
(
data_size_t
i
=
0
;
i
<
leaf_cnt
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
leaf_cnt
;
++
i
)
{
const
data_size_t
index
=
data_indices
[
i
];
const
data_size_t
index
=
data_indices
[
i
];
const
score_t
grad
=
gradients
[
index
];
const
score_t
grad
=
gradients
[
index
];
...
...
src/treelearner/leaf_splits.hpp
View file @
8ed371ce
...
@@ -95,7 +95,7 @@ class LeafSplits {
...
@@ -95,7 +95,7 @@ class LeafSplits {
data_indices_
=
nullptr
;
data_indices_
=
nullptr
;
double
tmp_sum_gradients
=
0.0
f
;
double
tmp_sum_gradients
=
0.0
f
;
double
tmp_sum_hessians
=
0.0
f
;
double
tmp_sum_hessians
=
0.0
f
;
#pragma omp parallel for schedule(static, 512) reduction(+:tmp_sum_gradients, tmp_sum_hessians) if (num_data_in_leaf_ >= 1024 && !deterministic_)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) reduction(+:tmp_sum_gradients, tmp_sum_hessians) if (num_data_in_leaf_ >= 1024 && !deterministic_)
for
(
data_size_t
i
=
0
;
i
<
num_data_in_leaf_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_in_leaf_
;
++
i
)
{
tmp_sum_gradients
+=
gradients
[
i
];
tmp_sum_gradients
+=
gradients
[
i
];
tmp_sum_hessians
+=
hessians
[
i
];
tmp_sum_hessians
+=
hessians
[
i
];
...
@@ -120,7 +120,7 @@ class LeafSplits {
...
@@ -120,7 +120,7 @@ class LeafSplits {
double
tmp_sum_hessians
=
0.0
f
;
double
tmp_sum_hessians
=
0.0
f
;
const
int16_t
*
packed_int_gradients_and_hessians
=
reinterpret_cast
<
const
int16_t
*>
(
int_gradients_and_hessians
);
const
int16_t
*
packed_int_gradients_and_hessians
=
reinterpret_cast
<
const
int16_t
*>
(
int_gradients_and_hessians
);
int64_t
tmp_sum_gradients_and_hessians
=
0
;
int64_t
tmp_sum_gradients_and_hessians
=
0
;
#pragma omp parallel for schedule(static, 512) reduction(+:tmp_sum_gradients, tmp_sum_hessians, tmp_sum_gradients_and_hessians) if (num_data_in_leaf_ >= 1024 && !deterministic_)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) reduction(+:tmp_sum_gradients, tmp_sum_hessians, tmp_sum_gradients_and_hessians) if (num_data_in_leaf_ >= 1024 && !deterministic_)
for
(
data_size_t
i
=
0
;
i
<
num_data_in_leaf_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_in_leaf_
;
++
i
)
{
tmp_sum_gradients
+=
int_gradients_and_hessians
[
2
*
i
+
1
]
*
grad_scale
;
tmp_sum_gradients
+=
int_gradients_and_hessians
[
2
*
i
+
1
]
*
grad_scale
;
tmp_sum_hessians
+=
int_gradients_and_hessians
[
2
*
i
]
*
hess_scale
;
tmp_sum_hessians
+=
int_gradients_and_hessians
[
2
*
i
]
*
hess_scale
;
...
@@ -149,7 +149,7 @@ class LeafSplits {
...
@@ -149,7 +149,7 @@ class LeafSplits {
data_indices_
=
data_partition
->
GetIndexOnLeaf
(
leaf
,
&
num_data_in_leaf_
);
data_indices_
=
data_partition
->
GetIndexOnLeaf
(
leaf
,
&
num_data_in_leaf_
);
double
tmp_sum_gradients
=
0.0
f
;
double
tmp_sum_gradients
=
0.0
f
;
double
tmp_sum_hessians
=
0.0
f
;
double
tmp_sum_hessians
=
0.0
f
;
#pragma omp parallel for schedule(static, 512) reduction(+:tmp_sum_gradients, tmp_sum_hessians) if (num_data_in_leaf_ >= 1024 && !deterministic_)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) reduction(+:tmp_sum_gradients, tmp_sum_hessians) if (num_data_in_leaf_ >= 1024 && !deterministic_)
for
(
data_size_t
i
=
0
;
i
<
num_data_in_leaf_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_in_leaf_
;
++
i
)
{
const
data_size_t
idx
=
data_indices_
[
i
];
const
data_size_t
idx
=
data_indices_
[
i
];
tmp_sum_gradients
+=
gradients
[
idx
];
tmp_sum_gradients
+=
gradients
[
idx
];
...
@@ -177,7 +177,7 @@ class LeafSplits {
...
@@ -177,7 +177,7 @@ class LeafSplits {
double
tmp_sum_hessians
=
0.0
f
;
double
tmp_sum_hessians
=
0.0
f
;
const
int16_t
*
packed_int_gradients_and_hessians
=
reinterpret_cast
<
const
int16_t
*>
(
int_gradients_and_hessians
);
const
int16_t
*
packed_int_gradients_and_hessians
=
reinterpret_cast
<
const
int16_t
*>
(
int_gradients_and_hessians
);
int64_t
tmp_sum_gradients_and_hessians
=
0
;
int64_t
tmp_sum_gradients_and_hessians
=
0
;
#pragma omp parallel for schedule(static, 512) reduction(+:tmp_sum_gradients, tmp_sum_hessians, tmp_sum_gradients_and_hessians) if (num_data_in_leaf_ >= 1024 && deterministic_)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) reduction(+:tmp_sum_gradients, tmp_sum_hessians, tmp_sum_gradients_and_hessians) if (num_data_in_leaf_ >= 1024 && deterministic_)
for
(
data_size_t
i
=
0
;
i
<
num_data_in_leaf_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_in_leaf_
;
++
i
)
{
const
data_size_t
idx
=
data_indices_
[
i
];
const
data_size_t
idx
=
data_indices_
[
i
];
tmp_sum_gradients
+=
int_gradients_and_hessians
[
2
*
idx
+
1
]
*
grad_scale
;
tmp_sum_gradients
+=
int_gradients_and_hessians
[
2
*
idx
+
1
]
*
grad_scale
;
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment