Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
8ed371ce
Unverified
Commit
8ed371ce
authored
Oct 09, 2023
by
James Lamb
Committed by
GitHub
Oct 09, 2023
Browse files
set explicit number of threads in every OpenMP `parallel` region (#6135)
parent
992f5056
Changes
45
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
16 additions
and
16 deletions
+16
-16
src/treelearner/linear_tree_learner.cpp
src/treelearner/linear_tree_learner.cpp
+7
-7
src/treelearner/linear_tree_learner.h
src/treelearner/linear_tree_learner.h
+1
-1
src/treelearner/serial_tree_learner.cpp
src/treelearner/serial_tree_learner.cpp
+3
-3
src/treelearner/serial_tree_learner.h
src/treelearner/serial_tree_learner.h
+1
-1
src/treelearner/voting_parallel_tree_learner.cpp
src/treelearner/voting_parallel_tree_learner.cpp
+4
-4
No files found.
src/treelearner/linear_tree_learner.cpp
View file @
8ed371ce
...
...
@@ -19,7 +19,7 @@ void LinearTreeLearner::InitLinear(const Dataset* train_data, const int max_leav
leaf_map_
=
std
::
vector
<
int
>
(
train_data
->
num_data
(),
-
1
);
contains_nan_
=
std
::
vector
<
int8_t
>
(
train_data
->
num_features
(),
0
);
// identify features containing nans
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
feat
=
0
;
feat
<
train_data
->
num_features
();
++
feat
)
{
auto
bin_mapper
=
train_data_
->
FeatureBinMapper
(
feat
);
if
(
bin_mapper
->
bin_type
()
==
BinType
::
NumericalBin
)
{
...
...
@@ -159,7 +159,7 @@ void LinearTreeLearner::GetLeafMap(Tree* tree) const {
std
::
fill
(
leaf_map_
.
begin
(),
leaf_map_
.
end
(),
-
1
);
// map data to leaf number
const
data_size_t
*
ind
=
data_partition_
->
indices
();
#pragma omp parallel for schedule(dynamic)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(dynamic)
for
(
int
i
=
0
;
i
<
tree
->
num_leaves
();
++
i
)
{
data_size_t
idx
=
data_partition_
->
leaf_begin
(
i
);
for
(
int
j
=
0
;
j
<
data_partition_
->
leaf_count
(
i
);
++
j
)
{
...
...
@@ -224,7 +224,7 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
}
}
// clear the coefficient matrices
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
for
(
int
leaf_num
=
0
;
leaf_num
<
num_leaves
;
++
leaf_num
)
{
size_t
num_feat
=
leaf_features
[
leaf_num
].
size
();
...
...
@@ -232,7 +232,7 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
std
::
fill
(
XTg_by_thread_
[
i
][
leaf_num
].
begin
(),
XTg_by_thread_
[
i
][
leaf_num
].
begin
()
+
num_feat
+
1
,
0.0
f
);
}
}
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
leaf_num
=
0
;
leaf_num
<
num_leaves
;
++
leaf_num
)
{
size_t
num_feat
=
leaf_features
[
leaf_num
].
size
();
std
::
fill
(
XTHX_
[
leaf_num
].
begin
(),
XTHX_
[
leaf_num
].
begin
()
+
(
num_feat
+
1
)
*
(
num_feat
+
2
)
/
2
,
0.0
f
);
...
...
@@ -245,7 +245,7 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
}
}
OMP_INIT_EX
();
#pragma omp parallel if (num_data_ > 1024)
#pragma omp parallel
num_threads(OMP_NUM_THREADS())
if (num_data_ > 1024)
{
std
::
vector
<
float
>
curr_row
(
max_num_features
+
1
);
int
tid
=
omp_get_thread_num
();
...
...
@@ -296,7 +296,7 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
auto
total_nonzero
=
std
::
vector
<
int
>
(
tree
->
num_leaves
());
// aggregate results from different threads
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
leaf_num
=
0
;
leaf_num
<
num_leaves
;
++
leaf_num
)
{
size_t
num_feat
=
leaf_features
[
leaf_num
].
size
();
for
(
size_t
j
=
0
;
j
<
(
num_feat
+
1
)
*
(
num_feat
+
2
)
/
2
;
++
j
)
{
...
...
@@ -318,7 +318,7 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
double
shrinkage
=
tree
->
shrinkage
();
double
decay_rate
=
config_
->
refit_decay_rate
;
// copy into eigen matrices and solve
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
leaf_num
=
0
;
leaf_num
<
num_leaves
;
++
leaf_num
)
{
if
(
total_nonzero
[
leaf_num
]
<
static_cast
<
int
>
(
leaf_features
[
leaf_num
].
size
())
+
1
)
{
if
(
is_refit
)
{
...
...
src/treelearner/linear_tree_learner.h
View file @
8ed371ce
...
...
@@ -75,7 +75,7 @@ class LinearTreeLearner: public SerialTreeLearner {
leaf_num_features
[
leaf_num
]
=
static_cast
<
int
>
(
feat_ptr
[
leaf_num
].
size
());
}
OMP_INIT_EX
();
#pragma omp parallel for schedule(static) if (num_data_ > 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) if (num_data_ > 1024)
for
(
int
i
=
0
;
i
<
num_data_
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
int
leaf_num
=
leaf_map_
[
i
];
...
...
src/treelearner/serial_tree_learner.cpp
View file @
8ed371ce
...
...
@@ -242,7 +242,7 @@ Tree* SerialTreeLearner::FitByExistingTree(const Tree* old_tree, const score_t*
auto
tree
=
std
::
unique_ptr
<
Tree
>
(
new
Tree
(
*
old_tree
));
CHECK_GE
(
data_partition_
->
num_leaves
(),
tree
->
num_leaves
());
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
tree
->
num_leaves
();
++
i
)
{
OMP_LOOP_EX_BEGIN
();
data_size_t
cnt_leaf_data
=
0
;
...
...
@@ -379,7 +379,7 @@ void SerialTreeLearner::FindBestSplits(const Tree* tree) {
void
SerialTreeLearner
::
FindBestSplits
(
const
Tree
*
tree
,
const
std
::
set
<
int
>*
force_features
)
{
std
::
vector
<
int8_t
>
is_feature_used
(
num_features_
,
0
);
#pragma omp parallel for schedule(static, 256) if (num_features_ >= 512)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 256) if (num_features_ >= 512)
for
(
int
feature_index
=
0
;
feature_index
<
num_features_
;
++
feature_index
)
{
if
(
!
col_sampler_
.
is_feature_used_bytree
()[
feature_index
]
&&
(
force_features
==
nullptr
||
force_features
->
find
(
feature_index
)
==
force_features
->
end
()))
continue
;
if
(
parent_leaf_histogram_array_
!=
nullptr
...
...
@@ -922,7 +922,7 @@ void SerialTreeLearner::RenewTreeOutput(Tree* tree, const ObjectiveFunction* obj
}
std
::
vector
<
int
>
n_nozeroworker_perleaf
(
tree
->
num_leaves
(),
1
);
int
num_machines
=
Network
::
num_machines
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
tree
->
num_leaves
();
++
i
)
{
const
double
output
=
static_cast
<
double
>
(
tree
->
LeafOutput
(
i
));
data_size_t
cnt_leaf_data
=
0
;
...
...
src/treelearner/serial_tree_learner.h
View file @
8ed371ce
...
...
@@ -103,7 +103,7 @@ class SerialTreeLearner: public TreeLearner {
if
(
tree
->
num_leaves
()
<=
1
)
{
return
;
}
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 1)
for
(
int
i
=
0
;
i
<
tree
->
num_leaves
();
++
i
)
{
double
output
=
static_cast
<
double
>
(
tree
->
LeafOutput
(
i
));
data_size_t
cnt_leaf_data
=
0
;
...
...
src/treelearner/voting_parallel_tree_learner.cpp
View file @
8ed371ce
...
...
@@ -243,7 +243,7 @@ template <typename TREELEARNER_T>
void
VotingParallelTreeLearner
<
TREELEARNER_T
>::
FindBestSplits
(
const
Tree
*
tree
)
{
// use local data to find local best splits
std
::
vector
<
int8_t
>
is_feature_used
(
this
->
num_features_
,
0
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
feature_index
=
0
;
feature_index
<
this
->
num_features_
;
++
feature_index
)
{
if
(
!
this
->
col_sampler_
.
is_feature_used_bytree
()[
feature_index
])
continue
;
if
(
this
->
parent_leaf_histogram_array_
!=
nullptr
...
...
@@ -265,7 +265,7 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree)
// clear histogram buffer before synchronizing
// otherwise histogram contents from the previous iteration will be sent
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
feature_index
=
0
;
feature_index
<
this
->
num_features_
;
++
feature_index
)
{
OMP_LOOP_EX_BEGIN
();
if
(
!
is_feature_used
[
feature_index
])
{
continue
;
}
...
...
@@ -285,7 +285,7 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree)
const
data_size_t
local_data_on_larger_leaf
=
this
->
data_partition_
->
leaf_count
(
larger_leaf_index
);
if
(
local_data_on_larger_leaf
<=
0
)
{
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
feature_index
=
0
;
feature_index
<
this
->
num_features_
;
++
feature_index
)
{
OMP_LOOP_EX_BEGIN
();
if
(
!
is_feature_used
[
feature_index
])
{
continue
;
}
...
...
@@ -307,7 +307,7 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree)
double
larger_leaf_parent_output
=
this
->
GetParentOutput
(
tree
,
this
->
larger_leaf_splits_
.
get
());
OMP_INIT_EX
();
// find splits
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
feature_index
=
0
;
feature_index
<
this
->
num_features_
;
++
feature_index
)
{
OMP_LOOP_EX_BEGIN
();
if
(
!
is_feature_used
[
feature_index
])
{
continue
;
}
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment