Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
8ed371ce
Unverified
Commit
8ed371ce
authored
Oct 09, 2023
by
James Lamb
Committed by
GitHub
Oct 09, 2023
Browse files
set explicit number of threads in every OpenMP `parallel` region (#6135)
parent
992f5056
Changes
45
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
103 additions
and
78 deletions
+103
-78
.ci/lint-cpp.sh
.ci/lint-cpp.sh
+25
-0
R-package/src/lightgbm_R.cpp
R-package/src/lightgbm_R.cpp
+7
-7
include/LightGBM/feature_group.h
include/LightGBM/feature_group.h
+1
-1
include/LightGBM/tree.h
include/LightGBM/tree.h
+3
-3
include/LightGBM/utils/common.h
include/LightGBM/utils/common.h
+2
-2
include/LightGBM/utils/threading.h
include/LightGBM/utils/threading.h
+1
-1
src/application/application.cpp
src/application/application.cpp
+1
-1
src/application/predictor.hpp
src/application/predictor.hpp
+1
-1
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+5
-5
src/boosting/gbdt.h
src/boosting/gbdt.h
+1
-1
src/boosting/gbdt_model_text.cpp
src/boosting/gbdt_model_text.cpp
+2
-2
src/boosting/rf.hpp
src/boosting/rf.hpp
+1
-1
src/boosting/score_updater.hpp
src/boosting/score_updater.hpp
+3
-3
src/c_api.cpp
src/c_api.cpp
+13
-13
src/io/dataset.cpp
src/io/dataset.cpp
+8
-8
src/io/dataset_loader.cpp
src/io/dataset_loader.cpp
+7
-7
src/io/metadata.cpp
src/io/metadata.cpp
+15
-15
src/io/multi_val_dense_bin.hpp
src/io/multi_val_dense_bin.hpp
+1
-1
src/io/multi_val_sparse_bin.hpp
src/io/multi_val_sparse_bin.hpp
+2
-2
src/io/train_share_states.cpp
src/io/train_share_states.cpp
+4
-4
No files found.
.ci/lint-cpp.sh
View file @
8ed371ce
...
...
@@ -18,3 +18,28 @@ cmakelint \
${
cmake_files
}
\
||
exit
-1
echo
"done running cmakelint"
echo
"checking that all OpenMP pragmas specify num_threads()"
get_omp_pragmas_without_num_threads
()
{
grep
\
-n
\
-R
\
--include
=
'*.c'
\
--include
=
'*.cc'
\
--include
=
'*.cpp'
\
--include
=
'*.h'
\
--include
=
'*.hpp'
\
'pragma omp parallel'
\
|
grep
-v
' num_threads'
\
|
grep
-v
'openmp_wrapper.h'
}
PROBLEMATIC_LINES
=
$(
get_omp_pragmas_without_num_threads
)
if
test
"
${
PROBLEMATIC_LINES
}
"
!=
""
;
then
get_omp_pragmas_without_num_threads
echo
"Found '#pragma omp parallel' not using explicit num_threads() configuration. Fix those."
echo
"For details, see https://www.openmp.org/spec-html/5.0/openmpse14.html#x54-800002.6"
exit
-1
fi
echo
"done checking OpenMP pragmas"
R-package/src/lightgbm_R.cpp
View file @
8ed371ce
...
...
@@ -226,7 +226,7 @@ SEXP LGBM_DatasetGetSubset_R(SEXP handle,
int32_t
len
=
static_cast
<
int32_t
>
(
Rf_asInteger
(
len_used_row_indices
));
std
::
vector
<
int32_t
>
idxvec
(
len
);
// convert from one-based to zero-based index
#pragma omp parallel for schedule(static, 512) if (len >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (len >= 1024)
for
(
int32_t
i
=
0
;
i
<
len
;
++
i
)
{
idxvec
[
i
]
=
static_cast
<
int32_t
>
(
INTEGER
(
used_row_indices
)[
i
]
-
1
);
}
...
...
@@ -339,7 +339,7 @@ SEXP LGBM_DatasetSetField_R(SEXP handle,
const
char
*
name
=
CHAR
(
PROTECT
(
Rf_asChar
(
field_name
)));
if
(
!
strcmp
(
"group"
,
name
)
||
!
strcmp
(
"query"
,
name
))
{
std
::
vector
<
int32_t
>
vec
(
len
);
#pragma omp parallel for schedule(static, 512) if (len >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (len >= 1024)
for
(
int
i
=
0
;
i
<
len
;
++
i
)
{
vec
[
i
]
=
static_cast
<
int32_t
>
(
INTEGER
(
field_data
)[
i
]);
}
...
...
@@ -348,7 +348,7 @@ SEXP LGBM_DatasetSetField_R(SEXP handle,
CHECK_CALL
(
LGBM_DatasetSetField
(
R_ExternalPtrAddr
(
handle
),
name
,
REAL
(
field_data
),
len
,
C_API_DTYPE_FLOAT64
));
}
else
{
std
::
vector
<
float
>
vec
(
len
);
#pragma omp parallel for schedule(static, 512) if (len >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (len >= 1024)
for
(
int
i
=
0
;
i
<
len
;
++
i
)
{
vec
[
i
]
=
static_cast
<
float
>
(
REAL
(
field_data
)[
i
]);
}
...
...
@@ -372,19 +372,19 @@ SEXP LGBM_DatasetGetField_R(SEXP handle,
if
(
!
strcmp
(
"group"
,
name
)
||
!
strcmp
(
"query"
,
name
))
{
auto
p_data
=
reinterpret_cast
<
const
int32_t
*>
(
res
);
// convert from boundaries to size
#pragma omp parallel for schedule(static, 512) if (out_len >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (out_len >= 1024)
for
(
int
i
=
0
;
i
<
out_len
-
1
;
++
i
)
{
INTEGER
(
field_data
)[
i
]
=
p_data
[
i
+
1
]
-
p_data
[
i
];
}
}
else
if
(
!
strcmp
(
"init_score"
,
name
))
{
auto
p_data
=
reinterpret_cast
<
const
double
*>
(
res
);
#pragma omp parallel for schedule(static, 512) if (out_len >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (out_len >= 1024)
for
(
int
i
=
0
;
i
<
out_len
;
++
i
)
{
REAL
(
field_data
)[
i
]
=
p_data
[
i
];
}
}
else
{
auto
p_data
=
reinterpret_cast
<
const
float
*>
(
res
);
#pragma omp parallel for schedule(static, 512) if (out_len >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (out_len >= 1024)
for
(
int
i
=
0
;
i
<
out_len
;
++
i
)
{
REAL
(
field_data
)[
i
]
=
p_data
[
i
];
}
...
...
@@ -611,7 +611,7 @@ SEXP LGBM_BoosterUpdateOneIterCustom_R(SEXP handle,
int
is_finished
=
0
;
int
int_len
=
Rf_asInteger
(
len
);
std
::
vector
<
float
>
tgrad
(
int_len
),
thess
(
int_len
);
#pragma omp parallel for schedule(static, 512) if (int_len >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (int_len >= 1024)
for
(
int
j
=
0
;
j
<
int_len
;
++
j
)
{
tgrad
[
j
]
=
static_cast
<
float
>
(
REAL
(
grad
)[
j
]);
thess
[
j
]
=
static_cast
<
float
>
(
REAL
(
hess
)[
j
]);
...
...
include/LightGBM/feature_group.h
View file @
8ed371ce
...
...
@@ -361,7 +361,7 @@ class FeatureGroup {
inline
void
FinishLoad
()
{
if
(
is_multi_val_
)
{
OMP_INIT_EX
();
#pragma omp parallel for schedule(guided)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(guided)
for
(
int
i
=
0
;
i
<
num_feature_
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
multi_bin_data_
[
i
]
->
FinishLoad
();
...
...
include/LightGBM/tree.h
View file @
8ed371ce
...
...
@@ -185,7 +185,7 @@ class Tree {
* \param rate The factor of shrinkage
*/
virtual
inline
void
Shrinkage
(
double
rate
)
{
#pragma omp parallel for schedule(static, 1024) if (num_leaves_ >= 2048)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 1024) if (num_leaves_ >= 2048)
for
(
int
i
=
0
;
i
<
num_leaves_
-
1
;
++
i
)
{
leaf_value_
[
i
]
=
MaybeRoundToZero
(
leaf_value_
[
i
]
*
rate
);
internal_value_
[
i
]
=
MaybeRoundToZero
(
internal_value_
[
i
]
*
rate
);
...
...
@@ -210,7 +210,7 @@ class Tree {
inline
double
shrinkage
()
const
{
return
shrinkage_
;
}
virtual
inline
void
AddBias
(
double
val
)
{
#pragma omp parallel for schedule(static, 1024) if (num_leaves_ >= 2048)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 1024) if (num_leaves_ >= 2048)
for
(
int
i
=
0
;
i
<
num_leaves_
-
1
;
++
i
)
{
leaf_value_
[
i
]
=
MaybeRoundToZero
(
leaf_value_
[
i
]
+
val
);
internal_value_
[
i
]
=
MaybeRoundToZero
(
internal_value_
[
i
]
+
val
);
...
...
@@ -218,7 +218,7 @@ class Tree {
leaf_value_
[
num_leaves_
-
1
]
=
MaybeRoundToZero
(
leaf_value_
[
num_leaves_
-
1
]
+
val
);
if
(
is_linear_
)
{
#pragma omp parallel for schedule(static, 1024) if (num_leaves_ >= 2048)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 1024) if (num_leaves_ >= 2048)
for
(
int
i
=
0
;
i
<
num_leaves_
-
1
;
++
i
)
{
leaf_const_
[
i
]
=
MaybeRoundToZero
(
leaf_const_
[
i
]
+
val
);
}
...
...
include/LightGBM/utils/common.h
View file @
8ed371ce
...
...
@@ -691,7 +691,7 @@ static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred, _VTRanIt*) {
size_t
inner_size
=
(
len
+
num_threads
-
1
)
/
num_threads
;
inner_size
=
std
::
max
(
inner_size
,
kMinInnerLen
);
num_threads
=
static_cast
<
int
>
((
len
+
inner_size
-
1
)
/
inner_size
);
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for
num_threads(num_threads)
schedule(static, 1)
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
size_t
left
=
inner_size
*
i
;
size_t
right
=
left
+
inner_size
;
...
...
@@ -707,7 +707,7 @@ static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred, _VTRanIt*) {
// Recursive merge
while
(
s
<
len
)
{
int
loop_size
=
static_cast
<
int
>
((
len
+
s
*
2
-
1
)
/
(
s
*
2
));
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for
num_threads(num_threads)
schedule(static, 1)
for
(
int
i
=
0
;
i
<
loop_size
;
++
i
)
{
size_t
left
=
i
*
2
*
s
;
size_t
mid
=
left
+
s
;
...
...
include/LightGBM/utils/threading.h
View file @
8ed371ce
...
...
@@ -73,7 +73,7 @@ class Threading {
INDEX_T
num_inner
=
end
-
start
;
BlockInfo
<
INDEX_T
>
(
num_inner
,
min_block_size
,
&
n_block
,
&
num_inner
);
OMP_INIT_EX
();
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 1)
for
(
int
i
=
0
;
i
<
n_block
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
INDEX_T
inner_start
=
start
+
num_inner
*
i
;
...
...
src/application/application.cpp
View file @
8ed371ce
...
...
@@ -227,7 +227,7 @@ void Application::Predict() {
TextReader
<
int
>
result_reader
(
config_
.
output_result
.
c_str
(),
false
);
result_reader
.
ReadAllLines
();
std
::
vector
<
std
::
vector
<
int
>>
pred_leaf
(
result_reader
.
Lines
().
size
());
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
result_reader
.
Lines
().
size
());
++
i
)
{
pred_leaf
[
i
]
=
Common
::
StringToArray
<
int
>
(
result_reader
.
Lines
()[
i
],
'\t'
);
// Free memory
...
...
src/application/predictor.hpp
View file @
8ed371ce
...
...
@@ -233,7 +233,7 @@ class Predictor {
std
::
vector
<
std
::
pair
<
int
,
double
>>
oneline_features
;
std
::
vector
<
std
::
string
>
result_to_write
(
lines
.
size
());
OMP_INIT_EX
();
#pragma omp parallel for schedule(static) firstprivate(oneline_features)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) firstprivate(oneline_features)
for
(
data_size_t
i
=
0
;
i
<
static_cast
<
data_size_t
>
(
lines
.
size
());
++
i
)
{
OMP_LOOP_EX_BEGIN
();
oneline_features
.
clear
();
...
...
src/boosting/gbdt.cpp
View file @
8ed371ce
...
...
@@ -255,7 +255,7 @@ void GBDT::RefitTree(const std::vector<std::vector<int>>& tree_leaf_prediction)
std
::
vector
<
int
>
leaf_pred
(
num_data_
);
if
(
linear_tree_
)
{
std
::
vector
<
int
>
max_leaves_by_thread
=
std
::
vector
<
int
>
(
OMP_NUM_THREADS
(),
0
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
tree_leaf_prediction
.
size
());
++
i
)
{
int
tid
=
omp_get_thread_num
();
for
(
size_t
j
=
0
;
j
<
tree_leaf_prediction
[
i
].
size
();
++
j
)
{
...
...
@@ -270,7 +270,7 @@ void GBDT::RefitTree(const std::vector<std::vector<int>>& tree_leaf_prediction)
Boosting
();
for
(
int
tree_id
=
0
;
tree_id
<
num_tree_per_iteration_
;
++
tree_id
)
{
int
model_index
=
iter
*
num_tree_per_iteration_
+
tree_id
;
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
num_data_
;
++
i
)
{
leaf_pred
[
i
]
=
tree_leaf_prediction
[
i
][
model_index
];
CHECK_LT
(
leaf_pred
[
i
],
models_
[
model_index
]
->
num_leaves
());
...
...
@@ -348,7 +348,7 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
if
(
data_sample_strategy_
->
IsHessianChange
())
{
// need to copy customized gradients when using GOSS
int64_t
total_size
=
static_cast
<
int64_t
>
(
num_data_
)
*
num_tree_per_iteration_
;
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int64_t
i
=
0
;
i
<
total_size
;
++
i
)
{
gradients_
[
i
]
=
gradients
[
i
];
hessians_
[
i
]
=
hessians
[
i
];
...
...
@@ -669,7 +669,7 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
}
#endif // USE_CUDA
if
(
objective_function_
!=
nullptr
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
std
::
vector
<
double
>
tree_pred
(
num_tree_per_iteration_
);
for
(
int
j
=
0
;
j
<
num_tree_per_iteration_
;
++
j
)
{
...
...
@@ -682,7 +682,7 @@ void GBDT::GetPredictAt(int data_idx, double* out_result, int64_t* out_len) {
}
}
}
else
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
for
(
int
j
=
0
;
j
<
num_tree_per_iteration_
;
++
j
)
{
out_result
[
j
*
num_data
+
i
]
=
static_cast
<
double
>
(
raw_scores
[
j
*
num_data
+
i
]);
...
...
src/boosting/gbdt.h
View file @
8ed371ce
...
...
@@ -434,7 +434,7 @@ class GBDT : public GBDTBase {
}
start_iteration_for_pred_
=
start_iteration
;
if
(
is_pred_contrib
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
models_
.
size
());
++
i
)
{
models_
[
i
]
->
RecomputeMaxDepth
();
}
...
...
src/boosting/gbdt_model_text.cpp
View file @
8ed371ce
...
...
@@ -354,7 +354,7 @@ std::string GBDT::SaveModelToString(int start_iteration, int num_iteration, int
std
::
vector
<
std
::
string
>
tree_strs
(
num_used_model
-
start_model
);
std
::
vector
<
size_t
>
tree_sizes
(
num_used_model
-
start_model
);
// output tree models
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
start_model
;
i
<
num_used_model
;
++
i
)
{
const
int
idx
=
i
-
start_model
;
tree_strs
[
idx
]
=
"Tree="
+
std
::
to_string
(
idx
)
+
'\n'
;
...
...
@@ -552,7 +552,7 @@ bool GBDT::LoadModelFromString(const char* buffer, size_t len) {
models_
.
emplace_back
(
nullptr
);
}
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
num_trees
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
auto
cur_p
=
p
+
tree_boundries
[
i
];
...
...
src/boosting/rf.hpp
View file @
8ed371ce
...
...
@@ -97,7 +97,7 @@ class RF : public GBDT {
}
size_t
total_size
=
static_cast
<
size_t
>
(
num_data_
)
*
num_tree_per_iteration_
;
std
::
vector
<
double
>
tmp_scores
(
total_size
,
0.0
f
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
j
=
0
;
j
<
num_tree_per_iteration_
;
++
j
)
{
size_t
offset
=
static_cast
<
size_t
>
(
j
)
*
num_data_
;
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
...
...
src/boosting/score_updater.hpp
View file @
8ed371ce
...
...
@@ -39,7 +39,7 @@ class ScoreUpdater {
Log
::
Fatal
(
"Number of class for initial score error"
);
}
has_init_score_
=
true
;
#pragma omp parallel for schedule(static, 512) if (total_size >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (total_size >= 1024)
for
(
int64_t
i
=
0
;
i
<
total_size
;
++
i
)
{
score_
[
i
]
=
init_score
[
i
];
}
...
...
@@ -54,7 +54,7 @@ class ScoreUpdater {
virtual
inline
void
AddScore
(
double
val
,
int
cur_tree_id
)
{
Common
::
FunctionTimer
fun_timer
(
"ScoreUpdater::AddScore"
,
global_timer
);
const
size_t
offset
=
static_cast
<
size_t
>
(
num_data_
)
*
cur_tree_id
;
#pragma omp parallel for schedule(static, 512) if (num_data_ >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (num_data_ >= 1024)
for
(
int
i
=
0
;
i
<
num_data_
;
++
i
)
{
score_
[
offset
+
i
]
+=
val
;
}
...
...
@@ -62,7 +62,7 @@ class ScoreUpdater {
virtual
inline
void
MultiplyScore
(
double
val
,
int
cur_tree_id
)
{
const
size_t
offset
=
static_cast
<
size_t
>
(
num_data_
)
*
cur_tree_id
;
#pragma omp parallel for schedule(static, 512) if (num_data_ >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (num_data_ >= 1024)
for
(
int
i
=
0
;
i
<
num_data_
;
++
i
)
{
score_
[
offset
+
i
]
*=
val
;
}
...
...
src/c_api.cpp
View file @
8ed371ce
...
...
@@ -437,7 +437,7 @@ class Booster {
int64_t
num_pred_in_one_row
=
boosting_
->
NumPredictOneRow
(
start_iteration
,
num_iteration
,
is_predict_leaf
,
predict_contrib
);
auto
pred_fun
=
predictor
.
GetPredictFunction
();
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
nrow
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
auto
one_row
=
get_row_fun
(
i
);
...
...
@@ -459,7 +459,7 @@ class Booster {
auto
pred_sparse_fun
=
predictor
.
GetPredictSparseFunction
();
std
::
vector
<
std
::
vector
<
std
::
unordered_map
<
int
,
double
>>>&
agg
=
*
agg_ptr
;
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int64_t
i
=
0
;
i
<
nrow
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
auto
one_row
=
get_row_fun
(
i
);
...
...
@@ -551,7 +551,7 @@ class Booster {
indptr_index
++
;
int64_t
matrix_start_index
=
m
*
static_cast
<
int64_t
>
(
agg
.
size
());
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int64_t
i
=
0
;
i
<
static_cast
<
int64_t
>
(
agg
.
size
());
++
i
)
{
OMP_LOOP_EX_BEGIN
();
auto
row_vector
=
agg
[
i
];
...
...
@@ -663,7 +663,7 @@ class Booster {
}
// Note: we parallelize across matrices instead of rows because of the column_counts[m][col_idx] increment inside the loop
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
m
=
0
;
m
<
num_matrices
;
++
m
)
{
OMP_LOOP_EX_BEGIN
();
for
(
int64_t
i
=
0
;
i
<
static_cast
<
int64_t
>
(
agg
.
size
());
++
i
)
{
...
...
@@ -1074,7 +1074,7 @@ int LGBM_DatasetPushRows(DatasetHandle dataset,
p_dataset
->
ResizeRaw
(
p_dataset
->
num_numeric_features
()
+
nrow
);
}
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
nrow
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
const
int
tid
=
omp_get_thread_num
();
...
...
@@ -1116,7 +1116,7 @@ int LGBM_DatasetPushRowsWithMetadata(DatasetHandle dataset,
const
int
max_omp_threads
=
p_dataset
->
omp_max_threads
()
>
0
?
p_dataset
->
omp_max_threads
()
:
OMP_NUM_THREADS
();
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
nrow
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
// convert internal thread id to be unique based on external thread id
...
...
@@ -1153,7 +1153,7 @@ int LGBM_DatasetPushRowsByCSR(DatasetHandle dataset,
p_dataset
->
ResizeRaw
(
p_dataset
->
num_numeric_features
()
+
nrow
);
}
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
nrow
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
const
int
tid
=
omp_get_thread_num
();
...
...
@@ -1199,7 +1199,7 @@ int LGBM_DatasetPushRowsByCSRWithMetadata(DatasetHandle dataset,
const
int
max_omp_threads
=
p_dataset
->
omp_max_threads
()
>
0
?
p_dataset
->
omp_max_threads
()
:
OMP_NUM_THREADS
();
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
nrow
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
// convert internal thread id to be unique based on external thread id
...
...
@@ -1319,7 +1319,7 @@ int LGBM_DatasetCreateFromMats(int32_t nmat,
int32_t
start_row
=
0
;
for
(
int
j
=
0
;
j
<
nmat
;
++
j
)
{
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
nrow
[
j
];
++
i
)
{
OMP_LOOP_EX_BEGIN
();
const
int
tid
=
omp_get_thread_num
();
...
...
@@ -1394,7 +1394,7 @@ int LGBM_DatasetCreateFromCSR(const void* indptr,
}
}
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
nindptr
-
1
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
const
int
tid
=
omp_get_thread_num
();
...
...
@@ -1465,7 +1465,7 @@ int LGBM_DatasetCreateFromCSRFunc(void* get_row_funptr,
OMP_INIT_EX
();
std
::
vector
<
std
::
pair
<
int
,
double
>>
thread_buffer
;
#pragma omp parallel for schedule(static) private(thread_buffer)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) private(thread_buffer)
for
(
int
i
=
0
;
i
<
num_rows
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
{
...
...
@@ -1506,7 +1506,7 @@ int LGBM_DatasetCreateFromCSC(const void* col_ptr,
std
::
vector
<
std
::
vector
<
double
>>
sample_values
(
ncol_ptr
-
1
);
std
::
vector
<
std
::
vector
<
int
>>
sample_idx
(
ncol_ptr
-
1
);
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
sample_values
.
size
());
++
i
)
{
OMP_LOOP_EX_BEGIN
();
CSC_RowIterator
col_it
(
col_ptr
,
col_ptr_type
,
indices
,
data
,
data_type
,
ncol_ptr
,
nelem
,
i
);
...
...
@@ -1534,7 +1534,7 @@ int LGBM_DatasetCreateFromCSC(const void* col_ptr,
reinterpret_cast
<
const
Dataset
*>
(
reference
));
}
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
ncol_ptr
-
1
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
const
int
tid
=
omp_get_thread_num
();
...
...
src/io/dataset.cpp
View file @
8ed371ce
...
...
@@ -536,7 +536,7 @@ MultiValBin* Dataset::GetMultiBinFromSparseFeatures(const std::vector<uint32_t>&
std
::
vector
<
uint32_t
>
most_freq_bins
;
double
sum_sparse_rate
=
0
;
for
(
int
i
=
0
;
i
<
num_feature
;
++
i
)
{
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 1)
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
iters
[
tid
].
emplace_back
(
feature_groups_
[
multi_group_id
]
->
SubFeatureIterator
(
i
));
...
...
@@ -584,7 +584,7 @@ MultiValBin* Dataset::GetMultiBinFromAllFeatures(const std::vector<uint32_t>& of
for
(
int
fid
=
0
;
fid
<
feature_groups_
[
gid
]
->
num_feature_
;
++
fid
)
{
const
auto
&
bin_mapper
=
feature_groups_
[
gid
]
->
bin_mappers_
[
fid
];
most_freq_bins
.
push_back
(
bin_mapper
->
GetMostFreqBin
());
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 1)
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
iters
[
tid
].
emplace_back
(
feature_groups_
[
gid
]
->
SubFeatureIterator
(
fid
));
...
...
@@ -823,7 +823,7 @@ void Dataset::ReSize(data_size_t num_data) {
if
(
num_data_
!=
num_data
)
{
num_data_
=
num_data
;
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
group
=
0
;
group
<
num_groups_
;
++
group
)
{
OMP_LOOP_EX_BEGIN
();
feature_groups_
[
group
]
->
ReSize
(
num_data_
);
...
...
@@ -856,7 +856,7 @@ void Dataset::CopySubrow(const Dataset* fullset,
int
num_copy_tasks
=
static_cast
<
int
>
(
group_ids
.
size
());
OMP_INIT_EX
();
#pragma omp parallel for schedule(dynamic)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(dynamic)
for
(
int
task_id
=
0
;
task_id
<
num_copy_tasks
;
++
task_id
)
{
OMP_LOOP_EX_BEGIN
();
int
group
=
group_ids
[
task_id
];
...
...
@@ -875,7 +875,7 @@ void Dataset::CopySubrow(const Dataset* fullset,
num_numeric_features_
=
fullset
->
num_numeric_features_
;
if
(
has_raw_
)
{
ResizeRaw
(
num_used_indices
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
i
=
0
;
i
<
num_used_indices
;
++
i
)
{
for
(
int
j
=
0
;
j
<
num_numeric_features_
;
++
j
)
{
raw_data_
[
j
][
i
]
=
fullset
->
raw_data_
[
j
][
used_indices
[
i
]];
...
...
@@ -1282,7 +1282,7 @@ void Dataset::ConstructHistogramsInner(
int16_t
*
ordered_gradients_and_hessians
=
reinterpret_cast
<
int16_t
*>
(
ordered_gradients
);
const
int16_t
*
gradients_and_hessians
=
reinterpret_cast
<
const
int16_t
*>
(
gradients
);
if
(
USE_INDICES
)
{
#pragma omp parallel for schedule(static, 512) if (num_data >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (num_data >= 1024)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
ordered_gradients_and_hessians
[
i
]
=
gradients_and_hessians
[
data_indices
[
i
]];
}
...
...
@@ -1292,7 +1292,7 @@ void Dataset::ConstructHistogramsInner(
}
else
{
if
(
USE_INDICES
)
{
if
(
USE_HESSIAN
)
{
#pragma omp parallel for schedule(static, 512) if (num_data >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (num_data >= 1024)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
ordered_gradients
[
i
]
=
gradients
[
data_indices
[
i
]];
ordered_hessians
[
i
]
=
hessians
[
data_indices
[
i
]];
...
...
@@ -1300,7 +1300,7 @@ void Dataset::ConstructHistogramsInner(
ptr_ordered_grad
=
ordered_gradients
;
ptr_ordered_hess
=
ordered_hessians
;
}
else
{
#pragma omp parallel for schedule(static, 512) if (num_data >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (num_data >= 1024)
for
(
data_size_t
i
=
0
;
i
<
num_data
;
++
i
)
{
ordered_gradients
[
i
]
=
gradients
[
data_indices
[
i
]];
}
...
...
src/io/dataset_loader.cpp
View file @
8ed371ce
...
...
@@ -625,7 +625,7 @@ Dataset* DatasetLoader::ConstructFromSampleData(double** sample_values,
if
(
Network
::
num_machines
()
==
1
)
{
// if only one machine, find bin locally
OMP_INIT_EX
();
#pragma omp parallel for schedule(guided)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(guided)
for
(
int
i
=
0
;
i
<
num_col
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
if
(
ignore_features_
.
count
(
i
)
>
0
)
{
...
...
@@ -674,7 +674,7 @@ Dataset* DatasetLoader::ConstructFromSampleData(double** sample_values,
}
len
[
num_machines
-
1
]
=
num_total_features
-
start
[
num_machines
-
1
];
OMP_INIT_EX
();
#pragma omp parallel for schedule(guided)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(guided)
for
(
int
i
=
0
;
i
<
len
[
rank
];
++
i
)
{
OMP_LOOP_EX_BEGIN
();
if
(
ignore_features_
.
count
(
start
[
rank
]
+
i
)
>
0
)
{
...
...
@@ -1136,7 +1136,7 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
if
(
num_machines
==
1
)
{
// if only one machine, find bin locally
OMP_INIT_EX
();
#pragma omp parallel for schedule(guided)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(guided)
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
sample_values
.
size
());
++
i
)
{
OMP_LOOP_EX_BEGIN
();
if
(
ignore_features_
.
count
(
i
)
>
0
)
{
...
...
@@ -1177,7 +1177,7 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
}
len
[
num_machines
-
1
]
=
dataset
->
num_total_features_
-
start
[
num_machines
-
1
];
OMP_INIT_EX
();
#pragma omp parallel for schedule(guided)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(guided)
for
(
int
i
=
0
;
i
<
len
[
rank
];
++
i
)
{
OMP_LOOP_EX_BEGIN
();
if
(
ignore_features_
.
count
(
start
[
rank
]
+
i
)
>
0
)
{
...
...
@@ -1268,7 +1268,7 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat
if
(
!
predict_fun_
)
{
OMP_INIT_EX
();
// if doesn't need to prediction with initial model
#pragma omp parallel for schedule(static) private(oneline_features) firstprivate(tmp_label, feature_row)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) private(oneline_features) firstprivate(tmp_label, feature_row)
for
(
data_size_t
i
=
0
;
i
<
dataset
->
num_data_
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
const
int
tid
=
omp_get_thread_num
();
...
...
@@ -1319,7 +1319,7 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat
OMP_INIT_EX
();
// if need to prediction with initial model
std
::
vector
<
double
>
init_score
(
static_cast
<
size_t
>
(
dataset
->
num_data_
)
*
num_class_
);
#pragma omp parallel for schedule(static) private(oneline_features) firstprivate(tmp_label, feature_row)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) private(oneline_features) firstprivate(tmp_label, feature_row)
for
(
data_size_t
i
=
0
;
i
<
dataset
->
num_data_
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
const
int
tid
=
omp_get_thread_num
();
...
...
@@ -1394,7 +1394,7 @@ void DatasetLoader::ExtractFeaturesFromFile(const char* filename, const Parser*
double
tmp_label
=
0.0
f
;
std
::
vector
<
float
>
feature_row
(
dataset
->
num_features_
);
OMP_INIT_EX
();
#pragma omp parallel for schedule(static) private(oneline_features) firstprivate(tmp_label, feature_row)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) private(oneline_features) firstprivate(tmp_label, feature_row)
for
(
data_size_t
i
=
0
;
i
<
static_cast
<
data_size_t
>
(
lines
.
size
());
++
i
)
{
OMP_LOOP_EX_BEGIN
();
const
int
tid
=
omp_get_thread_num
();
...
...
src/io/metadata.cpp
View file @
8ed371ce
...
...
@@ -101,7 +101,7 @@ void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, da
num_data_
=
num_used_indices
;
label_
=
std
::
vector
<
label_t
>
(
num_used_indices
);
#pragma omp parallel for schedule(static, 512) if (num_used_indices >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (num_used_indices >= 1024)
for
(
data_size_t
i
=
0
;
i
<
num_used_indices
;
++
i
)
{
label_
[
i
]
=
fullset
.
label_
[
used_indices
[
i
]];
}
...
...
@@ -109,7 +109,7 @@ void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, da
if
(
!
fullset
.
weights_
.
empty
())
{
weights_
=
std
::
vector
<
label_t
>
(
num_used_indices
);
num_weights_
=
num_used_indices
;
#pragma omp parallel for schedule(static, 512) if (num_used_indices >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (num_used_indices >= 1024)
for
(
data_size_t
i
=
0
;
i
<
num_used_indices
;
++
i
)
{
weights_
[
i
]
=
fullset
.
weights_
[
used_indices
[
i
]];
}
...
...
@@ -121,7 +121,7 @@ void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, da
int
num_class
=
static_cast
<
int
>
(
fullset
.
num_init_score_
/
fullset
.
num_data_
);
init_score_
=
std
::
vector
<
double
>
(
static_cast
<
size_t
>
(
num_used_indices
)
*
num_class
);
num_init_score_
=
static_cast
<
int64_t
>
(
num_used_indices
)
*
num_class
;
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
k
=
0
;
k
<
num_class
;
++
k
)
{
const
size_t
offset_dest
=
static_cast
<
size_t
>
(
k
)
*
num_data_
;
const
size_t
offset_src
=
static_cast
<
size_t
>
(
k
)
*
fullset
.
num_data_
;
...
...
@@ -173,7 +173,7 @@ void Metadata::PartitionLabel(const std::vector<data_size_t>& used_indices) {
auto
old_label
=
label_
;
num_data_
=
static_cast
<
data_size_t
>
(
used_indices
.
size
());
label_
=
std
::
vector
<
label_t
>
(
num_data_
);
#pragma omp parallel for schedule(static, 512) if (num_data_ >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (num_data_ >= 1024)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
label_
[
i
]
=
old_label
[
used_indices
[
i
]];
}
...
...
@@ -255,7 +255,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
auto
old_weights
=
weights_
;
num_weights_
=
num_data_
;
weights_
=
std
::
vector
<
label_t
>
(
num_data_
);
#pragma omp parallel for schedule(static, 512)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512)
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
used_data_indices
.
size
());
++
i
)
{
weights_
[
i
]
=
old_weights
[
used_data_indices
[
i
]];
}
...
...
@@ -274,7 +274,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
auto
old_positions
=
positions_
;
num_positions_
=
num_data_
;
positions_
=
std
::
vector
<
data_size_t
>
(
num_data_
);
#pragma omp parallel for schedule(static, 512)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512)
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
used_data_indices
.
size
());
++
i
)
{
positions_
[
i
]
=
old_positions
[
used_data_indices
[
i
]];
}
...
...
@@ -335,7 +335,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
int
num_class
=
static_cast
<
int
>
(
num_init_score_
/
num_all_data
);
num_init_score_
=
static_cast
<
int64_t
>
(
num_data_
)
*
num_class
;
init_score_
=
std
::
vector
<
double
>
(
num_init_score_
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
int
k
=
0
;
k
<
num_class
;
++
k
)
{
const
size_t
offset_dest
=
static_cast
<
size_t
>
(
k
)
*
num_data_
;
const
size_t
offset_src
=
static_cast
<
size_t
>
(
k
)
*
num_all_data
;
...
...
@@ -369,7 +369,7 @@ void Metadata::SetInitScore(const double* init_score, data_size_t len) {
if
(
init_score_
.
empty
())
{
init_score_
.
resize
(
len
);
}
num_init_score_
=
len
;
#pragma omp parallel for schedule(static, 512) if (num_init_score_ >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (num_init_score_ >= 1024)
for
(
int64_t
i
=
0
;
i
<
num_init_score_
;
++
i
)
{
init_score_
[
i
]
=
Common
::
AvoidInf
(
init_score
[
i
]);
}
...
...
@@ -413,7 +413,7 @@ void Metadata::SetLabel(const label_t* label, data_size_t len) {
}
if
(
label_
.
empty
())
{
label_
.
resize
(
num_data_
);
}
#pragma omp parallel for schedule(static, 512) if (num_data_ >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (num_data_ >= 1024)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
label_
[
i
]
=
Common
::
AvoidInf
(
label
[
i
]);
}
...
...
@@ -452,7 +452,7 @@ void Metadata::SetWeights(const label_t* weights, data_size_t len) {
if
(
weights_
.
empty
())
{
weights_
.
resize
(
num_data_
);
}
num_weights_
=
num_data_
;
#pragma omp parallel for schedule(static, 512) if (num_weights_ >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (num_weights_ >= 1024)
for
(
data_size_t
i
=
0
;
i
<
num_weights_
;
++
i
)
{
weights_
[
i
]
=
Common
::
AvoidInf
(
weights
[
i
]);
}
...
...
@@ -492,7 +492,7 @@ void Metadata::SetQuery(const data_size_t* query, data_size_t len) {
return
;
}
data_size_t
sum
=
0
;
#pragma omp parallel for schedule(static) reduction(+:sum)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static) reduction(+:sum)
for
(
data_size_t
i
=
0
;
i
<
len
;
++
i
)
{
sum
+=
query
[
i
];
}
...
...
@@ -554,7 +554,7 @@ void Metadata::SetPosition(const data_size_t* positions, data_size_t len) {
Log
::
Debug
(
"number of unique positions found = %ld"
,
position_ids_
.
size
());
#pragma omp parallel for schedule(static, 512) if (num_positions_ >= 1024)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 512) if (num_positions_ >= 1024)
for
(
data_size_t
i
=
0
;
i
<
num_positions_
;
++
i
)
{
positions_
[
i
]
=
map_id2pos
.
at
(
positions
[
i
]);
}
...
...
@@ -590,7 +590,7 @@ void Metadata::LoadWeights() {
Log
::
Info
(
"Loading weights..."
);
num_weights_
=
static_cast
<
data_size_t
>
(
reader
.
Lines
().
size
());
weights_
=
std
::
vector
<
label_t
>
(
num_weights_
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_weights_
;
++
i
)
{
double
tmp_weight
=
0.0
f
;
Common
::
Atof
(
reader
.
Lines
()[
i
].
c_str
(),
&
tmp_weight
);
...
...
@@ -645,7 +645,7 @@ void Metadata::LoadInitialScore(const std::string& data_filename) {
init_score_
=
std
::
vector
<
double
>
(
num_init_score_
);
if
(
num_class
==
1
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_line
;
++
i
)
{
double
tmp
=
0.0
f
;
Common
::
Atof
(
reader
.
Lines
()[
i
].
c_str
(),
&
tmp
);
...
...
@@ -653,7 +653,7 @@ void Metadata::LoadInitialScore(const std::string& data_filename) {
}
}
else
{
std
::
vector
<
std
::
string
>
oneline_init_score
;
#pragma omp parallel for schedule(static)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_line
;
++
i
)
{
double
tmp
=
0.0
f
;
oneline_init_score
=
Common
::
Split
(
reader
.
Lines
()[
i
].
c_str
(),
'\t'
);
...
...
src/io/multi_val_dense_bin.hpp
View file @
8ed371ce
...
...
@@ -271,7 +271,7 @@ class MultiValDenseBin : public MultiValBin {
data_size_t
block_size
=
num_data_
;
Threading
::
BlockInfo
<
data_size_t
>
(
num_data_
,
1024
,
&
n_block
,
&
block_size
);
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 1)
for
(
int
tid
=
0
;
tid
<
n_block
;
++
tid
)
{
data_size_t
start
=
tid
*
block_size
;
data_size_t
end
=
std
::
min
(
num_data_
,
start
+
block_size
);
...
...
src/io/multi_val_sparse_bin.hpp
View file @
8ed371ce
...
...
@@ -85,7 +85,7 @@ class MultiValSparseBin : public MultiValBin {
offsets
[
tid
+
1
]
=
offsets
[
tid
]
+
sizes
[
tid
+
1
];
}
data_
.
resize
(
row_ptr_
[
num_data_
]);
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 1)
for
(
int
tid
=
0
;
tid
<
static_cast
<
int
>
(
t_data_
.
size
());
++
tid
)
{
std
::
copy_n
(
t_data_
[
tid
].
data
(),
sizes
[
tid
+
1
],
data_
.
data
()
+
offsets
[
tid
]);
...
...
@@ -344,7 +344,7 @@ class MultiValSparseBin : public MultiValBin {
num_data_
,
1024
,
&
n_block
,
&
block_size
);
std
::
vector
<
INDEX_T
>
sizes
(
t_data_
.
size
()
+
1
,
0
);
const
int
pre_alloc_size
=
50
;
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for
num_threads(OMP_NUM_THREADS())
schedule(static, 1)
for
(
int
tid
=
0
;
tid
<
n_block
;
++
tid
)
{
data_size_t
start
=
tid
*
block_size
;
data_size_t
end
=
std
::
min
(
num_data_
,
start
+
block_size
);
...
...
src/io/train_share_states.cpp
View file @
8ed371ce
...
...
@@ -56,7 +56,7 @@ void MultiValBinWrapper::HistMove(const std::vector<hist_t,
if
(
HIST_BITS
==
32
)
{
const
int64_t
*
src
=
reinterpret_cast
<
const
int64_t
*>
(
hist_buf
.
data
())
+
hist_buf
.
size
()
/
2
-
static_cast
<
size_t
>
(
num_bin_aligned_
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
num_threads(num_threads_)
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
hist_move_src_
.
size
());
++
i
)
{
std
::
copy_n
(
src
+
hist_move_src_
[
i
]
/
2
,
hist_move_size_
[
i
]
/
2
,
reinterpret_cast
<
int64_t
*>
(
origin_hist_data_
)
+
hist_move_dest_
[
i
]
/
2
);
...
...
@@ -65,14 +65,14 @@ void MultiValBinWrapper::HistMove(const std::vector<hist_t,
const
int32_t
*
src
=
reinterpret_cast
<
const
int32_t
*>
(
hist_buf
.
data
())
+
hist_buf
.
size
()
/
2
-
static_cast
<
size_t
>
(
num_bin_aligned_
);
if
(
is_use_subcol_
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
num_threads(num_threads_)
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
hist_move_src_
.
size
());
++
i
)
{
std
::
copy_n
(
src
+
hist_move_src_
[
i
]
/
2
,
hist_move_size_
[
i
]
/
2
,
reinterpret_cast
<
int32_t
*>
(
origin_hist_data_
)
+
hist_move_dest_
[
i
]
/
2
);
}
}
else
{
int32_t
*
orig_ptr
=
reinterpret_cast
<
int32_t
*>
(
origin_hist_data_
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
num_threads(num_threads_)
for
(
int
i
=
0
;
i
<
num_bin_
;
++
i
)
{
orig_ptr
[
i
]
=
src
[
i
];
}
...
...
@@ -81,7 +81,7 @@ void MultiValBinWrapper::HistMove(const std::vector<hist_t,
}
else
{
const
hist_t
*
src
=
hist_buf
.
data
()
+
hist_buf
.
size
()
-
2
*
static_cast
<
size_t
>
(
num_bin_aligned_
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
num_threads(num_threads_)
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
hist_move_src_
.
size
());
++
i
)
{
std
::
copy_n
(
src
+
hist_move_src_
[
i
],
hist_move_size_
[
i
],
origin_hist_data_
+
hist_move_dest_
[
i
]);
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment