Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
ddcbe71c
Commit
ddcbe71c
authored
Apr 07, 2017
by
Guolin Ke
Browse files
fix some light omp loop .
parent
7586d9a8
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
31 additions
and
26 deletions
+31
-26
include/LightGBM/tree.h
include/LightGBM/tree.h
+1
-1
include/LightGBM/utils/array_args.h
include/LightGBM/utils/array_args.h
+1
-1
src/treelearner/data_partition.hpp
src/treelearner/data_partition.hpp
+11
-11
src/treelearner/feature_histogram.hpp
src/treelearner/feature_histogram.hpp
+7
-5
src/treelearner/serial_tree_learner.cpp
src/treelearner/serial_tree_learner.cpp
+11
-8
No files found.
include/LightGBM/tree.h
View file @
ddcbe71c
...
...
@@ -102,7 +102,7 @@ public:
* \param rate The factor of shrinkage
*/
inline
void
Shrinkage
(
double
rate
)
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static
, 512) if (num_leaves_ >= 1024
)
for
(
int
i
=
0
;
i
<
num_leaves_
;
++
i
)
{
leaf_value_
[
i
]
*=
rate
;
if
(
leaf_value_
[
i
]
>
kMaxTreeOutput
)
{
leaf_value_
[
i
]
=
kMaxTreeOutput
;
}
...
...
include/LightGBM/utils/array_args.h
View file @
ddcbe71c
...
...
@@ -47,7 +47,7 @@ public:
if
(
array
.
empty
())
{
return
0
;
}
if
(
array
.
size
()
>
10
0
)
{
if
(
array
.
size
()
>
10
24
)
{
return
ArgMaxMT
(
array
);
}
else
{
size_t
arg_max
=
0
;
...
...
src/treelearner/data_partition.hpp
View file @
ddcbe71c
...
...
@@ -24,8 +24,8 @@ public:
temp_left_indices_
.
resize
(
num_data_
);
temp_right_indices_
.
resize
(
num_data_
);
used_data_indices_
=
nullptr
;
#pragma omp parallel
#pragma omp master
#pragma omp parallel
#pragma omp master
{
num_threads_
=
omp_get_num_threads
();
}
...
...
@@ -60,7 +60,7 @@ public:
if
(
used_data_indices_
==
nullptr
)
{
// if using all data
leaf_count_
[
0
]
=
num_data_
;
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
indices_
[
i
]
=
i
;
}
...
...
@@ -92,7 +92,7 @@ public:
* \param right_leaf index of right leaf
*/
void
Split
(
int
leaf
,
const
Dataset
*
dataset
,
int
feature
,
uint32_t
threshold
,
int
right_leaf
)
{
const
data_size_t
min_inner_size
=
1000
;
const
data_size_t
min_inner_size
=
512
;
// get leaf boundary
const
data_size_t
begin
=
leaf_begin_
[
leaf
];
const
data_size_t
cnt
=
leaf_count_
[
leaf
];
...
...
@@ -101,7 +101,7 @@ public:
if
(
inner_size
<
min_inner_size
)
{
inner_size
=
min_inner_size
;
}
// split data multi-threading
OMP_INIT_EX
();
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for schedule(static, 1)
for
(
int
i
=
0
;
i
<
num_threads_
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
left_cnts_buf_
[
i
]
=
0
;
...
...
@@ -112,7 +112,7 @@ public:
if
(
cur_start
+
cur_cnt
>
cnt
)
{
cur_cnt
=
cnt
-
cur_start
;
}
// split data inner, reduce the times of function called
data_size_t
cur_left_count
=
dataset
->
Split
(
feature
,
threshold
,
indices_
.
data
()
+
begin
+
cur_start
,
cur_cnt
,
temp_left_indices_
.
data
()
+
cur_start
,
temp_right_indices_
.
data
()
+
cur_start
);
temp_left_indices_
.
data
()
+
cur_start
,
temp_right_indices_
.
data
()
+
cur_start
);
offsets_buf_
[
i
]
=
cur_start
;
left_cnts_buf_
[
i
]
=
cur_left_count
;
right_cnts_buf_
[
i
]
=
cur_cnt
-
cur_left_count
;
...
...
@@ -128,15 +128,15 @@ public:
}
left_cnt
=
left_write_pos_buf_
[
num_threads_
-
1
]
+
left_cnts_buf_
[
num_threads_
-
1
];
// copy back indices of right leaf to indices_
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for schedule(static, 1)
for
(
int
i
=
0
;
i
<
num_threads_
;
++
i
)
{
if
(
left_cnts_buf_
[
i
]
>
0
)
{
std
::
memcpy
(
indices_
.
data
()
+
begin
+
left_write_pos_buf_
[
i
],
temp_left_indices_
.
data
()
+
offsets_buf_
[
i
],
left_cnts_buf_
[
i
]
*
sizeof
(
data_size_t
));
std
::
memcpy
(
indices_
.
data
()
+
begin
+
left_write_pos_buf_
[
i
],
temp_left_indices_
.
data
()
+
offsets_buf_
[
i
],
left_cnts_buf_
[
i
]
*
sizeof
(
data_size_t
));
}
if
(
right_cnts_buf_
[
i
]
>
0
)
{
std
::
memcpy
(
indices_
.
data
()
+
begin
+
left_cnt
+
right_write_pos_buf_
[
i
],
temp_right_indices_
.
data
()
+
offsets_buf_
[
i
],
right_cnts_buf_
[
i
]
*
sizeof
(
data_size_t
));
std
::
memcpy
(
indices_
.
data
()
+
begin
+
left_cnt
+
right_write_pos_buf_
[
i
],
temp_right_indices_
.
data
()
+
offsets_buf_
[
i
],
right_cnts_buf_
[
i
]
*
sizeof
(
data_size_t
));
}
}
// update leaf boundary
...
...
src/treelearner/feature_histogram.hpp
View file @
ddcbe71c
...
...
@@ -359,9 +359,10 @@ public:
void
DynamicChangeSize
(
const
Dataset
*
train_data
,
const
TreeConfig
*
tree_config
,
int
cache_size
,
int
total_size
)
{
if
(
feature_metas_
.
empty
())
{
feature_metas_
.
resize
(
train_data
->
num_features
());
#pragma omp parallel for schedule(static)
for
(
int
i
=
0
;
i
<
train_data
->
num_features
();
++
i
)
{
int
num_feature
=
train_data
->
num_features
();
feature_metas_
.
resize
(
num_feature
);
#pragma omp parallel for schedule(static, 512) if(num_feature >= 1024)
for
(
int
i
=
0
;
i
<
num_feature
;
++
i
)
{
feature_metas_
[
i
].
num_bin
=
train_data
->
FeatureNumBin
(
i
);
if
(
train_data
->
FeatureBinMapper
(
i
)
->
GetDefaultBin
()
==
0
)
{
feature_metas_
[
i
].
bias
=
1
;
...
...
@@ -400,8 +401,9 @@ public:
}
void
ResetConfig
(
const
TreeConfig
*
tree_config
)
{
#pragma omp parallel for schedule(static)
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
feature_metas_
.
size
());
++
i
)
{
int
size
=
static_cast
<
int
>
(
feature_metas_
.
size
());
#pragma omp parallel for schedule(static, 512) if(size >= 1024)
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
feature_metas_
[
i
].
tree_config
=
tree_config
;
}
}
...
...
src/treelearner/serial_tree_learner.cpp
View file @
ddcbe71c
...
...
@@ -259,14 +259,15 @@ void SerialTreeLearner::BeforeTrain() {
std
::
memset
(
is_feature_used_
.
data
(),
0
,
sizeof
(
int8_t
)
*
num_features_
);
// Get used feature at current tree
auto
used_feature_indices
=
random_
.
Sample
(
train_data_
->
num_total_features
(),
used_feature_cnt
);
#pragma omp parallel for schedule(static)
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
used_feature_indices
.
size
());
++
i
)
{
int
omp_loop_size
=
static_cast
<
int
>
(
used_feature_indices
.
size
());
#pragma omp parallel for schedule(static, 512) if (omp_loop_size >= 1024)
for
(
int
i
=
0
;
i
<
omp_loop_size
;
++
i
)
{
int
inner_feature_index
=
train_data_
->
InnerFeatureIndex
(
used_feature_indices
[
i
]);
if
(
inner_feature_index
<
0
)
{
continue
;
}
is_feature_used_
[
inner_feature_index
]
=
1
;
}
}
else
{
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static
, 512) if (num_features_ >= 1024
)
for
(
int
i
=
0
;
i
<
num_features_
;
++
i
)
{
is_feature_used_
[
i
]
=
1
;
}
...
...
@@ -314,7 +315,8 @@ void SerialTreeLearner::BeforeTrain() {
const
data_size_t
*
indices
=
data_partition_
->
indices
();
data_size_t
begin
=
data_partition_
->
leaf_begin
(
0
);
data_size_t
end
=
begin
+
data_partition_
->
leaf_count
(
0
);
#pragma omp parallel for schedule(static)
data_size_t
loop_size
=
end
-
begin
;
#pragma omp parallel for schedule(static, 512) if(loop_size >= 1024)
for
(
data_size_t
i
=
begin
;
i
<
end
;
++
i
)
{
is_data_in_leaf_
[
indices
[
i
]]
=
1
;
}
...
...
@@ -327,7 +329,7 @@ void SerialTreeLearner::BeforeTrain() {
OMP_LOOP_EX_END
();
}
OMP_THROW_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static
, 512) if(loop_size >= 1024
)
for
(
data_size_t
i
=
begin
;
i
<
end
;
++
i
)
{
is_data_in_leaf_
[
indices
[
i
]]
=
0
;
}
...
...
@@ -388,12 +390,13 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int
char
mark
=
1
;
data_size_t
begin
=
data_partition_
->
leaf_begin
(
left_leaf
);
data_size_t
end
=
begin
+
left_cnt
;
data_size_t
loop_size
=
end
-
begin
;
if
(
left_cnt
>
right_cnt
)
{
begin
=
data_partition_
->
leaf_begin
(
right_leaf
);
end
=
begin
+
right_cnt
;
mark
=
0
;
}
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static
, 512) if(loop_size >= 1024
)
for
(
data_size_t
i
=
begin
;
i
<
end
;
++
i
)
{
is_data_in_leaf_
[
indices
[
i
]]
=
1
;
}
...
...
@@ -406,7 +409,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int
OMP_LOOP_EX_END
();
}
OMP_THROW_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static
, 512) if(loop_size >= 1024
)
for
(
data_size_t
i
=
begin
;
i
<
end
;
++
i
)
{
is_data_in_leaf_
[
indices
[
i
]]
=
0
;
}
...
...
@@ -447,7 +450,7 @@ void SerialTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_featur
void
SerialTreeLearner
::
FindBestThresholds
()
{
std
::
vector
<
int8_t
>
is_feature_used
(
num_features_
,
0
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static
,1024) if (num_features_ >= 2048
)
for
(
int
feature_index
=
0
;
feature_index
<
num_features_
;
++
feature_index
)
{
if
(
!
is_feature_used_
[
feature_index
])
continue
;
if
(
parent_leaf_histogram_array_
!=
nullptr
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment