Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
831c0e3f
Unverified
Commit
831c0e3f
authored
Oct 27, 2020
by
Guolin Ke
Committed by
GitHub
Oct 27, 2020
Browse files
rollback to omp sum (#3493)
* rollback omp sum * remove sum reduction
parent
ca7a01cd
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
17 additions
and
63 deletions
+17
-63
include/LightGBM/utils/threading.h
include/LightGBM/utils/threading.h
+0
-27
src/treelearner/leaf_splits.hpp
src/treelearner/leaf_splits.hpp
+17
-36
No files found.
include/LightGBM/utils/threading.h
View file @
831c0e3f
...
@@ -84,33 +84,6 @@ class Threading {
...
@@ -84,33 +84,6 @@ class Threading {
OMP_THROW_EX
();
OMP_THROW_EX
();
return
n_block
;
return
n_block
;
}
}
template
<
typename
INDEX_T
,
typename
VAL1_T
,
typename
VAL2_T
>
static
inline
int
SumReduction
(
INDEX_T
start
,
INDEX_T
end
,
INDEX_T
min_block_size
,
const
std
::
function
<
void
(
int
,
INDEX_T
,
INDEX_T
,
VAL1_T
*
res1
,
VAL2_T
*
res2
)
>&
inner_fun
,
VAL1_T
*
res1
,
VAL2_T
*
res2
)
{
int
n_block
=
1
;
INDEX_T
num_inner
=
end
-
start
;
BlockInfoForceSize
<
INDEX_T
>
(
num_inner
,
min_block_size
,
&
n_block
,
&
num_inner
);
std
::
vector
<
VAL1_T
>
val_1s
(
n_block
,
static_cast
<
VAL1_T
>
(
0
));
std
::
vector
<
VAL2_T
>
val_2s
(
n_block
,
static_cast
<
VAL2_T
>
(
0
));
#pragma omp parallel for schedule(static)
for
(
int
i
=
0
;
i
<
n_block
;
++
i
)
{
INDEX_T
inner_start
=
start
+
num_inner
*
i
;
INDEX_T
inner_end
=
std
::
min
(
end
,
inner_start
+
num_inner
);
inner_fun
(
i
,
inner_start
,
inner_end
,
&
val_1s
[
i
],
&
val_2s
[
i
]);
}
*
res1
=
0
;
*
res2
=
0
;
for
(
int
i
=
0
;
i
<
n_block
;
++
i
)
{
*
res1
+=
val_1s
[
i
];
*
res2
+=
val_2s
[
i
];
}
return
n_block
;
}
};
};
template
<
typename
INDEX_T
,
bool
TWO_BUFFER
>
template
<
typename
INDEX_T
,
bool
TWO_BUFFER
>
...
...
src/treelearner/leaf_splits.hpp
View file @
831c0e3f
...
@@ -68,24 +68,15 @@ class LeafSplits {
...
@@ -68,24 +68,15 @@ class LeafSplits {
num_data_in_leaf_
=
num_data_
;
num_data_in_leaf_
=
num_data_
;
leaf_index_
=
0
;
leaf_index_
=
0
;
data_indices_
=
nullptr
;
data_indices_
=
nullptr
;
if
(
num_data_in_leaf_
<
4096
)
{
double
tmp_sum_gradients
=
0.0
f
;
sum_gradients_
=
0.0
f
;
double
tmp_sum_hessians
=
0.0
f
;
sum_hessians_
=
0.0
f
;
#pragma omp parallel for schedule(static, 512) reduction(+:tmp_sum_gradients, tmp_sum_hessians) if (num_data_in_leaf_ >= 1024)
for
(
data_size_t
i
=
0
;
i
<
num_data_in_leaf_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_in_leaf_
;
++
i
)
{
sum_gradients_
+=
gradients
[
i
];
tmp_sum_gradients
+=
gradients
[
i
];
sum_hessians_
+=
hessians
[
i
];
tmp_sum_hessians
+=
hessians
[
i
];
}
}
else
{
Threading
::
SumReduction
<
data_size_t
,
double
,
double
>
(
0
,
num_data_in_leaf_
,
2048
,
[
=
](
int
,
data_size_t
start
,
data_size_t
end
,
double
*
s1
,
double
*
s2
)
{
for
(
data_size_t
i
=
start
;
i
<
end
;
++
i
)
{
*
s1
+=
gradients
[
i
];
*
s2
+=
hessians
[
i
];
}
},
&
sum_gradients_
,
&
sum_hessians_
);
}
}
sum_gradients_
=
tmp_sum_gradients
;
sum_hessians_
=
tmp_sum_hessians
;
}
}
/*!
/*!
...
@@ -99,26 +90,16 @@ class LeafSplits {
...
@@ -99,26 +90,16 @@ class LeafSplits {
const
score_t
*
gradients
,
const
score_t
*
hessians
)
{
const
score_t
*
gradients
,
const
score_t
*
hessians
)
{
leaf_index_
=
leaf
;
leaf_index_
=
leaf
;
data_indices_
=
data_partition
->
GetIndexOnLeaf
(
leaf
,
&
num_data_in_leaf_
);
data_indices_
=
data_partition
->
GetIndexOnLeaf
(
leaf
,
&
num_data_in_leaf_
);
if
(
num_data_in_leaf_
<
4096
)
{
double
tmp_sum_gradients
=
0.0
f
;
sum_gradients_
=
0.0
f
;
double
tmp_sum_hessians
=
0.0
f
;
sum_hessians_
=
0.0
f
;
#pragma omp parallel for schedule(static, 512) reduction(+:tmp_sum_gradients, tmp_sum_hessians) if (num_data_in_leaf_ >= 1024)
for
(
data_size_t
i
=
0
;
i
<
num_data_in_leaf_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_in_leaf_
;
++
i
)
{
const
data_size_t
idx
=
data_indices_
[
i
];
const
data_size_t
idx
=
data_indices_
[
i
];
sum_gradients_
+=
gradients
[
idx
];
tmp_sum_gradients
+=
gradients
[
idx
];
sum_hessians_
+=
hessians
[
idx
];
tmp_sum_hessians
+=
hessians
[
idx
];
}
}
else
{
Threading
::
SumReduction
<
data_size_t
,
double
,
double
>
(
0
,
num_data_in_leaf_
,
2048
,
[
=
](
int
,
data_size_t
start
,
data_size_t
end
,
double
*
s1
,
double
*
s2
)
{
for
(
data_size_t
i
=
start
;
i
<
end
;
++
i
)
{
const
data_size_t
idx
=
data_indices_
[
i
];
*
s1
+=
gradients
[
idx
];
*
s2
+=
hessians
[
idx
];
}
},
&
sum_gradients_
,
&
sum_hessians_
);
}
}
sum_gradients_
=
tmp_sum_gradients
;
sum_hessians_
=
tmp_sum_hessians
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment