Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
ca7a01cd
Unverified
Commit
ca7a01cd
authored
Oct 27, 2020
by
Guolin Ke
Committed by
GitHub
Oct 27, 2020
Browse files
speed up multi-threading sum (#3485)
* speed up multi-threading sum * Apply suggestions from code review
parent
ba0a1f8d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
53 additions
and
39 deletions
+53
-39
include/LightGBM/utils/threading.h
include/LightGBM/utils/threading.h
+3
-7
src/treelearner/leaf_splits.hpp
src/treelearner/leaf_splits.hpp
+50
-32
No files found.
include/LightGBM/utils/threading.h
View file @
ca7a01cd
...
@@ -71,7 +71,7 @@ class Threading {
...
@@ -71,7 +71,7 @@ class Threading {
const
std
::
function
<
void
(
int
,
INDEX_T
,
INDEX_T
)
>&
inner_fun
)
{
const
std
::
function
<
void
(
int
,
INDEX_T
,
INDEX_T
)
>&
inner_fun
)
{
int
n_block
=
1
;
int
n_block
=
1
;
INDEX_T
num_inner
=
end
-
start
;
INDEX_T
num_inner
=
end
-
start
;
BlockInfo
<
INDEX_T
>
(
end
-
start
,
min_block_size
,
&
n_block
,
&
num_inner
);
BlockInfo
<
INDEX_T
>
(
num_inner
,
min_block_size
,
&
n_block
,
&
num_inner
);
OMP_INIT_EX
();
OMP_INIT_EX
();
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for schedule(static, 1)
for
(
int
i
=
0
;
i
<
n_block
;
++
i
)
{
for
(
int
i
=
0
;
i
<
n_block
;
++
i
)
{
...
@@ -93,20 +93,16 @@ class Threading {
...
@@ -93,20 +93,16 @@ class Threading {
VAL1_T
*
res1
,
VAL2_T
*
res2
)
{
VAL1_T
*
res1
,
VAL2_T
*
res2
)
{
int
n_block
=
1
;
int
n_block
=
1
;
INDEX_T
num_inner
=
end
-
start
;
INDEX_T
num_inner
=
end
-
start
;
BlockInfoForceSize
<
INDEX_T
>
(
end
-
start
,
min_block_size
,
&
n_block
,
BlockInfoForceSize
<
INDEX_T
>
(
num_inner
,
min_block_size
,
&
n_block
,
&
num_inner
);
&
num_inner
);
std
::
vector
<
VAL1_T
>
val_1s
(
n_block
,
static_cast
<
VAL1_T
>
(
0
));
std
::
vector
<
VAL1_T
>
val_1s
(
n_block
,
static_cast
<
VAL1_T
>
(
0
));
std
::
vector
<
VAL2_T
>
val_2s
(
n_block
,
static_cast
<
VAL2_T
>
(
0
));
std
::
vector
<
VAL2_T
>
val_2s
(
n_block
,
static_cast
<
VAL2_T
>
(
0
));
OMP_INIT_EX
();
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 1)
for
(
int
i
=
0
;
i
<
n_block
;
++
i
)
{
for
(
int
i
=
0
;
i
<
n_block
;
++
i
)
{
OMP_LOOP_EX_BEGIN
();
INDEX_T
inner_start
=
start
+
num_inner
*
i
;
INDEX_T
inner_start
=
start
+
num_inner
*
i
;
INDEX_T
inner_end
=
std
::
min
(
end
,
inner_start
+
num_inner
);
INDEX_T
inner_end
=
std
::
min
(
end
,
inner_start
+
num_inner
);
inner_fun
(
i
,
inner_start
,
inner_end
,
&
val_1s
[
i
],
&
val_2s
[
i
]);
inner_fun
(
i
,
inner_start
,
inner_end
,
&
val_1s
[
i
],
&
val_2s
[
i
]);
OMP_LOOP_EX_END
();
}
}
OMP_THROW_EX
();
*
res1
=
0
;
*
res1
=
0
;
*
res2
=
0
;
*
res2
=
0
;
for
(
int
i
=
0
;
i
<
n_block
;
++
i
)
{
for
(
int
i
=
0
;
i
<
n_block
;
++
i
)
{
...
...
src/treelearner/leaf_splits.hpp
View file @
ca7a01cd
...
@@ -60,47 +60,65 @@ class LeafSplits {
...
@@ -60,47 +60,65 @@ class LeafSplits {
}
}
/*!
/*!
* \brief Init splits on current leaf, it will traverse all data to sum up the results
* \brief Init splits on
the
current leaf, it will traverse all data to sum up the results
* \param gradients
* \param gradients
* \param hessians
* \param hessians
*/
*/
void
Init
(
const
score_t
*
gradients
,
const
score_t
*
hessians
)
{
void
Init
(
const
score_t
*
gradients
,
const
score_t
*
hessians
)
{
num_data_in_leaf_
=
num_data_
;
num_data_in_leaf_
=
num_data_
;
leaf_index_
=
0
;
leaf_index_
=
0
;
data_indices_
=
nullptr
;
data_indices_
=
nullptr
;
Threading
::
SumReduction
<
data_size_t
,
double
,
double
>
(
if
(
num_data_in_leaf_
<
4096
)
{
0
,
num_data_in_leaf_
,
2048
,
sum_gradients_
=
0.0
f
;
[
=
](
int
,
data_size_t
start
,
data_size_t
end
,
double
*
s1
,
double
*
s2
)
{
sum_hessians_
=
0.0
f
;
*
s1
=
*
s2
=
0
;
for
(
data_size_t
i
=
0
;
i
<
num_data_in_leaf_
;
++
i
)
{
for
(
data_size_t
i
=
start
;
i
<
end
;
++
i
)
{
sum_gradients_
+=
gradients
[
i
];
*
s1
+=
gradients
[
i
];
sum_hessians_
+=
hessians
[
i
];
*
s2
+=
hessians
[
i
];
}
}
}
else
{
},
Threading
::
SumReduction
<
data_size_t
,
double
,
double
>
(
&
sum_gradients_
,
&
sum_hessians_
);
0
,
num_data_in_leaf_
,
2048
,
[
=
](
int
,
data_size_t
start
,
data_size_t
end
,
double
*
s1
,
double
*
s2
)
{
for
(
data_size_t
i
=
start
;
i
<
end
;
++
i
)
{
*
s1
+=
gradients
[
i
];
*
s2
+=
hessians
[
i
];
}
},
&
sum_gradients_
,
&
sum_hessians_
);
}
}
}
/*!
/*!
* \brief Init splits on current leaf of partial data.
* \brief Init splits on current leaf of partial data.
* \param leaf Index of current leaf
* \param leaf Index of current leaf
* \param data_partition current data partition
* \param data_partition current data partition
* \param gradients
* \param gradients
* \param hessians
* \param hessians
*/
*/
void
Init
(
int
leaf
,
const
DataPartition
*
data_partition
,
const
score_t
*
gradients
,
const
score_t
*
hessians
)
{
void
Init
(
int
leaf
,
const
DataPartition
*
data_partition
,
const
score_t
*
gradients
,
const
score_t
*
hessians
)
{
leaf_index_
=
leaf
;
leaf_index_
=
leaf
;
data_indices_
=
data_partition
->
GetIndexOnLeaf
(
leaf
,
&
num_data_in_leaf_
);
data_indices_
=
data_partition
->
GetIndexOnLeaf
(
leaf
,
&
num_data_in_leaf_
);
Threading
::
SumReduction
<
data_size_t
,
double
,
double
>
(
if
(
num_data_in_leaf_
<
4096
)
{
0
,
num_data_in_leaf_
,
2048
,
sum_gradients_
=
0.0
f
;
[
=
](
int
,
data_size_t
start
,
data_size_t
end
,
double
*
s1
,
double
*
s2
)
{
sum_hessians_
=
0.0
f
;
*
s1
=
*
s2
=
0
;
for
(
data_size_t
i
=
0
;
i
<
num_data_in_leaf_
;
++
i
)
{
for
(
data_size_t
i
=
start
;
i
<
end
;
++
i
)
{
const
data_size_t
idx
=
data_indices_
[
i
];
data_size_t
idx
=
data_indices_
[
i
];
sum_gradients_
+=
gradients
[
idx
];
*
s1
+=
gradients
[
idx
];
sum_hessians_
+=
hessians
[
idx
];
*
s2
+=
hessians
[
idx
];
}
}
}
else
{
},
Threading
::
SumReduction
<
data_size_t
,
double
,
double
>
(
&
sum_gradients_
,
&
sum_hessians_
);
0
,
num_data_in_leaf_
,
2048
,
[
=
](
int
,
data_size_t
start
,
data_size_t
end
,
double
*
s1
,
double
*
s2
)
{
for
(
data_size_t
i
=
start
;
i
<
end
;
++
i
)
{
const
data_size_t
idx
=
data_indices_
[
i
];
*
s1
+=
gradients
[
idx
];
*
s2
+=
hessians
[
idx
];
}
},
&
sum_gradients_
,
&
sum_hessians_
);
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment