Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
305369dd
Unverified
Commit
305369dd
authored
Jan 08, 2022
by
文佳鹏
Committed by
GitHub
Jan 08, 2022
Browse files
fix gpu allocate memory overflow (#4928)
parent
5b42c2c3
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
3 additions
and
3 deletions
+3
-3
src/treelearner/gpu_tree_learner.cpp
src/treelearner/gpu_tree_learner.cpp
+3
-3
No files found.
src/treelearner/gpu_tree_learner.cpp
View file @
305369dd
...
...
@@ -245,7 +245,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
}
// allocate memory for all features (FIXME: 4 GB barrier on some devices, need to split to multiple buffers)
device_features_
.
reset
();
device_features_
=
std
::
unique_ptr
<
boost
::
compute
::
vector
<
Feature4
>>
(
new
boost
::
compute
::
vector
<
Feature4
>
(
num_dense_feature4_
*
num_data_
,
ctx_
));
device_features_
=
std
::
unique_ptr
<
boost
::
compute
::
vector
<
Feature4
>>
(
new
boost
::
compute
::
vector
<
Feature4
>
(
(
uint64_t
)
num_dense_feature4_
*
num_data_
,
ctx_
));
// unpin old buffer if necessary before destructing them
if
(
ptr_pinned_gradients_
)
{
queue_
.
enqueue_unmap_buffer
(
pinned_gradients_
,
ptr_pinned_gradients_
);
...
...
@@ -427,7 +427,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
}
#pragma omp critical
queue_
.
enqueue_write_buffer
(
device_features_
->
get_buffer
(),
i
*
num_data_
*
sizeof
(
Feature4
),
num_data_
*
sizeof
(
Feature4
),
host4
);
(
uint64_t
)
i
*
num_data_
*
sizeof
(
Feature4
),
num_data_
*
sizeof
(
Feature4
),
host4
);
#if GPU_DEBUG >= 1
printf
(
"first example of feature-group tuple is: %d %d %d %d
\n
"
,
host4
[
0
].
s
[
0
],
host4
[
0
].
s
[
1
],
host4
[
0
].
s
[
2
],
host4
[
0
].
s
[
3
]);
printf
(
"Feature-groups copied to device with multipliers "
);
...
...
@@ -503,7 +503,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
}
// copying the last 1 to (dword_features - 1) feature-groups in the last tuple
queue_
.
enqueue_write_buffer
(
device_features_
->
get_buffer
(),
(
num_dense_feature4_
-
1
)
*
num_data_
*
sizeof
(
Feature4
),
num_data_
*
sizeof
(
Feature4
),
host4
);
(
num_dense_feature4_
-
1
)
*
(
uint64_t
)
num_data_
*
sizeof
(
Feature4
),
num_data_
*
sizeof
(
Feature4
),
host4
);
#if GPU_DEBUG >= 1
printf
(
"Last features copied to device
\n
"
);
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment