Unverified Commit 305369dd authored by 文佳鹏's avatar 文佳鹏 Committed by GitHub
Browse files

fix gpu allocate memory overflow (#4928)

parent 5b42c2c3
...@@ -245,7 +245,7 @@ void GPUTreeLearner::AllocateGPUMemory() { ...@@ -245,7 +245,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
} }
// allocate memory for all features (FIXME: 4 GB barrier on some devices, need to split to multiple buffers) // allocate memory for all features (FIXME: 4 GB barrier on some devices, need to split to multiple buffers)
device_features_.reset(); device_features_.reset();
device_features_ = std::unique_ptr<boost::compute::vector<Feature4>>(new boost::compute::vector<Feature4>(num_dense_feature4_ * num_data_, ctx_)); device_features_ = std::unique_ptr<boost::compute::vector<Feature4>>(new boost::compute::vector<Feature4>((uint64_t)num_dense_feature4_ * num_data_, ctx_));
// unpin old buffer if necessary before destructing them // unpin old buffer if necessary before destructing them
if (ptr_pinned_gradients_) { if (ptr_pinned_gradients_) {
queue_.enqueue_unmap_buffer(pinned_gradients_, ptr_pinned_gradients_); queue_.enqueue_unmap_buffer(pinned_gradients_, ptr_pinned_gradients_);
...@@ -427,7 +427,7 @@ void GPUTreeLearner::AllocateGPUMemory() { ...@@ -427,7 +427,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
} }
#pragma omp critical #pragma omp critical
queue_.enqueue_write_buffer(device_features_->get_buffer(), queue_.enqueue_write_buffer(device_features_->get_buffer(),
i * num_data_ * sizeof(Feature4), num_data_ * sizeof(Feature4), host4); (uint64_t)i * num_data_ * sizeof(Feature4), num_data_ * sizeof(Feature4), host4);
#if GPU_DEBUG >= 1 #if GPU_DEBUG >= 1
printf("first example of feature-group tuple is: %d %d %d %d\n", host4[0].s[0], host4[0].s[1], host4[0].s[2], host4[0].s[3]); printf("first example of feature-group tuple is: %d %d %d %d\n", host4[0].s[0], host4[0].s[1], host4[0].s[2], host4[0].s[3]);
printf("Feature-groups copied to device with multipliers "); printf("Feature-groups copied to device with multipliers ");
...@@ -503,7 +503,7 @@ void GPUTreeLearner::AllocateGPUMemory() { ...@@ -503,7 +503,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
} }
// copying the last 1 to (dword_features - 1) feature-groups in the last tuple // copying the last 1 to (dword_features - 1) feature-groups in the last tuple
queue_.enqueue_write_buffer(device_features_->get_buffer(), queue_.enqueue_write_buffer(device_features_->get_buffer(),
(num_dense_feature4_ - 1) * num_data_ * sizeof(Feature4), num_data_ * sizeof(Feature4), host4); (num_dense_feature4_ - 1) * (uint64_t)num_data_ * sizeof(Feature4), num_data_ * sizeof(Feature4), host4);
#if GPU_DEBUG >= 1 #if GPU_DEBUG >= 1
printf("Last features copied to device\n"); printf("Last features copied to device\n");
#endif #endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment