Unverified Commit 305369dd authored by 文佳鹏's avatar 文佳鹏 Committed by GitHub
Browse files

fix gpu allocate memory overflow (#4928)

parent 5b42c2c3
......@@ -245,7 +245,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
}
// allocate memory for all features (FIXME: 4 GB barrier on some devices, need to split to multiple buffers)
device_features_.reset();
device_features_ = std::unique_ptr<boost::compute::vector<Feature4>>(new boost::compute::vector<Feature4>(num_dense_feature4_ * num_data_, ctx_));
device_features_ = std::unique_ptr<boost::compute::vector<Feature4>>(new boost::compute::vector<Feature4>((uint64_t)num_dense_feature4_ * num_data_, ctx_));
// unpin old buffer if necessary before destructing them
if (ptr_pinned_gradients_) {
queue_.enqueue_unmap_buffer(pinned_gradients_, ptr_pinned_gradients_);
......@@ -427,7 +427,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
}
#pragma omp critical
queue_.enqueue_write_buffer(device_features_->get_buffer(),
i * num_data_ * sizeof(Feature4), num_data_ * sizeof(Feature4), host4);
(uint64_t)i * num_data_ * sizeof(Feature4), num_data_ * sizeof(Feature4), host4);
#if GPU_DEBUG >= 1
printf("first example of feature-group tuple is: %d %d %d %d\n", host4[0].s[0], host4[0].s[1], host4[0].s[2], host4[0].s[3]);
printf("Feature-groups copied to device with multipliers ");
......@@ -503,7 +503,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
}
// copying the last 1 to (dword_features - 1) feature-groups in the last tuple
queue_.enqueue_write_buffer(device_features_->get_buffer(),
(num_dense_feature4_ - 1) * num_data_ * sizeof(Feature4), num_data_ * sizeof(Feature4), host4);
(num_dense_feature4_ - 1) * (uint64_t)num_data_ * sizeof(Feature4), num_data_ * sizeof(Feature4), host4);
#if GPU_DEBUG >= 1
printf("Last features copied to device\n");
#endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment