Unverified Commit 18be11fd authored by xjx's avatar xjx Committed by GitHub
Browse files

[BUGFIX]fix CUDA OOM ERROR : invalid argument at cumem_allocator.cpp:119 (#35594)


Signed-off-by: default avatarxjx <493337577@qq.com>
parent 8d8855fd
...@@ -109,16 +109,18 @@ void create_and_map(unsigned long long device, ssize_t size, CUdeviceptr d_mem, ...@@ -109,16 +109,18 @@ void create_and_map(unsigned long long device, ssize_t size, CUdeviceptr d_mem,
#ifndef USE_ROCM #ifndef USE_ROCM
int flag = 0; int flag = 0;
CUDA_CHECK(cuDeviceGetAttribute( CUresult rdma_result = cuDeviceGetAttribute(
&flag, CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED, &flag, CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED,
device)); device);
if (flag) { // support GPUDirect RDMA if possible if (rdma_result == CUDA_SUCCESS &&
flag) { // support GPUDirect RDMA if possible
prop.allocFlags.gpuDirectRDMACapable = 1; prop.allocFlags.gpuDirectRDMACapable = 1;
} }
int fab_flag = 0; int fab_flag = 0;
CUDA_CHECK(cuDeviceGetAttribute( CUresult fab_result = cuDeviceGetAttribute(
&fab_flag, CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_FABRIC_SUPPORTED, device)); &fab_flag, CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_FABRIC_SUPPORTED, device);
if (fab_flag) { // support fabric handle if possible if (fab_result == CUDA_SUCCESS &&
fab_flag) { // support fabric handle if possible
prop.requestedHandleTypes = CU_MEM_HANDLE_TYPE_FABRIC; prop.requestedHandleTypes = CU_MEM_HANDLE_TYPE_FABRIC;
} }
#endif #endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment