Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
e3c3a6b9
Unverified
Commit
e3c3a6b9
authored
Jan 12, 2026
by
Qi Wang
Committed by
GitHub
Jan 12, 2026
Browse files
fix: GB200 mem allocation failure (#5328)
parent
52ce68e6
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
104 additions
and
7 deletions
+104
-7
lib/llm/src/block_manager/storage/cuda.rs
lib/llm/src/block_manager/storage/cuda.rs
+104
-7
No files found.
lib/llm/src/block_manager/storage/cuda.rs
View file @
e3c3a6b9
...
@@ -78,6 +78,33 @@ use cudarc::driver::{CudaContext, sys};
...
@@ -78,6 +78,33 @@ use cudarc::driver::{CudaContext, sys};
use
crate
::
block_manager
::
numa_allocator
;
use
crate
::
block_manager
::
numa_allocator
;
/// Allocates pinned host memory, preferring write-combined if supported.
///
/// Write-combined (WC) memory is optimal for PCIe DMA transfers but may not be
/// supported on systems with cache-coherent CPU-GPU interconnects (e.g., Grace
/// Hopper/Blackwell with NVLink-C2C). This function tries WC first and falls
/// back to regular pinned memory if not supported.
///
/// # Safety
///
/// Caller must ensure a valid CUDA context is bound to the current thread.
unsafe
fn
malloc_host_prefer_writecombined
(
size
:
usize
)
->
Result
<*
mut
u8
,
StorageError
>
{
// First, try write-combined allocation (optimal for PCIe systems)
// SAFETY: Caller guarantees a valid CUDA context is bound to the current thread
match
unsafe
{
cudarc
::
driver
::
result
::
malloc_host
(
size
,
sys
::
CU_MEMHOSTALLOC_WRITECOMBINED
)
}
{
Ok
(
ptr
)
=>
Ok
(
ptr
as
*
mut
u8
),
Err
(
_
)
=>
{
// Write-combined not supported (e.g., Grace Hopper/Blackwell),
// fall back to regular pinned memory
tracing
::
debug!
(
"Write-combined memory not supported, using regular pinned memory"
);
// SAFETY: Same as above - caller guarantees valid CUDA context
unsafe
{
cudarc
::
driver
::
result
::
malloc_host
(
size
,
0
)
}
.map
(|
ptr
|
ptr
as
*
mut
u8
)
.map_err
(
StorageError
::
Cuda
)
}
}
}
/// Trait for [Storage] types that can be accessed by CUDA
/// Trait for [Storage] types that can be accessed by CUDA
pub
trait
CudaAccessible
:
Storage
{}
pub
trait
CudaAccessible
:
Storage
{}
...
@@ -187,16 +214,11 @@ impl PinnedStorage {
...
@@ -187,16 +214,11 @@ impl PinnedStorage {
Ok
(
ptr
)
=>
ptr
,
Ok
(
ptr
)
=>
ptr
,
Err
(
e
)
=>
{
Err
(
e
)
=>
{
tracing
::
warn!
(
"NUMA allocation failed: {}, using direct allocation"
,
e
);
tracing
::
warn!
(
"NUMA allocation failed: {}, using direct allocation"
,
e
);
cudarc
::
driver
::
result
::
malloc_host
(
malloc_host_prefer_writecombined
(
size
)
?
size
,
sys
::
CU_MEMHOSTALLOC_WRITECOMBINED
,
)
.map_err
(
StorageError
::
Cuda
)
?
as
*
mut
u8
}
}
}
}
}
else
{
}
else
{
cudarc
::
driver
::
result
::
malloc_host
(
size
,
sys
::
CU_MEMHOSTALLOC_WRITECOMBINED
)
malloc_host_prefer_writecombined
(
size
)
?
.map_err
(
StorageError
::
Cuda
)
?
as
*
mut
u8
};
};
assert
!
(
!
ptr
.is_null
(),
"Failed to allocate pinned memory"
);
assert
!
(
!
ptr
.is_null
(),
"Failed to allocate pinned memory"
);
...
@@ -574,4 +596,79 @@ mod tests {
...
@@ -574,4 +596,79 @@ mod tests {
let
result
=
DeviceStorage
::
new_from_torch
(
&
ctx
,
Arc
::
new
(
tensor
));
let
result
=
DeviceStorage
::
new_from_torch
(
&
ctx
,
Arc
::
new
(
tensor
));
assert
!
(
result
.is_err
());
assert
!
(
result
.is_err
());
}
}
#[test]
fn
test_malloc_host_prefer_writecombined_allocates_memory
()
{
let
ctx
=
Cuda
::
device_or_create
(
0
)
.expect
(
"Failed to create CUDA context"
);
let
size
=
4096
;
// One page
unsafe
{
ctx
.bind_to_thread
()
.expect
(
"Failed to bind CUDA context"
);
// Test allocation succeeds (either write-combined or fallback)
let
ptr
=
malloc_host_prefer_writecombined
(
size
)
.expect
(
"malloc_host_prefer_writecombined should succeed"
);
// Verify pointer is valid and non-null
assert
!
(
!
ptr
.is_null
(),
"Allocated pointer should not be null"
);
// Verify memory is accessible by writing and reading
std
::
ptr
::
write_volatile
(
ptr
,
0xAB
);
let
val
=
std
::
ptr
::
read_volatile
(
ptr
);
assert_eq!
(
val
,
0xAB
,
"Should be able to write and read pinned memory"
);
// Clean up
cudarc
::
driver
::
result
::
free_host
(
ptr
as
_
)
.expect
(
"Failed to free pinned memory"
);
}
}
/// Test PinnedStorage::new with NUMA disabled (the direct allocation path)
///
/// This test confirms that when `DYN_KVBM_ENABLE_NUMA` is not set,
/// PinnedStorage::new uses the direct malloc_host_prefer_writecombined path
/// (lines 222-224 in the source).
#[test]
// `remove_var` is not thread-safe, so we need to run this test in a serial context
// #[serial]
fn
test_pinned_storage_new_without_numa
()
{
// Verify NUMA is actually disabled for this test
assert
!
(
!
numa_allocator
::
is_numa_enabled
(),
"NUMA should be disabled for this test"
);
let
ctx
=
Cuda
::
device_or_create
(
0
)
.expect
(
"Failed to create CUDA context"
);
let
size
=
8192
;
// Create PinnedStorage - this should take the non-NUMA path
let
mut
storage
=
PinnedStorage
::
new
(
&
ctx
,
size
)
.expect
(
"PinnedStorage::new should succeed"
);
// Verify storage properties
assert_eq!
(
storage
.size
(),
size
);
assert_eq!
(
storage
.storage_type
(),
StorageType
::
Pinned
);
assert_ne!
(
storage
.addr
(),
0
,
"Address should be non-zero"
);
// Verify memory is accessible
unsafe
{
let
ptr
=
storage
.as_mut_ptr
();
assert
!
(
!
ptr
.is_null
(),
"Pointer should not be null"
);
// Write a pattern to verify memory is usable
for
i
in
0
..
size
{
std
::
ptr
::
write_volatile
(
ptr
.add
(
i
),
(
i
&
0xFF
)
as
u8
);
}
// Read back and verify
for
i
in
0
..
size
{
let
val
=
std
::
ptr
::
read_volatile
(
ptr
.add
(
i
));
assert_eq!
(
val
,
(
i
&
0xFF
)
as
u8
,
"Memory content mismatch at offset {}"
,
i
);
}
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment