Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
fb62e2cf
Unverified
Commit
fb62e2cf
authored
Feb 10, 2026
by
Anant Sharma
Committed by
GitHub
Feb 10, 2026
Browse files
ci: add kvbm bindings to pre merge checks (#6042)
Signed-off-by:
Anant Sharma
<
anants@nvidia.com
>
parent
bf6840e6
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
48 additions
and
34 deletions
+48
-34
.github/workflows/pre-merge.yml
.github/workflows/pre-merge.yml
+2
-3
lib/bindings/kvbm/Cargo.lock
lib/bindings/kvbm/Cargo.lock
+2
-0
lib/bindings/kvbm/src/block_manager.rs
lib/bindings/kvbm/src/block_manager.rs
+3
-4
lib/bindings/kvbm/src/block_manager/cache_stats.rs
lib/bindings/kvbm/src/block_manager/cache_stats.rs
+6
-6
lib/bindings/kvbm/src/block_manager/distributed/worker.rs
lib/bindings/kvbm/src/block_manager/distributed/worker.rs
+1
-0
lib/bindings/kvbm/src/block_manager/vllm/connector/leader.rs
lib/bindings/kvbm/src/block_manager/vllm/connector/leader.rs
+12
-4
lib/bindings/kvbm/src/block_manager/vllm/connector/leader/recorder.rs
.../kvbm/src/block_manager/vllm/connector/leader/recorder.rs
+5
-6
lib/bindings/kvbm/src/block_manager/vllm/connector/leader/slot.rs
...ings/kvbm/src/block_manager/vllm/connector/leader/slot.rs
+4
-3
lib/bindings/kvbm/src/block_manager/vllm/connector/trtllm_leader.rs
...gs/kvbm/src/block_manager/vllm/connector/trtllm_leader.rs
+6
-4
lib/bindings/kvbm/src/block_manager/vllm/connector/worker.rs
lib/bindings/kvbm/src/block_manager/vllm/connector/worker.rs
+2
-0
lib/bindings/kvbm/src/block_manager/vllm/slot.rs
lib/bindings/kvbm/src/block_manager/vllm/slot.rs
+1
-1
lib/bindings/kvbm/src/lib.rs
lib/bindings/kvbm/src/lib.rs
+4
-3
No files found.
.github/workflows/pre-merge.yml
View file @
fb62e2cf
...
...
@@ -68,8 +68,7 @@ jobs:
runs-on
:
group
:
Fastchecker
strategy
:
# removing kvbm from here - it will fail to test with nixl dep enabled
matrix
:
{
dir
:
[
'
.'
,
'
lib/bindings/python'
,
'
lib/runtime/examples'
,
'
launch/dynamo-run'
]
}
matrix
:
{
dir
:
[
'
.'
,
'
lib/bindings/python'
,
'
lib/runtime/examples'
,
'
launch/dynamo-run'
,
'
lib/bindings/kvbm'
]
}
permissions
:
contents
:
read
steps
:
...
...
@@ -125,7 +124,7 @@ jobs:
runs-on
:
group
:
Fastchecker
strategy
:
matrix
:
{
dir
:
[
'
.'
,
'
lib/bindings/python'
,
'
lib/runtime/examples'
,
'
launch/dynamo-run'
]
}
matrix
:
{
dir
:
[
'
.'
,
'
lib/bindings/python'
,
'
lib/runtime/examples'
,
'
launch/dynamo-run'
,
'
lib/bindings/kvbm'
]
}
permissions
:
contents
:
read
steps
:
...
...
lib/bindings/kvbm/Cargo.lock
View file @
fb62e2cf
...
...
@@ -1740,6 +1740,7 @@ dependencies = [
"derive-getters",
"derive_builder",
"dynamo-kv-router",
"dynamo-runtime",
"dynamo-tokens",
"ndarray",
"ndarray-interp",
...
...
@@ -1751,6 +1752,7 @@ dependencies = [
"tokio-util",
"tracing",
"uuid",
"validator",
]
[[package]]
...
...
lib/bindings/kvbm/src/block_manager.rs
View file @
fb62e2cf
...
...
@@ -6,9 +6,7 @@ use anyhow::Result;
use
dynamo_llm
::
block_manager
::
block
::{
data
::
logical
::
distributed_leader_worker
::
DistributedLeaderWorkerResources
,
locality
::
Logical
,
};
use
dynamo_llm
::
block_manager
::
kv_consolidator
::{
EventSource
,
KvEventConsolidatorConfig
,
};
use
dynamo_llm
::
block_manager
::
kv_consolidator
::
EventSource
;
use
dynamo_llm
::
block_manager
::
offload
::
filter
::
FrequencyFilter
;
use
dynamo_llm
::
block_manager
::{
BasicMetadata
,
BlockParallelismStrategy
};
use
dynamo_runtime
::
DistributedRuntime
;
...
...
@@ -368,7 +366,8 @@ impl BlockManagerBuilder {
}
if
let
Some
((
engine_ep
,
output_ep
,
engine_source
))
=
self
.consolidator_config
{
config_builder
=
config_builder
.consolidator_config
(
engine_ep
,
output_ep
,
engine_source
);
config_builder
=
config_builder
.consolidator_config
(
engine_ep
,
output_ep
,
engine_source
);
}
let
config
=
config_builder
.build
()
?
;
...
...
lib/bindings/kvbm/src/block_manager/cache_stats.rs
View file @
fb62e2cf
...
...
@@ -17,17 +17,17 @@ const DEFAULT_LOG_INTERVAL_SECS: u64 = 5;
/// Cache statistics entry for a single request
#[derive(Clone,
Copy,
Debug)]
struct
CacheStatsEntry
{
host_blocks
:
u64
,
// Blocks found in host cache
disk_blocks
:
u64
,
// Blocks found in disk cache
total_blocks
:
u64
,
// Total blocks queried from host/disk
host_blocks
:
u64
,
// Blocks found in host cache
disk_blocks
:
u64
,
// Blocks found in disk cache
total_blocks
:
u64
,
// Total blocks queried from host/disk
}
/// Aggregated cache statistics for the current sliding window
#[derive(Default)]
struct
AggregatedStats
{
total_blocks_queried
:
u64
,
// Total blocks queried from host/disk (same for both tiers)
host_blocks_hit
:
u64
,
// Blocks found in host cache
disk_blocks_hit
:
u64
,
// Blocks found in disk cache
total_blocks_queried
:
u64
,
// Total blocks queried from host/disk (same for both tiers)
host_blocks_hit
:
u64
,
// Blocks found in host cache
disk_blocks_hit
:
u64
,
// Blocks found in disk cache
}
/// Cache statistics tracker with sliding window
...
...
lib/bindings/kvbm/src/block_manager/distributed/worker.rs
View file @
fb62e2cf
...
...
@@ -143,6 +143,7 @@ impl KvbmWorker {
#[pymethods]
impl
KvbmWorker
{
#[new]
#[allow(clippy::too_many_arguments)]
#[pyo3(signature
=
(num_device_blocks,
page_size,
tensors,
device_id=
0
,
dtype_width_bytes=
2
,
drt=None,
layout_blocking=
false
,
device_layout_type=None,
host_layout_type=None,
disk_layout_type=None))]
fn
new
(
num_device_blocks
:
usize
,
...
...
lib/bindings/kvbm/src/block_manager/vllm/connector/leader.rs
View file @
fb62e2cf
...
...
@@ -21,7 +21,7 @@ use dynamo_llm::block_manager::{
data
::
logical
::
distributed_leader_worker
::
DistributedLeaderWorkerResources
,
locality
::
Logical
,
},
connector
::{
*
,
protocol
::
RequestType
},
connector
::{
protocol
::
RequestType
,
*
},
kv_consolidator
::
EventSource
,
};
use
dynamo_llm
::
tokens
::{
SaltHash
,
TokenBlockSequence
,
Tokens
};
...
...
@@ -221,7 +221,7 @@ impl Leader for KvConnectorLeader {
);
// the number of device matched tokens should be less than or equal to the number of tokens in the request
debug_assert!
(
num_computed_tokens
%
self
.block_size
==
0
);
debug_assert!
(
num_computed_tokens
.is_multiple_of
(
self
.block_size
)
);
let
shared_slot
=
self
.slot_manager
()
.get_slot
(
&
request_id
)
?
;
let
mut
slot
=
shared_slot
...
...
@@ -262,7 +262,9 @@ impl Leader for KvConnectorLeader {
// return the number of external tokens that are ready for onboarding
// we always return true here as we always asynchronously onboard matched blocks
if
let
SlotState
::
OnboardStaged
(
num_external_tokens
)
=
slot
.state
()
{
debug_assert!
((
num_computed_tokens
+
num_external_tokens
)
%
self
.block_size
==
0
);
debug_assert!
(
(
num_computed_tokens
+
num_external_tokens
)
.is_multiple_of
(
self
.block_size
)
);
tracing
::
debug!
(
request_id
=
request_id
,
"scheduling onboarding for {} external tokens"
,
...
...
@@ -427,7 +429,13 @@ impl Leader for KvConnectorLeader {
.get
(
request_id
)
.unwrap_or
(
&
0
);
slot
.apply_scheduler_output
(
&
[],
&
[],
new_req
.num_computed_tokens
,
scheduled_tokens
,
None
)
?
;
slot
.apply_scheduler_output
(
&
[],
&
[],
new_req
.num_computed_tokens
,
scheduled_tokens
,
None
,
)
?
;
let
pending_ops_opt
=
slot
.take_pending_operations
();
...
...
lib/bindings/kvbm/src/block_manager/vllm/connector/leader/recorder.rs
View file @
fb62e2cf
...
...
@@ -152,12 +152,11 @@ impl KvConnectorLeaderRecorder {
if
let
(
Some
(
vllm_ep
),
Some
(
output_ep
))
=
(
consolidator_vllm_ep
,
consolidator_output_ep
)
{
block_manager_builder
=
block_manager_builder
.consolidator_config
(
vllm_ep
,
Some
(
output_ep
),
EventSource
::
Vllm
,
);
block_manager_builder
=
block_manager_builder
.consolidator_config
(
vllm_ep
,
Some
(
output_ep
),
EventSource
::
Vllm
,
);
}
let
block_manager
=
match
block_manager_builder
.build
()
.await
{
...
...
lib/bindings/kvbm/src/block_manager/vllm/connector/leader/slot.rs
View file @
fb62e2cf
...
...
@@ -174,6 +174,7 @@ pub struct ConnectorSlotManager<R: RequestKey> {
/// Cache statistics tracker
cache_stats
:
Arc
<
CacheStatsTracker
>
,
/// KVBM metrics for exposing cache hit rates
#[allow(dead_code)]
kvbm_metrics
:
KvbmMetrics
,
/// Minimum priority threshold for host offload filtering (read once at init)
offload_min_priority
:
u32
,
...
...
@@ -779,8 +780,8 @@ impl Slot for VllmConnectorSlot {
let
block_size
=
self
.block_size
;
// Convert cached tokens to blocks (rounding up)
let
host_blocks
=
(
self
.tokens_cached_from_host
+
block_size
-
1
)
/
block_size
;
let
disk_blocks
=
(
self
.tokens_cached_from_disk
+
block_size
-
1
)
/
block_size
;
let
host_blocks
=
self
.tokens_cached_from_host
.div_ceil
(
block_size
)
;
let
disk_blocks
=
self
.tokens_cached_from_disk
.div_ceil
(
block_size
)
;
tracing
::
debug!
(
request_id
=
%
self
.request_id
,
...
...
@@ -864,7 +865,7 @@ impl Slot for VllmConnectorSlot {
let
block_size
=
self
.block_manager
.block_size
();
let
num_computed_blocks
=
num_computed_tokens
/
block_size
;
debug_assert!
(
num_computed_tokens
%
block_size
==
0
);
debug_assert!
(
num_computed_tokens
.is_multiple_of
(
block_size
)
);
let
sequence_hashes
=
self
.sequence
()
...
...
lib/bindings/kvbm/src/block_manager/vllm/connector/trtllm_leader.rs
View file @
fb62e2cf
...
...
@@ -4,8 +4,6 @@
use
super
::
*
;
use
crate
::
block_manager
::
BlockManagerBuilder
;
use
dynamo_llm
::
block_manager
::
connector
::
protocol
::
RequestType
;
use
dynamo_llm
::
block_manager
::
kv_consolidator
::
EventSource
;
use
crate
::
block_manager
::
vllm
::
connector
::
leader
::
slot
::{
ConnectorSlotManager
,
SlotManager
,
SlotState
,
};
...
...
@@ -15,6 +13,8 @@ use crate::block_manager::vllm::connector::leader::{
use
crate
::
block_manager
::{
distributed
::
KvbmLeader
as
PyKvbmLeader
,
vllm
::
KvbmRequest
};
use
crate
::
get_current_tokio_handle
;
use
anyhow
;
use
dynamo_llm
::
block_manager
::
connector
::
protocol
::
RequestType
;
use
dynamo_llm
::
block_manager
::
kv_consolidator
::
EventSource
;
use
dynamo_llm
::
block_manager
::
metrics_kvbm
::{
KvbmMetrics
,
KvbmMetricsRegistry
};
use
std
::
collections
::
HashSet
;
use
std
::
sync
::{
Arc
,
OnceLock
};
...
...
@@ -190,7 +190,7 @@ impl Leader for KvConnectorLeader {
// TRTLLM could match partial blocks if enable_partial_reuse = True,
// immediately return 0 to simplify things.
if
num_computed_tokens
%
self
.block_size
!=
0
{
if
!
num_computed_tokens
.is_multiple_of
(
self
.block_size
)
{
return
Ok
((
0
,
false
));
}
...
...
@@ -215,7 +215,9 @@ impl Leader for KvConnectorLeader {
// return the number of external tokens that are ready for onboarding
// we always return true here as we always asynchronously onboard matched blocks
if
let
SlotState
::
OnboardStaged
(
num_external_tokens
)
=
slot
.state
()
{
debug_assert!
((
num_computed_tokens
+
num_external_tokens
)
%
self
.block_size
==
0
);
debug_assert!
(
(
num_computed_tokens
+
num_external_tokens
)
.is_multiple_of
(
self
.block_size
)
);
tracing
::
debug!
(
request_id
=
request_id
,
"scheduling onboarding for {} external tokens"
,
...
...
lib/bindings/kvbm/src/block_manager/vllm/connector/worker.rs
View file @
fb62e2cf
...
...
@@ -25,6 +25,7 @@ use dynamo_runtime::DistributedRuntime;
use
dynamo_runtime
::
utils
::
task
::
CriticalTaskExecutionHandle
;
pub
trait
Worker
:
Send
+
Sync
{
#[allow(clippy::too_many_arguments)]
fn
register_kv_caches
(
&
mut
self
,
num_device_blocks
:
usize
,
...
...
@@ -483,6 +484,7 @@ impl PyKvConnectorWorker {
Ok
(
Self
{
connector_worker
})
}
#[allow(clippy::too_many_arguments)]
#[pyo3(signature
=
(num_device_blocks,
page_size,
device_id,
dtype_width_bytes,
kv_caches,
raw_event_handles,
device_layout_type=None,
host_layout_type=None,
disk_layout_type=None))]
pub
fn
register_kv_caches
(
&
mut
self
,
...
...
lib/bindings/kvbm/src/block_manager/vllm/slot.rs
View file @
fb62e2cf
...
...
@@ -719,7 +719,7 @@ mod tests {
// Prefill count should remain unchanged
assert_eq!
(
slot
.num_tokens
(
SlotPosition
::
Prefill
),
4
);
if
expected_total
%
BLOCK_SIZE
==
0
{
if
expected_total
.is_multiple_of
(
BLOCK_SIZE
)
{
assert_eq!
(
slot
.mutable
.len
(),
0
);
assert_eq!
(
slot
.immutable
.len
(),
expected_total
/
BLOCK_SIZE
);
}
else
{
...
...
lib/bindings/kvbm/src/lib.rs
View file @
fb62e2cf
...
...
@@ -9,7 +9,9 @@ use std::{fmt::Display, sync::Arc};
use
tokio
::
sync
::
Mutex
;
use
tokio_util
::
sync
::
CancellationToken
;
use
dynamo_runtime
::{
self
as
rs
,
RuntimeConfig
,
logging
,
traits
::
DistributedRuntimeProvider
,
config
};
use
dynamo_runtime
::{
self
as
rs
,
RuntimeConfig
,
config
,
logging
,
traits
::
DistributedRuntimeProvider
,
};
use
dynamo_llm
::{
self
as
llm_rs
};
...
...
@@ -23,8 +25,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
// Initialize tokio runtime first to avoid panics when OTEL_EXPORT_ENABLED=1
init_pyo3_tokio_rt
();
if
config
::
env_is_truthy
(
"OTEL_EXPORT_ENABLED"
)
{
if
config
::
env_is_truthy
(
"OTEL_EXPORT_ENABLED"
)
{
// OTLP batch exporter needs runtime context to spawn background tasks
let
handle
=
get_current_tokio_handle
();
let
_
guard
=
handle
.enter
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment