Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
9d10e050
"...ssh:/git@developer.sourcefind.cn:2222/OpenDAS/dynamo.git" did not exist on "704c1dad84a97ce045a7819db9c0f816ced5b180"
Unverified
Commit
9d10e050
authored
Mar 02, 2026
by
Kris Hung
Committed by
GitHub
Mar 02, 2026
Browse files
fix: Fix vLLM KVBM integration test (#6768)
parent
93fa4d4d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
28 additions
and
191 deletions
+28
-191
lib/llm/src/block_manager/kv_consolidator/subscriber.rs
lib/llm/src/block_manager/kv_consolidator/subscriber.rs
+26
-190
tests/kvbm_integration/test_determinism_agg.py
tests/kvbm_integration/test_determinism_agg.py
+2
-1
No files found.
lib/llm/src/block_manager/kv_consolidator/subscriber.rs
View file @
9d10e050
...
...
@@ -14,6 +14,8 @@ use tokio::task::JoinHandle;
use
tokio_util
::
sync
::
CancellationToken
;
use
zeromq
::{
Socket
,
SocketRecv
,
SubSocket
};
use
dynamo_kv_router
::
zmq_wire
::
RawKvEvent
;
use
super
::
tracker
::{
CacheStatusTracker
,
EventSource
,
StorageTier
};
/// Event batch received from vLLM/TensorRT-LLM (array format)
...
...
@@ -23,9 +25,9 @@ use super::tracker::{CacheStatusTracker, EventSource, StorageTier};
/// rather than an object {"ts": ..., "events": ..., "rank": ...} for vLLM/TensorRT-LLM compatibility.
#[derive(Debug,
Deserialize)]
struct
VllmEventBatch
(
f64
,
// ts
Vec
<
Vllm
RawEvent
>
,
// events
Option
<
i32
>
,
// data_parallel_rank
f64
,
// ts
Vec
<
Raw
Kv
Event
>
,
// events
— reuses the same custom deserializer as the router publisher
Option
<
i32
>
,
// data_parallel_rank
);
impl
VllmEventBatch
{
...
...
@@ -33,7 +35,7 @@ impl VllmEventBatch {
self
.0
}
fn
events
(
&
self
)
->
&
Vec
<
Vllm
RawEvent
>
{
fn
events
(
&
self
)
->
&
Vec
<
Raw
Kv
Event
>
{
&
self
.1
}
...
...
@@ -42,124 +44,6 @@ impl VllmEventBatch {
}
}
/// Block hash can be either an integer or a string (bytes hex-encoded)
///
/// Note: Integers can be u64 or i64 (msgpack compatibility) but we convert to u64 for internal use.
/// - vLLM uses u64 block hashes
/// - TensorRT-LLM uses i64 block hashes (signed integers)
#[derive(Debug,
Clone)]
enum
BlockHash
{
IntU64
(
u64
),
IntI64
(
i64
),
// Added for TensorRT-LLM support (uses signed i64 hashes)
Str
(
String
),
}
impl
<
'de
>
serde
::
Deserialize
<
'de
>
for
BlockHash
{
fn
deserialize
<
D
>
(
deserializer
:
D
)
->
Result
<
Self
,
D
::
Error
>
where
D
:
serde
::
Deserializer
<
'de
>
,
{
use
serde
::
de
::{
self
,
Visitor
};
use
std
::
fmt
;
struct
BlockHashVisitor
;
impl
<
'de
>
Visitor
<
'de
>
for
BlockHashVisitor
{
type
Value
=
BlockHash
;
fn
expecting
(
&
self
,
formatter
:
&
mut
fmt
::
Formatter
)
->
fmt
::
Result
{
formatter
.write_str
(
"an integer or a string"
)
}
fn
visit_u64
<
E
>
(
self
,
value
:
u64
)
->
Result
<
Self
::
Value
,
E
>
where
E
:
de
::
Error
,
{
Ok
(
BlockHash
::
IntU64
(
value
))
}
fn
visit_i64
<
E
>
(
self
,
value
:
i64
)
->
Result
<
Self
::
Value
,
E
>
where
E
:
de
::
Error
,
{
Ok
(
BlockHash
::
IntI64
(
value
))
}
fn
visit_str
<
E
>
(
self
,
value
:
&
str
)
->
Result
<
Self
::
Value
,
E
>
where
E
:
de
::
Error
,
{
Ok
(
BlockHash
::
Str
(
value
.to_string
()))
}
fn
visit_string
<
E
>
(
self
,
value
:
String
)
->
Result
<
Self
::
Value
,
E
>
where
E
:
de
::
Error
,
{
Ok
(
BlockHash
::
Str
(
value
))
}
}
deserializer
.deserialize_any
(
BlockHashVisitor
)
}
}
impl
BlockHash
{
/// Convert to u64, handling both signed and unsigned integers
/// Returns None if the hash cannot be converted (e.g., invalid hex string)
/// This avoids silently collapsing invalid hashes to 0, which could cause collisions
fn
to_u64
(
&
self
)
->
Option
<
u64
>
{
match
self
{
BlockHash
::
IntU64
(
n
)
=>
Some
(
*
n
),
BlockHash
::
IntI64
(
n
)
=>
{
// Convert signed i64 back to unsigned u64 (two's complement)
// Rust's `as u64` automatically handles two's complement conversion
Some
(
*
n
as
u64
)
}
BlockHash
::
Str
(
s
)
=>
{
// Try to parse as hex string, return None on failure
// This avoids silently mapping invalid hashes to 0, which could cause collisions
u64
::
from_str_radix
(
s
,
16
)
.ok
()
}
}
}
}
impl
std
::
fmt
::
Display
for
BlockHash
{
fn
fmt
(
&
self
,
f
:
&
mut
std
::
fmt
::
Formatter
<
'_
>
)
->
std
::
fmt
::
Result
{
match
self
{
BlockHash
::
IntU64
(
n
)
=>
write!
(
f
,
"{}"
,
n
),
BlockHash
::
IntI64
(
n
)
=>
write!
(
f
,
"{}"
,
n
),
BlockHash
::
Str
(
s
)
=>
write!
(
f
,
"{}"
,
s
),
}
}
}
/// Raw vLLM event format (preserves all data including token_ids)
#[derive(Debug,
Clone,
Deserialize)]
#[serde(tag
=
"type"
)]
enum
VllmRawEvent
{
#[serde(rename
=
"BlockStored"
)]
BlockStored
{
block_hashes
:
Vec
<
BlockHash
>
,
parent_block_hash
:
Option
<
BlockHash
>
,
token_ids
:
Vec
<
i32
>
,
block_size
:
i32
,
#[serde(default)]
medium
:
Option
<
String
>
,
#[serde(default)]
lora_name
:
Option
<
String
>
,
},
#[serde(rename
=
"BlockRemoved"
)]
BlockRemoved
{
block_hashes
:
Vec
<
BlockHash
>
,
#[serde(default)]
medium
:
Option
<
String
>
,
},
#[serde(rename
=
"AllBlocksCleared"
)]
AllBlocksCleared
{},
}
/// Start ZMQ listener and process events into tracker
pub
async
fn
start_simple_zmq_listener
(
endpoint
:
String
,
...
...
@@ -266,18 +150,19 @@ async fn run_listener_loop(
fn
process_event
(
tracker
:
&
mut
CacheStatusTracker
,
event
:
Vllm
RawEvent
,
event
:
Raw
Kv
Event
,
data_parallel_rank
:
Option
<
i32
>
,
engine_source
:
EventSource
,
)
{
match
event
{
Vllm
RawEvent
::
BlockStored
{
Raw
Kv
Event
::
BlockStored
{
block_hashes
,
parent_block_hash
,
token_ids
,
block_size
,
medium
,
lora_name
,
..
// block_mm_infos not used in consolidator
}
=>
{
let
storage_tier
=
medium
.as_ref
()
...
...
@@ -294,32 +179,15 @@ fn process_event(
data_parallel_rank
);
// Convert block_size from i32 to usize for chunking
// SAFETY: Must validate block_size > 0 to prevent panic in chunks()
let
block_size_usize
=
match
usize
::
try_from
(
block_size
)
{
Ok
(
size
)
if
size
>
0
=>
size
,
_
=>
{
tracing
::
warn!
(
"Invalid block_size {} (must be positive), skipping event to avoid chunks() panic"
,
block_size
);
return
;
}
};
// Convert token_ids from i32 to u32 and split into chunks
let
token_ids_u32
:
Vec
<
u32
>
=
token_ids
.into_iter
()
.filter_map
(|
t
|
{
u32
::
try_from
(
t
)
.ok
()
.or_else
(||
{
tracing
::
warn!
(
"Invalid token ID {}, skipping"
,
t
);
None
})
})
.collect
();
// block_size is already usize; guard against 0 to avoid chunks() panic
if
block_size
==
0
{
tracing
::
warn!
(
"Invalid block_size 0 (must be positive), skipping event to avoid chunks() panic"
);
return
;
}
let
token_chunks
:
Vec
<
Vec
<
u32
>>
=
token_ids_u32
.chunks
(
block_size_usize
)
// token_ids is already Vec<u32>; split directly into per-block chunks
let
token_chunks
:
Vec
<
Vec
<
u32
>>
=
token_ids
.chunks
(
block_size
)
.map
(|
chunk
|
chunk
.to_vec
())
.collect
();
...
...
@@ -332,40 +200,19 @@ fn process_event(
return
;
}
// Process each block with its corresponding token chunk
// For batches, chain the blocks: each block's parent is the previous block in the batch
let
mut
current_parent
=
parent_block_hash
.as_ref
()
.and_then
(|
h
|
{
h
.to_u64
()
.or_else
(||
{
tracing
::
warn!
(
"Skipping parent block hash with unparsable string hash {:?}"
,
h
);
None
})
})
.map
(|
h
|
h
.to_string
());
for
(
i
,
block_hash
)
in
block_hashes
.iter
()
.enumerate
()
{
let
block_tokens
=
token_chunks
[
i
]
.clone
();
// For batches, chain the blocks: each block's parent is the previous block
let
mut
current_parent
=
parent_block_hash
.map
(|
h
|
h
.into_u64
()
.to_string
());
// Skip blocks with invalid/unparsable hashes to avoid collisions
let
Some
(
block_hash_u64
)
=
block_hash
.to_u64
()
else
{
tracing
::
warn!
(
"Skipping block with unparsable string hash {:?} (index {})"
,
block_hash
,
i
);
continue
;
};
for
(
i
,
block_hash
)
in
block_hashes
.into_iter
()
.enumerate
()
{
let
block_tokens
=
token_chunks
[
i
]
.clone
();
let
block_hash_u64
=
block_hash
.into_u64
();
tracker
.handle_store
(
block_hash_u64
.to_string
(),
engine_source
,
block_tokens
,
current_parent
.clone
(),
block_size
_usize
,
block_size
,
lora_name
.clone
(),
Some
(
storage_tier
),
data_parallel_rank
,
...
...
@@ -376,10 +223,7 @@ fn process_event(
}
}
VllmRawEvent
::
BlockRemoved
{
block_hashes
,
medium
,
}
=>
{
RawKvEvent
::
BlockRemoved
{
block_hashes
,
medium
}
=>
{
let
storage_tier
=
medium
.as_ref
()
.and_then
(|
m
|
StorageTier
::
from_vllm_medium
(
m
))
...
...
@@ -392,19 +236,11 @@ fn process_event(
);
for
block_hash
in
block_hashes
{
// Skip blocks with invalid/unparsable hashes to avoid collisions
let
Some
(
block_hash_u64
)
=
block_hash
.to_u64
()
else
{
tracing
::
warn!
(
"Skipping removal of block with unparsable string hash {:?}"
,
block_hash
);
continue
;
};
tracker
.handle_remove
(
&
block_hash_u64
.to_string
(),
engine_source
);
tracker
.handle_remove
(
&
block_hash
.into_u64
()
.to_string
(),
engine_source
);
}
}
Vllm
RawEvent
::
AllBlocksCleared
{}
=>
{
Raw
Kv
Event
::
AllBlocksCleared
=>
{
tracing
::
debug!
(
"Processing AllBlocksCleared"
);
tracker
.handle_clear_all
();
}
...
...
tests/kvbm_integration/test_determinism_agg.py
View file @
9d10e050
...
...
@@ -123,7 +123,6 @@ class LLMServerManager:
"DYN_KVBM_METRICS_PORT"
:
str
(
self
.
metrics_port
),
# Enable vLLM batch invariant for deterministic batching
"VLLM_BATCH_INVARIANT"
:
"1"
,
"VLLM_ATTENTION_BACKEND"
:
"FLASH_ATTN"
,
}
)
...
...
@@ -154,6 +153,8 @@ class LLMServerManager:
"--kv-transfer-config"
,
'{"kv_connector":"DynamoConnector","kv_role":"kv_both", "kv_connector_module_path": "kvbm.vllm_integration.connector"}'
,
os
.
environ
.
get
(
"KVBM_MODEL_ID"
,
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
),
"--attention-config.backend"
,
"FLASH_ATTN"
,
"--max-model-len"
,
"8000"
,
# required to fit on L4 GPU when using 8b model
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment