Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
d5803cbe
Unverified
Commit
d5803cbe
authored
Apr 07, 2026
by
Alec
Committed by
GitHub
Apr 07, 2026
Browse files
chore: bump vLLM to 0.19.0 (#7894)
parent
ad3a46a6
Changes
12
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
169 additions
and
46 deletions
+169
-46
components/src/dynamo/frontend/vllm_processor.py
components/src/dynamo/frontend/vllm_processor.py
+1
-1
components/src/dynamo/vllm/multimodal_utils/chat_processor.py
...onents/src/dynamo/vllm/multimodal_utils/chat_processor.py
+1
-1
components/src/dynamo/vllm/multimodal_utils/protocol.py
components/src/dynamo/vllm/multimodal_utils/protocol.py
+2
-3
components/src/dynamo/vllm/tests/test_vllm_kv_events_api.py
components/src/dynamo/vllm/tests/test_vllm_kv_events_api.py
+24
-0
components/src/dynamo/vllm/tests/test_vllm_renderer_api.py
components/src/dynamo/vllm/tests/test_vllm_renderer_api.py
+1
-1
container/context.yaml
container/context.yaml
+3
-3
container/deps/vllm/install_vllm.sh
container/deps/vllm/install_vllm.sh
+20
-24
docs/reference/support-matrix.md
docs/reference/support-matrix.md
+1
-1
examples/backends/vllm/launch/agg_omni_audio.sh
examples/backends/vllm/launch/agg_omni_audio.sh
+3
-0
lib/kv-router/src/zmq_wire.rs
lib/kv-router/src/zmq_wire.rs
+34
-5
lib/llm/src/kv_router/publisher/tests.rs
lib/llm/src/kv_router/publisher/tests.rs
+74
-2
pyproject.toml
pyproject.toml
+5
-5
No files found.
components/src/dynamo/frontend/vllm_processor.py
View file @
d5803cbe
...
@@ -14,7 +14,7 @@ from collections.abc import AsyncGenerator
...
@@ -14,7 +14,7 @@ from collections.abc import AsyncGenerator
from
typing
import
Any
from
typing
import
Any
from
vllm.config
import
CacheConfig
,
LoadConfig
,
ModelConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
LoadConfig
,
ModelConfig
,
VllmConfig
from
vllm.inputs
.data
import
TokensPrompt
from
vllm.inputs
import
TokensPrompt
from
vllm.reasoning
import
ReasoningParser
,
ReasoningParserManager
from
vllm.reasoning
import
ReasoningParser
,
ReasoningParserManager
from
vllm.sampling_params
import
RequestOutputKind
,
SamplingParams
from
vllm.sampling_params
import
RequestOutputKind
,
SamplingParams
from
vllm.tasks
import
GENERATION_TASKS
from
vllm.tasks
import
GENERATION_TASKS
...
...
components/src/dynamo/vllm/multimodal_utils/chat_processor.py
View file @
d5803cbe
...
@@ -20,7 +20,7 @@ from typing import AsyncIterator, List, Optional, Protocol, Union, runtime_check
...
@@ -20,7 +20,7 @@ from typing import AsyncIterator, List, Optional, Protocol, Union, runtime_check
from
vllm.config
import
ModelConfig
from
vllm.config
import
ModelConfig
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.entrypoints.chat_utils
import
ConversationMessage
from
vllm.entrypoints.chat_utils
import
ConversationMessage
from
vllm.inputs
.data
import
TokensPrompt
from
vllm.inputs
import
TokensPrompt
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.tokenizers
import
TokenizerLike
as
AnyTokenizer
from
vllm.tokenizers
import
TokenizerLike
as
AnyTokenizer
...
...
components/src/dynamo/vllm/multimodal_utils/protocol.py
View file @
d5803cbe
...
@@ -22,9 +22,8 @@ import torch
...
@@ -22,9 +22,8 @@ import torch
from
pydantic
import
BaseModel
,
ConfigDict
,
Field
,
field_serializer
,
field_validator
from
pydantic
import
BaseModel
,
ConfigDict
,
Field
,
field_serializer
,
field_validator
from
pydantic_core
import
core_schema
from
pydantic_core
import
core_schema
from
typing_extensions
import
NotRequired
from
typing_extensions
import
NotRequired
from
vllm.inputs
.data
import
TokensPrompt
from
vllm.inputs
import
MultiModalUUIDDict
,
TokensPrompt
# noqa: F401
from
vllm.logprobs
import
PromptLogprobs
from
vllm.logprobs
import
PromptLogprobs
from
vllm.multimodal.inputs
import
MultiModalUUIDDict
# noqa: F401
from
vllm.outputs
import
CompletionOutput
from
vllm.outputs
import
CompletionOutput
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.v1.metrics.stats
import
RequestStateStats
from
vllm.v1.metrics.stats
import
RequestStateStats
...
@@ -55,7 +54,7 @@ class PrefillResponse(BaseModel):
...
@@ -55,7 +54,7 @@ class PrefillResponse(BaseModel):
# Hack to override the type of multi_modal_data in TokensPrompt
# Hack to override the type of multi_modal_data in TokensPrompt
# as pydantic doesn't understand generic types
# as pydantic doesn't understand generic types
# TokensPrompt is
defined here: https://github.c
om
/
vllm
-project/vllm/blob/a4c402a756fa3213caf9d2cde0e4ceb2d57727f2/
vllm/inputs/
data.py#L38
# TokensPrompt is
exported fr
om
vllm
.inputs and implemented in
vllm/inputs/
llm.py.
# multi_modal_data is defined here: https://github.com/vllm-project/vllm/blob/main/vllm/multimodal/inputs.py#L103
# multi_modal_data is defined here: https://github.com/vllm-project/vllm/blob/main/vllm/multimodal/inputs.py#L103
# ModalityData is defined here: https://github.com/vllm-project/vllm/blob/main/vllm/multimodal/inputs.py#L80
# ModalityData is defined here: https://github.com/vllm-project/vllm/blob/main/vllm/multimodal/inputs.py#L80
class
PatchedTokensPrompt
(
TokensPrompt
):
class
PatchedTokensPrompt
(
TokensPrompt
):
...
...
components/src/dynamo/vllm/tests/test_vllm_kv_events_api.py
View file @
d5803cbe
...
@@ -194,3 +194,27 @@ class TestVllmKvEventsApi:
...
@@ -194,3 +194,27 @@ class TestVllmKvEventsApi:
assert
decoded
[
6
]
==
"GPU"
,
f
"medium at wrong position:
{
decoded
[
6
]
}
"
assert
decoded
[
6
]
==
"GPU"
,
f
"medium at wrong position:
{
decoded
[
6
]
}
"
assert
decoded
[
7
]
is
None
,
f
"lora_name at wrong position:
{
decoded
[
7
]
}
"
assert
decoded
[
7
]
is
None
,
f
"lora_name at wrong position:
{
decoded
[
7
]
}
"
assert
decoded
[
8
]
is
None
,
f
"extra_keys at wrong position:
{
decoded
[
8
]
}
"
assert
decoded
[
8
]
is
None
,
f
"extra_keys at wrong position:
{
decoded
[
8
]
}
"
def
test_block_stored_tuple_extra_keys_serialization_format
(
self
):
"""Verify multimodal tuple extra_keys keep the vLLM 0.19 wire shape."""
import
msgspec
mm_hash
=
"0123456789abcdef00112233445566778899aabbccddeefffedcba9876543210"
event
=
BlockStored
(
block_hashes
=
[
123
],
parent_block_hash
=
None
,
token_ids
=
[
1
,
2
,
3
,
4
],
block_size
=
16
,
lora_id
=
None
,
medium
=
"GPU"
,
lora_name
=
None
,
extra_keys
=
[((
mm_hash
,
7
),)],
)
decoded
=
msgspec
.
msgpack
.
decode
(
msgspec
.
msgpack
.
encode
(
event
))
assert
decoded
[
0
]
==
"BlockStored"
assert
decoded
[
8
]
==
[[[
mm_hash
,
7
]]],
(
"vLLM multimodal extra_keys no longer serialize as nested tuple/list "
f
"payloads. Decoded:
{
decoded
[
8
]
!
r
}
"
)
components/src/dynamo/vllm/tests/test_vllm_renderer_api.py
View file @
d5803cbe
...
@@ -23,7 +23,7 @@ _chat_protocol = importlib.import_module(
...
@@ -23,7 +23,7 @@ _chat_protocol = importlib.import_module(
"vllm.entrypoints.openai.chat_completion.protocol"
"vllm.entrypoints.openai.chat_completion.protocol"
)
)
_engine_protocol
=
importlib
.
import_module
(
"vllm.entrypoints.openai.engine.protocol"
)
_engine_protocol
=
importlib
.
import_module
(
"vllm.entrypoints.openai.engine.protocol"
)
_inputs_data
=
importlib
.
import_module
(
"vllm.inputs
.data
"
)
_inputs_data
=
importlib
.
import_module
(
"vllm.inputs"
)
_reasoning
=
importlib
.
import_module
(
"vllm.reasoning"
)
_reasoning
=
importlib
.
import_module
(
"vllm.reasoning"
)
_sampling_params
=
importlib
.
import_module
(
"vllm.sampling_params"
)
_sampling_params
=
importlib
.
import_module
(
"vllm.sampling_params"
)
_tool_parsers
=
importlib
.
import_module
(
"vllm.tool_parsers"
)
_tool_parsers
=
importlib
.
import_module
(
"vllm.tool_parsers"
)
...
...
container/context.yaml
View file @
d5803cbe
...
@@ -44,13 +44,13 @@ vllm:
...
@@ -44,13 +44,13 @@ vllm:
runtime_image
:
nvcr.io/nvidia/cuda
runtime_image
:
nvcr.io/nvidia/cuda
base_image_tag
:
25.06-cuda12.9-devel-ubuntu24.04
base_image_tag
:
25.06-cuda12.9-devel-ubuntu24.04
runtime_image_tag
:
12.9.1-runtime-ubuntu24.04
runtime_image_tag
:
12.9.1-runtime-ubuntu24.04
vllm_ref
:
v0.1
8
.0
vllm_ref
:
v0.1
9
.0
cuda13.0
:
cuda13.0
:
base_image
:
nvcr.io/nvidia/cuda-dl-base
base_image
:
nvcr.io/nvidia/cuda-dl-base
runtime_image
:
nvcr.io/nvidia/cuda
runtime_image
:
nvcr.io/nvidia/cuda
base_image_tag
:
25.11-cuda13.0-devel-ubuntu24.04
base_image_tag
:
25.11-cuda13.0-devel-ubuntu24.04
runtime_image_tag
:
13.0.2-runtime-ubuntu24.04
runtime_image_tag
:
13.0.2-runtime-ubuntu24.04
vllm_ref
:
v0.1
8
.0
vllm_ref
:
v0.1
9
.0
xpu
:
xpu
:
base_image
:
intel/deep-learning-essentials
base_image
:
intel/deep-learning-essentials
runtime_image
:
intel/deep-learning-essentials
runtime_image
:
intel/deep-learning-essentials
...
@@ -65,7 +65,7 @@ vllm:
...
@@ -65,7 +65,7 @@ vllm:
vllm_ref
:
v0.16.0
vllm_ref
:
v0.16.0
flashinf_ref
:
v0.6.6
flashinf_ref
:
v0.6.6
lmcache_ref
:
0.4.2
lmcache_ref
:
0.4.2
vllm_omni_ref
:
"
v0.1
8
.0"
vllm_omni_ref
:
"
release/
v0.1
9
.0
rc1
"
nixl_ref
:
0.10.1
nixl_ref
:
0.10.1
max_jobs
:
"
10"
max_jobs
:
"
10"
enable_media_ffmpeg
:
"
false"
enable_media_ffmpeg
:
"
false"
...
...
container/deps/vllm/install_vllm.sh
View file @
d5803cbe
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
set
-euo
pipefail
set
-euo
pipefail
VLLM_VER
=
"0.1
8
.0"
VLLM_VER
=
"0.1
9
.0"
VLLM_REF
=
"v
${
VLLM_VER
}
"
VLLM_REF
=
"v
${
VLLM_VER
}
"
DEVICE
=
"cuda"
DEVICE
=
"cuda"
...
@@ -141,6 +141,25 @@ cd vllm
...
@@ -141,6 +141,25 @@ cd vllm
git checkout
$VLLM_REF
git checkout
$VLLM_REF
echo
"✓ vLLM repository cloned"
echo
"✓ vLLM repository cloned"
echo
"
\n
=== Installing vLLM-Omni ==="
# Install omni BEFORE vLLM. Its transitive dependencies can otherwise upgrade the
# torch/transformers stack after vLLM is installed, which can leave vllm._C ABI-mismatched.
# vLLM should remain the final owner of the runtime stack in this environment.
if
[
-n
"
$VLLM_OMNI_REF
"
]
&&
[
"
$ARCH
"
=
"amd64"
]
;
then
# Try PyPI first, fall back to building from source
if
uv pip
install
vllm-omni
==
${
VLLM_OMNI_REF
#v
}
2>&1
;
then
echo
"✓ vLLM-Omni
${
VLLM_OMNI_REF
}
installed from PyPI"
else
echo
"⚠ PyPI install failed, building from source..."
git clone
--depth
1
--branch
${
VLLM_OMNI_REF
}
https://github.com/vllm-project/vllm-omni.git
$INSTALLATION_DIR
/vllm-omni
uv pip
install
$INSTALLATION_DIR
/vllm-omni
rm
-rf
$INSTALLATION_DIR
/vllm-omni
echo
"✓ vLLM-Omni
${
VLLM_OMNI_REF
}
installed from source"
fi
else
echo
"⚠ Skipping vLLM-Omni (no ref provided or ARM64 not supported)"
fi
if
[
"
$DEVICE
"
=
"xpu"
]
;
then
if
[
"
$DEVICE
"
=
"xpu"
]
;
then
echo
"
\n
=== Installing vLLM ==="
echo
"
\n
=== Installing vLLM ==="
uv pip
install
-r
requirements/xpu.txt
--index-strategy
unsafe-best-match
uv pip
install
-r
requirements/xpu.txt
--index-strategy
unsafe-best-match
...
@@ -240,29 +259,6 @@ else
...
@@ -240,29 +259,6 @@ else
echo
"⚠ Skipping LMCache (ARM64 or CUDA 13 not supported)"
echo
"⚠ Skipping LMCache (ARM64 or CUDA 13 not supported)"
fi
fi
echo
"
\n
=== Installing vLLM-Omni ==="
if
[
-n
"
$VLLM_OMNI_REF
"
]
&&
[
"
$ARCH
"
=
"amd64"
]
;
then
# Save original vllm entrypoint before vllm-omni overwrites it
VLLM_BIN
=
$(
which vllm
)
cp
"
$VLLM_BIN
"
/tmp/vllm-entrypoint-backup
# Try PyPI first, fall back to building from source
if
uv pip
install
vllm-omni
==
${
VLLM_OMNI_REF
#v
}
2>&1
;
then
echo
"✓ vLLM-Omni
${
VLLM_OMNI_REF
}
installed from PyPI"
else
echo
"⚠ PyPI install failed, building from source..."
git clone
--depth
1
--branch
${
VLLM_OMNI_REF
}
https://github.com/vllm-project/vllm-omni.git
$INSTALLATION_DIR
/vllm-omni
uv pip
install
$INSTALLATION_DIR
/vllm-omni
rm
-rf
$INSTALLATION_DIR
/vllm-omni
echo
"✓ vLLM-Omni
${
VLLM_OMNI_REF
}
installed from source"
fi
# Restore original vllm CLI entrypoint (vllm-omni replaces it with its own)
cp
/tmp/vllm-entrypoint-backup
"
$VLLM_BIN
"
echo
"✓ Original vllm entrypoint preserved"
else
echo
"⚠ Skipping vLLM-Omni (no ref provided or ARM64 not supported)"
fi
if
[
"
$DEVICE
"
=
"cuda"
]
;
then
if
[
"
$DEVICE
"
=
"cuda"
]
;
then
echo
"
\n
=== Installing DeepGEMM ==="
echo
"
\n
=== Installing DeepGEMM ==="
cd
$INSTALLATION_DIR
/vllm/tools
cd
$INSTALLATION_DIR
/vllm/tools
...
...
docs/reference/support-matrix.md
View file @
d5803cbe
...
@@ -29,7 +29,7 @@ The following table shows the backend framework versions included with each Dyna
...
@@ -29,7 +29,7 @@ The following table shows the backend framework versions included with each Dyna
|
**Dynamo**
|
**SGLang**
|
**TensorRT-LLM**
|
**vLLM**
|
**NIXL**
|
|
**Dynamo**
|
**SGLang**
|
**TensorRT-LLM**
|
**vLLM**
|
**NIXL**
|
| :--- | :--- | :--- | :--- | :--- |
| :--- | :--- | :--- | :--- | :--- |
|
**main (ToT)**
|
`0.5.9`
|
`1.3.0rc9`
|
`0.1
8
.0`
|
`0.10.1`
|
|
**main (ToT)**
|
`0.5.9`
|
`1.3.0rc9`
|
`0.1
9
.0`
|
`0.10.1`
|
|
**v1.1.0-dev.1**
*(experimental)*
|
`0.5.9`
|
`1.3.0rc5.post1`
|
`0.17.1`
|
`0.10.1`
|
|
**v1.1.0-dev.1**
*(experimental)*
|
`0.5.9`
|
`1.3.0rc5.post1`
|
`0.17.1`
|
`0.10.1`
|
|
**v1.0.1**
|
`0.5.9`
|
`1.3.0rc5.post1`
|
`0.16.0`
|
`0.10.1`
|
|
**v1.0.1**
|
`0.5.9`
|
`1.3.0rc5.post1`
|
`0.16.0`
|
`0.10.1`
|
|
**v1.0.0**
|
`0.5.9`
|
`1.3.0rc5.post1`
|
`0.16.0`
|
`0.10.1`
|
|
**v1.0.0**
|
`0.5.9`
|
`1.3.0rc5.post1`
|
`0.16.0`
|
`0.10.1`
|
...
...
examples/backends/vllm/launch/agg_omni_audio.sh
View file @
d5803cbe
...
@@ -46,6 +46,9 @@ FRONTEND_PID=$!
...
@@ -46,6 +46,9 @@ FRONTEND_PID=$!
sleep
2
sleep
2
echo
"Starting Omni Audio worker..."
echo
"Starting Omni Audio worker..."
# Upstream qwen3_tts stage configs still use a 65536 stage-1 max_model_len.
# vLLM 0.19 validates that against the model config unless we opt in here.
VLLM_ALLOW_LONG_MAX_MODEL_LEN
=
1
\
DYN_SYSTEM_PORT
=
${
DYN_SYSTEM_PORT
:-
8081
}
\
DYN_SYSTEM_PORT
=
${
DYN_SYSTEM_PORT
:-
8081
}
\
python
-m
dynamo.vllm.omni
\
python
-m
dynamo.vllm.omni
\
--model
"
$MODEL
"
\
--model
"
$MODEL
"
\
...
...
lib/kv-router/src/zmq_wire.rs
View file @
d5803cbe
...
@@ -113,12 +113,27 @@ pub fn parse_mm_hash_from_extra_key(s: &str) -> Option<u64> {
...
@@ -113,12 +113,27 @@ pub fn parse_mm_hash_from_extra_key(s: &str) -> Option<u64> {
None
None
}
}
#[derive(Debug,
Deserialize,
Clone)]
#[serde(untagged)]
pub
enum
ExtraKeyItem
{
Hash
(
String
),
HashWithSignedOffset
((
String
,
i64
)),
HashWithUnsignedOffset
((
String
,
u64
)),
Bytes
(
Vec
<
u8
>
),
Signed
(
i64
),
Unsigned
(
u64
),
Float
(
f64
),
Bool
(
bool
),
}
/// Convert vLLM BlockStored extra_keys to block-level MM infos.
/// Convert vLLM BlockStored extra_keys to block-level MM infos.
/// extra_keys is a list aligned with blocks:
/// extra_keys is a list aligned with blocks:
/// - None => no MM content in that block
/// - None => no MM content in that block
/// - ["hash1", "hash2", ...] => one or more MM objects in that block
/// - ["hash1", "hash2", ...] => one or more MM objects in that block
/// - [[hash, start_offset], ...] => one or more MM objects with block-relative
/// start offsets (vLLM 0.19+)
pub
fn
extra_keys_to_block_mm_infos
(
pub
fn
extra_keys_to_block_mm_infos
(
extra_keys
:
Option
<
Vec
<
Option
<
Vec
<
String
>>>>
,
extra_keys
:
Option
<
Vec
<
Option
<
Vec
<
ExtraKeyItem
>>>>
,
)
->
Option
<
Vec
<
Option
<
BlockExtraInfo
>>>
{
)
->
Option
<
Vec
<
Option
<
BlockExtraInfo
>>>
{
let
extra_keys
=
extra_keys
?
;
let
extra_keys
=
extra_keys
?
;
if
extra_keys
.is_empty
()
{
if
extra_keys
.is_empty
()
{
...
@@ -131,10 +146,24 @@ pub fn extra_keys_to_block_mm_infos(
...
@@ -131,10 +146,24 @@ pub fn extra_keys_to_block_mm_infos(
let
mm_objects
:
Vec
<
BlockMmObjectInfo
>
=
block_keys
let
mm_objects
:
Vec
<
BlockMmObjectInfo
>
=
block_keys
.unwrap_or_default
()
.unwrap_or_default
()
.iter
()
.iter
()
.filter_map
(|
key
|
parse_mm_hash_from_extra_key
(
key
))
.filter_map
(|
key
|
match
key
{
ExtraKeyItem
::
Hash
(
hash
)
|
ExtraKeyItem
::
HashWithSignedOffset
((
hash
,
_
))
|
ExtraKeyItem
::
HashWithUnsignedOffset
((
hash
,
_
))
=>
{
parse_mm_hash_from_extra_key
(
hash
)
}
ExtraKeyItem
::
Bytes
(
_
)
|
ExtraKeyItem
::
Signed
(
_
)
|
ExtraKeyItem
::
Unsigned
(
_
)
|
ExtraKeyItem
::
Float
(
_
)
|
ExtraKeyItem
::
Bool
(
_
)
=>
None
,
})
.map
(|
mm_hash
|
BlockMmObjectInfo
{
.map
(|
mm_hash
|
BlockMmObjectInfo
{
mm_hash
,
mm_hash
,
offsets
:
vec!
[],
// extra_keys does not carry offsets today
// vLLM extra_keys exposes MM start offsets but not MM lengths.
// Dynamo's block hash only depends on mm_hash today, so keep
// offsets empty rather than inventing a synthetic range.
offsets
:
vec!
[],
})
})
.collect
();
.collect
();
...
@@ -193,7 +222,7 @@ impl<'de> Visitor<'de> for RawKvEventVisitor {
...
@@ -193,7 +222,7 @@ impl<'de> Visitor<'de> for RawKvEventVisitor {
let
mut
block_size
:
Option
<
usize
>
=
None
;
let
mut
block_size
:
Option
<
usize
>
=
None
;
let
mut
medium
:
Option
<
Option
<
String
>>
=
None
;
let
mut
medium
:
Option
<
Option
<
String
>>
=
None
;
let
mut
lora_name
:
Option
<
Option
<
String
>>
=
None
;
let
mut
lora_name
:
Option
<
Option
<
String
>>
=
None
;
let
mut
extra_keys
:
Option
<
Option
<
Vec
<
Option
<
Vec
<
String
>>>>>
=
None
;
let
mut
extra_keys
:
Option
<
Option
<
Vec
<
Option
<
Vec
<
ExtraKeyItem
>>>>>
=
None
;
let
mut
block_mm_infos
:
Option
<
Option
<
Vec
<
Option
<
BlockExtraInfo
>>>>
=
None
;
let
mut
block_mm_infos
:
Option
<
Option
<
Vec
<
Option
<
BlockExtraInfo
>>>>
=
None
;
while
let
Some
(
key
)
=
map
.next_key
::
<
String
>
()
?
{
while
let
Some
(
key
)
=
map
.next_key
::
<
String
>
()
?
{
...
@@ -308,7 +337,7 @@ impl<'de> Visitor<'de> for RawKvEventVisitor {
...
@@ -308,7 +337,7 @@ impl<'de> Visitor<'de> for RawKvEventVisitor {
let
_
lora_id
:
Option
<
u64
>
=
seq
.next_element
()
?
.unwrap_or
(
None
);
let
_
lora_id
:
Option
<
u64
>
=
seq
.next_element
()
?
.unwrap_or
(
None
);
let
medium
:
Option
<
String
>
=
seq
.next_element
()
?
.unwrap_or
(
None
);
let
medium
:
Option
<
String
>
=
seq
.next_element
()
?
.unwrap_or
(
None
);
let
lora_name
:
Option
<
String
>
=
seq
.next_element
()
?
.unwrap_or
(
None
);
let
lora_name
:
Option
<
String
>
=
seq
.next_element
()
?
.unwrap_or
(
None
);
let
extra_keys
:
Option
<
Vec
<
Option
<
Vec
<
String
>>>>
=
let
extra_keys
:
Option
<
Vec
<
Option
<
Vec
<
ExtraKeyItem
>>>>
=
seq
.next_element
()
?
.unwrap_or
(
None
);
seq
.next_element
()
?
.unwrap_or
(
None
);
let
block_mm_infos
:
Option
<
Vec
<
Option
<
BlockExtraInfo
>>>
=
let
block_mm_infos
:
Option
<
Vec
<
Option
<
BlockExtraInfo
>>>
=
seq
.next_element
()
?
.unwrap_or
(
None
);
seq
.next_element
()
?
.unwrap_or
(
None
);
...
...
lib/llm/src/kv_router/publisher/tests.rs
View file @
d5803cbe
...
@@ -333,9 +333,12 @@ mod test_event_processing {
...
@@ -333,9 +333,12 @@ mod test_event_processing {
let
mm_hash
=
let
mm_hash
=
"0123456789abcdef00112233445566778899aabbccddeefffedcba9876543210"
.to_string
();
"0123456789abcdef00112233445566778899aabbccddeefffedcba9876543210"
.to_string
();
let
infos
=
extra_keys_to_block_mm_infos
(
Some
(
vec!
[
let
infos
=
extra_keys_to_block_mm_infos
(
Some
(
vec!
[
Some
(
vec!
[
mm_hash
.clone
()]),
Some
(
vec!
[
ExtraKeyItem
::
Hash
(
mm_hash
.clone
()
)
]),
None
,
None
,
Some
(
vec!
[
"invalid"
.to_string
(),
mm_hash
]),
Some
(
vec!
[
ExtraKeyItem
::
Hash
(
"invalid"
.to_string
()),
ExtraKeyItem
::
Hash
(
mm_hash
),
]),
]))
]))
.expect
(
"expected parsed MM infos"
);
.expect
(
"expected parsed MM infos"
);
...
@@ -383,6 +386,32 @@ mod test_event_processing {
...
@@ -383,6 +386,32 @@ mod test_event_processing {
);
);
}
}
#[test]
fn
test_seq_block_stored_field8_supports_tuple_extra_keys
()
{
let
mm_hash
=
"0123456789abcdef00112233445566778899aabbccddeefffedcba9876543210"
.to_string
();
let
extra_keys_payload
=
rmps
::
to_vec
(
&
(
"BlockStored"
,
vec!
[
10_u64
],
None
::
<
u64
>
,
vec!
[
1_u32
,
2
,
3
,
4
],
4_u
size
,
None
::
<
u64
>
,
None
::
<
String
>
,
None
::
<
String
>
,
vec!
[
Some
(
vec!
[(
mm_hash
,
7_i64
)])],
))
.unwrap
();
let
extra_keys_event
:
RawKvEvent
=
rmps
::
from_slice
(
&
extra_keys_payload
)
.unwrap
();
let
RawKvEvent
::
BlockStored
{
block_mm_infos
,
..
}
=
extra_keys_event
else
{
panic!
(
"expected BlockStored"
);
};
assert_eq!
(
block_mm_infos
.unwrap
()[
0
]
.as_ref
()
.unwrap
()
.mm_objects
[
0
]
.mm_hash
,
0x0123_4567_89ab_cdef
);
}
#[test]
#[test]
fn
test_map_block_stored_supports_extra_keys
()
{
fn
test_map_block_stored_supports_extra_keys
()
{
#[derive(serde::Serialize)]
#[derive(serde::Serialize)]
...
@@ -423,6 +452,49 @@ mod test_event_processing {
...
@@ -423,6 +452,49 @@ mod test_event_processing {
0x0123_4567_89ab_cdef
0x0123_4567_89ab_cdef
);
);
}
}
#[test]
fn
test_map_block_stored_supports_tuple_extra_keys
()
{
type
BlockTupleExtraKeys
=
Option
<
Vec
<
Option
<
Vec
<
(
String
,
i64
)
>>>>
;
#[derive(serde::Serialize)]
struct
MapBlockStoredEvent
{
#[serde(rename
=
"type"
)]
event_type
:
&
'static
str
,
block_hashes
:
Vec
<
u64
>
,
parent_block_hash
:
Option
<
u64
>
,
token_ids
:
Vec
<
u32
>
,
block_size
:
usize
,
lora_id
:
Option
<
u64
>
,
medium
:
Option
<
String
>
,
lora_name
:
Option
<
String
>
,
extra_keys
:
BlockTupleExtraKeys
,
}
let
mm_hash
=
"0123456789abcdef00112233445566778899aabbccddeefffedcba9876543210"
.to_string
();
let
payload
=
rmps
::
to_vec
(
&
MapBlockStoredEvent
{
event_type
:
"BlockStored"
,
block_hashes
:
vec!
[
10
],
parent_block_hash
:
None
,
token_ids
:
vec!
[
1
,
2
,
3
,
4
],
block_size
:
4
,
lora_id
:
None
,
medium
:
Some
(
"GPU"
.to_string
()),
lora_name
:
None
,
extra_keys
:
Some
(
vec!
[
Some
(
vec!
[(
mm_hash
,
3
)])]),
})
.unwrap
();
let
event
:
RawKvEvent
=
rmps
::
from_slice
(
&
payload
)
.unwrap
();
let
RawKvEvent
::
BlockStored
{
block_mm_infos
,
..
}
=
event
else
{
panic!
(
"expected BlockStored"
);
};
assert_eq!
(
block_mm_infos
.unwrap
()[
0
]
.as_ref
()
.unwrap
()
.mm_objects
[
0
]
.mm_hash
,
0x0123_4567_89ab_cdef
);
}
}
}
#[cfg(test)]
#[cfg(test)]
...
...
pyproject.toml
View file @
d5803cbe
...
@@ -50,11 +50,11 @@ trtllm =[
...
@@ -50,11 +50,11 @@ trtllm =[
vllm
=
[
vllm
=
[
"uvloop"
,
"uvloop"
,
"nixl[cu12]<=0.10.1"
,
"nixl[cu12]<=0.10.1"
,
"vllm[flashinfer,runai,otel]==0.1
8
.0"
,
"vllm[flashinfer,runai,otel]==0.1
9
.0"
,
# vllm-omni
0.18.0 is now on PyPI; install only future rc builds from source
in container builds
# vllm-omni
is installed separately
in container builds
(see
#
(see
container/deps/vllm/install_vllm.sh).
pip install
ai-dynamo[vllm]
will
# container/deps/vllm/install_vllm.sh).
Do not add it to
ai-dynamo[vllm]
:
#
not include vllm-omni — install it separately from source if needed
.
#
pip/uv dependency resolution for omni can override the vLLM torch stack
.
"vllm-omni==
0.18.0
"
,
#
"vllm-omni==
...
",
"blake3>=1.0.0,<2.0.0"
,
"blake3>=1.0.0,<2.0.0"
,
"soundfile>=0.13.1"
,
"soundfile>=0.13.1"
,
"librosa>=0.10.0"
,
"librosa>=0.10.0"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment