Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
d0e3b7b7
Unverified
Commit
d0e3b7b7
authored
Nov 26, 2025
by
Sihan Chen
Committed by
GitHub
Nov 25, 2025
Browse files
feat: Enable intel gaudi on dynamo (#4209)
Signed-off-by:
Spycsh
<
sihan.chen@intel.com
>
parent
17dcffe8
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
75 additions
and
0 deletions
+75
-0
components/src/dynamo/vllm/handlers.py
components/src/dynamo/vllm/handlers.py
+10
-0
examples/backends/vllm/launch/disagg_router_gaudi.sh
examples/backends/vllm/launch/disagg_router_gaudi.sh
+65
-0
No files found.
components/src/dynamo/vllm/handlers.py
View file @
d0e3b7b7
...
...
@@ -389,6 +389,16 @@ class PrefillWorkerHandler(BaseWorkerHandler):
sampling_params
.
extra_args
[
"kv_transfer_params"
]
=
{
"do_remote_decode"
:
True
,
}
sampling_params_defaults
=
{
"do_remote_prefill"
:
False
,
"remote_engine_id"
:
None
,
"remote_block_ids"
:
None
,
"remote_host"
:
None
,
"remote_port"
:
None
,
}
# Add only missing keys
for
k
,
v
in
sampling_params_defaults
.
items
():
sampling_params
.
extra_args
[
"kv_transfer_params"
].
setdefault
(
k
,
v
)
# Override for prefill: only generate 1 token
sampling_params
.
max_tokens
=
1
sampling_params
.
min_tokens
=
1
...
...
examples/backends/vllm/launch/disagg_router_gaudi.sh
0 → 100755
View file @
d0e3b7b7
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
set
-e
trap
'echo Cleaning up...; kill 0'
EXIT
# Set deterministic hash for KV event IDs
export
PYTHONHASHSEED
=
0
# Common configuration
MODEL
=
"Qwen/Qwen3-0.6B"
BLOCK_SIZE
=
64
VLLM_NIXL_DEVICE_TO_DEVICE
=
false
VLLM_SKIP_WARMUP
=
true
PT_HPU_LAZY_MODE
=
0
NIXL_BUFFER_DEVICE
=
cpu
VLLM_NIXL_BACKEND
=
UCX
# Start frontend with KV routing
# The frontend will automatically detect prefill workers and activate an internal prefill router
# edit --router-mode to random / round-robin / kv
python
-m
dynamo.frontend
\
--router-mode
kv
\
--http-port
8000
\
--router-reset-states
&
# two decode workers
# --enforce-eager is added for quick deployment. for production use, need to remove this flag
VLLM_NIXL_SIDE_CHANNEL_PORT
=
20096
\
HABANA_VISIBLE_DEVICES
=
0 python3
-m
dynamo.vllm
\
--model
$MODEL
\
--block-size
$BLOCK_SIZE
\
--kv-transfer-config
"{
\"
kv_connector
\"
:
\"
NixlConnector
\"
,
\"
kv_role
\"
:
\"
kv_both
\"
,
\"
kv_buffer_device
\"
:
\"
${
NIXL_BUFFER_DEVICE
}
\"
,
\"
kv_connector_extra_config
\"
: {
\"
backends
\"
: [
\"
${
VLLM_NIXL_BACKEND
}
\"
]}}"
\
--connector
none
\
--kv-events-config
'{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5556", "enable_kv_cache_events":true}'
&
VLLM_NIXL_SIDE_CHANNEL_PORT
=
20097
\
HABANA_VISIBLE_DEVICES
=
1 python3
-m
dynamo.vllm
\
--model
$MODEL
\
--block-size
$BLOCK_SIZE
\
--kv-transfer-config
"{
\"
kv_connector
\"
:
\"
NixlConnector
\"
,
\"
kv_role
\"
:
\"
kv_both
\"
,
\"
kv_buffer_device
\"
:
\"
${
NIXL_BUFFER_DEVICE
}
\"
,
\"
kv_connector_extra_config
\"
: {
\"
backends
\"
: [
\"
${
VLLM_NIXL_BACKEND
}
\"
]}}"
\
--connector
none
\
--kv-events-config
'{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5557", "enable_kv_cache_events":true}'
&
# two prefill workers
# When registered with --is-prefill-worker, these workers are automatically detected
# by the frontend, which activates an internal prefill router for KV-aware prefill routing
VLLM_NIXL_SIDE_CHANNEL_PORT
=
20098
\
HABANA_VISIBLE_DEVICES
=
2 python3
-m
dynamo.vllm
\
--model
$MODEL
\
--block-size
$BLOCK_SIZE
\
--kv-transfer-config
"{
\"
kv_connector
\"
:
\"
NixlConnector
\"
,
\"
kv_role
\"
:
\"
kv_both
\"
,
\"
kv_buffer_device
\"
:
\"
${
NIXL_BUFFER_DEVICE
}
\"
,
\"
kv_connector_extra_config
\"
: {
\"
backends
\"
: [
\"
${
VLLM_NIXL_BACKEND
}
\"
]}}"
\
--connector
none
\
--is-prefill-worker
\
--kv-events-config
'{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5558", "enable_kv_cache_events":true}'
&
VLLM_NIXL_SIDE_CHANNEL_PORT
=
20099
\
HABANA_VISIBLE_DEVICES
=
3 python3
-m
dynamo.vllm
\
--model
$MODEL
\
--block-size
$BLOCK_SIZE
\
--kv-transfer-config
"{
\"
kv_connector
\"
:
\"
NixlConnector
\"
,
\"
kv_role
\"
:
\"
kv_both
\"
,
\"
kv_buffer_device
\"
:
\"
${
NIXL_BUFFER_DEVICE
}
\"
,
\"
kv_connector_extra_config
\"
: {
\"
backends
\"
: [
\"
${
VLLM_NIXL_BACKEND
}
\"
]}}"
\
--connector
none
\
--is-prefill-worker
\
--kv-events-config
'{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5559", "enable_kv_cache_events":true}'
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment