Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
fb294b90
Unverified
Commit
fb294b90
authored
Oct 22, 2025
by
Yan Ru Pei
Committed by
GitHub
Oct 22, 2025
Browse files
chore: no need to publish kv events for decode vllm worker (#3819)
Signed-off-by:
PeaBrane
<
yanrpei@gmail.com
>
parent
1aa5e92b
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
17 additions
and
6 deletions
+17
-6
benchmarks/router/run_engines.sh
benchmarks/router/run_engines.sh
+2
-0
components/src/dynamo/vllm/args.py
components/src/dynamo/vllm/args.py
+7
-0
components/src/dynamo/vllm/main.py
components/src/dynamo/vllm/main.py
+5
-0
lib/llm/src/kv_router/prefill_router.rs
lib/llm/src/kv_router/prefill_router.rs
+3
-6
No files found.
benchmarks/router/run_engines.sh
View file @
fb294b90
...
...
@@ -225,6 +225,8 @@ for i in $(seq 1 $NUM_WORKERS); do
fi
if
[
"
$MODE
"
=
"prefill"
]
;
then
VLLM_ARGS+
=(
"--is-prefill-worker"
)
elif
[
"
$MODE
"
=
"decode"
]
;
then
VLLM_ARGS+
=(
"--is-decode-worker"
)
fi
VLLM_ARGS+
=(
"
${
EXTRA_ARGS
[@]
}
"
)
...
...
components/src/dynamo/vllm/args.py
View file @
fb294b90
...
...
@@ -45,6 +45,7 @@ class Config:
component
:
str
endpoint
:
str
is_prefill_worker
:
bool
is_decode_worker
:
bool
migration_limit
:
int
=
0
kv_port
:
Optional
[
int
]
=
None
port_range
:
DynamoPortRange
...
...
@@ -85,6 +86,11 @@ def parse_args() -> Config:
action
=
"store_true"
,
help
=
"Enable prefill functionality for this worker. Uses the provided namespace to construct dyn://namespace.prefill.generate"
,
)
parser
.
add_argument
(
"--is-decode-worker"
,
action
=
"store_true"
,
help
=
"Mark this as a decode worker which does not publish KV events."
,
)
parser
.
add_argument
(
"--migration-limit"
,
type
=
int
,
...
...
@@ -159,6 +165,7 @@ def parse_args() -> Config:
config
.
endpoint
=
"generate"
config
.
engine_args
=
engine_args
config
.
is_prefill_worker
=
args
.
is_prefill_worker
config
.
is_decode_worker
=
args
.
is_decode_worker
config
.
migration_limit
=
args
.
migration_limit
config
.
port_range
=
DynamoPortRange
(
min
=
args
.
dynamo_port_min
,
max
=
args
.
dynamo_port_max
...
...
components/src/dynamo/vllm/main.py
View file @
fb294b90
...
...
@@ -118,6 +118,11 @@ def setup_kv_event_publisher(
if
not
config
.
engine_args
.
enable_prefix_caching
:
return
None
# Skip KV event publishing for decode workers
if
config
.
is_decode_worker
:
logger
.
info
(
"Skipping KV event publisher setup for decode worker"
)
return
None
# Get data_parallel_size to create publishers for all dp_ranks
data_parallel_size
=
getattr
(
vllm_config
.
parallel_config
,
"data_parallel_size"
,
1
)
kv_publishers
=
[]
...
...
lib/llm/src/kv_router/prefill_router.rs
View file @
fb294b90
...
...
@@ -170,23 +170,20 @@ impl PrefillRouter {
bail!
(
"Prefill router returned no output (stream ended)"
);
};
while
prefill_response
.next
()
.await
.is_some
()
{}
if
let
Some
(
err
)
=
first_output
.err
()
{
while
prefill_response
.next
()
.await
.is_some
()
{}
bail!
(
"Prefill router returned error in output: {:?}"
,
err
);
bail!
(
"Prefill router returned error in output: {err:?}"
);
}
let
Some
(
output
)
=
&
first_output
.data
else
{
while
prefill_response
.next
()
.await
.is_some
()
{}
bail!
(
"Prefill router output has no data field"
);
};
let
Some
(
disaggregated_params
)
=
output
.disaggregated_params
.clone
()
else
{
while
prefill_response
.next
()
.await
.is_some
()
{}
bail!
(
"Prefill router output missing disaggregated_params"
);
};
while
prefill_response
.next
()
.await
.is_some
()
{}
Ok
(
disaggregated_params
)
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment