Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
5f1de2b1
Unverified
Commit
5f1de2b1
authored
Apr 03, 2026
by
Nick Hill
Committed by
GitHub
Apr 03, 2026
Browse files
[Model Runner V2] Add config validation for not-yet-supported features (#38758)
Signed-off-by:
Nick Hill
<
nickhill123@gmail.com
>
parent
a5a623d9
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
46 additions
and
1 deletion
+46
-1
.buildkite/test_areas/model_runner_v2.yaml
.buildkite/test_areas/model_runner_v2.yaml
+0
-1
vllm/config/vllm.py
vllm/config/vllm.py
+46
-0
No files found.
.buildkite/test_areas/model_runner_v2.yaml
View file @
5f1de2b1
...
...
@@ -78,7 +78,6 @@ steps:
-
TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py -k "not ray"
-
TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
# These require fix https://github.com/vllm-project/vllm/pull/36280
-
label
:
Model Runner V2 Pipeline Parallelism (4 GPUs)
timeout_in_minutes
:
60
working_dir
:
"
/vllm-workspace/tests"
...
...
vllm/config/vllm.py
View file @
5f1de2b1
...
...
@@ -1106,6 +1106,9 @@ class VllmConfig:
)
current_platform
.
check_and_update_config
(
self
)
if
envs
.
VLLM_USE_V2_MODEL_RUNNER
:
self
.
_validate_v2_model_runner
()
# Re-compute compile ranges after platform-specific config updates
# (e.g., XPU may lower max_num_batched_tokens when MLA is enabled)
self
.
_set_compile_ranges
()
...
...
@@ -1729,6 +1732,49 @@ class VllmConfig:
f
"kernel_config=
{
self
.
kernel_config
!
r
}
"
)
def
_validate_v2_model_runner
(
self
)
->
None
:
"""Check for features not yet supported by the V2 model runner."""
unsupported
:
list
[
str
]
=
[]
if
self
.
model_config
is
not
None
and
self
.
model_config
.
has_inner_state
:
unsupported
.
append
(
"hybrid/mamba models"
)
if
self
.
parallel_config
.
prefill_context_parallel_size
>
1
:
unsupported
.
append
(
"prefill context parallelism"
)
if
(
self
.
speculative_config
is
not
None
and
self
.
speculative_config
.
method
not
in
(
"eagle"
,
"eagle3"
,
"mtp"
)
):
unsupported
.
append
(
f
"speculative method '
{
self
.
speculative_config
.
method
}
'"
)
if
self
.
parallel_config
.
enable_dbo
:
unsupported
.
append
(
"dual batch overlap"
)
if
(
self
.
model_config
is
not
None
and
self
.
model_config
.
enable_return_routed_experts
):
# Will be added by https://github.com/vllm-project/vllm/pull/38163
unsupported
.
append
(
"routed experts capture"
)
if
self
.
model_config
is
not
None
and
self
.
model_config
.
logits_processors
:
unsupported
.
append
(
"custom logits processors"
)
if
self
.
cache_config
.
kv_sharing_fast_prefill
:
# Will be added by https://github.com/vllm-project/vllm/pull/35045
unsupported
.
append
(
"KV sharing fast prefill"
)
if
self
.
ec_transfer_config
is
not
None
:
# Will be added by https://github.com/vllm-project/vllm/pull/38390
unsupported
.
append
(
"EC transfer"
)
if
unsupported
:
raise
ValueError
(
"VLLM_USE_V2_MODEL_RUNNER does not yet support: "
+
", "
.
join
(
unsupported
)
)
def
validate_block_size
(
self
)
->
None
:
"""Validate block_size against DCP and mamba constraints.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment