"docs/vscode:/vscode.git/clone" did not exist on "06d490282f2bab6922137eb5230be9df5ebbe9c4"
Unverified Commit 5f1de2b1 authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[Model Runner V2] Add config validation for not-yet-supported features (#38758)


Signed-off-by: default avatarNick Hill <nickhill123@gmail.com>
parent a5a623d9
...@@ -78,7 +78,6 @@ steps: ...@@ -78,7 +78,6 @@ steps:
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py -k "not ray" - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py -k "not ray"
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
# These require fix https://github.com/vllm-project/vllm/pull/36280
- label: Model Runner V2 Pipeline Parallelism (4 GPUs) - label: Model Runner V2 Pipeline Parallelism (4 GPUs)
timeout_in_minutes: 60 timeout_in_minutes: 60
working_dir: "/vllm-workspace/tests" working_dir: "/vllm-workspace/tests"
......
...@@ -1106,6 +1106,9 @@ class VllmConfig: ...@@ -1106,6 +1106,9 @@ class VllmConfig:
) )
current_platform.check_and_update_config(self) current_platform.check_and_update_config(self)
if envs.VLLM_USE_V2_MODEL_RUNNER:
self._validate_v2_model_runner()
# Re-compute compile ranges after platform-specific config updates # Re-compute compile ranges after platform-specific config updates
# (e.g., XPU may lower max_num_batched_tokens when MLA is enabled) # (e.g., XPU may lower max_num_batched_tokens when MLA is enabled)
self._set_compile_ranges() self._set_compile_ranges()
...@@ -1729,6 +1732,49 @@ class VllmConfig: ...@@ -1729,6 +1732,49 @@ class VllmConfig:
f"kernel_config={self.kernel_config!r}" f"kernel_config={self.kernel_config!r}"
) )
def _validate_v2_model_runner(self) -> None:
"""Check for features not yet supported by the V2 model runner."""
unsupported: list[str] = []
if self.model_config is not None and self.model_config.has_inner_state:
unsupported.append("hybrid/mamba models")
if self.parallel_config.prefill_context_parallel_size > 1:
unsupported.append("prefill context parallelism")
if (
self.speculative_config is not None
and self.speculative_config.method not in ("eagle", "eagle3", "mtp")
):
unsupported.append(f"speculative method '{self.speculative_config.method}'")
if self.parallel_config.enable_dbo:
unsupported.append("dual batch overlap")
if (
self.model_config is not None
and self.model_config.enable_return_routed_experts
):
# Will be added by https://github.com/vllm-project/vllm/pull/38163
unsupported.append("routed experts capture")
if self.model_config is not None and self.model_config.logits_processors:
unsupported.append("custom logits processors")
if self.cache_config.kv_sharing_fast_prefill:
# Will be added by https://github.com/vllm-project/vllm/pull/35045
unsupported.append("KV sharing fast prefill")
if self.ec_transfer_config is not None:
# Will be added by https://github.com/vllm-project/vllm/pull/38390
unsupported.append("EC transfer")
if unsupported:
raise ValueError(
"VLLM_USE_V2_MODEL_RUNNER does not yet support: "
+ ", ".join(unsupported)
)
def validate_block_size(self) -> None: def validate_block_size(self) -> None:
"""Validate block_size against DCP and mamba constraints. """Validate block_size against DCP and mamba constraints.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment