[Model Runner V2] Add config validation for not-yet-supported features (#38758)

Signed-off-by: Nick Hill <nickhill123@gmail.com>

[Model Runner V2] Add config validation for not-yet-supported features (#38758)
Signed-off-by: Nick Hill <nickhill123@gmail.com>
5f1de2b1 · Nick Hill · GitHub · a5a623d9 · 5f1de2b1 · 5f1de2b1
Unverified Commit 5f1de2b1 authored Apr 03, 2026 by Nick Hill Committed by GitHub Apr 03, 2026
Show whitespace changes
Inline Side-by-side

Showing with 46 additions and 1 deletion

.buildkite/test_areas/model_runner_v2.yaml .buildkite/test_areas/model_runner_v2.yaml +0 -1

vllm/config/vllm.py vllm/config/vllm.py +46 -0

No files found.
--- a/.buildkite/test_areas/model_runner_v2.yaml
+++ b/.buildkite/test_areas/model_runner_v2.yaml
@@ -78,7 +78,6 @@ steps:
    - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py -k "not ray"
    - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py

-# These require fix https://github.com/vllm-project/vllm/pull/36280
 - label: Model Runner V2 Pipeline Parallelism (4 GPUs)
  timeout_in_minutes: 60
  working_dir: "/vllm-workspace/tests"

--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -1106,6 +1106,9 @@ class VllmConfig:
            )
        current_platform.check_and_update_config(self)

+        if envs.VLLM_USE_V2_MODEL_RUNNER:
+            self._validate_v2_model_runner()
+
        # Re-compute compile ranges after platform-specific config updates
        # (e.g., XPU may lower max_num_batched_tokens when MLA is enabled)
        self._set_compile_ranges()
@@ -1729,6 +1732,49 @@ class VllmConfig:
            f"kernel_config={self.kernel_config!r}"
        )

+    def _validate_v2_model_runner(self) -> None:
+        """Check for features not yet supported by the V2 model runner."""
+        unsupported: list[str] = []
+
+        if self.model_config is not None and self.model_config.has_inner_state:
+            unsupported.append("hybrid/mamba models")
+
+        if self.parallel_config.prefill_context_parallel_size > 1:
+            unsupported.append("prefill context parallelism")
+
+        if (
+            self.speculative_config is not None
+            and self.speculative_config.method not in ("eagle", "eagle3", "mtp")
+        ):
+            unsupported.append(f"speculative method '{self.speculative_config.method}'")
+
+        if self.parallel_config.enable_dbo:
+            unsupported.append("dual batch overlap")
+
+        if (
+            self.model_config is not None
+            and self.model_config.enable_return_routed_experts
+        ):
+            # Will be added by https://github.com/vllm-project/vllm/pull/38163
+            unsupported.append("routed experts capture")
+
+        if self.model_config is not None and self.model_config.logits_processors:
+            unsupported.append("custom logits processors")
+
+        if self.cache_config.kv_sharing_fast_prefill:
+            # Will be added by https://github.com/vllm-project/vllm/pull/35045
+            unsupported.append("KV sharing fast prefill")
+
+        if self.ec_transfer_config is not None:
+            # Will be added by https://github.com/vllm-project/vllm/pull/38390
+            unsupported.append("EC transfer")
+
+        if unsupported:
+            raise ValueError(
+                "VLLM_USE_V2_MODEL_RUNNER does not yet support: "
+                + ", ".join(unsupported)
+            )
+
    def validate_block_size(self) -> None:
        """Validate block_size against DCP and mamba constraints.