[Chore] Remove `SupportsV0Only` interface and update supported models docs (#26783)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>

[Chore] Remove `SupportsV0Only` interface and update supported models docs (#26783)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
9c4cb683 · Cyrus Leung · GitHub · 780eb03d · 9c4cb683 · 9c4cb683
Unverified Commit 9c4cb683 authored Oct 14, 2025 by Cyrus Leung Committed by GitHub Oct 14, 2025
9 changed files
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
--- a/docs/usage/v1_guide.md
+++ b/docs/usage/v1_guide.md
@@ -88,12 +88,6 @@ based on assigned priority, with FCFS as a tie-breaker), configurable via the
 | **Mamba Models**            | <nobr>🟢 (Mamba-2), 🟢 (Mamba-1)</nobr>                                            |
 | **Multimodal Models**       | <nobr>🟢 Functional</nobr>                                                         |
-vLLM V1 currently excludes model architectures with the `SupportsV0Only` protocol.
-!!! tip
-    This corresponds to the V1 column in our [list of supported models](../models/supported_models.md).
 See below for the status of models that are not yet supported or have more features planned in V1.
 #### Embedding Models

--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -76,9 +76,6 @@ class _HfExamplesInfo:
    trust_remote_code: bool = False
    """The ``trust_remote_code`` level required to load the model."""
-    v0_only: bool = False
-    """The model is only available with the vLLM V0 engine."""
    hf_overrides: dict[str, Any] = field(default_factory=dict)
    """The ``hf_overrides`` required to load the model."""
@@ -694,7 +691,6 @@ _MULTIMODAL_EXAMPLE_MODELS = {
    "MiniMaxVL01ForConditionalGeneration": _HfExamplesInfo(
        "MiniMaxAI/MiniMax-VL-01",
        trust_remote_code=True,
-        v0_only=True,
    ),
    "Mistral3ForConditionalGeneration": _HfExamplesInfo(
        "mistralai/Mistral-Small-3.1-24B-Instruct-2503",

--- a/tests/models/test_initialization.py
+++ b/tests/models/test_initialization.py
@@ -88,13 +88,15 @@ def can_initialize(
        # gpu_blocks (> 0), cpu_blocks, scheduler_kv_cache_config
        return 1, 0, scheduler_kv_cache_config
+    if model_arch == "MiniMaxVL01ForConditionalGeneration":
+        pytest.skip(
+            "pickle error when loading `transformers.models.auto.CONFIG_MAPPING`"
+        )
    with (
        patch.object(V1EngineCore, "_initialize_kv_caches", _initialize_kv_caches_v1),
        monkeypatch.context() as m,
    ):
-        if model_info.v0_only:
-            # NOTE(woosuk): skip the test for V0-only models
-            return
        if model_arch == "GptOssForCausalLM":
            # FIXME: A hack to bypass FA3 assertion because our CI's L4 GPU
            # has cc==8.9 which hasn't supported FA3 yet. Remove this hack when
@@ -132,8 +134,6 @@ def can_initialize(
 @pytest.mark.parametrize("model_arch", MINIMAL_MODEL_ARCH_LIST)
 def test_can_initialize_small_subset(model_arch: str, monkeypatch: pytest.MonkeyPatch):
    """Test initializing small subset of supported models"""
-    if model_arch == "Lfm2ForCausalLM":
-        pytest.skip("Skipping until test supports V1-only models")
    can_initialize(model_arch, monkeypatch, HF_EXAMPLE_MODELS)
@@ -144,8 +144,6 @@ def test_can_initialize_large_subset(model_arch: str, monkeypatch: pytest.Monkey
    This test covers the complement of the tests covered in the "small subset"
    test.
    """
-    if model_arch == "Lfm2ForCausalLM":
-        pytest.skip("Skipping until test supports V1-only models")
    can_initialize(model_arch, monkeypatch, HF_EXAMPLE_MODELS)

--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -1622,10 +1622,6 @@ class ModelConfig:
    def has_inner_state(self):
        return self._model_info.has_inner_state
-    @property
-    def is_v1_compatible(self) -> bool:
-        return not self._model_info.supports_v0_only
    @property
    def use_mla(self) -> bool:
        return self.is_deepseek_mla and not envs.VLLM_MLA_DISABLE

--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1606,13 +1606,6 @@ class EngineArgs:
            )
            return False
-        # No Mamba or Encoder-Decoder so far.
-        if not model_config.is_v1_compatible:
-            _raise_or_fallback(
-                feature_name=model_config.architectures, recommend_to_remove=False
-            )
-            return False
        # No Concurrent Partial Prefills so far.
        if (
            self.max_num_partial_prefills != SchedulerConfig.max_num_partial_prefills

--- a/vllm/model_executor/models/__init__.py
+++ b/vllm/model_executor/models/__init__.py
@@ -8,14 +8,12 @@ from .interfaces import (
    SupportsMultiModal,
    SupportsPP,
    SupportsTranscription,
-    SupportsV0Only,
    has_inner_state,
    supports_lora,
    supports_mrope,
    supports_multimodal,
    supports_pp,
    supports_transcription,
-    supports_v0_only,
 )
 from .interfaces_base import (
    VllmModelForPooling,
@@ -43,6 +41,4 @@ __all__ = [
    "supports_pp",
    "SupportsTranscription",
    "supports_transcription",
-    "SupportsV0Only",
-    "supports_v0_only",
 ]
--- a/vllm/model_executor/models/interfaces.py
+++ b/vllm/model_executor/models/interfaces.py
@@ -877,27 +877,6 @@ def supports_transcription(
    return getattr(model, "supports_transcription", False)
-@runtime_checkable
-class SupportsV0Only(Protocol):
-    """Models with this interface are not compatible with V1 vLLM."""
-    supports_v0_only: ClassVar[Literal[True]] = True
-@overload
-def supports_v0_only(model: type[object]) -> TypeIs[type[SupportsV0Only]]: ...
-@overload
-def supports_v0_only(model: object) -> TypeIs[SupportsV0Only]: ...
-def supports_v0_only(
-    model: type[object] | object,
-) -> TypeIs[type[SupportsV0Only]] | TypeIs[SupportsV0Only]:
-    return getattr(model, "supports_v0_only", False)
 @runtime_checkable
 class SupportsEagle3(Protocol):
    """The interface required for models that support

--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -44,7 +44,6 @@ from .interfaces import (
    supports_multimodal_raw_input_only,
    supports_pp,
    supports_transcription,
-    supports_v0_only,
 )
 from .interfaces_base import (
    get_default_pooling_type,
@@ -479,7 +478,6 @@ class _ModelInfo:
    has_noops: bool
    supports_transcription: bool
    supports_transcription_only: bool
-    supports_v0_only: bool
    @staticmethod
    def from_model_cls(model: type[nn.Module]) -> "_ModelInfo":
@@ -504,7 +502,6 @@ class _ModelInfo:
            supports_transcription_only=(
                supports_transcription(model) and model.supports_transcription_only
            ),
-            supports_v0_only=supports_v0_only(model),
            has_noops=has_noops(model),
        )
@@ -1063,14 +1060,6 @@ class _ModelRegistry:
        model_cls, _ = self.inspect_model_cls(architectures, model_config)
        return model_cls.supports_transcription_only
-    def is_v1_compatible(
-        self,
-        architectures: str | list[str],
-        model_config: ModelConfig,
-    ) -> bool:
-        model_cls, _ = self.inspect_model_cls(architectures, model_config)
-        return not model_cls.supports_v0_only
 ModelRegistry = _ModelRegistry(
    {