Unverified Commit 9c4cb683 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Chore] Remove `SupportsV0Only` interface and update supported models docs (#26783)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent 780eb03d
This diff is collapsed.
......@@ -88,12 +88,6 @@ based on assigned priority, with FCFS as a tie-breaker), configurable via the
| **Mamba Models** | <nobr>🟢 (Mamba-2), 🟢 (Mamba-1)</nobr> |
| **Multimodal Models** | <nobr>🟢 Functional</nobr> |
vLLM V1 currently excludes model architectures with the `SupportsV0Only` protocol.
!!! tip
This corresponds to the V1 column in our [list of supported models](../models/supported_models.md).
See below for the status of models that are not yet supported or have more features planned in V1.
#### Embedding Models
......
......@@ -76,9 +76,6 @@ class _HfExamplesInfo:
trust_remote_code: bool = False
"""The ``trust_remote_code`` level required to load the model."""
v0_only: bool = False
"""The model is only available with the vLLM V0 engine."""
hf_overrides: dict[str, Any] = field(default_factory=dict)
"""The ``hf_overrides`` required to load the model."""
......@@ -694,7 +691,6 @@ _MULTIMODAL_EXAMPLE_MODELS = {
"MiniMaxVL01ForConditionalGeneration": _HfExamplesInfo(
"MiniMaxAI/MiniMax-VL-01",
trust_remote_code=True,
v0_only=True,
),
"Mistral3ForConditionalGeneration": _HfExamplesInfo(
"mistralai/Mistral-Small-3.1-24B-Instruct-2503",
......
......@@ -88,13 +88,15 @@ def can_initialize(
# gpu_blocks (> 0), cpu_blocks, scheduler_kv_cache_config
return 1, 0, scheduler_kv_cache_config
if model_arch == "MiniMaxVL01ForConditionalGeneration":
pytest.skip(
"pickle error when loading `transformers.models.auto.CONFIG_MAPPING`"
)
with (
patch.object(V1EngineCore, "_initialize_kv_caches", _initialize_kv_caches_v1),
monkeypatch.context() as m,
):
if model_info.v0_only:
# NOTE(woosuk): skip the test for V0-only models
return
if model_arch == "GptOssForCausalLM":
# FIXME: A hack to bypass FA3 assertion because our CI's L4 GPU
# has cc==8.9 which hasn't supported FA3 yet. Remove this hack when
......@@ -132,8 +134,6 @@ def can_initialize(
@pytest.mark.parametrize("model_arch", MINIMAL_MODEL_ARCH_LIST)
def test_can_initialize_small_subset(model_arch: str, monkeypatch: pytest.MonkeyPatch):
"""Test initializing small subset of supported models"""
if model_arch == "Lfm2ForCausalLM":
pytest.skip("Skipping until test supports V1-only models")
can_initialize(model_arch, monkeypatch, HF_EXAMPLE_MODELS)
......@@ -144,8 +144,6 @@ def test_can_initialize_large_subset(model_arch: str, monkeypatch: pytest.Monkey
This test covers the complement of the tests covered in the "small subset"
test.
"""
if model_arch == "Lfm2ForCausalLM":
pytest.skip("Skipping until test supports V1-only models")
can_initialize(model_arch, monkeypatch, HF_EXAMPLE_MODELS)
......
......@@ -1622,10 +1622,6 @@ class ModelConfig:
def has_inner_state(self):
return self._model_info.has_inner_state
@property
def is_v1_compatible(self) -> bool:
return not self._model_info.supports_v0_only
@property
def use_mla(self) -> bool:
return self.is_deepseek_mla and not envs.VLLM_MLA_DISABLE
......
......@@ -1606,13 +1606,6 @@ class EngineArgs:
)
return False
# No Mamba or Encoder-Decoder so far.
if not model_config.is_v1_compatible:
_raise_or_fallback(
feature_name=model_config.architectures, recommend_to_remove=False
)
return False
# No Concurrent Partial Prefills so far.
if (
self.max_num_partial_prefills != SchedulerConfig.max_num_partial_prefills
......
......@@ -8,14 +8,12 @@ from .interfaces import (
SupportsMultiModal,
SupportsPP,
SupportsTranscription,
SupportsV0Only,
has_inner_state,
supports_lora,
supports_mrope,
supports_multimodal,
supports_pp,
supports_transcription,
supports_v0_only,
)
from .interfaces_base import (
VllmModelForPooling,
......@@ -43,6 +41,4 @@ __all__ = [
"supports_pp",
"SupportsTranscription",
"supports_transcription",
"SupportsV0Only",
"supports_v0_only",
]
......@@ -877,27 +877,6 @@ def supports_transcription(
return getattr(model, "supports_transcription", False)
@runtime_checkable
class SupportsV0Only(Protocol):
"""Models with this interface are not compatible with V1 vLLM."""
supports_v0_only: ClassVar[Literal[True]] = True
@overload
def supports_v0_only(model: type[object]) -> TypeIs[type[SupportsV0Only]]: ...
@overload
def supports_v0_only(model: object) -> TypeIs[SupportsV0Only]: ...
def supports_v0_only(
model: type[object] | object,
) -> TypeIs[type[SupportsV0Only]] | TypeIs[SupportsV0Only]:
return getattr(model, "supports_v0_only", False)
@runtime_checkable
class SupportsEagle3(Protocol):
"""The interface required for models that support
......
......@@ -44,7 +44,6 @@ from .interfaces import (
supports_multimodal_raw_input_only,
supports_pp,
supports_transcription,
supports_v0_only,
)
from .interfaces_base import (
get_default_pooling_type,
......@@ -479,7 +478,6 @@ class _ModelInfo:
has_noops: bool
supports_transcription: bool
supports_transcription_only: bool
supports_v0_only: bool
@staticmethod
def from_model_cls(model: type[nn.Module]) -> "_ModelInfo":
......@@ -504,7 +502,6 @@ class _ModelInfo:
supports_transcription_only=(
supports_transcription(model) and model.supports_transcription_only
),
supports_v0_only=supports_v0_only(model),
has_noops=has_noops(model),
)
......@@ -1063,14 +1060,6 @@ class _ModelRegistry:
model_cls, _ = self.inspect_model_cls(architectures, model_config)
return model_cls.supports_transcription_only
def is_v1_compatible(
self,
architectures: str | list[str],
model_config: ModelConfig,
) -> bool:
model_cls, _ = self.inspect_model_cls(architectures, model_config)
return not model_cls.supports_v0_only
ModelRegistry = _ModelRegistry(
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment