"cacheflow/vscode:/vscode.git/clone" did not exist on "b7955ef17b8d899327b25564f20665ec3ffa71cb"
Unverified Commit 9c4cb683 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Chore] Remove `SupportsV0Only` interface and update supported models docs (#26783)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent 780eb03d
This diff is collapsed.
...@@ -88,12 +88,6 @@ based on assigned priority, with FCFS as a tie-breaker), configurable via the ...@@ -88,12 +88,6 @@ based on assigned priority, with FCFS as a tie-breaker), configurable via the
| **Mamba Models** | <nobr>🟢 (Mamba-2), 🟢 (Mamba-1)</nobr> | | **Mamba Models** | <nobr>🟢 (Mamba-2), 🟢 (Mamba-1)</nobr> |
| **Multimodal Models** | <nobr>🟢 Functional</nobr> | | **Multimodal Models** | <nobr>🟢 Functional</nobr> |
vLLM V1 currently excludes model architectures with the `SupportsV0Only` protocol.
!!! tip
This corresponds to the V1 column in our [list of supported models](../models/supported_models.md).
See below for the status of models that are not yet supported or have more features planned in V1. See below for the status of models that are not yet supported or have more features planned in V1.
#### Embedding Models #### Embedding Models
......
...@@ -76,9 +76,6 @@ class _HfExamplesInfo: ...@@ -76,9 +76,6 @@ class _HfExamplesInfo:
trust_remote_code: bool = False trust_remote_code: bool = False
"""The ``trust_remote_code`` level required to load the model.""" """The ``trust_remote_code`` level required to load the model."""
v0_only: bool = False
"""The model is only available with the vLLM V0 engine."""
hf_overrides: dict[str, Any] = field(default_factory=dict) hf_overrides: dict[str, Any] = field(default_factory=dict)
"""The ``hf_overrides`` required to load the model.""" """The ``hf_overrides`` required to load the model."""
...@@ -694,7 +691,6 @@ _MULTIMODAL_EXAMPLE_MODELS = { ...@@ -694,7 +691,6 @@ _MULTIMODAL_EXAMPLE_MODELS = {
"MiniMaxVL01ForConditionalGeneration": _HfExamplesInfo( "MiniMaxVL01ForConditionalGeneration": _HfExamplesInfo(
"MiniMaxAI/MiniMax-VL-01", "MiniMaxAI/MiniMax-VL-01",
trust_remote_code=True, trust_remote_code=True,
v0_only=True,
), ),
"Mistral3ForConditionalGeneration": _HfExamplesInfo( "Mistral3ForConditionalGeneration": _HfExamplesInfo(
"mistralai/Mistral-Small-3.1-24B-Instruct-2503", "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
......
...@@ -88,13 +88,15 @@ def can_initialize( ...@@ -88,13 +88,15 @@ def can_initialize(
# gpu_blocks (> 0), cpu_blocks, scheduler_kv_cache_config # gpu_blocks (> 0), cpu_blocks, scheduler_kv_cache_config
return 1, 0, scheduler_kv_cache_config return 1, 0, scheduler_kv_cache_config
if model_arch == "MiniMaxVL01ForConditionalGeneration":
pytest.skip(
"pickle error when loading `transformers.models.auto.CONFIG_MAPPING`"
)
with ( with (
patch.object(V1EngineCore, "_initialize_kv_caches", _initialize_kv_caches_v1), patch.object(V1EngineCore, "_initialize_kv_caches", _initialize_kv_caches_v1),
monkeypatch.context() as m, monkeypatch.context() as m,
): ):
if model_info.v0_only:
# NOTE(woosuk): skip the test for V0-only models
return
if model_arch == "GptOssForCausalLM": if model_arch == "GptOssForCausalLM":
# FIXME: A hack to bypass FA3 assertion because our CI's L4 GPU # FIXME: A hack to bypass FA3 assertion because our CI's L4 GPU
# has cc==8.9 which hasn't supported FA3 yet. Remove this hack when # has cc==8.9 which hasn't supported FA3 yet. Remove this hack when
...@@ -132,8 +134,6 @@ def can_initialize( ...@@ -132,8 +134,6 @@ def can_initialize(
@pytest.mark.parametrize("model_arch", MINIMAL_MODEL_ARCH_LIST) @pytest.mark.parametrize("model_arch", MINIMAL_MODEL_ARCH_LIST)
def test_can_initialize_small_subset(model_arch: str, monkeypatch: pytest.MonkeyPatch): def test_can_initialize_small_subset(model_arch: str, monkeypatch: pytest.MonkeyPatch):
"""Test initializing small subset of supported models""" """Test initializing small subset of supported models"""
if model_arch == "Lfm2ForCausalLM":
pytest.skip("Skipping until test supports V1-only models")
can_initialize(model_arch, monkeypatch, HF_EXAMPLE_MODELS) can_initialize(model_arch, monkeypatch, HF_EXAMPLE_MODELS)
...@@ -144,8 +144,6 @@ def test_can_initialize_large_subset(model_arch: str, monkeypatch: pytest.Monkey ...@@ -144,8 +144,6 @@ def test_can_initialize_large_subset(model_arch: str, monkeypatch: pytest.Monkey
This test covers the complement of the tests covered in the "small subset" This test covers the complement of the tests covered in the "small subset"
test. test.
""" """
if model_arch == "Lfm2ForCausalLM":
pytest.skip("Skipping until test supports V1-only models")
can_initialize(model_arch, monkeypatch, HF_EXAMPLE_MODELS) can_initialize(model_arch, monkeypatch, HF_EXAMPLE_MODELS)
......
...@@ -1622,10 +1622,6 @@ class ModelConfig: ...@@ -1622,10 +1622,6 @@ class ModelConfig:
def has_inner_state(self): def has_inner_state(self):
return self._model_info.has_inner_state return self._model_info.has_inner_state
@property
def is_v1_compatible(self) -> bool:
return not self._model_info.supports_v0_only
@property @property
def use_mla(self) -> bool: def use_mla(self) -> bool:
return self.is_deepseek_mla and not envs.VLLM_MLA_DISABLE return self.is_deepseek_mla and not envs.VLLM_MLA_DISABLE
......
...@@ -1606,13 +1606,6 @@ class EngineArgs: ...@@ -1606,13 +1606,6 @@ class EngineArgs:
) )
return False return False
# No Mamba or Encoder-Decoder so far.
if not model_config.is_v1_compatible:
_raise_or_fallback(
feature_name=model_config.architectures, recommend_to_remove=False
)
return False
# No Concurrent Partial Prefills so far. # No Concurrent Partial Prefills so far.
if ( if (
self.max_num_partial_prefills != SchedulerConfig.max_num_partial_prefills self.max_num_partial_prefills != SchedulerConfig.max_num_partial_prefills
......
...@@ -8,14 +8,12 @@ from .interfaces import ( ...@@ -8,14 +8,12 @@ from .interfaces import (
SupportsMultiModal, SupportsMultiModal,
SupportsPP, SupportsPP,
SupportsTranscription, SupportsTranscription,
SupportsV0Only,
has_inner_state, has_inner_state,
supports_lora, supports_lora,
supports_mrope, supports_mrope,
supports_multimodal, supports_multimodal,
supports_pp, supports_pp,
supports_transcription, supports_transcription,
supports_v0_only,
) )
from .interfaces_base import ( from .interfaces_base import (
VllmModelForPooling, VllmModelForPooling,
...@@ -43,6 +41,4 @@ __all__ = [ ...@@ -43,6 +41,4 @@ __all__ = [
"supports_pp", "supports_pp",
"SupportsTranscription", "SupportsTranscription",
"supports_transcription", "supports_transcription",
"SupportsV0Only",
"supports_v0_only",
] ]
...@@ -877,27 +877,6 @@ def supports_transcription( ...@@ -877,27 +877,6 @@ def supports_transcription(
return getattr(model, "supports_transcription", False) return getattr(model, "supports_transcription", False)
@runtime_checkable
class SupportsV0Only(Protocol):
"""Models with this interface are not compatible with V1 vLLM."""
supports_v0_only: ClassVar[Literal[True]] = True
@overload
def supports_v0_only(model: type[object]) -> TypeIs[type[SupportsV0Only]]: ...
@overload
def supports_v0_only(model: object) -> TypeIs[SupportsV0Only]: ...
def supports_v0_only(
model: type[object] | object,
) -> TypeIs[type[SupportsV0Only]] | TypeIs[SupportsV0Only]:
return getattr(model, "supports_v0_only", False)
@runtime_checkable @runtime_checkable
class SupportsEagle3(Protocol): class SupportsEagle3(Protocol):
"""The interface required for models that support """The interface required for models that support
......
...@@ -44,7 +44,6 @@ from .interfaces import ( ...@@ -44,7 +44,6 @@ from .interfaces import (
supports_multimodal_raw_input_only, supports_multimodal_raw_input_only,
supports_pp, supports_pp,
supports_transcription, supports_transcription,
supports_v0_only,
) )
from .interfaces_base import ( from .interfaces_base import (
get_default_pooling_type, get_default_pooling_type,
...@@ -479,7 +478,6 @@ class _ModelInfo: ...@@ -479,7 +478,6 @@ class _ModelInfo:
has_noops: bool has_noops: bool
supports_transcription: bool supports_transcription: bool
supports_transcription_only: bool supports_transcription_only: bool
supports_v0_only: bool
@staticmethod @staticmethod
def from_model_cls(model: type[nn.Module]) -> "_ModelInfo": def from_model_cls(model: type[nn.Module]) -> "_ModelInfo":
...@@ -504,7 +502,6 @@ class _ModelInfo: ...@@ -504,7 +502,6 @@ class _ModelInfo:
supports_transcription_only=( supports_transcription_only=(
supports_transcription(model) and model.supports_transcription_only supports_transcription(model) and model.supports_transcription_only
), ),
supports_v0_only=supports_v0_only(model),
has_noops=has_noops(model), has_noops=has_noops(model),
) )
...@@ -1063,14 +1060,6 @@ class _ModelRegistry: ...@@ -1063,14 +1060,6 @@ class _ModelRegistry:
model_cls, _ = self.inspect_model_cls(architectures, model_config) model_cls, _ = self.inspect_model_cls(architectures, model_config)
return model_cls.supports_transcription_only return model_cls.supports_transcription_only
def is_v1_compatible(
self,
architectures: str | list[str],
model_config: ModelConfig,
) -> bool:
model_cls, _ = self.inspect_model_cls(architectures, model_config)
return not model_cls.supports_v0_only
ModelRegistry = _ModelRegistry( ModelRegistry = _ModelRegistry(
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment