test_registry.py 4.22 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
5
import warnings

6
import pytest
7
import torch.cuda
8

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from vllm.model_executor.models import (
    is_pooling_model,
    is_text_generation_model,
    supports_multimodal,
)
from vllm.model_executor.models.adapters import (
    as_embedding_model,
    as_seq_cls_model,
)
from vllm.model_executor.models.registry import (
    _MULTIMODAL_MODELS,
    _SPECULATIVE_DECODING_MODELS,
    _TEXT_GENERATION_MODELS,
    ModelRegistry,
)
24
25
from vllm.platforms import current_platform

26
from ..utils import create_new_process_for_each_test
27
from .registry import HF_EXAMPLE_MODELS
28
29


30
@pytest.mark.parametrize("model_arch", ModelRegistry.get_supported_archs())
31
def test_registry_imports(model_arch):
32
33
    # Skip if transformers version is incompatible
    model_info = HF_EXAMPLE_MODELS.get_hf_info(model_arch)
34
35
36
37
38
    model_info.check_transformers_version(
        on_fail="skip",
        check_max_version=False,
        check_version_reason="vllm",
    )
39
    # Ensure all model classes can be imported successfully
40
41
    model_cls = ModelRegistry._try_load_model_cls(model_arch)
    assert model_cls is not None
42
43

    if model_arch in _SPECULATIVE_DECODING_MODELS:
44
45
        return  # Ignore these models which do not have a unified format

46
    if model_arch in _TEXT_GENERATION_MODELS or model_arch in _MULTIMODAL_MODELS:
47
48
        assert is_text_generation_model(model_cls)

49
    # All vLLM models should be convertible to a pooling model
50
    assert is_pooling_model(as_seq_cls_model(model_cls))
51
    assert is_pooling_model(as_embedding_model(model_cls))
52
53
54

    if model_arch in _MULTIMODAL_MODELS:
        assert supports_multimodal(model_cls)
55
56


57
@create_new_process_for_each_test()
58
@pytest.mark.parametrize(
59
    "model_arch,is_mm,init_cuda,score_type",
60
    [
61
62
63
64
65
66
67
68
        ("LlamaForCausalLM", False, False, "bi-encoder"),
        ("LlavaForConditionalGeneration", True, True, "bi-encoder"),
        ("BertForSequenceClassification", False, False, "cross-encoder"),
        ("RobertaForSequenceClassification", False, False, "cross-encoder"),
        ("XLMRobertaForSequenceClassification", False, False, "cross-encoder"),
        ("GteNewModel", False, False, "bi-encoder"),
        ("GteNewForSequenceClassification", False, False, "cross-encoder"),
        ("HF_ColBERT", False, False, "late-interaction"),
69
70
    ],
)
71
def test_registry_model_property(model_arch, is_mm, init_cuda, score_type):
72
73
    model_info = ModelRegistry._try_inspect_model_cls(model_arch)
    assert model_info is not None
74

75
    assert model_info.supports_multimodal is is_mm
76
    assert model_info.score_type == score_type
77

78
79
80
    if init_cuda and current_platform.is_cuda_alike():
        assert not torch.cuda.is_initialized()

81
        ModelRegistry._try_load_model_cls(model_arch)
82
83
84
85
        if not torch.cuda.is_initialized():
            warnings.warn(
                "This model no longer initializes CUDA on import. "
                "Please test using a different one.",
86
87
                stacklevel=2,
            )
88
89


90
@create_new_process_for_each_test()
91
92
93
94
95
96
97
98
@pytest.mark.parametrize(
    "model_arch,is_pp,init_cuda",
    [
        # TODO(woosuk): Re-enable this once the MLP Speculator is supported
        # in V1.
        # ("MLPSpeculatorPreTrainedModel", False, False),
        ("DeepseekV2ForCausalLM", True, False),
        ("Qwen2VLForConditionalGeneration", True, True),
99
100
    ],
)
101
def test_registry_is_pp(model_arch, is_pp, init_cuda):
102
103
104
105
    model_info = ModelRegistry._try_inspect_model_cls(model_arch)
    assert model_info is not None

    assert model_info.supports_pp is is_pp
106
107
108
109

    if init_cuda and current_platform.is_cuda_alike():
        assert not torch.cuda.is_initialized()

110
        ModelRegistry._try_load_model_cls(model_arch)
111
112
113
114
        if not torch.cuda.is_initialized():
            warnings.warn(
                "This model no longer initializes CUDA on import. "
                "Please test using a different one.",
115
116
                stacklevel=2,
            )
117
118
119


def test_hf_registry_coverage():
120
121
122
    untested_archs = (
        ModelRegistry.get_supported_archs() - HF_EXAMPLE_MODELS.get_supported_archs()
    )
123
124
125

    assert not untested_archs, (
        "Please add the following architectures to "
126
127
        f"`tests/models/registry.py`: {untested_archs}"
    )