test_registry.py 4.1 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
5
import warnings

6
import pytest
7
import torch.cuda
8

9
from vllm.model_executor.models import (is_pooling_model,
10
11
                                        is_text_generation_model,
                                        supports_multimodal)
12
13
14
from vllm.model_executor.models.adapters import (as_embedding_model,
                                                 as_reward_model,
                                                 as_seq_cls_model)
15
from vllm.model_executor.models.registry import (_MULTIMODAL_MODELS,
16
17
18
                                                 _SPECULATIVE_DECODING_MODELS,
                                                 _TEXT_GENERATION_MODELS,
                                                 ModelRegistry)
19
20
from vllm.platforms import current_platform

21
from ..utils import create_new_process_for_each_test
22
from .registry import HF_EXAMPLE_MODELS
23
24


25
@pytest.mark.parametrize("model_arch", ModelRegistry.get_supported_archs())
26
def test_registry_imports(model_arch):
27
    # Ensure all model classes can be imported successfully
28
29
    model_cls = ModelRegistry._try_load_model_cls(model_arch)
    assert model_cls is not None
30
31

    if model_arch in _SPECULATIVE_DECODING_MODELS:
32
33
34
35
36
37
        return  # Ignore these models which do not have a unified format

    if (model_arch in _TEXT_GENERATION_MODELS
            or model_arch in _MULTIMODAL_MODELS):
        assert is_text_generation_model(model_cls)

38
    # All vLLM models should be convertible to a pooling model
39
    assert is_pooling_model(as_seq_cls_model(model_cls))
40
41
    assert is_pooling_model(as_embedding_model(model_cls))
    assert is_pooling_model(as_reward_model(model_cls))
42
43
44

    if model_arch in _MULTIMODAL_MODELS:
        assert supports_multimodal(model_cls)
45
46


47
@create_new_process_for_each_test()
48
49
50
51
52
53
54
@pytest.mark.parametrize("model_arch,is_mm,init_cuda,is_ce", [
    ("LlamaForCausalLM", False, False, False),
    ("MllamaForConditionalGeneration", True, False, False),
    ("LlavaForConditionalGeneration", True, True, False),
    ("BertForSequenceClassification", False, False, True),
    ("RobertaForSequenceClassification", False, False, True),
    ("XLMRobertaForSequenceClassification", False, False, True),
55
])
56
def test_registry_model_property(model_arch, is_mm, init_cuda, is_ce):
57
58
    model_info = ModelRegistry._try_inspect_model_cls(model_arch)
    assert model_info is not None
59

60
61
    assert model_info.supports_multimodal is is_mm
    assert model_info.supports_cross_encoding is is_ce
62

63
64
65
    if init_cuda and current_platform.is_cuda_alike():
        assert not torch.cuda.is_initialized()

66
        ModelRegistry._try_load_model_cls(model_arch)
67
68
69
70
71
72
73
        if not torch.cuda.is_initialized():
            warnings.warn(
                "This model no longer initializes CUDA on import. "
                "Please test using a different one.",
                stacklevel=2)


74
@create_new_process_for_each_test()
75
76
77
78
79
80
81
82
83
@pytest.mark.parametrize(
    "model_arch,is_pp,init_cuda",
    [
        # TODO(woosuk): Re-enable this once the MLP Speculator is supported
        # in V1.
        # ("MLPSpeculatorPreTrainedModel", False, False),
        ("DeepseekV2ForCausalLM", True, False),
        ("Qwen2VLForConditionalGeneration", True, True),
    ])
84
def test_registry_is_pp(model_arch, is_pp, init_cuda):
85
86
87
88
    model_info = ModelRegistry._try_inspect_model_cls(model_arch)
    assert model_info is not None

    assert model_info.supports_pp is is_pp
89
90
91
92

    if init_cuda and current_platform.is_cuda_alike():
        assert not torch.cuda.is_initialized()

93
        ModelRegistry._try_load_model_cls(model_arch)
94
95
96
97
98
        if not torch.cuda.is_initialized():
            warnings.warn(
                "This model no longer initializes CUDA on import. "
                "Please test using a different one.",
                stacklevel=2)
99
100
101


def test_hf_registry_coverage():
102
103
    untested_archs = (ModelRegistry.get_supported_archs() -
                      HF_EXAMPLE_MODELS.get_supported_archs())
104
105
106
107

    assert not untested_archs, (
        "Please add the following architectures to "
        f"`tests/models/registry.py`: {untested_archs}")