test_registry.py 4.56 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
5
import warnings

zhuwenwen's avatar
zhuwenwen committed
6
import os
7
import pytest
8
import torch.cuda
9

10
from vllm.model_executor.models import (is_pooling_model,
11
12
                                        is_text_generation_model,
                                        supports_multimodal)
13
14
15
from vllm.model_executor.models.adapters import (as_classification_model,
                                                 as_embedding_model,
                                                 as_reward_model)
16
from vllm.model_executor.models.registry import (_MULTIMODAL_MODELS,
17
18
19
                                                 _SPECULATIVE_DECODING_MODELS,
                                                 _TEXT_GENERATION_MODELS,
                                                 ModelRegistry)
20
21
from vllm.platforms import current_platform

22
from ..utils import create_new_process_for_each_test
23
from .registry import HF_EXAMPLE_MODELS
24

zhuwenwen's avatar
zhuwenwen committed
25
26
models_path_prefix = os.getenv('VLLM_OPTEST_MODELS_PATH') or os.getenv("OPTEST_MODELS_PATH")

27

28
@pytest.mark.parametrize("model_arch", ModelRegistry.get_supported_archs())
29
def test_registry_imports(model_arch):
30
31
32
33
34

    # Llama4ForCausalLM does not have a standalone model
    if model_arch == "Llama4ForCausalLM":
        return

35
36
37
    model_info = HF_EXAMPLE_MODELS.get_hf_info(model_arch)
    model_info.check_transformers_version(on_fail="skip")

38
    # Ensure all model classes can be imported successfully
39
40
41
    model_cls, _ = ModelRegistry.resolve_model_cls(model_arch)

    if model_arch in _SPECULATIVE_DECODING_MODELS:
42
43
44
45
46
47
        return  # Ignore these models which do not have a unified format

    if (model_arch in _TEXT_GENERATION_MODELS
            or model_arch in _MULTIMODAL_MODELS):
        assert is_text_generation_model(model_cls)

48
49
50
51
    # All vLLM models should be convertible to a pooling model
    assert is_pooling_model(as_classification_model(model_cls))
    assert is_pooling_model(as_embedding_model(model_cls))
    assert is_pooling_model(as_reward_model(model_cls))
52
53
54

    if model_arch in _MULTIMODAL_MODELS:
        assert supports_multimodal(model_cls)
55
56


57
@create_new_process_for_each_test()
58
@pytest.mark.parametrize("model_arch,is_mm,init_cuda,is_ce", [
zhuwenwen's avatar
zhuwenwen committed
59
60
61
62
63
64
    (os.path.join(models_path_prefix, "LlamaForCausalLM"), False, False, False),
    (os.path.join(models_path_prefix, "MllamaForConditionalGeneration"), True, False, False),
    (os.path.join(models_path_prefix, "LlavaForConditionalGeneration"), True, True, False),
    (os.path.join(models_path_prefix, "BertForSequenceClassification"), False, False, True),
    (os.path.join(models_path_prefix, "RobertaForSequenceClassification"), False, False, True),
    (os.path.join(models_path_prefix, "XLMRobertaForSequenceClassification"), False, False, True),
65
])
66
def test_registry_model_property(model_arch, is_mm, init_cuda, is_ce):
67
68
    assert ModelRegistry.is_multimodal_model(model_arch) is is_mm

69
70
    assert ModelRegistry.is_cross_encoder_model(model_arch) is is_ce

71
72
73
74
75
76
77
78
79
80
81
    if init_cuda and current_platform.is_cuda_alike():
        assert not torch.cuda.is_initialized()

        ModelRegistry.resolve_model_cls(model_arch)
        if not torch.cuda.is_initialized():
            warnings.warn(
                "This model no longer initializes CUDA on import. "
                "Please test using a different one.",
                stacklevel=2)


82
@create_new_process_for_each_test()
83
@pytest.mark.parametrize("model_arch,is_pp,init_cuda", [
zhuwenwen's avatar
zhuwenwen committed
84
85
86
    (os.path.join(models_path_prefix, "MLPSpeculatorPreTrainedModel"), False, False),
    (os.path.join(models_path_prefix, "DeepseekV2ForCausalLM"), True, False),
    (os.path.join(models_path_prefix, "Qwen2VLForConditionalGeneration"), True, True),
87
88
89
90
91
92
93
94
95
96
97
98
99
])
def test_registry_is_pp(model_arch, is_pp, init_cuda):
    assert ModelRegistry.is_pp_supported_model(model_arch) is is_pp

    if init_cuda and current_platform.is_cuda_alike():
        assert not torch.cuda.is_initialized()

        ModelRegistry.resolve_model_cls(model_arch)
        if not torch.cuda.is_initialized():
            warnings.warn(
                "This model no longer initializes CUDA on import. "
                "Please test using a different one.",
                stacklevel=2)
100
101
102


def test_hf_registry_coverage():
103
104
105
106
107
    untested_archs = set(ModelRegistry.get_supported_archs() -
                         HF_EXAMPLE_MODELS.get_supported_archs())

    # Llama4ForCausalLM does not have a standalone model
    untested_archs.discard("Llama4ForCausalLM")
108
109
110

    assert not untested_archs, (
        "Please add the following architectures to "
zhuwenwen's avatar
zhuwenwen committed
111
        f"`tests/models/registry.py`: {untested_archs}")