"vllm/perf/benchmark_throughput.py" did not exist on "99d499457ddd0fea525e3542e2734f051b96eb08"
test_registry.py 3.24 KB
Newer Older
1
2
import warnings

3
import pytest
4
import torch.cuda
5

6
7
8
9
10
11
12
13
from vllm.model_executor.models import (is_embedding_model,
                                        is_text_generation_model,
                                        supports_multimodal)
from vllm.model_executor.models.registry import (_EMBEDDING_MODELS,
                                                 _MULTIMODAL_MODELS,
                                                 _SPECULATIVE_DECODING_MODELS,
                                                 _TEXT_GENERATION_MODELS,
                                                 ModelRegistry)
14
15
16
from vllm.platforms import current_platform

from ..utils import fork_new_process_for_each_test
17
from .registry import HF_EXAMPLE_MODELS
18
19


20
@pytest.mark.parametrize("model_arch", ModelRegistry.get_supported_archs())
21
def test_registry_imports(model_arch):
22
    # Ensure all model classes can be imported successfully
23
24
25
26
27
28
29
30
31
32
33
34
35
36
    model_cls, _ = ModelRegistry.resolve_model_cls(model_arch)

    if model_arch in _SPECULATIVE_DECODING_MODELS:
        pass  # Ignore these models which do not have a unified format
    else:
        assert is_text_generation_model(model_cls) is (
            model_arch in _TEXT_GENERATION_MODELS
            or model_arch in _MULTIMODAL_MODELS)

        assert is_embedding_model(model_cls) is (model_arch
                                                 in _EMBEDDING_MODELS)

        assert supports_multimodal(model_cls) is (model_arch
                                                  in _MULTIMODAL_MODELS)
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76


@fork_new_process_for_each_test
@pytest.mark.parametrize("model_arch,is_mm,init_cuda", [
    ("LlamaForCausalLM", False, False),
    ("MllamaForConditionalGeneration", True, False),
    ("LlavaForConditionalGeneration", True, True),
])
def test_registry_is_multimodal(model_arch, is_mm, init_cuda):
    assert ModelRegistry.is_multimodal_model(model_arch) is is_mm

    if init_cuda and current_platform.is_cuda_alike():
        assert not torch.cuda.is_initialized()

        ModelRegistry.resolve_model_cls(model_arch)
        if not torch.cuda.is_initialized():
            warnings.warn(
                "This model no longer initializes CUDA on import. "
                "Please test using a different one.",
                stacklevel=2)


@fork_new_process_for_each_test
@pytest.mark.parametrize("model_arch,is_pp,init_cuda", [
    ("MLPSpeculatorPreTrainedModel", False, False),
    ("DeepseekV2ForCausalLM", True, False),
    ("Qwen2VLForConditionalGeneration", True, True),
])
def test_registry_is_pp(model_arch, is_pp, init_cuda):
    assert ModelRegistry.is_pp_supported_model(model_arch) is is_pp

    if init_cuda and current_platform.is_cuda_alike():
        assert not torch.cuda.is_initialized()

        ModelRegistry.resolve_model_cls(model_arch)
        if not torch.cuda.is_initialized():
            warnings.warn(
                "This model no longer initializes CUDA on import. "
                "Please test using a different one.",
                stacklevel=2)
77
78
79


def test_hf_registry_coverage():
80
81
    untested_archs = (ModelRegistry.get_supported_archs() -
                      HF_EXAMPLE_MODELS.get_supported_archs())
82
83
84
85

    assert not untested_archs, (
        "Please add the following architectures to "
        f"`tests/models/registry.py`: {untested_archs}")