test_warmup.py 5.21 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Unit tests for BaseRenderer.warmup MM-warmup behavior.

These tests exercise:
  - Zero-limit modalities are filtered from mm_counts passed to
    get_dummy_processor_inputs (e.g. --limit-mm-per-prompt image=0 ...)
  - MM warmup is skipped entirely when mm_processor is None

No model weights are required: warmup() is called directly on a MagicMock
that acts as the renderer instance.
"""

from unittest.mock import MagicMock, patch

from vllm.renderers.base import BaseRenderer
from vllm.renderers.params import ChatParams


def _make_renderer_mock(mm_limits: dict[str, int]) -> MagicMock:
    """Return a MagicMock that quacks like a BaseRenderer instance.

    render_chat is mocked to raise ChatTemplateResolutionError so the chat
    warmup block is skipped cleanly, keeping the test focused on MM warmup.
    """
    from vllm.entrypoints.chat_utils import ChatTemplateResolutionError

    renderer = MagicMock()

    # chat warmup: make render_chat raise so we skip past it cleanly
    renderer.render_chat.side_effect = ChatTemplateResolutionError("no template")

    # MM processor with configurable limits
    mm_processor = MagicMock()
    mm_processor.info.allowed_mm_limits = mm_limits
    renderer.mm_processor = mm_processor
37
38
39
40
41
42
43
44
    renderer._readonly_mm_processor = None
    renderer._warmup_mm_processor = BaseRenderer._warmup_mm_processor.__get__(
        renderer, BaseRenderer
    )
    renderer._clear_processor_cache = BaseRenderer._clear_processor_cache
    renderer.clear_mm_cache = MagicMock()
    renderer.model_config.max_model_len = 128
    renderer.model_config.get_multimodal_config.return_value.limit_per_prompt = {}
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119

    return renderer


class TestMmWarmupZeroLimitFiltering:
    """Zero-limit modalities must be excluded from mm_counts."""

    def test_zero_limit_modality_excluded_from_mm_counts(self):
        """A modality with limit=0 must not appear in mm_counts."""
        renderer = _make_renderer_mock({"image": 1, "video": 0})

        with patch("vllm.multimodal.processing.TimingContext", autospec=True):
            BaseRenderer.warmup(renderer, ChatParams())

        get_inputs = renderer.mm_processor.dummy_inputs.get_dummy_processor_inputs
        get_inputs.assert_called_once()
        _, kwargs = get_inputs.call_args
        assert "video" not in kwargs["mm_counts"]
        assert kwargs["mm_counts"]["image"] == 1

    def test_all_zero_limits_passes_empty_mm_counts(self):
        """When all limits are 0, mm_counts must be empty."""
        renderer = _make_renderer_mock({"image": 0, "video": 0})

        with patch("vllm.multimodal.processing.TimingContext", autospec=True):
            BaseRenderer.warmup(renderer, ChatParams())

        get_inputs = renderer.mm_processor.dummy_inputs.get_dummy_processor_inputs
        get_inputs.assert_called_once()
        _, kwargs = get_inputs.call_args
        assert kwargs["mm_counts"] == {}

    def test_positive_limits_all_included_in_mm_counts(self):
        """All modalities with limit > 0 must be present in mm_counts."""
        renderer = _make_renderer_mock({"image": 2, "video": 1})

        with patch("vllm.multimodal.processing.TimingContext", autospec=True):
            BaseRenderer.warmup(renderer, ChatParams())

        get_inputs = renderer.mm_processor.dummy_inputs.get_dummy_processor_inputs
        get_inputs.assert_called_once()
        _, kwargs = get_inputs.call_args
        assert kwargs["mm_counts"] == {"image": 1, "video": 1}


class TestMmWarmupRunsNormally:
    """MM warmup must run when mm_processor is set and limits > 0."""

    def test_processor_apply_called(self):
        renderer = _make_renderer_mock({"image": 1})

        with patch("vllm.multimodal.processing.TimingContext", autospec=True):
            BaseRenderer.warmup(renderer, ChatParams())

        renderer.mm_processor.apply.assert_called_once()

    def test_mm_cache_cleared_after_warmup(self):
        renderer = _make_renderer_mock({"image": 1})

        with patch("vllm.multimodal.processing.TimingContext", autospec=True):
            BaseRenderer.warmup(renderer, ChatParams())

        renderer.clear_mm_cache.assert_called_once()


class TestMmWarmupSkippedWhenNoProcessor:
    """MM warmup must be skipped when mm_processor is None (text-only model)."""

    def test_no_warmup_without_processor(self):
        renderer = _make_renderer_mock({})
        renderer.mm_processor = None  # override to None

        BaseRenderer.warmup(renderer, ChatParams())

        renderer.model_config.get_multimodal_config.assert_not_called()
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135


class TestReadonlyMmWarmup:
    """Readonly MM processor warmup must mirror the render path behavior."""

    def test_readonly_processor_apply_called_and_cache_cleared(self):
        renderer = _make_renderer_mock({"image": 1})
        readonly_mm_processor = MagicMock()
        readonly_mm_processor.info.allowed_mm_limits = {"image": 1}
        renderer._readonly_mm_processor = readonly_mm_processor

        with patch("vllm.multimodal.processing.TimingContext", autospec=True):
            BaseRenderer.warmup(renderer, ChatParams())

        readonly_mm_processor.apply.assert_called_once()
        readonly_mm_processor.cache.clear_cache.assert_called_once()