test_offline_mode.py 5.1 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
Joe Runde's avatar
Joe Runde committed
3
"""Tests for HF_HUB_OFFLINE mode"""
4

Joe Runde's avatar
Joe Runde committed
5
6
7
8
import importlib
import sys

import pytest
9
import regex as re
10
import urllib3
Joe Runde's avatar
Joe Runde committed
11
12

from vllm import LLM
13
from vllm.distributed import cleanup_dist_env_and_memory
Joe Runde's avatar
Joe Runde committed
14

15
16
17
18
19
20
21
22
23
24
MODEL_CONFIGS = [
    {
        "model": "facebook/opt-125m",
        "enforce_eager": True,
        "gpu_memory_utilization": 0.20,
        "max_model_len": 64,
        "max_num_batched_tokens": 64,
        "max_num_seqs": 64,
        "tensor_parallel_size": 1,
    },
25
26
27
28
29
30
31
32
33
34
    {
        "model": "Qwen/Qwen3-0.6B",
        "enforce_eager": True,
        "gpu_memory_utilization": 0.50,
        "max_model_len": 64,
        "max_num_batched_tokens": 64,
        "max_num_seqs": 64,
        "tensor_parallel_size": 1,
        "tokenizer": "Qwen/Qwen3-4B",
    },
35
36
37
38
39
40
41
42
43
44
    {
        "model": "mistralai/Mistral-7B-Instruct-v0.1",
        "enforce_eager": True,
        "gpu_memory_utilization": 0.95,
        "max_model_len": 64,
        "max_num_batched_tokens": 64,
        "max_num_seqs": 64,
        "tensor_parallel_size": 1,
        "tokenizer_mode": "mistral",
    },
45
46
47
48
49
50
51
52
53
54
    # TODO: re-enable once these tests are run with V1
    # {
    #     "model": "sentence-transformers/all-MiniLM-L12-v2",
    #     "enforce_eager": True,
    #     "gpu_memory_utilization": 0.20,
    #     "max_model_len": 64,
    #     "max_num_batched_tokens": 64,
    #     "max_num_seqs": 64,
    #     "tensor_parallel_size": 1,
    # },
55
]
Joe Runde's avatar
Joe Runde committed
56
57
58


@pytest.fixture(scope="module")
59
60
61
62
63
def cache_models():
    # Cache model files first
    for model_config in MODEL_CONFIGS:
        LLM(**model_config)
        cleanup_dist_env_and_memory()
Joe Runde's avatar
Joe Runde committed
64

65
    yield
Joe Runde's avatar
Joe Runde committed
66
67
68


@pytest.mark.skip_global_cleanup
69
@pytest.mark.usefixtures("cache_models")
70
def test_offline_mode(monkeypatch: pytest.MonkeyPatch):
Joe Runde's avatar
Joe Runde committed
71
    # Set HF to offline mode and ensure we can still construct an LLM
72
73
74
75
    with monkeypatch.context() as m:
        try:
            m.setenv("HF_HUB_OFFLINE", "1")
            m.setenv("VLLM_NO_USAGE_STATS", "1")
76

77
78
            def disable_connect(*args, **kwargs):
                raise RuntimeError("No http calls allowed")
79

80
81
82
83
84
85
86
87
88
89
            m.setattr(
                urllib3.connection.HTTPConnection,
                "connect",
                disable_connect,
            )
            m.setattr(
                urllib3.connection.HTTPSConnection,
                "connect",
                disable_connect,
            )
90

91
            # Need to re-import huggingface_hub
92
            # and friends to set up offline mode
93
94
95
96
97
98
99
100
            _re_import_modules()
            # Cached model files should be used in offline mode
            for model_config in MODEL_CONFIGS:
                LLM(**model_config)
        finally:
            # Reset the environment after the test
            # NB: Assuming tests are run in online mode
            _re_import_modules()
Joe Runde's avatar
Joe Runde committed
101
102
103


def _re_import_modules():
104
    hf_hub_module_names = [k for k in sys.modules if k.startswith("huggingface_hub")]
Joe Runde's avatar
Joe Runde committed
105
    transformers_module_names = [
106
107
108
        k
        for k in sys.modules
        if k.startswith("transformers") and not k.startswith("transformers_modules")
Joe Runde's avatar
Joe Runde committed
109
110
    ]

111
    # These modules are aliased in Transformers v5 and so cannot be reloaded directly
112
113
114
115
116
    aliased_module_patterns = [
        r".+\.tokenization_utils$",
        r".+\.tokenization_utils_fast$",
        r".+\.models\..+\.image_processing_.+_fast$",
    ]
117

Joe Runde's avatar
Joe Runde committed
118
119
    reload_exception = None
    for module_name in hf_hub_module_names + transformers_module_names:
120
        if any(re.match(pattern, module_name) for pattern in aliased_module_patterns):
121
122
123
            # Remove from sys.modules so they are re-aliased on next import
            del sys.modules[module_name]
            continue
Joe Runde's avatar
Joe Runde committed
124
125
126
127
128
129
130
131
132
133
        try:
            importlib.reload(sys.modules[module_name])
        except Exception as e:
            reload_exception = e
            # Try to continue clean up so that other tests are less likely to
            # be affected

    # Error this test if reloading a module failed
    if reload_exception is not None:
        raise reload_exception
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158


@pytest.mark.skip_global_cleanup
@pytest.mark.usefixtures("cache_models")
def test_model_from_huggingface_offline(monkeypatch: pytest.MonkeyPatch):
    # Set HF to offline mode and ensure we can still construct an LLM
    with monkeypatch.context() as m:
        try:
            m.setenv("HF_HUB_OFFLINE", "1")
            m.setenv("VLLM_NO_USAGE_STATS", "1")

            def disable_connect(*args, **kwargs):
                raise RuntimeError("No http calls allowed")

            m.setattr(
                urllib3.connection.HTTPConnection,
                "connect",
                disable_connect,
            )
            m.setattr(
                urllib3.connection.HTTPSConnection,
                "connect",
                disable_connect,
            )
            # Need to re-import huggingface_hub
159
            # and friends to set up offline mode
160
            _re_import_modules()
161
            LLM(model="facebook/opt-125m")
162
163
164
165
        finally:
            # Reset the environment after the test
            # NB: Assuming tests are run in online mode
            _re_import_modules()