test_offline_mode.py 3.06 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
Joe Runde's avatar
Joe Runde committed
2
3
4
5
6
"""Tests for HF_HUB_OFFLINE mode"""
import importlib
import sys

import pytest
7
import urllib3
Joe Runde's avatar
Joe Runde committed
8
9

from vllm import LLM
10
from vllm.distributed import cleanup_dist_env_and_memory
Joe Runde's avatar
Joe Runde committed
11

12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
MODEL_CONFIGS = [
    {
        "model": "facebook/opt-125m",
        "enforce_eager": True,
        "gpu_memory_utilization": 0.20,
        "max_model_len": 64,
        "max_num_batched_tokens": 64,
        "max_num_seqs": 64,
        "tensor_parallel_size": 1,
    },
    {
        "model": "mistralai/Mistral-7B-Instruct-v0.1",
        "enforce_eager": True,
        "gpu_memory_utilization": 0.95,
        "max_model_len": 64,
        "max_num_batched_tokens": 64,
        "max_num_seqs": 64,
        "tensor_parallel_size": 1,
        "tokenizer_mode": "mistral",
    },
32
33
34
35
36
37
38
39
40
    {
        "model": "sentence-transformers/all-MiniLM-L12-v2",
        "enforce_eager": True,
        "gpu_memory_utilization": 0.20,
        "max_model_len": 64,
        "max_num_batched_tokens": 64,
        "max_num_seqs": 64,
        "tensor_parallel_size": 1,
    },
41
]
Joe Runde's avatar
Joe Runde committed
42
43
44


@pytest.fixture(scope="module")
45
46
47
48
49
def cache_models():
    # Cache model files first
    for model_config in MODEL_CONFIGS:
        LLM(**model_config)
        cleanup_dist_env_and_memory()
Joe Runde's avatar
Joe Runde committed
50

51
    yield
Joe Runde's avatar
Joe Runde committed
52
53
54


@pytest.mark.skip_global_cleanup
55
56
@pytest.mark.usefixtures("cache_models")
def test_offline_mode(monkeypatch):
Joe Runde's avatar
Joe Runde committed
57
58
59
    # Set HF to offline mode and ensure we can still construct an LLM
    try:
        monkeypatch.setenv("HF_HUB_OFFLINE", "1")
60
61
62
63
64
65
66
67
68
69
        monkeypatch.setenv("VLLM_NO_USAGE_STATS", "1")

        def disable_connect(*args, **kwargs):
            raise RuntimeError("No http calls allowed")

        monkeypatch.setattr(urllib3.connection.HTTPConnection, "connect",
                            disable_connect)
        monkeypatch.setattr(urllib3.connection.HTTPSConnection, "connect",
                            disable_connect)

Joe Runde's avatar
Joe Runde committed
70
71
72
        # Need to re-import huggingface_hub and friends to setup offline mode
        _re_import_modules()
        # Cached model files should be used in offline mode
73
74
        for model_config in MODEL_CONFIGS:
            LLM(**model_config)
Joe Runde's avatar
Joe Runde committed
75
76
77
78
    finally:
        # Reset the environment after the test
        # NB: Assuming tests are run in online mode
        monkeypatch.delenv("HF_HUB_OFFLINE")
79
        monkeypatch.delenv("VLLM_NO_USAGE_STATS")
Joe Runde's avatar
Joe Runde committed
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
        _re_import_modules()
        pass


def _re_import_modules():
    hf_hub_module_names = [
        k for k in sys.modules if k.startswith("huggingface_hub")
    ]
    transformers_module_names = [
        k for k in sys.modules if k.startswith("transformers")
        and not k.startswith("transformers_modules")
    ]

    reload_exception = None
    for module_name in hf_hub_module_names + transformers_module_names:
        try:
            importlib.reload(sys.modules[module_name])
        except Exception as e:
            reload_exception = e
            # Try to continue clean up so that other tests are less likely to
            # be affected

    # Error this test if reloading a module failed
    if reload_exception is not None:
        raise reload_exception