test_offline_mode.py 2.46 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
Joe Runde's avatar
Joe Runde committed
2
3
4
"""Tests for HF_HUB_OFFLINE mode"""
import importlib
import sys
5
import os
Joe Runde's avatar
Joe Runde committed
6
7
8
9

import pytest

from vllm import LLM
10
from vllm.distributed import cleanup_dist_env_and_memory
11
from ...utils import models_path_prefix
Joe Runde's avatar
Joe Runde committed
12
13


14
15
MODEL_CONFIGS = [
    {
zhuwenwen's avatar
zhuwenwen committed
16
        "model": os.path.join(models_path_prefix, "facebook/opt-125m"),
17
18
19
20
21
22
23
24
        "enforce_eager": True,
        "gpu_memory_utilization": 0.20,
        "max_model_len": 64,
        "max_num_batched_tokens": 64,
        "max_num_seqs": 64,
        "tensor_parallel_size": 1,
    },
    {
zhuwenwen's avatar
zhuwenwen committed
25
        "model":  os.path.join(models_path_prefix, "mistralai/Mistral-7B-Instruct-v0.1"),
26
27
28
29
30
31
32
33
34
        "enforce_eager": True,
        "gpu_memory_utilization": 0.95,
        "max_model_len": 64,
        "max_num_batched_tokens": 64,
        "max_num_seqs": 64,
        "tensor_parallel_size": 1,
        "tokenizer_mode": "mistral",
    },
]
Joe Runde's avatar
Joe Runde committed
35
36
37


@pytest.fixture(scope="module")
38
39
40
41
42
def cache_models():
    # Cache model files first
    for model_config in MODEL_CONFIGS:
        LLM(**model_config)
        cleanup_dist_env_and_memory()
Joe Runde's avatar
Joe Runde committed
43

44
    yield
Joe Runde's avatar
Joe Runde committed
45
46
47


@pytest.mark.skip_global_cleanup
48
49
@pytest.mark.usefixtures("cache_models")
def test_offline_mode(monkeypatch):
Joe Runde's avatar
Joe Runde committed
50
51
52
53
54
55
    # Set HF to offline mode and ensure we can still construct an LLM
    try:
        monkeypatch.setenv("HF_HUB_OFFLINE", "1")
        # Need to re-import huggingface_hub and friends to setup offline mode
        _re_import_modules()
        # Cached model files should be used in offline mode
56
57
        for model_config in MODEL_CONFIGS:
            LLM(**model_config)
Joe Runde's avatar
Joe Runde committed
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
    finally:
        # Reset the environment after the test
        # NB: Assuming tests are run in online mode
        monkeypatch.delenv("HF_HUB_OFFLINE")
        _re_import_modules()
        pass


def _re_import_modules():
    hf_hub_module_names = [
        k for k in sys.modules if k.startswith("huggingface_hub")
    ]
    transformers_module_names = [
        k for k in sys.modules if k.startswith("transformers")
        and not k.startswith("transformers_modules")
    ]

    reload_exception = None
    for module_name in hf_hub_module_names + transformers_module_names:
        try:
            importlib.reload(sys.modules[module_name])
        except Exception as e:
            reload_exception = e
            # Try to continue clean up so that other tests are less likely to
            # be affected

    # Error this test if reloading a module failed
    if reload_exception is not None:
        raise reload_exception