test_offline_mode.py 2.18 KB
Newer Older
Joe Runde's avatar
Joe Runde committed
1
2
3
4
5
6
7
8
"""Tests for HF_HUB_OFFLINE mode"""
import importlib
import sys
import weakref

import pytest

from vllm import LLM
9
from vllm.distributed import cleanup_dist_env_and_memory
Joe Runde's avatar
Joe Runde committed
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28

MODEL_NAME = "facebook/opt-125m"


@pytest.fixture(scope="module")
def llm():
    # pytest caches the fixture so we use weakref.proxy to
    # enable garbage collection
    llm = LLM(model=MODEL_NAME,
              max_num_batched_tokens=4096,
              tensor_parallel_size=1,
              gpu_memory_utilization=0.10,
              enforce_eager=True)

    with llm.deprecate_legacy_api():
        yield weakref.proxy(llm)

        del llm

29
    cleanup_dist_env_and_memory()
Joe Runde's avatar
Joe Runde committed
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45


@pytest.mark.skip_global_cleanup
def test_offline_mode(llm: LLM, monkeypatch):
    # we use the llm fixture to ensure the model files are in-cache
    del llm

    # Set HF to offline mode and ensure we can still construct an LLM
    try:
        monkeypatch.setenv("HF_HUB_OFFLINE", "1")
        # Need to re-import huggingface_hub and friends to setup offline mode
        _re_import_modules()
        # Cached model files should be used in offline mode
        LLM(model=MODEL_NAME,
            max_num_batched_tokens=4096,
            tensor_parallel_size=1,
46
            gpu_memory_utilization=0.20,
Joe Runde's avatar
Joe Runde committed
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
            enforce_eager=True)
    finally:
        # Reset the environment after the test
        # NB: Assuming tests are run in online mode
        monkeypatch.delenv("HF_HUB_OFFLINE")
        _re_import_modules()
        pass


def _re_import_modules():
    hf_hub_module_names = [
        k for k in sys.modules if k.startswith("huggingface_hub")
    ]
    transformers_module_names = [
        k for k in sys.modules if k.startswith("transformers")
        and not k.startswith("transformers_modules")
    ]

    reload_exception = None
    for module_name in hf_hub_module_names + transformers_module_names:
        try:
            importlib.reload(sys.modules[module_name])
        except Exception as e:
            reload_exception = e
            # Try to continue clean up so that other tests are less likely to
            # be affected

    # Error this test if reloading a module failed
    if reload_exception is not None:
        raise reload_exception