Unverified Commit b6885977 authored by Ran Rubin's avatar Ran Rubin Committed by GitHub
Browse files

feat(tests): add --models-dir flag for read-only HF cache (#8362)


Signed-off-by: default avatarrrubin <rrubin@nvidia.com>
parent 4b3cd459
......@@ -36,6 +36,34 @@ export HF_HUB_OFFLINE=1 HF_TOKEN="$(cat ~/.cache/huggingface/token)"
python3 -m pytest -xvv --basetemp=/tmp/pytest_temp --durations=0 tests/
```
### Running against a pre-populated local model cache
If you have models already downloaded into a read-only directory (e.g. a shared
NFS mount or a bind-mounted volume), pass `--models-dir` to skip all network
downloads and avoid any writes to the cache:
```bash
python3 -m pytest --models-dir=/path/to/hf_cache -xvv tests/serve/test_vllm.py
```
Accepts either a **bare `HF_HUB_CACHE` directory** (contains `models--org--name/`
subdirs) or an **`HF_HOME` directory** (auto-detected: if a `hub/` subdirectory is
present, `HF_HOME` is used; otherwise `HF_HUB_CACHE` is used). A warning is logged
when the `HF_HOME` layout is detected so you can verify the choice is correct.
What `--models-dir` does:
- Sets `HF_HUB_CACHE` (or `HF_HOME`) to the supplied path.
- Enables `HF_HUB_OFFLINE=1` and `TRANSFORMERS_OFFLINE=1` — no network calls.
- Short-circuits `predownload_models` and `predownload_tokenizers` — no writes to
the cache directory.
- Sets `DYNAMO_MODELS_DIR` — code that would perform network downloads (e.g. LoRA
adapters in `download_lora()`) will `pytest.skip()` instead of failing.
**LoRA tests are incompatible with `--models-dir`** because they download adapters
from HuggingFace Hub at test time. Tests that call `download_lora()` will be
skipped automatically with a clear message when the flag is active. To run LoRA
tests locally, omit `--models-dir` and ensure `HF_TOKEN` is set.
- `python3 -m pytest` ensures the venv's pytest runs with the correct `sys.path`.
The system `pytest` at `/usr/local/bin/pytest` is **outside** the venv and cannot
see venv-installed packages (like `dynamo`).
......
......@@ -12,6 +12,12 @@ from typing import Generator, Optional
import pytest
from filelock import FileLock
from tests.hf_cache import (
_apply_models_dir_env,
_disable_offline_with_mistral_patch,
_enable_offline_with_mistral_patch,
_restore_models_dir_env,
)
from tests.utils.constants import TEST_MODELS, DefaultPort
from tests.utils.managed_process import ManagedProcess
from tests.utils.port_utils import (
......@@ -76,6 +82,21 @@ def pytest_addoption(parser: pytest.Parser) -> None:
default=False,
help="Show which tests would run vs skip based on --max-vram-gib, then exit.",
)
# -------------------------------------------------------------------------
# Model cache options
# -------------------------------------------------------------------------
# NOTE: if you add a new option here, also add it to the forwarding list
# in pytest_runtestloop (search for "opt_name, cli_flag" in this file).
parser.addoption(
"--models-dir",
type=str,
default=None,
help=(
"Path to a pre-populated HuggingFace cache (read-only safe). "
"Enables HF_HUB_OFFLINE mode and skips predownload fixtures. "
"See .ai/pytest-guidelines.md for full details."
),
)
def pytest_runtest_setup(item):
......@@ -127,7 +148,14 @@ logging.basicConfig(
def pytest_configure(config: pytest.Config) -> None:
"""Detect GPUs for --max-vram-gib planning and parallel execution."""
"""Configure session: validate --models-dir and detect GPUs for --max-vram-gib."""
models_dir = config.getoption("--models-dir", default=None)
if models_dir and not Path(models_dir).is_dir():
pytest.exit(
f"--models-dir: directory does not exist: {models_dir}",
returncode=2,
)
vram_limit = config.getoption("max_vram_gib", default=None)
if vram_limit is None:
return
......@@ -227,6 +255,9 @@ def pytest_runtestloop(session: pytest.Session) -> bool | None:
val = config.getoption(opt_name, default=None)
if val is not None:
extra_args.extend([cli_flag, str(val)])
models_dir = config.getoption("--models-dir", default=None)
if models_dir is not None:
extra_args.extend(["--models-dir", str(models_dir)])
if config.getoption("skip_service_restart", default=None):
extra_args.append("--skip-service-restart")
......@@ -333,97 +364,44 @@ def download_models(model_list=None, ignore_weights=False):
)
def _enable_offline_with_mistral_patch():
"""Set HF_HUB_OFFLINE=1 and work around a transformers 4.57.3 regression.
transformers 4.57.3 (PR #42389) introduced _patch_mistral_regex which calls
huggingface_hub.model_info() unconditionally for every tokenizer load — even
non-Mistral models with fully cached weights. This API call fails when
HF_HUB_OFFLINE=1.
_download_lock_path = os.path.join(tempfile.gettempdir(), "pytest_model_download.lock")
Since tests launch TRT-LLM workers as subprocesses that inherit env vars but
not in-process monkey-patches, we inject the fix via a sitecustomize.py on
PYTHONPATH so every subprocess auto-applies it at startup.
Upstream bug: https://github.com/huggingface/transformers/issues/44843
@pytest.fixture(scope="session", autouse=True)
def _models_dir_env(pytestconfig):
"""Set up HF env vars for --models-dir mode. No-op when flag is absent.
TODO: Remove this workaround once transformers ships a fix and TRT-LLM (or
any other dependency) upgrades to that fixed version.
Session-scoped: runs once per worker process. Under pytest-xdist each worker
applies and restores env vars independently — there is no cross-worker
coordination needed since env vars are process-local.
"""
os.environ["HF_HUB_OFFLINE"] = "1"
# Apply the patch in this process
models_dir = pytestconfig.getoption("--models-dir")
if not models_dir:
yield
return
orig = _apply_models_dir_env(models_dir)
try:
from huggingface_hub.errors import OfflineModeIsEnabled
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
original = PreTrainedTokenizerBase._patch_mistral_regex
@classmethod # type: ignore[misc]
def _safe_patch(cls, tokenizer, *args, **kwargs):
try:
return original.__func__(cls, tokenizer, *args, **kwargs)
except OfflineModeIsEnabled:
return tokenizer
PreTrainedTokenizerBase._patch_mistral_regex = _safe_patch
except (ImportError, AttributeError):
return # transformers version without _patch_mistral_regex — nothing to do
# Write a sitecustomize.py so subprocesses also get the patch.
# Use a per-worker dir under xdist to avoid write races.
worker_id = os.environ.get("PYTEST_XDIST_WORKER", "main")
patch_dir = os.path.join(tempfile.gettempdir(), f"dynamo_test_hf_patch_{worker_id}")
os.makedirs(patch_dir, exist_ok=True)
with open(os.path.join(patch_dir, "sitecustomize.py"), "w") as f:
f.write(
"import os\n"
"if os.environ.get('HF_HUB_OFFLINE') == '1':\n"
" try:\n"
" from transformers.tokenization_utils_base import"
" PreTrainedTokenizerBase as _T\n"
" from huggingface_hub.errors import"
" OfflineModeIsEnabled as _E\n"
" _orig = _T._patch_mistral_regex\n"
" @classmethod\n"
" def _safe(cls, tokenizer, *a, **kw):\n"
" try:\n"
" return _orig.__func__(cls, tokenizer, *a, **kw)\n"
" except _E:\n"
" return tokenizer\n"
" _T._patch_mistral_regex = _safe\n"
" except (ImportError, AttributeError):\n"
" pass\n"
)
pythonpath = os.environ.get("PYTHONPATH", "")
os.environ["PYTHONPATH"] = f"{patch_dir}:{pythonpath}" if pythonpath else patch_dir
logging.info(
"Enabled HF_HUB_OFFLINE with _patch_mistral_regex workaround "
"(see https://github.com/huggingface/transformers/issues/44843)"
)
def _disable_offline_with_mistral_patch():
"""Undo _enable_offline_with_mistral_patch."""
os.environ.pop("HF_HUB_OFFLINE", None)
worker_id = os.environ.get("PYTEST_XDIST_WORKER", "main")
patch_dir = os.path.join(tempfile.gettempdir(), f"dynamo_test_hf_patch_{worker_id}")
pythonpath = os.environ.get("PYTHONPATH", "")
os.environ["PYTHONPATH"] = pythonpath.replace(f"{patch_dir}:", "").replace(
patch_dir, ""
)
_download_lock_path = os.path.join(tempfile.gettempdir(), "pytest_model_download.lock")
yield
finally:
_restore_models_dir_env(orig)
@pytest.fixture(scope="session")
def predownload_models(pytestconfig):
def predownload_models(pytestconfig, _models_dir_env):
"""Fixture wrapper around download_models for models used in collected tests.
Uses a file lock so that under xdist, only one worker downloads at a time
and the rest reuse the HuggingFace cache.
When --models-dir is passed, _models_dir_env has already set up HF env vars;
this fixture simply yields without downloading.
_models_dir_env is declared as a dependency to ensure HF env vars are
configured before any download attempt, even though its yielded value is unused.
"""
if pytestconfig.getoption("--models-dir"):
yield
return
models = getattr(pytestconfig, "models_to_download", None)
with FileLock(_download_lock_path):
if models:
......@@ -440,11 +418,20 @@ def predownload_models(pytestconfig):
@pytest.fixture(scope="session")
def predownload_tokenizers(pytestconfig):
def predownload_tokenizers(pytestconfig, _models_dir_env):
"""Fixture wrapper around download_models for tokenizers used in collected tests.
Uses a file lock so that under xdist, only one worker downloads at a time.
When --models-dir is passed, _models_dir_env has already set up HF env vars;
this fixture simply yields without downloading.
_models_dir_env is declared as a dependency to ensure HF env vars are
configured before any download attempt, even though its yielded value is unused.
"""
if pytestconfig.getoption("--models-dir"):
yield
return
models = getattr(pytestconfig, "models_to_download", None)
with FileLock(_download_lock_path):
if models:
......
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import logging
import os
import shutil
import tempfile
import textwrap
from pathlib import Path
_mistral_patch_applied: bool = False
def _enable_offline_with_mistral_patch():
"""Set HF_HUB_OFFLINE=1 and work around a transformers 4.57.3 regression.
transformers 4.57.3 (PR #42389) introduced _patch_mistral_regex which calls
huggingface_hub.model_info() unconditionally for every tokenizer load — even
non-Mistral models with fully cached weights. This API call fails when
HF_HUB_OFFLINE=1.
Since tests launch TRT-LLM workers as subprocesses that inherit env vars but
not in-process monkey-patches, we inject the fix via a sitecustomize.py on
PYTHONPATH so every subprocess auto-applies it at startup.
_mistral_patch_applied guards the class-level patch and PYTHONPATH injection
so they run at most once per enable/disable cycle. _disable_offline_with_mistral_patch
resets the flag so a subsequent enable call re-injects PYTHONPATH; the class-level
re-application on that second call is harmless — it adds one extra try/except layer
that behaves identically to the first.
Upstream bug: https://github.com/huggingface/transformers/issues/44843
TODO: Remove this workaround once transformers ships a fix and TRT-LLM (or
any other dependency) upgrades to that fixed version.
"""
global _mistral_patch_applied
os.environ["HF_HUB_OFFLINE"] = "1"
if _mistral_patch_applied:
return # class patch and sitecustomize already applied for this cycle
# Resolve OfflineModeIsEnabled before touching transformers. If huggingface_hub
# predates the .errors module, transformers 4.57.3+ imports OfflineModeIsEnabled
# lazily inside _patch_mistral_regex, so that call itself raises ImportError under
# offline mode — using ImportError as the fallback catches that exact error.
try:
from huggingface_hub.errors import OfflineModeIsEnabled
except ImportError:
OfflineModeIsEnabled = ImportError # type: ignore[assignment,misc]
# Apply the patch in this process
try:
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
original = PreTrainedTokenizerBase._patch_mistral_regex
@classmethod # type: ignore[misc]
def _safe_patch(cls, tokenizer, *args, **kwargs):
try:
return original.__func__(cls, tokenizer, *args, **kwargs)
except OfflineModeIsEnabled:
return tokenizer
PreTrainedTokenizerBase._patch_mistral_regex = _safe_patch
except (ImportError, AttributeError):
return # transformers version without _patch_mistral_regex — nothing to do
# Write a sitecustomize.py so subprocesses also get the patch.
# Use a per-worker dir under xdist to avoid write races.
worker_id = os.environ.get("PYTEST_XDIST_WORKER", "main")
patch_dir = os.path.join(tempfile.gettempdir(), f"dynamo_test_hf_patch_{worker_id}")
os.makedirs(patch_dir, exist_ok=True)
with open(os.path.join(patch_dir, "sitecustomize.py"), "w") as f:
f.write(
textwrap.dedent(
"""\
import os
if os.environ.get('HF_HUB_OFFLINE') == '1':
try:
from transformers.tokenization_utils_base import PreTrainedTokenizerBase as _T
try:
from huggingface_hub.errors import OfflineModeIsEnabled as _E
except ImportError:
_E = ImportError
_orig = _T._patch_mistral_regex
@classmethod
def _safe(cls, tokenizer, *a, **kw):
try:
return _orig.__func__(cls, tokenizer, *a, **kw)
except _E:
return tokenizer
_T._patch_mistral_regex = _safe
except (ImportError, AttributeError):
pass
"""
)
)
existing_entries = [e for e in os.environ.get("PYTHONPATH", "").split(":") if e]
os.environ["PYTHONPATH"] = ":".join([patch_dir] + existing_entries)
logging.info(
"Enabled HF_HUB_OFFLINE with _patch_mistral_regex workaround "
"(see https://github.com/huggingface/transformers/issues/44843)"
)
_mistral_patch_applied = True
def _disable_offline_with_mistral_patch():
"""Undo _enable_offline_with_mistral_patch."""
global _mistral_patch_applied
os.environ.pop("HF_HUB_OFFLINE", None)
worker_id = os.environ.get("PYTEST_XDIST_WORKER", "main")
patch_dir = os.path.join(tempfile.gettempdir(), f"dynamo_test_hf_patch_{worker_id}")
pythonpath = os.environ.get("PYTHONPATH", "")
result = ":".join(e for e in pythonpath.split(":") if e and e != patch_dir)
if result:
os.environ["PYTHONPATH"] = result
else:
os.environ.pop("PYTHONPATH", None)
shutil.rmtree(patch_dir, ignore_errors=True)
_mistral_patch_applied = False
# Keys managed by _apply_models_dir_env / _restore_models_dir_env.
# PYTHONPATH is intentionally excluded: _disable_offline_with_mistral_patch()
# removes its entry by exact-match list filtering (idempotent, needs no snapshot).
_TRANSFORMERS_CACHE_OVERRIDE_KEYS = (
"TRANSFORMERS_CACHE",
"PYTORCH_TRANSFORMERS_CACHE",
"PYTORCH_PRETRAINED_BERT_CACHE",
)
_MODELS_DIR_ENV_KEYS = (
"HF_HUB_CACHE",
"HF_HOME",
*_TRANSFORMERS_CACHE_OVERRIDE_KEYS,
"HF_HUB_OFFLINE",
"TRANSFORMERS_OFFLINE",
"DYNAMO_MODELS_DIR",
)
def _apply_models_dir_env(models_dir: str) -> dict:
"""Set HF env vars for read-only cache mode. Returns original env values."""
orig = {k: os.environ.get(k) for k in _MODELS_DIR_ENV_KEYS}
if (Path(models_dir) / "hub").is_dir():
logging.warning(
"--models-dir: detected HF_HOME layout (hub/ subdirectory found). "
"If this is wrong (e.g. you have a model named hub/), rename hub/ "
"or pass a bare HF_HUB_CACHE directory instead."
)
os.environ.pop("HF_HUB_CACHE", None) # clear so HF_HOME takes effect
os.environ["HF_HOME"] = models_dir
else:
logging.info("--models-dir: detected bare HF_HUB_CACHE layout")
os.environ.pop("HF_HOME", None) # clear for consistency
os.environ["HF_HUB_CACHE"] = models_dir
for key in _TRANSFORMERS_CACHE_OVERRIDE_KEYS:
os.environ.pop(key, None)
os.environ["HF_HUB_OFFLINE"] = "1"
os.environ["TRANSFORMERS_OFFLINE"] = "1"
os.environ["DYNAMO_MODELS_DIR"] = models_dir
_enable_offline_with_mistral_patch() # activates sitecustomize for Mistral tokenizer workaround
return orig
def _restore_models_dir_env(orig: dict) -> None:
"""Undo _apply_models_dir_env. Call after fixture yield."""
# _disable pops HF_HUB_OFFLINE; the loop below then restores the original value
# (no-op if orig was None, set-back if orig had a pre-existing value). Safe.
_disable_offline_with_mistral_patch() # pops HF_HUB_OFFLINE + cleans sitecustomize
for k, v in orig.items():
if v is None:
os.environ.pop(k, None)
else:
os.environ[k] = v
......@@ -19,6 +19,7 @@ from pathlib import Path
from typing import TYPE_CHECKING, Optional
import boto3
import pytest
import requests
from botocore.client import Config
from botocore.exceptions import ClientError
......@@ -232,7 +233,16 @@ class MinioService:
raise RuntimeError(f"Failed to check bucket: {e}") from e
def download_lora(self) -> str:
"""Download LoRA from Hugging Face Hub, returns temp directory path."""
"""Download LoRA from Hugging Face Hub, returns temp directory path.
Skips via pytest.skip() when DYNAMO_MODELS_DIR is set (--models-dir active).
"""
if os.environ.get("DYNAMO_MODELS_DIR"):
pytest.skip(
"--models-dir is active (read-only cache mode): LoRA network download suppressed. "
"Pre-stage LoRA adapters into the cache or omit --models-dir to enable downloads."
)
self._temp_download_dir = tempfile.mkdtemp(prefix="lora_download_")
self._logger.info(
f"Downloading LoRA {self.config.lora_repo} to {self._temp_download_dir}"
......
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import os
import subprocess
import sys
from pathlib import Path
import pytest
import tests.hf_cache as hf_cache
from tests.hf_cache import (
_MODELS_DIR_ENV_KEYS,
_TRANSFORMERS_CACHE_OVERRIDE_KEYS,
_apply_models_dir_env,
_disable_offline_with_mistral_patch,
_enable_offline_with_mistral_patch,
_restore_models_dir_env,
)
from tests.serve.lora_utils import MinioLoraConfig, MinioService
@pytest.mark.pre_merge
@pytest.mark.unit
@pytest.mark.gpu_0
def test_apply_bare_cache_layout(tmp_path, monkeypatch):
for k in _MODELS_DIR_ENV_KEYS:
monkeypatch.delenv(k, raising=False)
monkeypatch.delenv("PYTHONPATH", raising=False)
orig = _apply_models_dir_env(str(tmp_path))
try:
assert os.environ["HF_HUB_CACHE"] == str(tmp_path)
assert "HF_HOME" not in os.environ
assert os.environ["HF_HUB_OFFLINE"] == "1"
assert os.environ["TRANSFORMERS_OFFLINE"] == "1"
assert os.environ["DYNAMO_MODELS_DIR"] == str(tmp_path)
for k in _TRANSFORMERS_CACHE_OVERRIDE_KEYS:
assert k not in os.environ
finally:
_restore_models_dir_env(orig)
@pytest.mark.pre_merge
@pytest.mark.unit
@pytest.mark.gpu_0
def test_apply_hf_home_layout(tmp_path, monkeypatch):
for k in _MODELS_DIR_ENV_KEYS:
monkeypatch.delenv(k, raising=False)
monkeypatch.delenv("PYTHONPATH", raising=False)
(tmp_path / "hub").mkdir()
orig = _apply_models_dir_env(str(tmp_path))
try:
assert os.environ["HF_HOME"] == str(tmp_path)
assert "HF_HUB_CACHE" not in os.environ
assert os.environ["HF_HUB_OFFLINE"] == "1"
assert os.environ["TRANSFORMERS_OFFLINE"] == "1"
assert os.environ["DYNAMO_MODELS_DIR"] == str(tmp_path)
for k in _TRANSFORMERS_CACHE_OVERRIDE_KEYS:
assert k not in os.environ
finally:
_restore_models_dir_env(orig)
@pytest.mark.pre_merge
@pytest.mark.unit
@pytest.mark.gpu_0
def test_restore_clears_vars_that_were_absent(tmp_path, monkeypatch):
for k in _MODELS_DIR_ENV_KEYS:
monkeypatch.delenv(k, raising=False)
monkeypatch.delenv("PYTHONPATH", raising=False)
orig = _apply_models_dir_env(str(tmp_path))
_restore_models_dir_env(orig)
for k in _MODELS_DIR_ENV_KEYS:
assert k not in os.environ
assert "PYTHONPATH" not in os.environ
@pytest.mark.pre_merge
@pytest.mark.unit
@pytest.mark.gpu_0
@pytest.mark.parametrize("use_hf_home", [False, True])
def test_restore_preserves_preexisting_values(tmp_path, monkeypatch, use_hf_home):
if use_hf_home:
(tmp_path / "hub").mkdir()
sentinel = {k: f"preexisting_{k}" for k in _MODELS_DIR_ENV_KEYS}
for k, v in sentinel.items():
monkeypatch.setenv(k, v)
orig = _apply_models_dir_env(str(tmp_path))
_restore_models_dir_env(orig)
for k, v in sentinel.items():
assert os.environ[k] == v
@pytest.mark.pre_merge
@pytest.mark.unit
@pytest.mark.gpu_0
@pytest.mark.timeout(60)
def test_models_dir_nonexistent_exits_with_code_2(tmp_path):
missing = tmp_path / "no_such_dir"
# Run from the project root so conftest.py is discovered and --models-dir
# is registered before pytest_configure fires.
# Note: the child pytest process collects from this file itself — keep
# module-level imports here side-effect-free to avoid spurious child failures.
project_root = Path(__file__).parents[1]
result = subprocess.run(
[
sys.executable,
"-m",
"pytest",
f"--models-dir={missing}",
"--collect-only",
"tests/test_models_dir_flag.py",
],
capture_output=True,
text=True,
cwd=str(project_root),
timeout=30,
)
assert result.returncode == 2
assert "does not exist" in result.stderr + result.stdout
@pytest.mark.pre_merge
@pytest.mark.unit
@pytest.mark.gpu_0
def test_download_lora_skips_in_models_dir_mode(tmp_path, monkeypatch):
monkeypatch.setenv("DYNAMO_MODELS_DIR", str(tmp_path))
service = MinioService(MinioLoraConfig())
with pytest.raises(pytest.skip.Exception, match="read-only cache mode"):
service.download_lora()
@pytest.mark.pre_merge
@pytest.mark.unit
@pytest.mark.gpu_0
def test_disable_removes_patch_dir(monkeypatch):
"""_disable_offline_with_mistral_patch cleans up the sitecustomize patch directory."""
import tempfile
monkeypatch.delenv("PYTHONPATH", raising=False)
monkeypatch.delenv("HF_HUB_OFFLINE", raising=False)
monkeypatch.setattr(hf_cache, "_mistral_patch_applied", False)
worker_id = os.environ.get("PYTEST_XDIST_WORKER", "main")
patch_dir = os.path.join(tempfile.gettempdir(), f"dynamo_test_hf_patch_{worker_id}")
os.makedirs(patch_dir, exist_ok=True)
(Path(patch_dir) / "sitecustomize.py").write_text("# stub")
monkeypatch.setenv("PYTHONPATH", patch_dir)
_disable_offline_with_mistral_patch()
assert not Path(patch_dir).exists()
assert "PYTHONPATH" not in os.environ
@pytest.mark.pre_merge
@pytest.mark.unit
@pytest.mark.gpu_0
def test_enable_normalizes_pythonpath_empty_components(monkeypatch):
"""_enable_offline_with_mistral_patch filters empty components from PYTHONPATH."""
monkeypatch.setenv("PYTHONPATH", ":some:existing:path:")
monkeypatch.delenv("HF_HUB_OFFLINE", raising=False)
monkeypatch.setattr(hf_cache, "_mistral_patch_applied", False)
try:
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
monkeypatch.setattr(
PreTrainedTokenizerBase,
"_patch_mistral_regex",
classmethod(lambda cls, t, *a, **kw: t),
raising=False,
)
except ImportError:
pytest.skip("transformers not installed")
_enable_offline_with_mistral_patch()
pythonpath = os.environ.get("PYTHONPATH", "")
assert "" not in pythonpath.split(
":"
), f"Empty component in PYTHONPATH: {pythonpath!r}"
_disable_offline_with_mistral_patch()
@pytest.mark.pre_merge
@pytest.mark.unit
@pytest.mark.gpu_0
def test_pythonpath_restored_after_apply_restore(tmp_path, monkeypatch):
original = "some:existing:path"
monkeypatch.setenv("PYTHONPATH", original)
for k in _MODELS_DIR_ENV_KEYS:
monkeypatch.delenv(k, raising=False)
monkeypatch.setattr(hf_cache, "_mistral_patch_applied", False)
orig = _apply_models_dir_env(str(tmp_path))
_restore_models_dir_env(orig)
assert os.environ["PYTHONPATH"] == original
@pytest.mark.pre_merge
@pytest.mark.unit
@pytest.mark.gpu_0
def test_enable_disable_enable_cycle(monkeypatch):
"""_enable/_disable is safe to call in sequence; PYTHONPATH and HF_HUB_OFFLINE are correct after each call."""
monkeypatch.delenv("PYTHONPATH", raising=False)
monkeypatch.delenv("HF_HUB_OFFLINE", raising=False)
monkeypatch.setattr(hf_cache, "_mistral_patch_applied", False)
# Inject a no-op _patch_mistral_regex so the test always exercises the full
# patching code path, regardless of the installed transformers version.
try:
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
if not hasattr(PreTrainedTokenizerBase, "_patch_mistral_regex"):
@classmethod # type: ignore[misc]
def _noop_patch(cls, tokenizer, *args, **kwargs):
return tokenizer
monkeypatch.setattr(
PreTrainedTokenizerBase,
"_patch_mistral_regex",
_noop_patch,
raising=False,
)
except ImportError:
pytest.skip("transformers not installed")
_enable_offline_with_mistral_patch()
assert os.environ.get("HF_HUB_OFFLINE") == "1"
assert hf_cache._mistral_patch_applied is True
pythonpath_after_enable = os.environ.get("PYTHONPATH")
_disable_offline_with_mistral_patch()
assert "HF_HUB_OFFLINE" not in os.environ
assert hf_cache._mistral_patch_applied is False
assert os.environ.get("PYTHONPATH") is None
_enable_offline_with_mistral_patch()
assert os.environ.get("HF_HUB_OFFLINE") == "1"
assert hf_cache._mistral_patch_applied is True
assert os.environ.get("PYTHONPATH") == pythonpath_after_enable
_disable_offline_with_mistral_patch()
assert hf_cache._mistral_patch_applied is False
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment