[CI]: reduce HTTP calls inside entrypoints openai tests (#23646)

Signed-off-by: AzizCode92 <azizbenothman76@gmail.com> Signed-off-by: Aziz <azizbenothman76@gmail.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

[CI]: reduce HTTP calls inside entrypoints openai tests (#23646)
Signed-off-by: AzizCode92 <azizbenothman76@gmail.com> Signed-off-by: Aziz <azizbenothman76@gmail.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
ce30dca5 · Aziz · GitHub · 2f0bab3f · ce30dca5 · ce30dca5
Unverified Commit ce30dca5 authored Sep 02, 2025 by Aziz Committed by GitHub Sep 02, 2025
8 changed files
--- a/tests/entrypoints/conftest.py
+++ b/tests/entrypoints/conftest.py
@@ -201,3 +201,32 @@ table: "table_1" | "table_2"
 condition: column "=" number
 number: "1" | "2"
 """)
+@pytest.fixture(scope="session")
+def zephyr_lora_files():
+    """Download zephyr LoRA files once per test session."""
+    from huggingface_hub import snapshot_download
+    return snapshot_download(repo_id="typeof/zephyr-7b-beta-lora")
+@pytest.fixture(scope="session")
+def zephyr_lora_added_tokens_files(zephyr_lora_files):
+    """Create zephyr LoRA files with added tokens once per test session."""
+    import shutil
+    from tempfile import TemporaryDirectory
+    from transformers import AutoTokenizer
+    tmp_dir = TemporaryDirectory()
+    tmp_model_dir = f"{tmp_dir.name}/zephyr"
+    shutil.copytree(zephyr_lora_files, tmp_model_dir)
+    tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
+    # Copy tokenizer to adapter and add some unique tokens
+    # 32000, 32001, 32002
+    added = tokenizer.add_tokens(["vllm1", "vllm2", "vllm3"],
+                                 special_tokens=True)
+    assert added == 3
+    tokenizer.save_pretrained(tmp_model_dir)
+    yield tmp_model_dir
+    tmp_dir.cleanup()
--- a/tests/entrypoints/openai/test_chat.py
+++ b/tests/entrypoints/openai/test_chat.py
@@ -15,8 +15,6 @@ import torch
 from openai import BadRequestError, OpenAI
 from ...utils import RemoteOpenAIServer
-from .test_completion import zephyr_lora_added_tokens_files  # noqa: F401
-from .test_completion import zephyr_lora_files  # noqa: F401
 # any model with a chat template should work here
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"

--- a/tests/entrypoints/openai/test_completion.py
+++ b/tests/entrypoints/openai/test_completion.py
@@ -3,8 +3,6 @@
 # imports for guided decoding tests
 import json
 import os
-import shutil
-from tempfile import TemporaryDirectory
 from typing import Optional
 import jsonschema
@@ -14,9 +12,7 @@ import pytest_asyncio
 import regex as re
 import requests
 # downloading lora to test lora requests
-from huggingface_hub import snapshot_download
 from openai import BadRequestError
-from transformers import AutoTokenizer
 from vllm.transformers_utils.tokenizer import get_tokenizer
@@ -26,32 +22,10 @@ from ...utils import RemoteOpenAIServer
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
 # technically these adapters use a different base model,
 # but we're not testing generation quality here
-LORA_NAME = "typeof/zephyr-7b-beta-lora"
 GUIDED_DECODING_BACKENDS = ["outlines", "xgrammar", "guidance"]
-@pytest.fixture(scope="module")
-def zephyr_lora_files():
-    return snapshot_download(repo_id=LORA_NAME)
-@pytest.fixture(scope="module")
-def zephyr_lora_added_tokens_files(zephyr_lora_files):
-    tmp_dir = TemporaryDirectory()
-    tmp_model_dir = f"{tmp_dir.name}/zephyr"
-    shutil.copytree(zephyr_lora_files, tmp_model_dir)
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-    # Copy tokenizer to adapter and add some unique tokens
-    # 32000, 32001, 32002
-    added = tokenizer.add_tokens(["vllm1", "vllm2", "vllm3"],
-                                 special_tokens=True)
-    assert added == 3
-    tokenizer.save_pretrained(tmp_model_dir)
-    yield tmp_model_dir
-    tmp_dir.cleanup()
 @pytest.fixture(scope="module")
 def default_server_args(zephyr_lora_files, zephyr_lora_added_tokens_files):
    return [

--- a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py
+++ b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py
@@ -3,48 +3,23 @@
 import base64
 import io
-import shutil
-from tempfile import TemporaryDirectory
 import openai  # use the official client for correctness check
 import pytest
 import pytest_asyncio
 import torch
 # downloading lora to test lora requests
-from huggingface_hub import snapshot_download
 from openai import BadRequestError
-from transformers import AutoConfig, AutoTokenizer
+from transformers import AutoConfig
 from ...utils import RemoteOpenAIServer
 # any model with a chat template should work here
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
-LORA_NAME = "typeof/zephyr-7b-beta-lora"
 CONFIG = AutoConfig.from_pretrained(MODEL_NAME)
-@pytest.fixture(scope="module")
-def zephyr_lora_files():
-    return snapshot_download(repo_id=LORA_NAME)
-@pytest.fixture(scope="module")
-def zephyr_lora_added_tokens_files(zephyr_lora_files):
-    tmp_dir = TemporaryDirectory()
-    tmp_model_dir = f"{tmp_dir.name}/zephyr"
-    shutil.copytree(zephyr_lora_files, tmp_model_dir)
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-    # Copy tokenizer to adapter and add some unique tokens
-    # 32000, 32001, 32002
-    added = tokenizer.add_tokens(["vllm1", "vllm2", "vllm3"],
-                                 special_tokens=True)
-    assert added == 3
-    tokenizer.save_pretrained(tmp_model_dir)
-    yield tmp_model_dir
-    tmp_dir.cleanup()
 @pytest.fixture(scope="module")
 def default_server_args(
    zephyr_lora_files,

--- a/tests/entrypoints/openai/test_lora_adapters.py
+++ b/tests/entrypoints/openai/test_lora_adapters.py
@@ -9,8 +9,6 @@ from contextlib import suppress
 import openai  # use the official client for correctness check
 import pytest
 import pytest_asyncio
-# downloading lora to test lora requests
-from huggingface_hub import snapshot_download
 from ...utils import RemoteOpenAIServer
@@ -18,7 +16,6 @@ from ...utils import RemoteOpenAIServer
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
 # technically this needs Mistral-7B-v0.1 as base, but we're not testing
 # generation quality here
-LORA_NAME = "typeof/zephyr-7b-beta-lora"
 BADREQUEST_CASES = [
    (
@@ -48,11 +45,6 @@ BADREQUEST_CASES = [
 ]
-@pytest.fixture(scope="module")
-def zephyr_lora_files():
-    return snapshot_download(repo_id=LORA_NAME)
 @pytest.fixture(scope="module")
 def monkeypatch_module():
    from _pytest.monkeypatch import MonkeyPatch

--- a/tests/entrypoints/openai/test_models.py
+++ b/tests/entrypoints/openai/test_models.py
@@ -4,8 +4,6 @@
 import openai  # use the official client for correctness check
 import pytest
 import pytest_asyncio
-# downloading lora to test lora requests
-from huggingface_hub import snapshot_download
 from ...utils import RemoteOpenAIServer
@@ -13,12 +11,6 @@ from ...utils import RemoteOpenAIServer
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
 # technically this needs Mistral-7B-v0.1 as base, but we're not testing
 # generation quality here
-LORA_NAME = "typeof/zephyr-7b-beta-lora"
-@pytest.fixture(scope="module")
-def zephyr_lora_files():
-    return snapshot_download(repo_id=LORA_NAME)
 @pytest.fixture(scope="module")

--- a/tests/entrypoints/openai/test_return_tokens_as_ids.py
+++ b/tests/entrypoints/openai/test_return_tokens_as_ids.py
@@ -11,8 +11,6 @@ from vllm.transformers_utils.tokenizer import get_tokenizer
 from ...utils import RemoteOpenAIServer
 from .test_completion import default_server_args  # noqa: F401
-from .test_completion import zephyr_lora_added_tokens_files  # noqa: F401
-from .test_completion import zephyr_lora_files  # noqa: F401
 from .test_completion import MODEL_NAME

--- a/tests/entrypoints/openai/test_tokenization.py
+++ b/tests/entrypoints/openai/test_tokenization.py
@@ -8,8 +8,6 @@ import requests
 from vllm.transformers_utils.tokenizer import get_tokenizer
 from ...utils import RemoteOpenAIServer
-from .test_completion import zephyr_lora_added_tokens_files  # noqa: F401
-from .test_completion import zephyr_lora_files  # noqa: F401
 # any model with a chat template should work here
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"