Unverified Commit ce30dca5 authored by Aziz's avatar Aziz Committed by GitHub
Browse files

[CI]: reduce HTTP calls inside entrypoints openai tests (#23646)


Signed-off-by: default avatarAzizCode92 <azizbenothman76@gmail.com>
Signed-off-by: default avatarAziz <azizbenothman76@gmail.com>
Co-authored-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 2f0bab3f
...@@ -201,3 +201,32 @@ table: "table_1" | "table_2" ...@@ -201,3 +201,32 @@ table: "table_1" | "table_2"
condition: column "=" number condition: column "=" number
number: "1" | "2" number: "1" | "2"
""") """)
@pytest.fixture(scope="session")
def zephyr_lora_files():
"""Download zephyr LoRA files once per test session."""
from huggingface_hub import snapshot_download
return snapshot_download(repo_id="typeof/zephyr-7b-beta-lora")
@pytest.fixture(scope="session")
def zephyr_lora_added_tokens_files(zephyr_lora_files):
"""Create zephyr LoRA files with added tokens once per test session."""
import shutil
from tempfile import TemporaryDirectory
from transformers import AutoTokenizer
tmp_dir = TemporaryDirectory()
tmp_model_dir = f"{tmp_dir.name}/zephyr"
shutil.copytree(zephyr_lora_files, tmp_model_dir)
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
# Copy tokenizer to adapter and add some unique tokens
# 32000, 32001, 32002
added = tokenizer.add_tokens(["vllm1", "vllm2", "vllm3"],
special_tokens=True)
assert added == 3
tokenizer.save_pretrained(tmp_model_dir)
yield tmp_model_dir
tmp_dir.cleanup()
...@@ -15,8 +15,6 @@ import torch ...@@ -15,8 +15,6 @@ import torch
from openai import BadRequestError, OpenAI from openai import BadRequestError, OpenAI
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer
from .test_completion import zephyr_lora_added_tokens_files # noqa: F401
from .test_completion import zephyr_lora_files # noqa: F401
# any model with a chat template should work here # any model with a chat template should work here
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
......
...@@ -3,8 +3,6 @@ ...@@ -3,8 +3,6 @@
# imports for guided decoding tests # imports for guided decoding tests
import json import json
import os import os
import shutil
from tempfile import TemporaryDirectory
from typing import Optional from typing import Optional
import jsonschema import jsonschema
...@@ -14,9 +12,7 @@ import pytest_asyncio ...@@ -14,9 +12,7 @@ import pytest_asyncio
import regex as re import regex as re
import requests import requests
# downloading lora to test lora requests # downloading lora to test lora requests
from huggingface_hub import snapshot_download
from openai import BadRequestError from openai import BadRequestError
from transformers import AutoTokenizer
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.transformers_utils.tokenizer import get_tokenizer
...@@ -26,32 +22,10 @@ from ...utils import RemoteOpenAIServer ...@@ -26,32 +22,10 @@ from ...utils import RemoteOpenAIServer
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
# technically these adapters use a different base model, # technically these adapters use a different base model,
# but we're not testing generation quality here # but we're not testing generation quality here
LORA_NAME = "typeof/zephyr-7b-beta-lora"
GUIDED_DECODING_BACKENDS = ["outlines", "xgrammar", "guidance"] GUIDED_DECODING_BACKENDS = ["outlines", "xgrammar", "guidance"]
@pytest.fixture(scope="module")
def zephyr_lora_files():
return snapshot_download(repo_id=LORA_NAME)
@pytest.fixture(scope="module")
def zephyr_lora_added_tokens_files(zephyr_lora_files):
tmp_dir = TemporaryDirectory()
tmp_model_dir = f"{tmp_dir.name}/zephyr"
shutil.copytree(zephyr_lora_files, tmp_model_dir)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# Copy tokenizer to adapter and add some unique tokens
# 32000, 32001, 32002
added = tokenizer.add_tokens(["vllm1", "vllm2", "vllm3"],
special_tokens=True)
assert added == 3
tokenizer.save_pretrained(tmp_model_dir)
yield tmp_model_dir
tmp_dir.cleanup()
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def default_server_args(zephyr_lora_files, zephyr_lora_added_tokens_files): def default_server_args(zephyr_lora_files, zephyr_lora_added_tokens_files):
return [ return [
......
...@@ -3,48 +3,23 @@ ...@@ -3,48 +3,23 @@
import base64 import base64
import io import io
import shutil
from tempfile import TemporaryDirectory
import openai # use the official client for correctness check import openai # use the official client for correctness check
import pytest import pytest
import pytest_asyncio import pytest_asyncio
import torch import torch
# downloading lora to test lora requests # downloading lora to test lora requests
from huggingface_hub import snapshot_download
from openai import BadRequestError from openai import BadRequestError
from transformers import AutoConfig, AutoTokenizer from transformers import AutoConfig
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer
# any model with a chat template should work here # any model with a chat template should work here
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
LORA_NAME = "typeof/zephyr-7b-beta-lora"
CONFIG = AutoConfig.from_pretrained(MODEL_NAME) CONFIG = AutoConfig.from_pretrained(MODEL_NAME)
@pytest.fixture(scope="module")
def zephyr_lora_files():
return snapshot_download(repo_id=LORA_NAME)
@pytest.fixture(scope="module")
def zephyr_lora_added_tokens_files(zephyr_lora_files):
tmp_dir = TemporaryDirectory()
tmp_model_dir = f"{tmp_dir.name}/zephyr"
shutil.copytree(zephyr_lora_files, tmp_model_dir)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# Copy tokenizer to adapter and add some unique tokens
# 32000, 32001, 32002
added = tokenizer.add_tokens(["vllm1", "vllm2", "vllm3"],
special_tokens=True)
assert added == 3
tokenizer.save_pretrained(tmp_model_dir)
yield tmp_model_dir
tmp_dir.cleanup()
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def default_server_args( def default_server_args(
zephyr_lora_files, zephyr_lora_files,
......
...@@ -9,8 +9,6 @@ from contextlib import suppress ...@@ -9,8 +9,6 @@ from contextlib import suppress
import openai # use the official client for correctness check import openai # use the official client for correctness check
import pytest import pytest
import pytest_asyncio import pytest_asyncio
# downloading lora to test lora requests
from huggingface_hub import snapshot_download
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer
...@@ -18,7 +16,6 @@ from ...utils import RemoteOpenAIServer ...@@ -18,7 +16,6 @@ from ...utils import RemoteOpenAIServer
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
# technically this needs Mistral-7B-v0.1 as base, but we're not testing # technically this needs Mistral-7B-v0.1 as base, but we're not testing
# generation quality here # generation quality here
LORA_NAME = "typeof/zephyr-7b-beta-lora"
BADREQUEST_CASES = [ BADREQUEST_CASES = [
( (
...@@ -48,11 +45,6 @@ BADREQUEST_CASES = [ ...@@ -48,11 +45,6 @@ BADREQUEST_CASES = [
] ]
@pytest.fixture(scope="module")
def zephyr_lora_files():
return snapshot_download(repo_id=LORA_NAME)
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def monkeypatch_module(): def monkeypatch_module():
from _pytest.monkeypatch import MonkeyPatch from _pytest.monkeypatch import MonkeyPatch
......
...@@ -4,8 +4,6 @@ ...@@ -4,8 +4,6 @@
import openai # use the official client for correctness check import openai # use the official client for correctness check
import pytest import pytest
import pytest_asyncio import pytest_asyncio
# downloading lora to test lora requests
from huggingface_hub import snapshot_download
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer
...@@ -13,12 +11,6 @@ from ...utils import RemoteOpenAIServer ...@@ -13,12 +11,6 @@ from ...utils import RemoteOpenAIServer
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
# technically this needs Mistral-7B-v0.1 as base, but we're not testing # technically this needs Mistral-7B-v0.1 as base, but we're not testing
# generation quality here # generation quality here
LORA_NAME = "typeof/zephyr-7b-beta-lora"
@pytest.fixture(scope="module")
def zephyr_lora_files():
return snapshot_download(repo_id=LORA_NAME)
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
......
...@@ -11,8 +11,6 @@ from vllm.transformers_utils.tokenizer import get_tokenizer ...@@ -11,8 +11,6 @@ from vllm.transformers_utils.tokenizer import get_tokenizer
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer
from .test_completion import default_server_args # noqa: F401 from .test_completion import default_server_args # noqa: F401
from .test_completion import zephyr_lora_added_tokens_files # noqa: F401
from .test_completion import zephyr_lora_files # noqa: F401
from .test_completion import MODEL_NAME from .test_completion import MODEL_NAME
......
...@@ -8,8 +8,6 @@ import requests ...@@ -8,8 +8,6 @@ import requests
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.transformers_utils.tokenizer import get_tokenizer
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer
from .test_completion import zephyr_lora_added_tokens_files # noqa: F401
from .test_completion import zephyr_lora_files # noqa: F401
# any model with a chat template should work here # any model with a chat template should work here
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment