Commit 3c9817d2 authored by zhuwenwen's avatar zhuwenwen
Browse files

add VLLM_OPTEST_MODELS_PATH/OPTEST_MODELS_PATH to load models from local path...

add VLLM_OPTEST_MODELS_PATH/OPTEST_MODELS_PATH  to load models from local path instead of Hugging Face Hub
parent 49204f68
import weakref
from typing import List
import os
import pytest
......@@ -7,8 +8,9 @@ from vllm import LLM, RequestOutput, SamplingParams
from ...conftest import cleanup
from ..openai.test_vision import TEST_IMAGE_URLS
from ...utils import models_path_prefix
MODEL_NAME = "facebook/opt-125m"
MODEL_NAME = os.path.join(models_path_prefix, "facebook/opt-125m")
PROMPTS = [
"Hello, my name is",
......@@ -145,7 +147,7 @@ def test_multiple_sampling_params(llm: LLM):
def test_chat():
llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct")
llm = LLM(model=os.path.join(models_path_prefix, "meta-llama/Meta-Llama-3-8B-Instruct"))
prompt1 = "Explain the concept of entropy."
messages = [
......@@ -164,7 +166,7 @@ def test_chat():
def test_multi_chat():
llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct")
llm = LLM(model=os.path.join(models_path_prefix, "meta-llama/Meta-Llama-3-8B-Instruct"))
prompt1 = "Explain the concept of entropy."
prompt2 = "Explain what among us is."
......@@ -201,7 +203,7 @@ def test_multi_chat():
[[TEST_IMAGE_URLS[0], TEST_IMAGE_URLS[1]]])
def test_chat_multi_image(image_urls: List[str]):
llm = LLM(
model="microsoft/Phi-3.5-vision-instruct",
model=os.path.join(models_path_prefix, "microsoft/Phi-3.5-vision-instruct"),
dtype="bfloat16",
max_model_len=4096,
max_num_seqs=5,
......
import weakref
import pytest
import os
# downloading lora to test lora requests
from huggingface_hub import snapshot_download
......@@ -8,8 +9,9 @@ from vllm import LLM
from vllm.lora.request import LoRARequest
from ...conftest import cleanup
from ...utils import models_path_prefix
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
PROMPTS = [
"Hello, my name is",
......@@ -18,7 +20,7 @@ PROMPTS = [
"The future of AI is",
]
LORA_NAME = "typeof/zephyr-7b-beta-lora"
LORA_NAME = os.path.join(models_path_prefix, "typeof/zephyr-7b-beta-lora")
@pytest.fixture(scope="module")
......
......@@ -4,14 +4,16 @@ import weakref
import jsonschema
import pytest
import os
from vllm.entrypoints.llm import LLM
from vllm.outputs import RequestOutput
from vllm.sampling_params import SamplingParams
from ...conftest import cleanup
from ...utils import models_path_prefix
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
@pytest.fixture(scope="module")
......
import sys
import os
from vllm import LLM, SamplingParams
from ...utils import models_path_prefix
def test_lazy_outlines(sample_regex):
......@@ -14,7 +16,7 @@ def test_lazy_outlines(sample_regex):
]
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
llm = LLM(model="facebook/opt-125m",
llm = LLM(model=os.path.join(models_path_prefix, "facebook/opt-125m"),
enforce_eager=True,
gpu_memory_utilization=0.3)
outputs = llm.generate(prompts, sampling_params)
......@@ -26,7 +28,7 @@ def test_lazy_outlines(sample_regex):
# make sure outlines is not imported
assert 'outlines' not in sys.modules
llm = LLM(model="facebook/opt-125m",
llm = LLM(model=os.path.join(models_path_prefix, "facebook/opt-125m"),
enforce_eager=True,
guided_decoding_backend="lm-format-enforcer",
gpu_memory_utilization=0.3)
......
import pytest
import os
from vllm import LLM
from ...utils import models_path_prefix
def test_empty_prompt():
llm = LLM(model="gpt2")
llm = LLM(model=os.path.join(models_path_prefix, "gpt2"))
with pytest.raises(ValueError, match='Prompt cannot be empty'):
llm.generate([""])
......@@ -2,14 +2,16 @@
import importlib
import sys
import weakref
import os
import pytest
from vllm import LLM
from ...conftest import cleanup
from ...utils import models_path_prefix
MODEL_NAME = "facebook/opt-125m"
MODEL_NAME = os.path.join(models_path_prefix, "facebook/opt-125m")
@pytest.fixture(scope="module")
......
......@@ -9,10 +9,12 @@ AsyncLLMEngine are working correctly.
import lm_eval
import pytest
import os
from ...utils import RemoteOpenAIServer
from ...utils import models_path_prefix
MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"
MODEL_NAME = os.path.join(models_path_prefix, "Qwen/Qwen2-1.5B-Instruct")
NUM_CONCURRENT = 500
TASK = "gsm8k"
FILTER = "exact_match,strict-match"
......
......@@ -2,14 +2,15 @@ from typing import Dict, List
import openai
import pytest
import os
import pytest_asyncio
from vllm.assets.audio import AudioAsset
from vllm.multimodal.utils import encode_audio_base64, fetch_audio
from ...utils import RemoteOpenAIServer
from ...utils import RemoteOpenAIServer, models_path_prefix
MODEL_NAME = "fixie-ai/ultravox-v0_3"
MODEL_NAME = os.path.join(models_path_prefix, "fixie-ai/ultravox-v0_3")
TEST_AUDIO_URLS = [
AudioAsset("winning_call").url,
]
......
......@@ -7,9 +7,9 @@ import requests
from vllm.version import __version__ as VLLM_VERSION
from ...utils import RemoteOpenAIServer
from ...utils import RemoteOpenAIServer, models_path_prefix
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
@pytest.fixture(scope="module")
......
......@@ -6,19 +6,20 @@ from typing import Dict, List, Optional
import jsonschema
import openai # use the official client for correctness check
import pytest
import os
import pytest_asyncio
import torch
from openai import BadRequestError
from ...utils import RemoteOpenAIServer
from ...utils import RemoteOpenAIServer, models_path_prefix
from .test_completion import zephyr_lora_added_tokens_files # noqa: F401
from .test_completion import zephyr_lora_files # noqa: F401
# any model with a chat template should work here
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
# technically this needs Mistral-7B-v0.1 as base, but we're not testing
# generation quality here
LORA_NAME = "typeof/zephyr-7b-beta-lora"
LORA_NAME = os.path.join(models_path_prefix, "typeof/zephyr-7b-beta-lora")
@pytest.fixture(scope="module")
......
import pytest
import os
from vllm.entrypoints.chat_utils import (apply_hf_chat_template,
load_chat_template)
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.transformers_utils.tokenizer import get_tokenizer
from ...utils import VLLM_PATH
from ...utils import VLLM_PATH, models_path_prefix
chatml_jinja_path = VLLM_PATH / "examples/template_chatml.jinja"
assert chatml_jinja_path.exists()
# Define models, templates, and their corresponding expected outputs
MODEL_TEMPLATE_GENERATON_OUTPUT = [
("facebook/opt-125m", chatml_jinja_path, True, """<|im_start|>user
(os.path.join(models_path_prefix, "facebook/opt-125m"), chatml_jinja_path, True, """<|im_start|>user
Hello<|im_end|>
<|im_start|>assistant
Hi there!<|im_end|>
......@@ -20,7 +21,7 @@ Hi there!<|im_end|>
What is the capital of<|im_end|>
<|im_start|>assistant
"""),
("facebook/opt-125m", chatml_jinja_path, False, """<|im_start|>user
(os.path.join(models_path_prefix, "facebook/opt-125m"), chatml_jinja_path, False, """<|im_start|>user
Hello<|im_end|>
<|im_start|>assistant
Hi there!<|im_end|>
......
......@@ -8,22 +8,23 @@ from typing import Dict, List, Optional
import jsonschema
import openai # use the official client for correctness check
import pytest
import os
import pytest_asyncio
# downloading lora to test lora requests
from huggingface_hub import snapshot_download
# from huggingface_hub import snapshot_download
from openai import BadRequestError
from transformers import AutoTokenizer
from vllm.transformers_utils.tokenizer import get_tokenizer
from ...utils import RemoteOpenAIServer
from ...utils import RemoteOpenAIServer, models_path_prefix
# any model with a chat template should work here
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
# technically these adapters use a different base model,
# but we're not testing generation quality here
LORA_NAME = "typeof/zephyr-7b-beta-lora"
PA_NAME = "swapnilbp/llama_tweet_ptune"
LORA_NAME = os.path.join(models_path_prefix, "typeof/zephyr-7b-beta-lora")
PA_NAME = os.path.join(models_path_prefix, "swapnilbp/llama_tweet_ptune")
# if PA_NAME changes, PA_NUM_VIRTUAL_TOKENS might also
# need to change to match the prompt adapter
PA_NUM_VIRTUAL_TOKENS = 8
......@@ -31,7 +32,8 @@ PA_NUM_VIRTUAL_TOKENS = 8
@pytest.fixture(scope="module")
def zephyr_lora_files():
return snapshot_download(repo_id=LORA_NAME)
# return snapshot_download(repo_id=LORA_NAME)
return LORA_NAME
@pytest.fixture(scope="module")
......@@ -52,7 +54,8 @@ def zephyr_lora_added_tokens_files(zephyr_lora_files):
@pytest.fixture(scope="module")
def zephyr_pa_files():
return snapshot_download(repo_id=PA_NAME)
# return snapshot_download(repo_id=PA_NAME)
return PA_NAME
@pytest.fixture(scope="module")
......
......@@ -3,11 +3,12 @@ import base64
import numpy as np
import openai
import pytest
import os
import pytest_asyncio
from ...utils import RemoteOpenAIServer
from ...utils import RemoteOpenAIServer, models_path_prefix
EMBEDDING_MODEL_NAME = "intfloat/e5-mistral-7b-instruct"
EMBEDDING_MODEL_NAME = os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct")
@pytest.fixture(scope="module")
......
import openai
import pytest
import os
import pytest_asyncio
from ...utils import RemoteOpenAIServer
from ...utils import RemoteOpenAIServer, models_path_prefix
MODEL_NAME = "facebook/bart-base"
MODEL_NAME = os.path.join(models_path_prefix, "facebook/bart-base")
@pytest.fixture(scope="module")
......
# This unit test should be moved to a new
# tests/test_guided_decoding directory.
import pytest
import os
import torch
from transformers import AutoTokenizer
......@@ -9,11 +10,12 @@ from vllm.model_executor.guided_decoding import (
get_guided_decoding_logits_processor)
from vllm.model_executor.guided_decoding.outlines_logits_processors import (
JSONLogitsProcessor, RegexLogitsProcessor)
from ...utils import models_path_prefix
def test_guided_logits_processors(sample_regex, sample_json_schema):
"""Basic unit test for RegexLogitsProcessor and JSONLogitsProcessor."""
tokenizer = AutoTokenizer.from_pretrained('HuggingFaceH4/zephyr-7b-beta')
tokenizer = AutoTokenizer.from_pretrained(os.path.join(models_path_prefix, 'HuggingFaceH4/zephyr-7b-beta'))
regex_LP = RegexLogitsProcessor(sample_regex, tokenizer)
json_LP = JSONLogitsProcessor(sample_json_schema,
tokenizer,
......@@ -41,7 +43,7 @@ def test_guided_logits_processors(sample_regex, sample_json_schema):
@pytest.mark.parametrize("backend", ["outlines", "lm-format-enforcer"])
async def test_guided_logits_processor_black_box(backend: str, sample_regex,
sample_json_schema):
tokenizer = AutoTokenizer.from_pretrained('HuggingFaceH4/zephyr-7b-beta')
tokenizer = AutoTokenizer.from_pretrained(os.path.join(models_path_prefix, 'HuggingFaceH4/zephyr-7b-beta'))
token_ids = tokenizer.encode(
f"Give an example IPv4 address with this regex: {sample_regex}")
regex_request = CompletionRequest(model='test',
......
......@@ -2,17 +2,18 @@ import json
import openai # use the official client for correctness check
import pytest
import os
import pytest_asyncio
# downloading lora to test lora requests
from huggingface_hub import snapshot_download
from ...utils import RemoteOpenAIServer
from ...utils import RemoteOpenAIServer, models_path_prefix
# any model with a chat template should work here
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
# technically this needs Mistral-7B-v0.1 as base, but we're not testing
# generation quality here
LORA_NAME = "typeof/zephyr-7b-beta-lora"
LORA_NAME = os.path.join(models_path_prefix, "typeof/zephyr-7b-beta-lora")
@pytest.fixture(scope="module")
......
......@@ -6,14 +6,15 @@ from http import HTTPStatus
import openai
import pytest
import os
import pytest_asyncio
import requests
from prometheus_client.parser import text_string_to_metric_families
from transformers import AutoTokenizer
from ...utils import RemoteOpenAIServer
from ...utils import RemoteOpenAIServer, models_path_prefix
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
MODEL_NAME = os.path.join(models_path_prefix, "TinyLlama/TinyLlama-1.1B-Chat-v1.0")
@pytest.fixture(scope="module")
......@@ -211,7 +212,7 @@ def test_metrics_exist_run_batch():
"-o",
output_file.name,
"--model",
"intfloat/e5-mistral-7b-instruct",
os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct"),
"--enable-metrics",
"--url",
base_url,
......
import openai # use the official client for correctness check
import pytest
import os
import pytest_asyncio
# downloading lora to test lora requests
from huggingface_hub import snapshot_download
from ...utils import RemoteOpenAIServer
from ...utils import RemoteOpenAIServer, models_path_prefix
# any model with a chat template should work here
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
# technically this needs Mistral-7B-v0.1 as base, but we're not testing
# generation quality here
LORA_NAME = "typeof/zephyr-7b-beta-lora"
LORA_NAME = os.path.join(models_path_prefix, "typeof/zephyr-7b-beta-lora")
@pytest.fixture(scope="module")
def zephyr_lora_files():
return snapshot_download(repo_id=LORA_NAME)
# return snapshot_download(repo_id=LORA_NAME)
return LORA_NAME
@pytest.fixture(scope="module")
......
from ...utils import VLLM_PATH, RemoteOpenAIServer
import vllm.envs as envs
import os
from ...utils import VLLM_PATH, RemoteOpenAIServer, models_path_prefix, envs
chatml_jinja_path = VLLM_PATH / "examples/template_chatml.jinja"
assert chatml_jinja_path.exists()
......@@ -54,5 +54,5 @@ def run_and_test_dummy_opt_api_server(model, tp=1):
def test_oot_registration_for_api_server(dummy_opt_path: str):
dummy_opt_path="facebook/opt-125m"
dummy_opt_path = os.path.join(models_path_prefix, "facebook/opt-125m")
run_and_test_dummy_opt_api_server(dummy_opt_path)
......@@ -4,12 +4,12 @@ import re
import openai
import pytest
from ...utils import RemoteOpenAIServer
from ...utils import RemoteOpenAIServer, models_path_prefix
@pytest.mark.asyncio
async def test_empty_prompt():
model_name = "gpt2"
model_name = os.path.join(models_path_prefix, "gpt2")
server_args = ["--enforce-eager"]
with RemoteOpenAIServer(model_name, server_args) as remote_server:
client = remote_server.get_async_client()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment