Commit 859566e0 authored by guanyu's avatar guanyu
Browse files

test_audio文件将TEST_AUDIO_URLS改为本地;testchat文件360行路径改为本地;test_metrics文件197行将base_...

test_audio文件将TEST_AUDIO_URLS改为本地;testchat文件360行路径改为本地;test_metrics文件197行将base_url从0.0.0.0改为localhost;test_run_batch将INPUT_BATCH,INVALID_INPUT_BATCH,INPUT_EMBEDDING_BATCH改为原来的格式;test_tokenizer_group将18行的gpt路径改为修改后的路径;test_braodcast将model的判断改为if llava-hf/llava-1.5-7b-hf in model
parent e06809f9
...@@ -11,11 +11,13 @@ from vllm.multimodal.utils import encode_audio_base64, fetch_audio ...@@ -11,11 +11,13 @@ from vllm.multimodal.utils import encode_audio_base64, fetch_audio
from ...utils import RemoteOpenAIServer, models_path_prefix from ...utils import RemoteOpenAIServer, models_path_prefix
MODEL_NAME = os.path.join(models_path_prefix, "fixie-ai/ultravox-v0_3") MODEL_NAME = os.path.join(models_path_prefix, "fixie-ai/ultravox-v0_3")
# TEST_AUDIO_URLS = [
# AudioAsset("winning_call").url,
# ]
TEST_AUDIO_URLS = [ TEST_AUDIO_URLS = [
AudioAsset("winning_call").url, "http://localhost:8000/winning_call.ogg"
] ]
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def server(): def server():
args = [ args = [
......
...@@ -355,9 +355,13 @@ async def test_chat_streaming(client: openai.AsyncOpenAI, model_name: str): ...@@ -355,9 +355,13 @@ async def test_chat_streaming(client: openai.AsyncOpenAI, model_name: str):
@pytest.mark.asyncio @pytest.mark.asyncio
# @pytest.mark.parametrize(
# "model_name",
# ["HuggingFaceH4/zephyr-7b-beta", "zephyr-lora"],
# )
@pytest.mark.parametrize( @pytest.mark.parametrize(
"model_name", "model_name",
["HuggingFaceH4/zephyr-7b-beta", "zephyr-lora"], [os.path.join(models_path_prefix,"HuggingFaceH4/zephyr-7b-beta"), "zephyr-lora"],
) )
async def test_chat_completion_stream_options(client: openai.AsyncOpenAI, async def test_chat_completion_stream_options(client: openai.AsyncOpenAI,
model_name: str): model_name: str):
......
...@@ -194,7 +194,8 @@ async def test_metrics_exist(client: openai.AsyncOpenAI): ...@@ -194,7 +194,8 @@ async def test_metrics_exist(client: openai.AsyncOpenAI):
def test_metrics_exist_run_batch(): def test_metrics_exist_run_batch():
input_batch = """{"custom_id": "request-0", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are a helpful assistant."}}""" # noqa: E501 input_batch = """{"custom_id": "request-0", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are a helpful assistant."}}""" # noqa: E501
base_url = "0.0.0.0" #base_url = "0.0.0.0"
base_url = "localhost"
port = "8001" port = "8001"
server_url = f"http://{base_url}:{port}" server_url = f"http://{base_url}:{port}"
......
...@@ -7,20 +7,36 @@ from vllm.entrypoints.openai.protocol import BatchRequestOutput ...@@ -7,20 +7,36 @@ from vllm.entrypoints.openai.protocol import BatchRequestOutput
from ...utils import models_path_prefix from ...utils import models_path_prefix
# ruff: noqa: E501 # ruff: noqa: E501
INPUT_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}} # INPUT_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}} # {"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
# {"custom_id": "request-3", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NonExistModel", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
# {"custom_id": "request-4", "method": "POST", "url": "/bad_url", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
# {"custom_id": "request-5", "method": "POST", "url": "/v1/chat/completions", "body": {"stream": "True", "model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
# INVALID_INPUT_BATCH = """{"invalid_field": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
# {"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
# INPUT_EMBEDDING_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/embeddings", "body": {"model": os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct"), "input": "You are a helpful assistant."}}
# {"custom_id": "request-2", "method": "POST", "url": "/v1/embeddings", "body": {"model": os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct"), "input": "You are an unhelpful assistant."}}
# {"custom_id": "request-3", "method": "POST", "url": "/v1/embeddings", "body": {"model": os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct"), "input": "Hello world!"}}
# {"custom_id": "request-4", "method": "POST", "url": "/v1/embeddings", "body": {"model": "NonExistModel", "input": "Hello world!"}}"""
#修改↓
INPUT_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-3", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NonExistModel", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}} {"custom_id": "request-3", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NonExistModel", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-4", "method": "POST", "url": "/bad_url", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}} {"custom_id": "request-4", "method": "POST", "url": "/bad_url", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-5", "method": "POST", "url": "/v1/chat/completions", "body": {"stream": "True", "model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}""" {"custom_id": "request-5", "method": "POST", "url": "/v1/chat/completions", "body": {"stream": "True", "model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
INVALID_INPUT_BATCH = """{"invalid_field": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}} INVALID_INPUT_BATCH = """{"invalid_field": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": os.path.join(models_path_prefix, "NousResearch/Meta-Llama-3-8B-Instruct"), "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}""" {"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
INPUT_EMBEDDING_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/embeddings", "body": {"model": os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct"), "input": "You are a helpful assistant."}} INPUT_EMBEDDING_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are a helpful assistant."}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/embeddings", "body": {"model": os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct"), "input": "You are an unhelpful assistant."}} {"custom_id": "request-2", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are an unhelpful assistant."}}
{"custom_id": "request-3", "method": "POST", "url": "/v1/embeddings", "body": {"model": os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct"), "input": "Hello world!"}} {"custom_id": "request-3", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "Hello world!"}}
{"custom_id": "request-4", "method": "POST", "url": "/v1/embeddings", "body": {"model": "NonExistModel", "input": "Hello world!"}}""" {"custom_id": "request-4", "method": "POST", "url": "/v1/embeddings", "body": {"model": "NonExistModel", "input": "Hello world!"}}"""
......
...@@ -4,7 +4,8 @@ from transformers import AutoTokenizer, PreTrainedTokenizerBase ...@@ -4,7 +4,8 @@ from transformers import AutoTokenizer, PreTrainedTokenizerBase
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.transformers_utils.tokenizer import get_lora_tokenizer from vllm.transformers_utils.tokenizer import get_lora_tokenizer
from vllm.transformers_utils.tokenizer_group import get_tokenizer_group from vllm.transformers_utils.tokenizer_group import get_tokenizer_group
import os
from ..utils import RemoteOpenAIServer, models_path_prefix
from ..conftest import get_tokenizer_pool_config from ..conftest import get_tokenizer_pool_config
...@@ -14,7 +15,7 @@ async def test_tokenizer_group_lora(sql_lora_files, tokenizer_group_type): ...@@ -14,7 +15,7 @@ async def test_tokenizer_group_lora(sql_lora_files, tokenizer_group_type):
reference_tokenizer = AutoTokenizer.from_pretrained(sql_lora_files) reference_tokenizer = AutoTokenizer.from_pretrained(sql_lora_files)
tokenizer_group = get_tokenizer_group( tokenizer_group = get_tokenizer_group(
get_tokenizer_pool_config(tokenizer_group_type), get_tokenizer_pool_config(tokenizer_group_type),
tokenizer_id="gpt2", tokenizer_id=os.path.join(models_path_prefix,"gpt2"),
enable_lora=True, enable_lora=True,
max_num_seqs=1, max_num_seqs=1,
max_input_length=None, max_input_length=None,
......
...@@ -20,11 +20,22 @@ def test_models(hf_runner, vllm_runner, image_assets, ...@@ -20,11 +20,22 @@ def test_models(hf_runner, vllm_runner, image_assets,
num_logprobs = 5 num_logprobs = 5
tensor_parallel_size = 2 tensor_parallel_size = 2
if model.startswith("llava-hf/llava-1.5"): # if model.startswith("llava-hf/llava-1.5"):
# from .test_llava import models, run_test
# elif model.startswith("llava-hf/llava-v1.6"):
# from .test_llava_next import models, run_test # type: ignore[no-redef]
# elif model.startswith("facebook/chameleon"):
# from .test_chameleon import models, run_test # type: ignore[no-redef]
# else:
# raise NotImplementedError(f"Unsupported model: {model}")
if "llava-hf/llava-1.5-7b-hf" in model:
# if model.startswith(os.path.join(models_path_prefix, "llava-hf/llava-1.5-7b-hf"),):
from .test_llava import models, run_test from .test_llava import models, run_test
elif model.startswith("llava-hf/llava-v1.6"): elif "llava-hf/llava-v1.6-mistral-7b-hf" in model:
#elif model.startswith(os.path.join(models_path_prefix, "llava-hf/llava-v1.6-mistral-7b-hf")):
from .test_llava_next import models, run_test # type: ignore[no-redef] from .test_llava_next import models, run_test # type: ignore[no-redef]
elif model.startswith("facebook/chameleon"): elif "facebook/chameleon-7b" in model:
#elif model.startswith(os.path.join(models_path_prefix, "facebook/chameleon-7b")):
from .test_chameleon import models, run_test # type: ignore[no-redef] from .test_chameleon import models, run_test # type: ignore[no-redef]
else: else:
raise NotImplementedError(f"Unsupported model: {model}") raise NotImplementedError(f"Unsupported model: {model}")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment