"vscode:/vscode.git/clone" did not exist on "78565e554b5d69d9ab027a7ccb2c1761d3a1bd01"
Commit cc7f22a8 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.9.1' into v0.9.1-ori

parents b9ea0c09 b6553be1
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# imports for guided decoding tests # imports for guided decoding tests
import openai import openai
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest import pytest
import requests import requests
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# Separate these tests out from test_completion and test_chat, because they # Separate these tests out from test_completion and test_chat, because they
# require launching a second server with a different flag. Running both servers # require launching a second server with a different flag. Running both servers
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import contextlib import contextlib
import os import os
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json import json
import subprocess import subprocess
import sys
import tempfile import tempfile
import pytest
from vllm.entrypoints.openai.protocol import BatchRequestOutput from vllm.entrypoints.openai.protocol import BatchRequestOutput
# ruff: noqa: E501 # ruff: noqa: E501
...@@ -24,9 +26,13 @@ INPUT_EMBEDDING_BATCH = """{"custom_id": "request-1", "method": "POST", "url": " ...@@ -24,9 +26,13 @@ INPUT_EMBEDDING_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "
{"custom_id": "request-3", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/multilingual-e5-small", "input": "Hello world!"}} {"custom_id": "request-3", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/multilingual-e5-small", "input": "Hello world!"}}
{"custom_id": "request-4", "method": "POST", "url": "/v1/embeddings", "body": {"model": "NonExistModel", "input": "Hello world!"}}""" {"custom_id": "request-4", "method": "POST", "url": "/v1/embeddings", "body": {"model": "NonExistModel", "input": "Hello world!"}}"""
INPUT_SCORE_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/score", "body": {"model": "BAAI/bge-reranker-v2-m3", "text_1": "What is the capital of France?", "text_2": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}} INPUT_SCORE_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/score", "body": {"model": "BAAI/bge-reranker-v2-m3", "text_1": "What is the capital of France?", "text_2": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/score", "body": {"model": "BAAI/bge-reranker-v2-m3", "text_1": "What is the capital of France?", "text_2": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}""" {"custom_id": "request-2", "method": "POST", "url": "/v1/score", "body": {"model": "BAAI/bge-reranker-v2-m3", "text_1": "What is the capital of France?", "text_2": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}"""
INPUT_RERANK_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/rerank", "body": {"model": "BAAI/bge-reranker-v2-m3", "query": "What is the capital of France?", "documents": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/rerank", "body": {"model": "BAAI/bge-reranker-v2-m3", "query": "What is the capital of France?", "documents": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}
{"custom_id": "request-2", "method": "POST", "url": "/v2/rerank", "body": {"model": "BAAI/bge-reranker-v2-m3", "query": "What is the capital of France?", "documents": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}"""
def test_empty_file(): def test_empty_file():
with tempfile.NamedTemporaryFile( with tempfile.NamedTemporaryFile(
...@@ -35,9 +41,8 @@ def test_empty_file(): ...@@ -35,9 +41,8 @@ def test_empty_file():
input_file.write("") input_file.write("")
input_file.flush() input_file.flush()
proc = subprocess.Popen([ proc = subprocess.Popen([
sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i", "vllm", "run-batch", "-i", input_file.name, "-o", output_file.name,
input_file.name, "-o", output_file.name, "--model", "--model", "intfloat/multilingual-e5-small"
"intfloat/multilingual-e5-small"
], ) ], )
proc.communicate() proc.communicate()
proc.wait() proc.wait()
...@@ -54,9 +59,8 @@ def test_completions(): ...@@ -54,9 +59,8 @@ def test_completions():
input_file.write(INPUT_BATCH) input_file.write(INPUT_BATCH)
input_file.flush() input_file.flush()
proc = subprocess.Popen([ proc = subprocess.Popen([
sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i", "vllm", "run-batch", "-i", input_file.name, "-o", output_file.name,
input_file.name, "-o", output_file.name, "--model", "--model", "NousResearch/Meta-Llama-3-8B-Instruct"
"NousResearch/Meta-Llama-3-8B-Instruct"
], ) ], )
proc.communicate() proc.communicate()
proc.wait() proc.wait()
...@@ -79,9 +83,8 @@ def test_completions_invalid_input(): ...@@ -79,9 +83,8 @@ def test_completions_invalid_input():
input_file.write(INVALID_INPUT_BATCH) input_file.write(INVALID_INPUT_BATCH)
input_file.flush() input_file.flush()
proc = subprocess.Popen([ proc = subprocess.Popen([
sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i", "vllm", "run-batch", "-i", input_file.name, "-o", output_file.name,
input_file.name, "-o", output_file.name, "--model", "--model", "NousResearch/Meta-Llama-3-8B-Instruct"
"NousResearch/Meta-Llama-3-8B-Instruct"
], ) ], )
proc.communicate() proc.communicate()
proc.wait() proc.wait()
...@@ -95,9 +98,8 @@ def test_embeddings(): ...@@ -95,9 +98,8 @@ def test_embeddings():
input_file.write(INPUT_EMBEDDING_BATCH) input_file.write(INPUT_EMBEDDING_BATCH)
input_file.flush() input_file.flush()
proc = subprocess.Popen([ proc = subprocess.Popen([
sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i", "vllm", "run-batch", "-i", input_file.name, "-o", output_file.name,
input_file.name, "-o", output_file.name, "--model", "--model", "intfloat/multilingual-e5-small"
"intfloat/multilingual-e5-small"
], ) ], )
proc.communicate() proc.communicate()
proc.wait() proc.wait()
...@@ -110,16 +112,17 @@ def test_embeddings(): ...@@ -110,16 +112,17 @@ def test_embeddings():
BatchRequestOutput.model_validate_json(line) BatchRequestOutput.model_validate_json(line)
def test_score(): @pytest.mark.parametrize("input_batch",
[INPUT_SCORE_BATCH, INPUT_RERANK_BATCH])
def test_score(input_batch):
with tempfile.NamedTemporaryFile( with tempfile.NamedTemporaryFile(
"w") as input_file, tempfile.NamedTemporaryFile( "w") as input_file, tempfile.NamedTemporaryFile(
"r") as output_file: "r") as output_file:
input_file.write(INPUT_SCORE_BATCH) input_file.write(input_batch)
input_file.flush() input_file.flush()
proc = subprocess.Popen([ proc = subprocess.Popen([
sys.executable, "vllm",
"-m", "run-batch",
"vllm.entrypoints.openai.run_batch",
"-i", "-i",
input_file.name, input_file.name,
"-o", "-o",
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Any from typing import Any
import pytest import pytest
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio import asyncio
from contextlib import suppress from contextlib import suppress
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from http import HTTPStatus from http import HTTPStatus
from unittest.mock import MagicMock from unittest.mock import MagicMock
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import openai import openai
import pytest import pytest
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import requests import requests
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import gc import gc
import json import json
import tempfile import tempfile
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest import pytest
import pytest_asyncio import pytest_asyncio
...@@ -76,11 +77,11 @@ async def test_tokenize_completions( ...@@ -76,11 +77,11 @@ async def test_tokenize_completions(
}) })
response.raise_for_status() response.raise_for_status()
assert response.json() == { result = response.json()
"tokens": tokens, assert result["tokens"] == tokens
"count": len(tokens), assert result["count"] == len(tokens)
"max_model_len": 8192 assert result["max_model_len"] == 8192
} assert result["token_strs"] is None
@pytest.mark.asyncio @pytest.mark.asyncio
...@@ -138,11 +139,11 @@ async def test_tokenize_chat( ...@@ -138,11 +139,11 @@ async def test_tokenize_chat(
}) })
response.raise_for_status() response.raise_for_status()
assert response.json() == { result = response.json()
"tokens": tokens, assert result["tokens"] == tokens
"count": len(tokens), assert result["count"] == len(tokens)
"max_model_len": 8192 assert result["max_model_len"] == 8192
} assert result["token_strs"] is None
@pytest.mark.asyncio @pytest.mark.asyncio
...@@ -215,11 +216,46 @@ async def test_tokenize_chat_with_tools( ...@@ -215,11 +216,46 @@ async def test_tokenize_chat_with_tools(
) )
response.raise_for_status() response.raise_for_status()
assert response.json() == { result = response.json()
"tokens": tokens, assert result["tokens"] == tokens
"count": len(tokens), assert result["count"] == len(tokens)
"max_model_len": 8192, assert result["max_model_len"] == 8192
} assert result["token_strs"] is None
@pytest.mark.asyncio
@pytest.mark.parametrize(
"model_name, tokenizer_name",
[(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")],
indirect=["tokenizer_name"],
)
async def test_tokenize_with_return_token_strs(
server: RemoteOpenAIServer,
model_name: str,
tokenizer_name: str,
):
tokenizer = get_tokenizer(tokenizer_name=tokenizer_name,
tokenizer_mode="fast")
prompt = "This is a token_strs test prompt! vllm1"
response = requests.post(
server.url_for("tokenize"),
json={
"prompt": prompt,
"model": model_name,
"return_token_strs": True
},
)
response.raise_for_status()
tokens = tokenizer.encode(prompt, add_special_tokens=True)
tokens_str = tokenizer.convert_ids_to_tokens(tokens)
result = response.json()
assert result["tokens"] == tokens
assert result["count"] == len(tokens)
assert result["max_model_len"] == 8192
assert result["token_strs"] == tokens_str
@pytest.mark.asyncio @pytest.mark.asyncio
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# imports for guided decoding tests # imports for guided decoding tests
import io import io
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Any from typing import Any
import openai import openai
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json import json
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json import json
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json import json
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from unittest.mock import MagicMock from unittest.mock import MagicMock, patch
import pytest import pytest
...@@ -191,3 +192,27 @@ def test_streaming_tool_call_with_large_steps(): ...@@ -191,3 +192,27 @@ def test_streaming_tool_call_with_large_steps():
assert reconstructor.tool_calls[0].function == SIMPLE_FUNCTION_CALL assert reconstructor.tool_calls[0].function == SIMPLE_FUNCTION_CALL
assert reconstructor.tool_calls[1].function == PARAMETERLESS_FUNCTION_CALL assert reconstructor.tool_calls[1].function == PARAMETERLESS_FUNCTION_CALL
assert reconstructor.tool_calls[2].function == EMPTY_LIST_FUNCTION_CALL assert reconstructor.tool_calls[2].function == EMPTY_LIST_FUNCTION_CALL
@pytest.mark.parametrize("streaming", [False])
def test_regex_timeout_handling(streaming: bool):
"""test regex timeout is handled gracefully"""
mock_tokenizer = MagicMock()
tool_parser: ToolParser = ToolParserManager.get_tool_parser(
"llama4_pythonic")(mock_tokenizer)
fake_problematic_input = "hello world[A(A=" + "\t)A(A=,\t" * 2
# create a mock regex that raises TimeoutError
mock_regex = MagicMock()
mock_regex.match.side_effect = TimeoutError("Regex timeout")
with patch.object(tool_parser, 'TOOL_CALL_REGEX', mock_regex):
content, tool_calls = run_tool_extraction(tool_parser,
fake_problematic_input,
streaming=streaming)
# should treat as regular text when regex times out
assert content == fake_problematic_input
assert len(tool_calls) == 0
mock_regex.match.assert_called_once()
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from unittest.mock import MagicMock from unittest.mock import MagicMock, patch
import pytest import pytest
...@@ -159,3 +160,27 @@ def test_streaming_tool_call_with_large_steps(): ...@@ -159,3 +160,27 @@ def test_streaming_tool_call_with_large_steps():
assert reconstructor.tool_calls[0].function == SIMPLE_FUNCTION_CALL assert reconstructor.tool_calls[0].function == SIMPLE_FUNCTION_CALL
assert reconstructor.tool_calls[1].function == PARAMETERLESS_FUNCTION_CALL assert reconstructor.tool_calls[1].function == PARAMETERLESS_FUNCTION_CALL
assert reconstructor.tool_calls[2].function == EMPTY_LIST_FUNCTION_CALL assert reconstructor.tool_calls[2].function == EMPTY_LIST_FUNCTION_CALL
@pytest.mark.parametrize("streaming", [False])
def test_regex_timeout_handling(streaming: bool):
"""test regex timeout is handled gracefully"""
mock_tokenizer = MagicMock()
tool_parser: ToolParser = ToolParserManager.get_tool_parser(
"llama4_pythonic")(mock_tokenizer)
fake_problematic_input = "hello world[A(A=" + "\t)A(A=,\t" * 2
# create a mock regex that raises TimeoutError
mock_regex = MagicMock()
mock_regex.match.side_effect = TimeoutError("Regex timeout")
with patch.object(tool_parser, 'TOOL_CALL_REGEX', mock_regex):
content, tool_calls = run_tool_extraction(tool_parser,
fake_problematic_input,
streaming=streaming)
# should treat as regular text when regex times out
assert content == fake_problematic_input
assert len(tool_calls) == 0
mock_regex.match.assert_called_once()
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from collections.abc import Iterable from collections.abc import Iterable
from typing import Union from typing import Union
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment