Commit 0da93439 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.18.1rc0' into v0.18.1rc0-ori

parents 25f2f756 298e5108
......@@ -6,8 +6,8 @@ import json
import pytest
from ...utils import ROCM_ENV_OVERRIDES, ROCM_EXTRA_ARGS, RemoteOpenAIServer
from .conftest import add_attention_backend
from tests.entrypoints.openai.conftest import add_attention_backend
from tests.utils import ROCM_ENV_OVERRIDES, ROCM_EXTRA_ARGS, RemoteOpenAIServer
MISTRAL_FORMAT_ARGS = [
"--tokenizer_mode",
......@@ -152,5 +152,5 @@ async def test_basic_audio_foscolo(foscolo, rocm_aiter_fa_attention, model_name)
model_name,
foscolo,
language="it",
expected_text="ove il mio corpo fanciulletto giacque",
expected_text="ove il mio corpo fanciulletto",
)
......@@ -13,7 +13,7 @@ import pytest
import pytest_asyncio
import soundfile as sf
from ...utils import RemoteOpenAIServer
from tests.utils import RemoteOpenAIServer
MODEL_NAME = "openai/whisper-large-v3-turbo"
......
......@@ -14,8 +14,8 @@ import pytest
import pytest_asyncio
import soundfile as sf
from ...utils import RemoteOpenAIServer
from .conftest import add_attention_backend
from tests.entrypoints.openai.conftest import add_attention_backend
from tests.utils import RemoteOpenAIServer
SERVER_ARGS = ["--enforce-eager"]
......
......@@ -291,3 +291,32 @@ def test_served_model_name_parsing(tmp_path, vllm_parser, args, raises):
else:
with pytest.raises(raises):
vllm_parser.parse_args(args=args)
### Tests for LoRA target modules parsing
def test_lora_target_modules_single(serve_parser):
"""Test parsing single lora-target-modules argument"""
args = serve_parser.parse_args(
args=["--enable-lora", "--lora-target-modules", "o_proj"]
)
assert args.lora_target_modules == ["o_proj"]
def test_lora_target_modules_multiple(serve_parser):
"""Test parsing multiple lora-target-modules arguments"""
args = serve_parser.parse_args(
args=[
"--enable-lora",
"--lora-target-modules",
"o_proj",
"qkv_proj",
"down_proj",
]
)
assert args.lora_target_modules == ["o_proj", "qkv_proj", "down_proj"]
def test_lora_target_modules_default_none(serve_parser):
"""Test that lora-target-modules defaults to None"""
args = serve_parser.parse_args(args=[])
assert args.lora_target_modules is None
......@@ -275,7 +275,7 @@ INPUT_REASONING_BATCH = "\n".join(
]
)
MINIMAL_WAV_BASE64 = "UklGRiQAAABXQVZFZm10IBAAAAABAAEAQB8AAEAfAAABAAgAZGF0YQAAAAA="
MINIMAL_WAV_BASE64 = "UklGRigAAABXQVZFZm10IBAAAAABAAEAgD4AAAB9AAACABAAZGF0YQQAAAAAAP9/"
INPUT_TRANSCRIPTION_BATCH = (
json.dumps(
{
......
......@@ -5,7 +5,7 @@ import json
import pytest
from tests.entrypoints.openai.tool_parsers.utils import (
from tests.tool_parsers.utils import (
run_tool_extraction,
run_tool_extraction_streaming,
)
......@@ -13,6 +13,13 @@ from vllm.entrypoints.openai.engine.protocol import FunctionCall
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers import ToolParser, ToolParserManager
MSG_SEP_TOKEN = "<|message_sep|>\n\n"
ROLE_SEP_TOKEN = "<|role_sep|>\n"
EOS_TOKEN = "</s>"
TOOL_HEADER_GIGACHAT3 = f"function call{ROLE_SEP_TOKEN}"
TOOL_HEADER_GIGACHAT31 = "<|function_call|>"
SIMPLE_ARGS_DICT = {
"action": "create",
"id": "preferences",
......@@ -24,7 +31,10 @@ SIMPLE_FUNCTION_JSON = json.dumps(
},
ensure_ascii=False,
)
SIMPLE_FUNCTION_OUTPUT = "function call" + SIMPLE_FUNCTION_JSON
SIMPLE_FUNCTION_OUTPUT_GIGACHAT3 = (
f"{MSG_SEP_TOKEN}{TOOL_HEADER_GIGACHAT3}{SIMPLE_FUNCTION_JSON}"
)
SIMPLE_FUNCTION_OUTPUT_GIGACHAT31 = f"{TOOL_HEADER_GIGACHAT31}{SIMPLE_FUNCTION_JSON}"
SIMPLE_FUNCTION_CALL = FunctionCall(
name="manage_user_memory",
arguments=json.dumps(SIMPLE_ARGS_DICT, ensure_ascii=False),
......@@ -38,7 +48,12 @@ PARAMETERLESS_FUNCTION_JSON = json.dumps(
},
ensure_ascii=False,
)
PARAMETERLESS_FUNCTION_OUTPUT = "function call" + PARAMETERLESS_FUNCTION_JSON
PARAMETERLESS_FUNCTION_OUTPUT_GIGACHAT3 = (
f"{MSG_SEP_TOKEN}{TOOL_HEADER_GIGACHAT3}{PARAMETERLESS_FUNCTION_JSON}"
)
PARAMETERLESS_FUNCTION_OUTPUT_GIGACHAT31 = (
f"{TOOL_HEADER_GIGACHAT31}{PARAMETERLESS_FUNCTION_JSON}"
)
PARAMETERLESS_FUNCTION_CALL = FunctionCall(
name="manage_user_memory",
arguments=json.dumps({}, ensure_ascii=False),
......@@ -62,17 +77,38 @@ COMPLEX_FUNCTION_JSON = json.dumps(
},
ensure_ascii=False,
)
COMPLEX_FUNCTION_OUTPUT = "function call" + COMPLEX_FUNCTION_JSON
COMPLEX_FUNCTION_OUTPUT_GIGACHAT3 = (
f"{MSG_SEP_TOKEN}{TOOL_HEADER_GIGACHAT3}{COMPLEX_FUNCTION_JSON}"
)
COMPLEX_FUNCTION_OUTPUT_GIGACHAT31 = f"{TOOL_HEADER_GIGACHAT31}{COMPLEX_FUNCTION_JSON}"
COMPLEX_FUNCTION_CALL = FunctionCall(
name="manage_user_memory",
arguments=json.dumps(COMPLEX_ARGS_DICT, ensure_ascii=False),
)
CONTENT_TEXT = "I'll check that for you."
MIXED_OUTPUT_GIGACHAT3 = f"{CONTENT_TEXT}{SIMPLE_FUNCTION_OUTPUT_GIGACHAT3}"
MIXED_OUTPUT_GIGACHAT31 = f"{CONTENT_TEXT}{SIMPLE_FUNCTION_OUTPUT_GIGACHAT31}"
@pytest.fixture(name="gigachat_tokenizer")
def fixture_gigachat_tokenizer(default_tokenizer: TokenizerLike):
default_tokenizer.add_tokens(
[
MSG_SEP_TOKEN,
ROLE_SEP_TOKEN,
TOOL_HEADER_GIGACHAT31,
EOS_TOKEN,
]
)
return default_tokenizer
@pytest.mark.parametrize("streaming", [True, False])
def test_no_tool_call(streaming: bool, default_tokenizer: TokenizerLike):
def test_no_tool_call(streaming: bool, gigachat_tokenizer: TokenizerLike):
tool_parser: ToolParser = ToolParserManager.get_tool_parser("gigachat3")(
default_tokenizer
gigachat_tokenizer
)
model_output = "How can I help you today?"
content, tool_calls = run_tool_extraction(
......@@ -85,45 +121,143 @@ def test_no_tool_call(streaming: bool, default_tokenizer: TokenizerLike):
TEST_CASES = [
pytest.param(
True,
SIMPLE_FUNCTION_OUTPUT,
SIMPLE_FUNCTION_OUTPUT_GIGACHAT3,
[SIMPLE_FUNCTION_CALL],
None,
id="simple_streaming_gigachat3",
),
pytest.param(
False,
SIMPLE_FUNCTION_OUTPUT_GIGACHAT3,
[SIMPLE_FUNCTION_CALL],
None,
id="simple_nonstreaming_gigachat3",
),
pytest.param(
True,
PARAMETERLESS_FUNCTION_OUTPUT_GIGACHAT3,
[PARAMETERLESS_FUNCTION_CALL],
None,
id="parameterless_streaming_gigachat3",
),
pytest.param(
False,
PARAMETERLESS_FUNCTION_OUTPUT_GIGACHAT3,
[PARAMETERLESS_FUNCTION_CALL],
None,
id="parameterless_nonstreaming_gigachat3",
),
pytest.param(
True,
COMPLEX_FUNCTION_OUTPUT_GIGACHAT3,
[COMPLEX_FUNCTION_CALL],
None,
id="complex_streaming_gigachat3",
),
pytest.param(
False,
COMPLEX_FUNCTION_OUTPUT_GIGACHAT3,
[COMPLEX_FUNCTION_CALL],
None,
id="complex_nonstreaming_gigachat3",
),
pytest.param(
True,
MIXED_OUTPUT_GIGACHAT3,
[SIMPLE_FUNCTION_CALL],
CONTENT_TEXT,
id="mixed_content_streaming_gigachat3",
),
pytest.param(
False,
MIXED_OUTPUT_GIGACHAT3,
[SIMPLE_FUNCTION_CALL],
CONTENT_TEXT,
id="mixed_content_nonstreaming_gigachat3",
),
pytest.param(
True,
MIXED_OUTPUT_GIGACHAT3 + EOS_TOKEN,
[SIMPLE_FUNCTION_CALL],
CONTENT_TEXT,
id="mixed_content_streaming_with_eos_gigachat3",
),
pytest.param(
False,
MIXED_OUTPUT_GIGACHAT3 + EOS_TOKEN,
[SIMPLE_FUNCTION_CALL],
CONTENT_TEXT,
id="mixed_content_nonstreaming_with_eos_gigachat3",
),
pytest.param(
True,
SIMPLE_FUNCTION_OUTPUT_GIGACHAT31,
[SIMPLE_FUNCTION_CALL],
None,
id="simple_streaming",
id="simple_streaming_gigachat31",
),
pytest.param(
False,
SIMPLE_FUNCTION_OUTPUT,
SIMPLE_FUNCTION_OUTPUT_GIGACHAT31,
[SIMPLE_FUNCTION_CALL],
None,
id="simple_nonstreaming",
id="simple_nonstreaming_gigachat31",
),
pytest.param(
True,
PARAMETERLESS_FUNCTION_OUTPUT,
PARAMETERLESS_FUNCTION_OUTPUT_GIGACHAT31,
[PARAMETERLESS_FUNCTION_CALL],
None,
id="parameterless_streaming",
id="parameterless_streaming_gigachat31",
),
pytest.param(
False,
PARAMETERLESS_FUNCTION_OUTPUT,
PARAMETERLESS_FUNCTION_OUTPUT_GIGACHAT31,
[PARAMETERLESS_FUNCTION_CALL],
None,
id="parameterless_nonstreaming",
id="parameterless_nonstreaming_gigachat31",
),
pytest.param(
True,
COMPLEX_FUNCTION_OUTPUT,
COMPLEX_FUNCTION_OUTPUT_GIGACHAT31,
[COMPLEX_FUNCTION_CALL],
None,
id="complex_streaming",
id="complex_streaming_gigachat31",
),
pytest.param(
False,
COMPLEX_FUNCTION_OUTPUT,
COMPLEX_FUNCTION_OUTPUT_GIGACHAT31,
[COMPLEX_FUNCTION_CALL],
None,
id="complex_nonstreaming",
id="complex_nonstreaming_gigachat31",
),
pytest.param(
True,
MIXED_OUTPUT_GIGACHAT31,
[SIMPLE_FUNCTION_CALL],
CONTENT_TEXT,
id="mixed_content_streaming_gigachat31",
),
pytest.param(
False,
MIXED_OUTPUT_GIGACHAT31,
[SIMPLE_FUNCTION_CALL],
CONTENT_TEXT,
id="mixed_content_nonstreaming_gigachat31",
),
pytest.param(
True,
MIXED_OUTPUT_GIGACHAT31 + EOS_TOKEN,
[SIMPLE_FUNCTION_CALL],
CONTENT_TEXT,
id="mixed_content_streaming_with_eos_gigachat31",
),
pytest.param(
False,
MIXED_OUTPUT_GIGACHAT31 + EOS_TOKEN,
[SIMPLE_FUNCTION_CALL],
CONTENT_TEXT,
id="mixed_content_nonstreaming_with_eos_gigachat31",
),
]
......@@ -136,14 +270,16 @@ def test_tool_call(
model_output: str,
expected_tool_calls: list[FunctionCall],
expected_content: str | None,
default_tokenizer: TokenizerLike,
gigachat_tokenizer: TokenizerLike,
):
tool_parser: ToolParser = ToolParserManager.get_tool_parser("gigachat3")(
default_tokenizer
gigachat_tokenizer
)
content, tool_calls = run_tool_extraction(
tool_parser, model_output, streaming=streaming
)
if content == "":
content = None
assert content == expected_content
assert len(tool_calls) == len(expected_tool_calls)
for actual, expected in zip(tool_calls, expected_tool_calls):
......@@ -154,15 +290,46 @@ def test_tool_call(
assert actual_args == expected_args
def test_streaming_tool_call_with_large_steps(default_tokenizer: TokenizerLike):
@pytest.mark.parametrize(
"model_output_deltas",
[
pytest.param(
[
CONTENT_TEXT[:3],
CONTENT_TEXT[3:5],
CONTENT_TEXT[5:],
MSG_SEP_TOKEN,
TOOL_HEADER_GIGACHAT3,
COMPLEX_FUNCTION_JSON[:40],
COMPLEX_FUNCTION_JSON[40:-1],
COMPLEX_FUNCTION_JSON[-1],
],
id="gigachat3",
),
pytest.param(
[
CONTENT_TEXT[:3],
CONTENT_TEXT[3:5],
CONTENT_TEXT[5:],
TOOL_HEADER_GIGACHAT31,
COMPLEX_FUNCTION_JSON[:40],
COMPLEX_FUNCTION_JSON[40:-1],
COMPLEX_FUNCTION_JSON[-1],
],
id="gigachat31",
),
],
)
def test_streaming_tool_call_with_large_steps(
model_output_deltas: list[str],
gigachat_tokenizer: TokenizerLike,
):
"""
Test that the closing braces are streamed correctly.
"""
tool_parser: ToolParser = ToolParserManager.get_tool_parser("gigachat3")(
default_tokenizer
gigachat_tokenizer
)
model_output_deltas = [
"function call",
COMPLEX_FUNCTION_JSON[:40],
COMPLEX_FUNCTION_JSON[40:],
]
reconstructor = run_tool_extraction_streaming(
tool_parser,
model_output_deltas,
......
......@@ -7,7 +7,7 @@ from unittest.mock import MagicMock
import pytest
from tests.entrypoints.openai.tool_parsers.utils import (
from tests.tool_parsers.utils import (
run_tool_extraction,
run_tool_extraction_streaming,
)
......
......@@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch
import pytest
from tests.entrypoints.openai.tool_parsers.utils import (
from tests.tool_parsers.utils import (
run_tool_extraction,
run_tool_extraction_streaming,
)
......
......@@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch
import pytest
from tests.entrypoints.openai.tool_parsers.utils import (
from tests.tool_parsers.utils import (
run_tool_extraction,
run_tool_extraction_streaming,
)
......
......@@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch
import pytest
from tests.entrypoints.openai.tool_parsers.utils import (
from tests.tool_parsers.utils import (
run_tool_extraction,
run_tool_extraction_streaming,
)
......
......@@ -7,10 +7,10 @@ embedding models, covering text embedding, embedding type conversions,
response structure, batching, normalisation, and semantic similarity.
"""
import base64
import struct
import numpy as np
import pybase64 as base64
import pytest
import requests
......
......@@ -6,11 +6,11 @@ Validates image embedding, batching, normalisation, and embedding type
conversions through the /v2/embed endpoint.
"""
import base64
import struct
import zlib
import numpy as np
import pybase64 as base64
import pytest
import requests
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import base64
import json
import numpy as np
import openai
import pybase64 as base64
import pytest
import pytest_asyncio
import requests
......
......@@ -3,10 +3,10 @@
"""Unit tests for Cohere embed protocol: build_typed_embeddings and its
underlying packing helpers, plus Cohere-specific serving helpers."""
import base64
import struct
import numpy as np
import pybase64 as base64
import pytest
from vllm.entrypoints.pooling.embed.protocol import (
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import base64
import json
import numpy as np
import pybase64 as base64
import pytest
import requests
import torch
......
......@@ -8,12 +8,11 @@ import pytest
import pytest_asyncio
from transformers import AutoTokenizer
from tests.utils import RemoteOpenAIServer
from vllm.config import ModelConfig
from vllm.config.utils import getattr_iter
from vllm.v1.engine.detokenizer import check_stop_strings
from ...utils import RemoteOpenAIServer
MODEL_NAME = "Qwen/Qwen3-0.6B"
GEN_ENDPOINT = "/inference/v1/generate"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment