Unverified Commit fefce498 authored by Chauncey's avatar Chauncey Committed by GitHub
Browse files

[Refactor] [6/N] to simplify the vLLM openai chat_completion serving architecture (#32240)


Signed-off-by: default avatarchaunceyjiang <chaunceyjiang@gmail.com>
parent a5bbbd2f
...@@ -6,8 +6,8 @@ from unittest.mock import MagicMock ...@@ -6,8 +6,8 @@ from unittest.mock import MagicMock
import pytest import pytest
from vllm.entrypoints.openai.protocol import ErrorResponse from vllm.entrypoints.openai.engine.protocol import ErrorResponse
from vllm.entrypoints.openai.serving_engine import GenerationError, OpenAIServing from vllm.entrypoints.openai.engine.serving import GenerationError, OpenAIServing
@pytest.mark.asyncio @pytest.mark.asyncio
......
...@@ -7,7 +7,7 @@ import json ...@@ -7,7 +7,7 @@ import json
import pytest import pytest
from openai.types.responses import ResponseFunctionToolCall from openai.types.responses import ResponseFunctionToolCall
from vllm.entrypoints.openai.protocol import ResponsesRequest from vllm.entrypoints.openai.engine.protocol import ResponsesRequest
def test_function_call_dict_converted_to_object(): def test_function_call_dict_converted_to_object():
...@@ -253,7 +253,7 @@ def test_function_call_validation_failure_logs_debug(caplog): ...@@ -253,7 +253,7 @@ def test_function_call_validation_failure_logs_debug(caplog):
} }
# Mock the logger to verify debug was called # Mock the logger to verify debug was called
with patch("vllm.entrypoints.openai.protocol.logger") as mock_logger: with patch("vllm.entrypoints.openai.engine.protocol.logger") as mock_logger:
with pytest.raises(ValueError): with pytest.raises(ValueError):
ResponsesRequest(**request_data) ResponsesRequest(**request_data)
......
...@@ -9,8 +9,9 @@ from unittest.mock import AsyncMock, MagicMock ...@@ -9,8 +9,9 @@ from unittest.mock import AsyncMock, MagicMock
import pytest import pytest
from vllm.config.multimodal import MultiModalConfig from vllm.config.multimodal import MultiModalConfig
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, ErrorResponse from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
from vllm.entrypoints.openai.engine.protocol import ErrorResponse
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.outputs import CompletionOutput, RequestOutput from vllm.outputs import CompletionOutput, RequestOutput
from vllm.tokenizers import get_tokenizer from vllm.tokenizers import get_tokenizer
......
...@@ -5,7 +5,7 @@ import pytest ...@@ -5,7 +5,7 @@ import pytest
from vllm.config import ModelConfig from vllm.config import ModelConfig
from vllm.entrypoints.chat_utils import apply_hf_chat_template, load_chat_template from vllm.entrypoints.chat_utils import apply_hf_chat_template, load_chat_template
from vllm.entrypoints.openai.protocol import ChatCompletionRequest from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.tokenizers import get_tokenizer from vllm.tokenizers import get_tokenizer
from ...models.registry import HF_EXAMPLE_MODELS from ...models.registry import HF_EXAMPLE_MODELS
......
...@@ -9,7 +9,7 @@ from unittest.mock import AsyncMock, MagicMock ...@@ -9,7 +9,7 @@ from unittest.mock import AsyncMock, MagicMock
import pytest import pytest
from vllm.config.multimodal import MultiModalConfig from vllm.config.multimodal import MultiModalConfig
from vllm.entrypoints.openai.protocol import CompletionRequest, ErrorResponse from vllm.entrypoints.openai.engine.protocol import CompletionRequest, ErrorResponse
from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.outputs import CompletionOutput, RequestOutput from vllm.outputs import CompletionOutput, RequestOutput
......
...@@ -8,7 +8,7 @@ from unittest.mock import Mock ...@@ -8,7 +8,7 @@ from unittest.mock import Mock
import pytest import pytest
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.engine.protocol import (
StructuredOutputsParams, StructuredOutputsParams,
) )
from vllm.entrypoints.tool_server import ToolServer from vllm.entrypoints.tool_server import ToolServer
......
...@@ -9,7 +9,7 @@ from unittest.mock import AsyncMock, MagicMock ...@@ -9,7 +9,7 @@ from unittest.mock import AsyncMock, MagicMock
import pytest import pytest
from vllm.config.multimodal import MultiModalConfig from vllm.config.multimodal import MultiModalConfig
from vllm.entrypoints.openai.protocol import CompletionRequest, ErrorResponse from vllm.entrypoints.openai.engine.protocol import CompletionRequest, ErrorResponse
from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
......
...@@ -4,7 +4,10 @@ from openai_harmony import ( ...@@ -4,7 +4,10 @@ from openai_harmony import (
Message, Message,
) )
from vllm.entrypoints.openai.protocol import serialize_message, serialize_messages from vllm.entrypoints.openai.engine.protocol import (
serialize_message,
serialize_messages,
)
def test_serialize_message() -> None: def test_serialize_message() -> None:
......
...@@ -11,14 +11,16 @@ import pytest_asyncio ...@@ -11,14 +11,16 @@ import pytest_asyncio
from openai import OpenAI from openai import OpenAI
from vllm.config.multimodal import MultiModalConfig from vllm.config.multimodal import MultiModalConfig
from vllm.entrypoints.openai.parser.harmony_utils import get_encoding from vllm.entrypoints.openai.chat_completion.protocol import (
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest, ChatCompletionRequest,
ChatCompletionResponse, ChatCompletionResponse,
)
from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
from vllm.entrypoints.openai.engine.protocol import (
ErrorResponse, ErrorResponse,
RequestResponseMetadata, RequestResponseMetadata,
) )
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat from vllm.entrypoints.openai.parser.harmony_utils import get_encoding
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.outputs import CompletionOutput, RequestOutput from vllm.outputs import CompletionOutput, RequestOutput
from vllm.tokenizers import get_tokenizer from vllm.tokenizers import get_tokenizer
...@@ -1517,12 +1519,12 @@ class TestCreateRemainingArgsDelta: ...@@ -1517,12 +1519,12 @@ class TestCreateRemainingArgsDelta:
def test_preserves_id_type_name(self): def test_preserves_id_type_name(self):
"""Test that id, type, and name are preserved from original delta.""" """Test that id, type, and name are preserved from original delta."""
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
from vllm.entrypoints.openai.engine.protocol import (
DeltaFunctionCall, DeltaFunctionCall,
DeltaMessage, DeltaMessage,
DeltaToolCall, DeltaToolCall,
) )
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
original_delta = DeltaMessage( original_delta = DeltaMessage(
tool_calls=[ tool_calls=[
...@@ -1552,12 +1554,12 @@ class TestCreateRemainingArgsDelta: ...@@ -1552,12 +1554,12 @@ class TestCreateRemainingArgsDelta:
def test_matches_by_index(self): def test_matches_by_index(self):
"""Test that the correct tool call is matched by index.""" """Test that the correct tool call is matched by index."""
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
from vllm.entrypoints.openai.engine.protocol import (
DeltaFunctionCall, DeltaFunctionCall,
DeltaMessage, DeltaMessage,
DeltaToolCall, DeltaToolCall,
) )
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
original_delta = DeltaMessage( original_delta = DeltaMessage(
tool_calls=[ tool_calls=[
...@@ -1588,12 +1590,12 @@ class TestCreateRemainingArgsDelta: ...@@ -1588,12 +1590,12 @@ class TestCreateRemainingArgsDelta:
def test_no_matching_tool_call(self): def test_no_matching_tool_call(self):
"""Test graceful handling when no matching tool call is found.""" """Test graceful handling when no matching tool call is found."""
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
from vllm.entrypoints.openai.engine.protocol import (
DeltaFunctionCall, DeltaFunctionCall,
DeltaMessage, DeltaMessage,
DeltaToolCall, DeltaToolCall,
) )
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
original_delta = DeltaMessage( original_delta = DeltaMessage(
tool_calls=[ tool_calls=[
...@@ -1620,8 +1622,8 @@ class TestCreateRemainingArgsDelta: ...@@ -1620,8 +1622,8 @@ class TestCreateRemainingArgsDelta:
def test_function_is_none(self): def test_function_is_none(self):
"""Test handling when original tool call has no function.""" """Test handling when original tool call has no function."""
from vllm.entrypoints.openai.protocol import DeltaMessage, DeltaToolCall from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat from vllm.entrypoints.openai.engine.protocol import DeltaMessage, DeltaToolCall
original_delta = DeltaMessage( original_delta = DeltaMessage(
tool_calls=[ tool_calls=[
......
...@@ -9,7 +9,7 @@ from unittest.mock import patch ...@@ -9,7 +9,7 @@ from unittest.mock import patch
import pytest import pytest
from vllm.entrypoints.openai.serving_chat_stream_harmony import ( from vllm.entrypoints.openai.chat_completion.stream_harmony import (
extract_harmony_streaming_delta, extract_harmony_streaming_delta,
) )
...@@ -82,7 +82,7 @@ class TestExtractHarmonyStreamingDelta: ...@@ -82,7 +82,7 @@ class TestExtractHarmonyStreamingDelta:
assert tools_streamed is False assert tools_streamed is False
@pytest.mark.parametrize("channel", ["commentary", "analysis"]) @pytest.mark.parametrize("channel", ["commentary", "analysis"])
@patch("vllm.entrypoints.openai.serving_chat_stream_harmony.make_tool_call_id") @patch("vllm.entrypoints.openai.chat_completion.stream_harmony.make_tool_call_id")
def test_new_tool_call(self, mock_make_tool_call_id, channel): def test_new_tool_call(self, mock_make_tool_call_id, channel):
"""Test new tool call creation when recipient changes.""" """Test new tool call creation when recipient changes."""
mock_make_tool_call_id.return_value = "call_test123" mock_make_tool_call_id.return_value = "call_test123"
......
...@@ -8,7 +8,7 @@ from unittest.mock import Mock ...@@ -8,7 +8,7 @@ from unittest.mock import Mock
import pytest import pytest
from vllm.config import ModelConfig from vllm.config import ModelConfig
from vllm.entrypoints.openai.serving_engine import OpenAIServing from vllm.entrypoints.openai.engine.serving import OpenAIServing
from vllm.entrypoints.openai.serving_models import OpenAIServingModels from vllm.entrypoints.openai.serving_models import OpenAIServingModels
from vllm.tokenizers.mistral import MistralTokenizer from vllm.tokenizers.mistral import MistralTokenizer
......
...@@ -8,7 +8,7 @@ import pytest ...@@ -8,7 +8,7 @@ import pytest
from vllm.config import ModelConfig from vllm.config import ModelConfig
from vllm.engine.protocol import EngineClient from vllm.engine.protocol import EngineClient
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.engine.protocol import (
ErrorResponse, ErrorResponse,
LoadLoRAAdapterRequest, LoadLoRAAdapterRequest,
UnloadLoRAAdapterRequest, UnloadLoRAAdapterRequest,
......
...@@ -14,7 +14,7 @@ from openai.types.responses.tool import ( ...@@ -14,7 +14,7 @@ from openai.types.responses.tool import (
) )
from vllm.entrypoints.context import ConversationContext from vllm.entrypoints.context import ConversationContext
from vllm.entrypoints.openai.protocol import ErrorResponse, ResponsesRequest from vllm.entrypoints.openai.engine.protocol import ErrorResponse, ResponsesRequest
from vllm.entrypoints.openai.serving_responses import ( from vllm.entrypoints.openai.serving_responses import (
OpenAIServingResponses, OpenAIServingResponses,
_extract_allowed_tools_from_mcp_requests, _extract_allowed_tools_from_mcp_requests,
......
...@@ -9,7 +9,7 @@ from tests.entrypoints.openai.tool_parsers.utils import ( ...@@ -9,7 +9,7 @@ from tests.entrypoints.openai.tool_parsers.utils import (
run_tool_extraction, run_tool_extraction,
run_tool_extraction_streaming, run_tool_extraction_streaming,
) )
from vllm.entrypoints.openai.protocol import FunctionCall from vllm.entrypoints.openai.engine.protocol import FunctionCall
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers import ToolParser, ToolParserManager from vllm.tool_parsers import ToolParser, ToolParserManager
......
...@@ -5,7 +5,7 @@ import json ...@@ -5,7 +5,7 @@ import json
import pytest import pytest
from vllm.entrypoints.openai.protocol import ChatCompletionRequest from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.hermes_tool_parser import Hermes2ProToolParser from vllm.tool_parsers.hermes_tool_parser import Hermes2ProToolParser
......
...@@ -11,7 +11,7 @@ from tests.entrypoints.openai.tool_parsers.utils import ( ...@@ -11,7 +11,7 @@ from tests.entrypoints.openai.tool_parsers.utils import (
run_tool_extraction, run_tool_extraction,
run_tool_extraction_streaming, run_tool_extraction_streaming,
) )
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall
from vllm.tool_parsers import ToolParser, ToolParserManager from vllm.tool_parsers import ToolParser, ToolParserManager
......
...@@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch ...@@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch
import pytest import pytest
from vllm.entrypoints.openai.protocol import ExtractedToolCallInformation from vllm.entrypoints.openai.engine.protocol import ExtractedToolCallInformation
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.llama_tool_parser import Llama3JsonToolParser from vllm.tool_parsers.llama_tool_parser import Llama3JsonToolParser
......
...@@ -9,7 +9,7 @@ from tests.entrypoints.openai.tool_parsers.utils import ( ...@@ -9,7 +9,7 @@ from tests.entrypoints.openai.tool_parsers.utils import (
run_tool_extraction, run_tool_extraction,
run_tool_extraction_streaming, run_tool_extraction_streaming,
) )
from vllm.entrypoints.openai.protocol import FunctionCall from vllm.entrypoints.openai.engine.protocol import FunctionCall
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers import ToolParser, ToolParserManager from vllm.tool_parsers import ToolParser, ToolParserManager
......
...@@ -9,7 +9,7 @@ from tests.entrypoints.openai.tool_parsers.utils import ( ...@@ -9,7 +9,7 @@ from tests.entrypoints.openai.tool_parsers.utils import (
run_tool_extraction, run_tool_extraction,
run_tool_extraction_streaming, run_tool_extraction_streaming,
) )
from vllm.entrypoints.openai.protocol import FunctionCall from vllm.entrypoints.openai.engine.protocol import FunctionCall
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers import ToolParser, ToolParserManager from vllm.tool_parsers import ToolParser, ToolParserManager
......
...@@ -9,7 +9,7 @@ from tests.entrypoints.openai.tool_parsers.utils import ( ...@@ -9,7 +9,7 @@ from tests.entrypoints.openai.tool_parsers.utils import (
run_tool_extraction, run_tool_extraction,
run_tool_extraction_streaming, run_tool_extraction_streaming,
) )
from vllm.entrypoints.openai.protocol import FunctionCall from vllm.entrypoints.openai.engine.protocol import FunctionCall
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers import ToolParser, ToolParserManager from vllm.tool_parsers import ToolParser, ToolParserManager
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment