Unverified Commit fefce498 authored by Chauncey's avatar Chauncey Committed by GitHub
Browse files

[Refactor] [6/N] to simplify the vLLM openai chat_completion serving architecture (#32240)


Signed-off-by: default avatarchaunceyjiang <chaunceyjiang@gmail.com>
parent a5bbbd2f
......@@ -3,8 +3,8 @@
from collections.abc import Iterable
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest,
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage,
ExtractedToolCallInformation,
FunctionCall,
......
......@@ -4,11 +4,13 @@ import json
from collections.abc import AsyncGenerator
from typing import Any
from vllm.entrypoints.openai.protocol import (
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionResponse,
ChatCompletionResponseChoice,
ChatCompletionStreamResponse,
ChatMessage,
)
from vllm.entrypoints.openai.engine.protocol import (
UsageInfo,
)
......
......@@ -5,7 +5,7 @@ import pytest
from transformers import AutoTokenizer
from tests.reasoning.utils import run_reasoning_extraction
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
......
......@@ -4,7 +4,8 @@
import pytest
from transformers import AutoTokenizer
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
from vllm.reasoning.deepseek_v3_reasoning_parser import DeepSeekV3ReasoningParser
from vllm.reasoning.identity_reasoning_parser import IdentityReasoningParser
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.reasoning import ReasoningParser
from vllm.tokenizers.mistral import MistralTokenizer
......
......@@ -7,8 +7,8 @@ from collections.abc import Generator
import pytest
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest,
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage,
FunctionCall,
ToolCall,
......
......@@ -5,7 +5,7 @@ from unittest.mock import MagicMock
import pytest
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.tool_parsers.functiongemma_tool_parser import FunctionGemmaToolParser
......
......@@ -6,7 +6,7 @@ import json
import pytest
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall
from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers.glm4_moe_tool_parser import (
Glm4MoeModelToolParser,
......
......@@ -8,7 +8,7 @@ import partial_json_parser
import pytest
from partial_json_parser.core.options import Allow
from vllm.entrypoints.openai.protocol import DeltaMessage, FunctionCall, ToolCall
from vllm.entrypoints.openai.engine.protocol import DeltaMessage, FunctionCall, ToolCall
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.tool_parsers.jamba_tool_parser import JambaToolParser
......
......@@ -6,7 +6,7 @@ import json
import pytest
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall
from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser
......
......@@ -7,8 +7,10 @@ from typing import Any
import pytest
from vllm.entrypoints.openai.protocol import (
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.engine.protocol import (
FunctionCall,
ToolCall,
)
......
......@@ -11,7 +11,7 @@ from mistral_common.protocol.instruct.request import InstructRequest
from mistral_common.protocol.instruct.tool_calls import FunctionCall, ToolCall
from partial_json_parser.core.options import Allow
from vllm.entrypoints.openai.protocol import DeltaMessage, DeltaToolCall
from vllm.entrypoints.openai.engine.protocol import DeltaMessage, DeltaToolCall
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.tokenizers.mistral import MistralTokenizer
......
......@@ -14,7 +14,7 @@ from openai_harmony import (
load_harmony_encoding,
)
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall
from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers.openai_tool_parser import OpenAIToolParser
......
......@@ -6,9 +6,11 @@ from collections.abc import Generator
import pytest
from vllm.entrypoints.openai.protocol import (
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage,
FunctionCall,
ToolCall,
......
......@@ -7,9 +7,11 @@ from collections.abc import Generator
import pytest
from vllm.entrypoints.openai.protocol import (
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage,
FunctionCall,
ToolCall,
......
......@@ -6,8 +6,8 @@ from collections.abc import Generator
import pytest
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest,
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage,
FunctionCall,
ToolCall,
......
......@@ -3,7 +3,7 @@
import pytest
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
def test_chat_completion_request_with_no_tools():
......
......@@ -8,10 +8,10 @@ import pytest
import regex as re
from pydantic import TypeAdapter
from vllm.entrypoints.openai.protocol import (
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
from vllm.tool_parsers.utils import get_json_schema_from_tools
pytestmark = pytest.mark.cpu_test
......
......@@ -11,12 +11,14 @@ from vllm import SamplingParams
from vllm.assets.image import ImageAsset
from vllm.config import VllmConfig
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.entrypoints.openai.protocol import (
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
ChatCompletionResponse,
)
from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
from vllm.entrypoints.openai.engine.protocol import (
ErrorResponse,
)
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.inputs import PromptType
from vllm.outputs import RequestOutput
......
......@@ -25,16 +25,18 @@ from vllm.entrypoints.anthropic.protocol import (
)
from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption
from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.protocol import (
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionNamedToolChoiceParam,
ChatCompletionRequest,
ChatCompletionResponse,
ChatCompletionStreamResponse,
ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
from vllm.entrypoints.openai.engine.protocol import (
ErrorResponse,
StreamOptions,
)
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
logger = logging.getLogger(__name__)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment