Unverified Commit fefce498 authored by Chauncey's avatar Chauncey Committed by GitHub
Browse files

[Refactor] [6/N] to simplify the vLLM openai chat_completion serving architecture (#32240)


Signed-off-by: default avatarchaunceyjiang <chaunceyjiang@gmail.com>
parent a5bbbd2f
......@@ -9,18 +9,20 @@ from fastapi import Request
from vllm.engine.protocol import EngineClient
from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption
from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.protocol import (
from vllm.entrypoints.openai.engine.protocol import (
ErrorResponse,
)
from vllm.entrypoints.openai.engine.serving import OpenAIServing
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
from vllm.entrypoints.renderer import RenderConfig
from vllm.entrypoints.serve.tokenize.protocol import (
DetokenizeRequest,
DetokenizeResponse,
ErrorResponse,
TokenizeChatRequest,
TokenizeRequest,
TokenizeResponse,
TokenizerInfoResponse,
)
from vllm.entrypoints.openai.serving_engine import OpenAIServing
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
from vllm.entrypoints.renderer import RenderConfig
from vllm.inputs import TokensPrompt
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
......
......@@ -22,9 +22,11 @@ from vllm.entrypoints.chat_utils import (
resolve_hf_chat_template,
resolve_mistral_chat_template,
)
from vllm.entrypoints.openai.cli_args import make_arg_parser
from vllm.entrypoints.openai.protocol import (
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.cli_args import make_arg_parser
from vllm.entrypoints.openai.engine.protocol import (
CompletionRequest,
StreamOptions,
)
......
......@@ -14,8 +14,10 @@ from vllm.utils.collection_utils import is_list_of
from vllm.utils.import_utils import import_from_path
if TYPE_CHECKING:
from vllm.entrypoints.openai.protocol import (
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage,
ResponsesRequest,
)
......
......@@ -5,13 +5,15 @@ from abc import abstractmethod
from collections.abc import Sequence
from typing import TYPE_CHECKING, Any
from vllm.entrypoints.openai.protocol import DeltaMessage
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
from vllm.tokenizers import TokenizerLike
if TYPE_CHECKING:
from vllm.entrypoints.openai.protocol import (
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import (
ResponsesRequest,
)
else:
......
......@@ -3,7 +3,7 @@
from collections.abc import Sequence
from vllm.entrypoints.openai.protocol import DeltaMessage
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
......
......@@ -5,7 +5,10 @@ from collections.abc import Sequence
from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser
from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
......
......@@ -5,7 +5,10 @@ from collections.abc import Sequence
from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.logger import init_logger
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
......
......@@ -5,8 +5,11 @@ from collections.abc import Sequence
from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.entrypoints.openai.parser.harmony_utils import parse_chat_output
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.entrypoints.tool_server import ToolServer
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser
......
......@@ -6,7 +6,10 @@ from collections.abc import Sequence
import regex as re
from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser
......
......@@ -3,7 +3,10 @@
from collections.abc import Sequence
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.logger import init_logger
from vllm.reasoning import (
ReasoningParser,
......
......@@ -6,7 +6,10 @@ from collections.abc import Sequence
import regex as re
from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser
......
......@@ -5,7 +5,10 @@ from collections.abc import Sequence
from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser
......
......@@ -3,8 +3,10 @@
from collections.abc import Sequence
from vllm.entrypoints.openai.protocol import (
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage,
ResponsesRequest,
)
......
......@@ -3,8 +3,10 @@
from functools import cached_property
from vllm.entrypoints.openai.protocol import (
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import (
ResponsesRequest,
)
from vllm.logger import init_logger
......
......@@ -10,9 +10,10 @@ import regex as re
if TYPE_CHECKING:
from vllm.tokenizers import TokenizerLike
from vllm.entrypoints.openai.protocol import (
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage,
ResponsesRequest,
)
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, ResponsesRequest
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import ResponsesRequest
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
......
......@@ -6,7 +6,10 @@ from collections.abc import Sequence
import regex as re
from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser
......
......@@ -19,7 +19,7 @@ from mistral_common.tokens.tokenizers.sentencepiece import (
from mistral_common.tokens.tokenizers.tekken import Tekkenizer
from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.logger import init_logger
from .protocol import TokenizerLike
......
......@@ -10,8 +10,8 @@ from openai.types.responses.response_format_text_json_schema_config import (
ResponseFormatTextJSONSchemaConfig,
)
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest,
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage,
ExtractedToolCallInformation,
ResponsesRequest,
......
......@@ -6,8 +6,10 @@ from collections.abc import Sequence
import regex as re
from vllm.entrypoints.chat_utils import make_tool_call_id
from vllm.entrypoints.openai.protocol import (
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaFunctionCall,
DeltaMessage,
DeltaToolCall,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment