Unverified Commit fefce498 authored by Chauncey's avatar Chauncey Committed by GitHub
Browse files

[Refactor] [6/N] to simplify the vLLM openai chat_completion serving architecture (#32240)


Signed-off-by: default avatarchaunceyjiang <chaunceyjiang@gmail.com>
parent a5bbbd2f
...@@ -9,18 +9,20 @@ from fastapi import Request ...@@ -9,18 +9,20 @@ from fastapi import Request
from vllm.engine.protocol import EngineClient from vllm.engine.protocol import EngineClient
from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption
from vllm.entrypoints.logger import RequestLogger from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.engine.protocol import (
ErrorResponse,
)
from vllm.entrypoints.openai.engine.serving import OpenAIServing
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
from vllm.entrypoints.renderer import RenderConfig
from vllm.entrypoints.serve.tokenize.protocol import (
DetokenizeRequest, DetokenizeRequest,
DetokenizeResponse, DetokenizeResponse,
ErrorResponse,
TokenizeChatRequest, TokenizeChatRequest,
TokenizeRequest, TokenizeRequest,
TokenizeResponse, TokenizeResponse,
TokenizerInfoResponse, TokenizerInfoResponse,
) )
from vllm.entrypoints.openai.serving_engine import OpenAIServing
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
from vllm.entrypoints.renderer import RenderConfig
from vllm.inputs import TokensPrompt from vllm.inputs import TokensPrompt
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
......
...@@ -22,9 +22,11 @@ from vllm.entrypoints.chat_utils import ( ...@@ -22,9 +22,11 @@ from vllm.entrypoints.chat_utils import (
resolve_hf_chat_template, resolve_hf_chat_template,
resolve_mistral_chat_template, resolve_mistral_chat_template,
) )
from vllm.entrypoints.openai.cli_args import make_arg_parser from vllm.entrypoints.openai.chat_completion.protocol import (
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest, ChatCompletionRequest,
)
from vllm.entrypoints.openai.cli_args import make_arg_parser
from vllm.entrypoints.openai.engine.protocol import (
CompletionRequest, CompletionRequest,
StreamOptions, StreamOptions,
) )
......
...@@ -14,8 +14,10 @@ from vllm.utils.collection_utils import is_list_of ...@@ -14,8 +14,10 @@ from vllm.utils.collection_utils import is_list_of
from vllm.utils.import_utils import import_from_path from vllm.utils.import_utils import import_from_path
if TYPE_CHECKING: if TYPE_CHECKING:
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest, ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage, DeltaMessage,
ResponsesRequest, ResponsesRequest,
) )
......
...@@ -5,13 +5,15 @@ from abc import abstractmethod ...@@ -5,13 +5,15 @@ from abc import abstractmethod
from collections.abc import Sequence from collections.abc import Sequence
from typing import TYPE_CHECKING, Any from typing import TYPE_CHECKING, Any
from vllm.entrypoints.openai.protocol import DeltaMessage from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
if TYPE_CHECKING: if TYPE_CHECKING:
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest, ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import (
ResponsesRequest, ResponsesRequest,
) )
else: else:
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
from collections.abc import Sequence from collections.abc import Sequence
from vllm.entrypoints.openai.protocol import DeltaMessage from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
......
...@@ -5,7 +5,10 @@ from collections.abc import Sequence ...@@ -5,7 +5,10 @@ from collections.abc import Sequence
from transformers import PreTrainedTokenizerBase from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser from vllm.reasoning import ReasoningParser
from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
......
...@@ -5,7 +5,10 @@ from collections.abc import Sequence ...@@ -5,7 +5,10 @@ from collections.abc import Sequence
from transformers import PreTrainedTokenizerBase from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
......
...@@ -5,8 +5,11 @@ from collections.abc import Sequence ...@@ -5,8 +5,11 @@ from collections.abc import Sequence
from transformers import PreTrainedTokenizerBase from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.entrypoints.openai.parser.harmony_utils import parse_chat_output from vllm.entrypoints.openai.parser.harmony_utils import parse_chat_output
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.entrypoints.tool_server import ToolServer from vllm.entrypoints.tool_server import ToolServer
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser from vllm.reasoning import ReasoningParser
......
...@@ -6,7 +6,10 @@ from collections.abc import Sequence ...@@ -6,7 +6,10 @@ from collections.abc import Sequence
import regex as re import regex as re
from transformers import PreTrainedTokenizerBase from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser from vllm.reasoning import ReasoningParser
......
...@@ -3,7 +3,10 @@ ...@@ -3,7 +3,10 @@
from collections.abc import Sequence from collections.abc import Sequence
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.reasoning import ( from vllm.reasoning import (
ReasoningParser, ReasoningParser,
......
...@@ -6,7 +6,10 @@ from collections.abc import Sequence ...@@ -6,7 +6,10 @@ from collections.abc import Sequence
import regex as re import regex as re
from transformers import PreTrainedTokenizerBase from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser from vllm.reasoning import ReasoningParser
......
...@@ -5,7 +5,10 @@ from collections.abc import Sequence ...@@ -5,7 +5,10 @@ from collections.abc import Sequence
from transformers import PreTrainedTokenizerBase from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser from vllm.reasoning import ReasoningParser
......
...@@ -3,8 +3,10 @@ ...@@ -3,8 +3,10 @@
from collections.abc import Sequence from collections.abc import Sequence
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest, ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage, DeltaMessage,
ResponsesRequest, ResponsesRequest,
) )
......
...@@ -3,8 +3,10 @@ ...@@ -3,8 +3,10 @@
from functools import cached_property from functools import cached_property
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest, ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import (
ResponsesRequest, ResponsesRequest,
) )
from vllm.logger import init_logger from vllm.logger import init_logger
......
...@@ -10,9 +10,10 @@ import regex as re ...@@ -10,9 +10,10 @@ import regex as re
if TYPE_CHECKING: if TYPE_CHECKING:
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
from vllm.entrypoints.openai.chat_completion.protocol import (
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest, ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage, DeltaMessage,
ResponsesRequest, ResponsesRequest,
) )
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from vllm.entrypoints.openai.chat_completion.protocol import (
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, ResponsesRequest ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import ResponsesRequest
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
......
...@@ -6,7 +6,10 @@ from collections.abc import Sequence ...@@ -6,7 +6,10 @@ from collections.abc import Sequence
import regex as re import regex as re
from transformers import PreTrainedTokenizerBase from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser from vllm.reasoning import ReasoningParser
......
...@@ -19,7 +19,7 @@ from mistral_common.tokens.tokenizers.sentencepiece import ( ...@@ -19,7 +19,7 @@ from mistral_common.tokens.tokenizers.sentencepiece import (
from mistral_common.tokens.tokenizers.tekken import Tekkenizer from mistral_common.tokens.tokenizers.tekken import Tekkenizer
from vllm.entrypoints.chat_utils import ChatCompletionMessageParam from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
from vllm.entrypoints.openai.protocol import ChatCompletionRequest from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.logger import init_logger from vllm.logger import init_logger
from .protocol import TokenizerLike from .protocol import TokenizerLike
......
...@@ -10,8 +10,8 @@ from openai.types.responses.response_format_text_json_schema_config import ( ...@@ -10,8 +10,8 @@ from openai.types.responses.response_format_text_json_schema_config import (
ResponseFormatTextJSONSchemaConfig, ResponseFormatTextJSONSchemaConfig,
) )
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
ChatCompletionRequest, from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage, DeltaMessage,
ExtractedToolCallInformation, ExtractedToolCallInformation,
ResponsesRequest, ResponsesRequest,
......
...@@ -6,8 +6,10 @@ from collections.abc import Sequence ...@@ -6,8 +6,10 @@ from collections.abc import Sequence
import regex as re import regex as re
from vllm.entrypoints.chat_utils import make_tool_call_id from vllm.entrypoints.chat_utils import make_tool_call_id
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest, ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaFunctionCall, DeltaFunctionCall,
DeltaMessage, DeltaMessage,
DeltaToolCall, DeltaToolCall,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment