Unverified Commit fefce498 authored by Chauncey's avatar Chauncey Committed by GitHub
Browse files

[Refactor] [6/N] to simplify the vLLM openai chat_completion serving architecture (#32240)


Signed-off-by: default avatarchaunceyjiang <chaunceyjiang@gmail.com>
parent a5bbbd2f
...@@ -3,8 +3,8 @@ ...@@ -3,8 +3,8 @@
from collections.abc import Iterable from collections.abc import Iterable
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
ChatCompletionRequest, from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage, DeltaMessage,
ExtractedToolCallInformation, ExtractedToolCallInformation,
FunctionCall, FunctionCall,
......
...@@ -4,11 +4,13 @@ import json ...@@ -4,11 +4,13 @@ import json
from collections.abc import AsyncGenerator from collections.abc import AsyncGenerator
from typing import Any from typing import Any
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionResponse, ChatCompletionResponse,
ChatCompletionResponseChoice, ChatCompletionResponseChoice,
ChatCompletionStreamResponse, ChatCompletionStreamResponse,
ChatMessage, ChatMessage,
)
from vllm.entrypoints.openai.engine.protocol import (
UsageInfo, UsageInfo,
) )
......
...@@ -5,7 +5,7 @@ import pytest ...@@ -5,7 +5,7 @@ import pytest
from transformers import AutoTokenizer from transformers import AutoTokenizer
from tests.reasoning.utils import run_reasoning_extraction from tests.reasoning.utils import run_reasoning_extraction
from vllm.entrypoints.openai.protocol import ChatCompletionRequest from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
......
...@@ -4,7 +4,8 @@ ...@@ -4,7 +4,8 @@
import pytest import pytest
from transformers import AutoTokenizer from transformers import AutoTokenizer
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
from vllm.reasoning.deepseek_v3_reasoning_parser import DeepSeekV3ReasoningParser from vllm.reasoning.deepseek_v3_reasoning_parser import DeepSeekV3ReasoningParser
from vllm.reasoning.identity_reasoning_parser import IdentityReasoningParser from vllm.reasoning.identity_reasoning_parser import IdentityReasoningParser
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.reasoning import ReasoningParser from vllm.reasoning import ReasoningParser
from vllm.tokenizers.mistral import MistralTokenizer from vllm.tokenizers.mistral import MistralTokenizer
......
...@@ -7,8 +7,8 @@ from collections.abc import Generator ...@@ -7,8 +7,8 @@ from collections.abc import Generator
import pytest import pytest
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
ChatCompletionRequest, from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage, DeltaMessage,
FunctionCall, FunctionCall,
ToolCall, ToolCall,
......
...@@ -5,7 +5,7 @@ from unittest.mock import MagicMock ...@@ -5,7 +5,7 @@ from unittest.mock import MagicMock
import pytest import pytest
from vllm.entrypoints.openai.protocol import ChatCompletionRequest from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.tool_parsers.functiongemma_tool_parser import FunctionGemmaToolParser from vllm.tool_parsers.functiongemma_tool_parser import FunctionGemmaToolParser
......
...@@ -6,7 +6,7 @@ import json ...@@ -6,7 +6,7 @@ import json
import pytest import pytest
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall
from vllm.tokenizers import get_tokenizer from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers.glm4_moe_tool_parser import ( from vllm.tool_parsers.glm4_moe_tool_parser import (
Glm4MoeModelToolParser, Glm4MoeModelToolParser,
......
...@@ -8,7 +8,7 @@ import partial_json_parser ...@@ -8,7 +8,7 @@ import partial_json_parser
import pytest import pytest
from partial_json_parser.core.options import Allow from partial_json_parser.core.options import Allow
from vllm.entrypoints.openai.protocol import DeltaMessage, FunctionCall, ToolCall from vllm.entrypoints.openai.engine.protocol import DeltaMessage, FunctionCall, ToolCall
from vllm.tokenizers import TokenizerLike, get_tokenizer from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.tool_parsers.jamba_tool_parser import JambaToolParser from vllm.tool_parsers.jamba_tool_parser import JambaToolParser
......
...@@ -6,7 +6,7 @@ import json ...@@ -6,7 +6,7 @@ import json
import pytest import pytest
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall
from vllm.tokenizers import get_tokenizer from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser from vllm.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser
......
...@@ -7,8 +7,10 @@ from typing import Any ...@@ -7,8 +7,10 @@ from typing import Any
import pytest import pytest
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionToolsParam, ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.engine.protocol import (
FunctionCall, FunctionCall,
ToolCall, ToolCall,
) )
......
...@@ -11,7 +11,7 @@ from mistral_common.protocol.instruct.request import InstructRequest ...@@ -11,7 +11,7 @@ from mistral_common.protocol.instruct.request import InstructRequest
from mistral_common.protocol.instruct.tool_calls import FunctionCall, ToolCall from mistral_common.protocol.instruct.tool_calls import FunctionCall, ToolCall
from partial_json_parser.core.options import Allow from partial_json_parser.core.options import Allow
from vllm.entrypoints.openai.protocol import DeltaMessage, DeltaToolCall from vllm.entrypoints.openai.engine.protocol import DeltaMessage, DeltaToolCall
from vllm.tokenizers import TokenizerLike, get_tokenizer from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.tokenizers.mistral import MistralTokenizer from vllm.tokenizers.mistral import MistralTokenizer
......
...@@ -14,7 +14,7 @@ from openai_harmony import ( ...@@ -14,7 +14,7 @@ from openai_harmony import (
load_harmony_encoding, load_harmony_encoding,
) )
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall
from vllm.tokenizers import get_tokenizer from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers.openai_tool_parser import OpenAIToolParser from vllm.tool_parsers.openai_tool_parser import OpenAIToolParser
......
...@@ -6,9 +6,11 @@ from collections.abc import Generator ...@@ -6,9 +6,11 @@ from collections.abc import Generator
import pytest import pytest
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest, ChatCompletionRequest,
ChatCompletionToolsParam, ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage, DeltaMessage,
FunctionCall, FunctionCall,
ToolCall, ToolCall,
......
...@@ -7,9 +7,11 @@ from collections.abc import Generator ...@@ -7,9 +7,11 @@ from collections.abc import Generator
import pytest import pytest
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest, ChatCompletionRequest,
ChatCompletionToolsParam, ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage, DeltaMessage,
FunctionCall, FunctionCall,
ToolCall, ToolCall,
......
...@@ -6,8 +6,8 @@ from collections.abc import Generator ...@@ -6,8 +6,8 @@ from collections.abc import Generator
import pytest import pytest
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
ChatCompletionRequest, from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage, DeltaMessage,
FunctionCall, FunctionCall,
ToolCall, ToolCall,
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
import pytest import pytest
from vllm.entrypoints.openai.protocol import ChatCompletionRequest from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
def test_chat_completion_request_with_no_tools(): def test_chat_completion_request_with_no_tools():
......
...@@ -8,10 +8,10 @@ import pytest ...@@ -8,10 +8,10 @@ import pytest
import regex as re import regex as re
from pydantic import TypeAdapter from pydantic import TypeAdapter
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionToolsParam, ChatCompletionToolsParam,
) )
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
from vllm.tool_parsers.utils import get_json_schema_from_tools from vllm.tool_parsers.utils import get_json_schema_from_tools
pytestmark = pytest.mark.cpu_test pytestmark = pytest.mark.cpu_test
......
...@@ -11,12 +11,14 @@ from vllm import SamplingParams ...@@ -11,12 +11,14 @@ from vllm import SamplingParams
from vllm.assets.image import ImageAsset from vllm.assets.image import ImageAsset
from vllm.config import VllmConfig from vllm.config import VllmConfig
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest, ChatCompletionRequest,
ChatCompletionResponse, ChatCompletionResponse,
)
from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
from vllm.entrypoints.openai.engine.protocol import (
ErrorResponse, ErrorResponse,
) )
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.inputs import PromptType from vllm.inputs import PromptType
from vllm.outputs import RequestOutput from vllm.outputs import RequestOutput
......
...@@ -25,16 +25,18 @@ from vllm.entrypoints.anthropic.protocol import ( ...@@ -25,16 +25,18 @@ from vllm.entrypoints.anthropic.protocol import (
) )
from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption
from vllm.entrypoints.logger import RequestLogger from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionNamedToolChoiceParam, ChatCompletionNamedToolChoiceParam,
ChatCompletionRequest, ChatCompletionRequest,
ChatCompletionResponse, ChatCompletionResponse,
ChatCompletionStreamResponse, ChatCompletionStreamResponse,
ChatCompletionToolsParam, ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
from vllm.entrypoints.openai.engine.protocol import (
ErrorResponse, ErrorResponse,
StreamOptions, StreamOptions,
) )
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_models import OpenAIServingModels from vllm.entrypoints.openai.serving_models import OpenAIServingModels
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment