Unverified Commit 4c1c501a authored by Chauncey's avatar Chauncey Committed by GitHub
Browse files

[Refactor] [10/N] to simplify the vLLM openai completion serving architecture (#32369)


Signed-off-by: default avatarchaunceyjiang <chaunceyjiang@gmail.com>
parent ae1eba6a
...@@ -9,10 +9,10 @@ from fastapi.exceptions import RequestValidationError ...@@ -9,10 +9,10 @@ from fastapi.exceptions import RequestValidationError
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from typing_extensions import assert_never from typing_extensions import assert_never
from vllm.entrypoints.openai.api_server import validate_json_request
from vllm.entrypoints.openai.engine.protocol import ( from vllm.entrypoints.openai.engine.protocol import (
ErrorResponse, ErrorResponse,
) )
from vllm.entrypoints.openai.utils import validate_json_request
from vllm.entrypoints.serve.tokenize.protocol import ( from vllm.entrypoints.serve.tokenize.protocol import (
DetokenizeRequest, DetokenizeRequest,
DetokenizeResponse, DetokenizeResponse,
......
...@@ -13,7 +13,7 @@ from vllm.entrypoints.openai.engine.protocol import ( ...@@ -13,7 +13,7 @@ from vllm.entrypoints.openai.engine.protocol import (
ErrorResponse, ErrorResponse,
) )
from vllm.entrypoints.openai.engine.serving import OpenAIServing from vllm.entrypoints.openai.engine.serving import OpenAIServing
from vllm.entrypoints.openai.serving_models import OpenAIServingModels from vllm.entrypoints.openai.models.serving import OpenAIServingModels
from vllm.entrypoints.renderer import RenderConfig from vllm.entrypoints.renderer import RenderConfig
from vllm.entrypoints.serve.tokenize.protocol import ( from vllm.entrypoints.serve.tokenize.protocol import (
DetokenizeRequest, DetokenizeRequest,
......
...@@ -31,11 +31,13 @@ if TYPE_CHECKING: ...@@ -31,11 +31,13 @@ if TYPE_CHECKING:
from vllm.entrypoints.openai.chat_completion.protocol import ( from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest, ChatCompletionRequest,
) )
from vllm.entrypoints.openai.engine.protocol import ( from vllm.entrypoints.openai.completion.protocol import (
CompletionRequest, CompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import (
StreamOptions, StreamOptions,
) )
from vllm.entrypoints.openai.serving_models import LoRAModulePath from vllm.entrypoints.openai.models.protocol import LoRAModulePath
else: else:
ChatCompletionRequest = object ChatCompletionRequest = object
CompletionRequest = object CompletionRequest = object
...@@ -281,7 +283,7 @@ def should_include_usage( ...@@ -281,7 +283,7 @@ def should_include_usage(
def process_lora_modules( def process_lora_modules(
args_lora_modules: list[LoRAModulePath], default_mm_loras: dict[str, str] | None args_lora_modules: list[LoRAModulePath], default_mm_loras: dict[str, str] | None
) -> list[LoRAModulePath]: ) -> list[LoRAModulePath]:
from vllm.entrypoints.openai.serving_models import LoRAModulePath from vllm.entrypoints.openai.models.serving import LoRAModulePath
lora_modules = args_lora_modules lora_modules = args_lora_modules
if default_mm_loras: if default_mm_loras:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment