Unverified Commit 40c0461f authored by Ning Xie's avatar Ning Xie Committed by GitHub
Browse files

[openapi] refactor render related openapi [3/N] (#36749)


Signed-off-by: default avatarAndy Xie <andy.xning@gmail.com>
parent 72475968
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import sys
import traceback
from collections.abc import Callable, Sequence from collections.abc import Callable, Sequence
from http import HTTPStatus from http import HTTPStatus
from typing import Any from typing import Any
import jinja2
from openai_harmony import Message as OpenAIMessage from openai_harmony import Message as OpenAIMessage
from vllm.config import ModelConfig from vllm.config import ModelConfig
...@@ -18,7 +15,6 @@ from vllm.entrypoints.logger import RequestLogger ...@@ -18,7 +15,6 @@ from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.completion.protocol import CompletionRequest from vllm.entrypoints.openai.completion.protocol import CompletionRequest
from vllm.entrypoints.openai.engine.protocol import ( from vllm.entrypoints.openai.engine.protocol import (
ErrorInfo,
ErrorResponse, ErrorResponse,
ModelCard, ModelCard,
ModelList, ModelList,
...@@ -30,7 +26,7 @@ from vllm.entrypoints.openai.parser.harmony_utils import ( ...@@ -30,7 +26,7 @@ from vllm.entrypoints.openai.parser.harmony_utils import (
parse_chat_inputs_to_harmony_messages, parse_chat_inputs_to_harmony_messages,
render_for_completion, render_for_completion,
) )
from vllm.entrypoints.utils import sanitize_message from vllm.entrypoints.utils import create_error_response
from vllm.inputs.data import ProcessorInputs, PromptType, SingletonPrompt, TokensPrompt from vllm.inputs.data import ProcessorInputs, PromptType, SingletonPrompt, TokensPrompt
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.parser import ParserManager from vllm.parser import ParserManager
...@@ -102,7 +98,6 @@ class OpenAIServingRender: ...@@ -102,7 +98,6 @@ class OpenAIServingRender:
logger.error("Error with model %s", error_check_ret) logger.error("Error with model %s", error_check_ret)
return error_check_ret return error_check_ret
try:
tokenizer = self.renderer.tokenizer tokenizer = self.renderer.tokenizer
tool_parser = self.tool_parser tool_parser = self.tool_parser
...@@ -142,8 +137,7 @@ class OpenAIServingRender: ...@@ -142,8 +137,7 @@ class OpenAIServingRender:
) )
if request.tools is None or ( if request.tools is None or (
request.tool_choice == "none" request.tool_choice == "none" and self.exclude_tools_when_tool_choice_none
and self.exclude_tools_when_tool_choice_none
): ):
tool_dicts = None tool_dicts = None
else: else:
...@@ -174,9 +168,6 @@ class OpenAIServingRender: ...@@ -174,9 +168,6 @@ class OpenAIServingRender:
conversation, engine_prompts = self._make_request_with_harmony( conversation, engine_prompts = self._make_request_with_harmony(
request, should_include_tools request, should_include_tools
) )
except (ValueError, TypeError, RuntimeError, jinja2.TemplateError) as e:
logger.exception("Error in preprocessing prompt inputs")
return self.create_error_response(e)
return conversation, engine_prompts return conversation, engine_prompts
...@@ -204,15 +195,11 @@ class OpenAIServingRender: ...@@ -204,15 +195,11 @@ class OpenAIServingRender:
"prompt_logprobs is not compatible with prompt embeds." "prompt_logprobs is not compatible with prompt embeds."
) )
try:
engine_prompts = await self._preprocess_completion( engine_prompts = await self._preprocess_completion(
request, request,
prompt_input=request.prompt, prompt_input=request.prompt,
prompt_embeds=request.prompt_embeds, prompt_embeds=request.prompt_embeds,
) )
except (ValueError, TypeError, RuntimeError, jinja2.TemplateError) as e:
logger.exception("Error in preprocessing prompt inputs")
return self.create_error_response(e)
return engine_prompts return engine_prompts
...@@ -284,54 +271,7 @@ class OpenAIServingRender: ...@@ -284,54 +271,7 @@ class OpenAIServingRender:
status_code: HTTPStatus = HTTPStatus.BAD_REQUEST, status_code: HTTPStatus = HTTPStatus.BAD_REQUEST,
param: str | None = None, param: str | None = None,
) -> ErrorResponse: ) -> ErrorResponse:
"""Copied from OpenAIServing.create_error_response.""" return create_error_response(message, err_type, status_code, param)
exc: Exception | None = None
if isinstance(message, Exception):
exc = message
from vllm.exceptions import VLLMValidationError
if isinstance(exc, VLLMValidationError):
err_type = "BadRequestError"
status_code = HTTPStatus.BAD_REQUEST
param = exc.parameter
elif isinstance(exc, (ValueError, TypeError, RuntimeError, OverflowError)):
# Common validation errors from user input
err_type = "BadRequestError"
status_code = HTTPStatus.BAD_REQUEST
param = None
elif isinstance(exc, NotImplementedError):
err_type = "NotImplementedError"
status_code = HTTPStatus.NOT_IMPLEMENTED
param = None
elif exc.__class__.__name__ == "TemplateError":
# jinja2.TemplateError (avoid importing jinja2)
err_type = "BadRequestError"
status_code = HTTPStatus.BAD_REQUEST
param = None
else:
err_type = "InternalServerError"
status_code = HTTPStatus.INTERNAL_SERVER_ERROR
param = None
message = str(exc)
if self.log_error_stack:
exc_type, _, _ = sys.exc_info()
if exc_type is not None:
traceback.print_exc()
else:
traceback.print_stack()
return ErrorResponse(
error=ErrorInfo(
message=sanitize_message(message),
type=err_type,
code=status_code.value,
param=param,
)
)
def _is_model_supported(self, model_name: str) -> bool: def _is_model_supported(self, model_name: str) -> bool:
"""Simplified from OpenAIServing._is_model_supported (no LoRA support).""" """Simplified from OpenAIServing._is_model_supported (no LoRA support)."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment