Unverified Commit 40c0461f authored by Ning Xie's avatar Ning Xie Committed by GitHub
Browse files

[openapi] refactor render related openapi [3/N] (#36749)


Signed-off-by: default avatarAndy Xie <andy.xning@gmail.com>
parent 72475968
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import sys
import traceback
from collections.abc import Callable, Sequence
from http import HTTPStatus
from typing import Any
import jinja2
from openai_harmony import Message as OpenAIMessage
from vllm.config import ModelConfig
......@@ -18,7 +15,6 @@ from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.completion.protocol import CompletionRequest
from vllm.entrypoints.openai.engine.protocol import (
ErrorInfo,
ErrorResponse,
ModelCard,
ModelList,
......@@ -30,7 +26,7 @@ from vllm.entrypoints.openai.parser.harmony_utils import (
parse_chat_inputs_to_harmony_messages,
render_for_completion,
)
from vllm.entrypoints.utils import sanitize_message
from vllm.entrypoints.utils import create_error_response
from vllm.inputs.data import ProcessorInputs, PromptType, SingletonPrompt, TokensPrompt
from vllm.logger import init_logger
from vllm.parser import ParserManager
......@@ -102,7 +98,6 @@ class OpenAIServingRender:
logger.error("Error with model %s", error_check_ret)
return error_check_ret
try:
tokenizer = self.renderer.tokenizer
tool_parser = self.tool_parser
......@@ -142,8 +137,7 @@ class OpenAIServingRender:
)
if request.tools is None or (
request.tool_choice == "none"
and self.exclude_tools_when_tool_choice_none
request.tool_choice == "none" and self.exclude_tools_when_tool_choice_none
):
tool_dicts = None
else:
......@@ -174,9 +168,6 @@ class OpenAIServingRender:
conversation, engine_prompts = self._make_request_with_harmony(
request, should_include_tools
)
except (ValueError, TypeError, RuntimeError, jinja2.TemplateError) as e:
logger.exception("Error in preprocessing prompt inputs")
return self.create_error_response(e)
return conversation, engine_prompts
......@@ -204,15 +195,11 @@ class OpenAIServingRender:
"prompt_logprobs is not compatible with prompt embeds."
)
try:
engine_prompts = await self._preprocess_completion(
request,
prompt_input=request.prompt,
prompt_embeds=request.prompt_embeds,
)
except (ValueError, TypeError, RuntimeError, jinja2.TemplateError) as e:
logger.exception("Error in preprocessing prompt inputs")
return self.create_error_response(e)
return engine_prompts
......@@ -284,54 +271,7 @@ class OpenAIServingRender:
status_code: HTTPStatus = HTTPStatus.BAD_REQUEST,
param: str | None = None,
) -> ErrorResponse:
"""Copied from OpenAIServing.create_error_response."""
exc: Exception | None = None
if isinstance(message, Exception):
exc = message
from vllm.exceptions import VLLMValidationError
if isinstance(exc, VLLMValidationError):
err_type = "BadRequestError"
status_code = HTTPStatus.BAD_REQUEST
param = exc.parameter
elif isinstance(exc, (ValueError, TypeError, RuntimeError, OverflowError)):
# Common validation errors from user input
err_type = "BadRequestError"
status_code = HTTPStatus.BAD_REQUEST
param = None
elif isinstance(exc, NotImplementedError):
err_type = "NotImplementedError"
status_code = HTTPStatus.NOT_IMPLEMENTED
param = None
elif exc.__class__.__name__ == "TemplateError":
# jinja2.TemplateError (avoid importing jinja2)
err_type = "BadRequestError"
status_code = HTTPStatus.BAD_REQUEST
param = None
else:
err_type = "InternalServerError"
status_code = HTTPStatus.INTERNAL_SERVER_ERROR
param = None
message = str(exc)
if self.log_error_stack:
exc_type, _, _ = sys.exc_info()
if exc_type is not None:
traceback.print_exc()
else:
traceback.print_stack()
return ErrorResponse(
error=ErrorInfo(
message=sanitize_message(message),
type=err_type,
code=status_code.value,
param=param,
)
)
return create_error_response(message, err_type, status_code, param)
def _is_model_supported(self, model_name: str) -> bool:
"""Simplified from OpenAIServing._is_model_supported (no LoRA support)."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment