"docs/vscode:/vscode.git/clone" did not exist on "4fc722eca4f6ad63edf1936989f4d2171aab3ca2"
Unverified Commit 32102644 authored by Chen Zhang's avatar Chen Zhang Committed by GitHub
Browse files

[Frontend] Add --log-error-stack to print stack trace for error response (#22960)


Signed-off-by: default avatarChen Zhang <zhangch99@outlook.com>
parent 644d57d5
...@@ -1749,6 +1749,7 @@ async def init_app_state( ...@@ -1749,6 +1749,7 @@ async def init_app_state(
enable_prompt_tokens_details=args.enable_prompt_tokens_details, enable_prompt_tokens_details=args.enable_prompt_tokens_details,
enable_force_include_usage=args.enable_force_include_usage, enable_force_include_usage=args.enable_force_include_usage,
enable_log_outputs=args.enable_log_outputs, enable_log_outputs=args.enable_log_outputs,
log_error_stack=args.log_error_stack,
) if "generate" in supported_tasks else None ) if "generate" in supported_tasks else None
state.openai_serving_chat = OpenAIServingChat( state.openai_serving_chat = OpenAIServingChat(
engine_client, engine_client,
...@@ -1767,6 +1768,7 @@ async def init_app_state( ...@@ -1767,6 +1768,7 @@ async def init_app_state(
enable_prompt_tokens_details=args.enable_prompt_tokens_details, enable_prompt_tokens_details=args.enable_prompt_tokens_details,
enable_force_include_usage=args.enable_force_include_usage, enable_force_include_usage=args.enable_force_include_usage,
enable_log_outputs=args.enable_log_outputs, enable_log_outputs=args.enable_log_outputs,
log_error_stack=args.log_error_stack,
) if "generate" in supported_tasks else None ) if "generate" in supported_tasks else None
state.openai_serving_completion = OpenAIServingCompletion( state.openai_serving_completion = OpenAIServingCompletion(
engine_client, engine_client,
...@@ -1776,6 +1778,7 @@ async def init_app_state( ...@@ -1776,6 +1778,7 @@ async def init_app_state(
return_tokens_as_token_ids=args.return_tokens_as_token_ids, return_tokens_as_token_ids=args.return_tokens_as_token_ids,
enable_prompt_tokens_details=args.enable_prompt_tokens_details, enable_prompt_tokens_details=args.enable_prompt_tokens_details,
enable_force_include_usage=args.enable_force_include_usage, enable_force_include_usage=args.enable_force_include_usage,
log_error_stack=args.log_error_stack,
) if "generate" in supported_tasks else None ) if "generate" in supported_tasks else None
state.openai_serving_pooling = OpenAIServingPooling( state.openai_serving_pooling = OpenAIServingPooling(
engine_client, engine_client,
...@@ -1784,6 +1787,7 @@ async def init_app_state( ...@@ -1784,6 +1787,7 @@ async def init_app_state(
request_logger=request_logger, request_logger=request_logger,
chat_template=resolved_chat_template, chat_template=resolved_chat_template,
chat_template_content_format=args.chat_template_content_format, chat_template_content_format=args.chat_template_content_format,
log_error_stack=args.log_error_stack,
) if "encode" in supported_tasks else None ) if "encode" in supported_tasks else None
state.openai_serving_embedding = OpenAIServingEmbedding( state.openai_serving_embedding = OpenAIServingEmbedding(
engine_client, engine_client,
...@@ -1792,12 +1796,14 @@ async def init_app_state( ...@@ -1792,12 +1796,14 @@ async def init_app_state(
request_logger=request_logger, request_logger=request_logger,
chat_template=resolved_chat_template, chat_template=resolved_chat_template,
chat_template_content_format=args.chat_template_content_format, chat_template_content_format=args.chat_template_content_format,
log_error_stack=args.log_error_stack,
) if "embed" in supported_tasks else None ) if "embed" in supported_tasks else None
state.openai_serving_classification = ServingClassification( state.openai_serving_classification = ServingClassification(
engine_client, engine_client,
model_config, model_config,
state.openai_serving_models, state.openai_serving_models,
request_logger=request_logger, request_logger=request_logger,
log_error_stack=args.log_error_stack,
) if "classify" in supported_tasks else None ) if "classify" in supported_tasks else None
enable_serving_reranking = ("classify" in supported_tasks and getattr( enable_serving_reranking = ("classify" in supported_tasks and getattr(
...@@ -1807,6 +1813,7 @@ async def init_app_state( ...@@ -1807,6 +1813,7 @@ async def init_app_state(
model_config, model_config,
state.openai_serving_models, state.openai_serving_models,
request_logger=request_logger, request_logger=request_logger,
log_error_stack=args.log_error_stack,
) if ("embed" in supported_tasks or enable_serving_reranking) else None ) if ("embed" in supported_tasks or enable_serving_reranking) else None
state.openai_serving_tokenization = OpenAIServingTokenization( state.openai_serving_tokenization = OpenAIServingTokenization(
...@@ -1816,18 +1823,21 @@ async def init_app_state( ...@@ -1816,18 +1823,21 @@ async def init_app_state(
request_logger=request_logger, request_logger=request_logger,
chat_template=resolved_chat_template, chat_template=resolved_chat_template,
chat_template_content_format=args.chat_template_content_format, chat_template_content_format=args.chat_template_content_format,
log_error_stack=args.log_error_stack,
) )
state.openai_serving_transcription = OpenAIServingTranscription( state.openai_serving_transcription = OpenAIServingTranscription(
engine_client, engine_client,
model_config, model_config,
state.openai_serving_models, state.openai_serving_models,
request_logger=request_logger, request_logger=request_logger,
log_error_stack=args.log_error_stack,
) if "transcription" in supported_tasks else None ) if "transcription" in supported_tasks else None
state.openai_serving_translation = OpenAIServingTranslation( state.openai_serving_translation = OpenAIServingTranslation(
engine_client, engine_client,
model_config, model_config,
state.openai_serving_models, state.openai_serving_models,
request_logger=request_logger, request_logger=request_logger,
log_error_stack=args.log_error_stack,
) if "transcription" in supported_tasks else None ) if "transcription" in supported_tasks else None
state.enable_server_load_tracking = args.enable_server_load_tracking state.enable_server_load_tracking = args.enable_server_load_tracking
......
...@@ -180,6 +180,8 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`""" ...@@ -180,6 +180,8 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
h11_max_header_count: int = H11_MAX_HEADER_COUNT_DEFAULT h11_max_header_count: int = H11_MAX_HEADER_COUNT_DEFAULT
"""Maximum number of HTTP headers allowed in a request for h11 parser. """Maximum number of HTTP headers allowed in a request for h11 parser.
Helps mitigate header abuse. Default: 256.""" Helps mitigate header abuse. Default: 256."""
log_error_stack: bool = envs.VLLM_SERVER_DEV_MODE
"""If set to True, log the stack trace of error responses"""
@staticmethod @staticmethod
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
......
...@@ -76,13 +76,15 @@ class OpenAIServingChat(OpenAIServing): ...@@ -76,13 +76,15 @@ class OpenAIServingChat(OpenAIServing):
enable_prompt_tokens_details: bool = False, enable_prompt_tokens_details: bool = False,
enable_force_include_usage: bool = False, enable_force_include_usage: bool = False,
enable_log_outputs: bool = False, enable_log_outputs: bool = False,
log_error_stack: bool = False,
) -> None: ) -> None:
super().__init__(engine_client=engine_client, super().__init__(engine_client=engine_client,
model_config=model_config, model_config=model_config,
models=models, models=models,
request_logger=request_logger, request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids, return_tokens_as_token_ids=return_tokens_as_token_ids,
enable_force_include_usage=enable_force_include_usage) enable_force_include_usage=enable_force_include_usage,
log_error_stack=log_error_stack)
self.response_role = response_role self.response_role = response_role
self.chat_template = chat_template self.chat_template = chat_template
......
...@@ -129,12 +129,14 @@ class ServingClassification(ClassificationMixin): ...@@ -129,12 +129,14 @@ class ServingClassification(ClassificationMixin):
models: OpenAIServingModels, models: OpenAIServingModels,
*, *,
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
log_error_stack: bool = False,
) -> None: ) -> None:
super().__init__( super().__init__(
engine_client=engine_client, engine_client=engine_client,
model_config=model_config, model_config=model_config,
models=models, models=models,
request_logger=request_logger, request_logger=request_logger,
log_error_stack=log_error_stack,
) )
async def create_classify( async def create_classify(
......
...@@ -59,6 +59,7 @@ class OpenAIServingCompletion(OpenAIServing): ...@@ -59,6 +59,7 @@ class OpenAIServingCompletion(OpenAIServing):
return_tokens_as_token_ids: bool = False, return_tokens_as_token_ids: bool = False,
enable_prompt_tokens_details: bool = False, enable_prompt_tokens_details: bool = False,
enable_force_include_usage: bool = False, enable_force_include_usage: bool = False,
log_error_stack: bool = False,
): ):
super().__init__( super().__init__(
engine_client=engine_client, engine_client=engine_client,
...@@ -67,6 +68,7 @@ class OpenAIServingCompletion(OpenAIServing): ...@@ -67,6 +68,7 @@ class OpenAIServingCompletion(OpenAIServing):
request_logger=request_logger, request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids, return_tokens_as_token_ids=return_tokens_as_token_ids,
enable_force_include_usage=enable_force_include_usage, enable_force_include_usage=enable_force_include_usage,
log_error_stack=log_error_stack,
) )
self.enable_prompt_tokens_details = enable_prompt_tokens_details self.enable_prompt_tokens_details = enable_prompt_tokens_details
self.default_sampling_params = ( self.default_sampling_params = (
......
...@@ -593,11 +593,13 @@ class OpenAIServingEmbedding(EmbeddingMixin): ...@@ -593,11 +593,13 @@ class OpenAIServingEmbedding(EmbeddingMixin):
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
chat_template: Optional[str], chat_template: Optional[str],
chat_template_content_format: ChatTemplateContentFormatOption, chat_template_content_format: ChatTemplateContentFormatOption,
log_error_stack: bool = False,
) -> None: ) -> None:
super().__init__(engine_client=engine_client, super().__init__(engine_client=engine_client,
model_config=model_config, model_config=model_config,
models=models, models=models,
request_logger=request_logger) request_logger=request_logger,
log_error_stack=log_error_stack)
self.chat_template = chat_template self.chat_template = chat_template
self.chat_template_content_format: Final = chat_template_content_format self.chat_template_content_format: Final = chat_template_content_format
......
...@@ -5,6 +5,7 @@ import io ...@@ -5,6 +5,7 @@ import io
import json import json
import sys import sys
import time import time
import traceback
from collections.abc import AsyncGenerator, Iterable, Mapping, Sequence from collections.abc import AsyncGenerator, Iterable, Mapping, Sequence
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from http import HTTPStatus from http import HTTPStatus
...@@ -205,6 +206,7 @@ class OpenAIServing: ...@@ -205,6 +206,7 @@ class OpenAIServing:
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
return_tokens_as_token_ids: bool = False, return_tokens_as_token_ids: bool = False,
enable_force_include_usage: bool = False, enable_force_include_usage: bool = False,
log_error_stack: bool = False,
): ):
super().__init__() super().__init__()
...@@ -222,6 +224,7 @@ class OpenAIServing: ...@@ -222,6 +224,7 @@ class OpenAIServing:
self._async_tokenizer_pool: dict[AnyTokenizer, self._async_tokenizer_pool: dict[AnyTokenizer,
AsyncMicrobatchTokenizer] = {} AsyncMicrobatchTokenizer] = {}
self.log_error_stack = log_error_stack
def _get_async_tokenizer(self, tokenizer) -> AsyncMicrobatchTokenizer: def _get_async_tokenizer(self, tokenizer) -> AsyncMicrobatchTokenizer:
""" """
...@@ -412,6 +415,12 @@ class OpenAIServing: ...@@ -412,6 +415,12 @@ class OpenAIServing:
message: str, message: str,
err_type: str = "BadRequestError", err_type: str = "BadRequestError",
status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> ErrorResponse: status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> ErrorResponse:
if self.log_error_stack:
exc_type, _, _ = sys.exc_info()
if exc_type is not None:
traceback.print_exc()
else:
traceback.print_stack()
return ErrorResponse(error=ErrorInfo( return ErrorResponse(error=ErrorInfo(
message=message, type=err_type, code=status_code.value)) message=message, type=err_type, code=status_code.value))
......
...@@ -58,11 +58,13 @@ class OpenAIServingPooling(OpenAIServing): ...@@ -58,11 +58,13 @@ class OpenAIServingPooling(OpenAIServing):
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
chat_template: Optional[str], chat_template: Optional[str],
chat_template_content_format: ChatTemplateContentFormatOption, chat_template_content_format: ChatTemplateContentFormatOption,
log_error_stack: bool = False,
) -> None: ) -> None:
super().__init__(engine_client=engine_client, super().__init__(engine_client=engine_client,
model_config=model_config, model_config=model_config,
models=models, models=models,
request_logger=request_logger) request_logger=request_logger,
log_error_stack=log_error_stack)
self.chat_template = chat_template self.chat_template = chat_template
self.chat_template_content_format: Final = chat_template_content_format self.chat_template_content_format: Final = chat_template_content_format
......
...@@ -88,6 +88,7 @@ class OpenAIServingResponses(OpenAIServing): ...@@ -88,6 +88,7 @@ class OpenAIServingResponses(OpenAIServing):
enable_prompt_tokens_details: bool = False, enable_prompt_tokens_details: bool = False,
enable_force_include_usage: bool = False, enable_force_include_usage: bool = False,
enable_log_outputs: bool = False, enable_log_outputs: bool = False,
log_error_stack: bool = False,
) -> None: ) -> None:
super().__init__( super().__init__(
engine_client=engine_client, engine_client=engine_client,
...@@ -96,6 +97,7 @@ class OpenAIServingResponses(OpenAIServing): ...@@ -96,6 +97,7 @@ class OpenAIServingResponses(OpenAIServing):
request_logger=request_logger, request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids, return_tokens_as_token_ids=return_tokens_as_token_ids,
enable_force_include_usage=enable_force_include_usage, enable_force_include_usage=enable_force_include_usage,
log_error_stack=log_error_stack,
) )
self.chat_template = chat_template self.chat_template = chat_template
......
...@@ -47,11 +47,13 @@ class ServingScores(OpenAIServing): ...@@ -47,11 +47,13 @@ class ServingScores(OpenAIServing):
models: OpenAIServingModels, models: OpenAIServingModels,
*, *,
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
log_error_stack: bool = False,
) -> None: ) -> None:
super().__init__(engine_client=engine_client, super().__init__(engine_client=engine_client,
model_config=model_config, model_config=model_config,
models=models, models=models,
request_logger=request_logger) request_logger=request_logger,
log_error_stack=log_error_stack)
async def _embedding_score( async def _embedding_score(
self, self,
......
...@@ -39,11 +39,13 @@ class OpenAIServingTokenization(OpenAIServing): ...@@ -39,11 +39,13 @@ class OpenAIServingTokenization(OpenAIServing):
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
chat_template: Optional[str], chat_template: Optional[str],
chat_template_content_format: ChatTemplateContentFormatOption, chat_template_content_format: ChatTemplateContentFormatOption,
log_error_stack: bool = False,
) -> None: ) -> None:
super().__init__(engine_client=engine_client, super().__init__(engine_client=engine_client,
model_config=model_config, model_config=model_config,
models=models, models=models,
request_logger=request_logger) request_logger=request_logger,
log_error_stack=log_error_stack)
self.chat_template = chat_template self.chat_template = chat_template
self.chat_template_content_format: Final = chat_template_content_format self.chat_template_content_format: Final = chat_template_content_format
......
...@@ -32,13 +32,15 @@ class OpenAIServingTranscription(OpenAISpeechToText): ...@@ -32,13 +32,15 @@ class OpenAIServingTranscription(OpenAISpeechToText):
*, *,
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
return_tokens_as_token_ids: bool = False, return_tokens_as_token_ids: bool = False,
log_error_stack: bool = False,
): ):
super().__init__(engine_client=engine_client, super().__init__(engine_client=engine_client,
model_config=model_config, model_config=model_config,
models=models, models=models,
request_logger=request_logger, request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids, return_tokens_as_token_ids=return_tokens_as_token_ids,
task_type="transcribe") task_type="transcribe",
log_error_stack=log_error_stack)
async def create_transcription( async def create_transcription(
self, audio_data: bytes, request: TranscriptionRequest, self, audio_data: bytes, request: TranscriptionRequest,
...@@ -88,13 +90,15 @@ class OpenAIServingTranslation(OpenAISpeechToText): ...@@ -88,13 +90,15 @@ class OpenAIServingTranslation(OpenAISpeechToText):
*, *,
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
return_tokens_as_token_ids: bool = False, return_tokens_as_token_ids: bool = False,
log_error_stack: bool = False,
): ):
super().__init__(engine_client=engine_client, super().__init__(engine_client=engine_client,
model_config=model_config, model_config=model_config,
models=models, models=models,
request_logger=request_logger, request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids, return_tokens_as_token_ids=return_tokens_as_token_ids,
task_type="translate") task_type="translate",
log_error_stack=log_error_stack)
async def create_translation( async def create_translation(
self, audio_data: bytes, request: TranslationRequest, self, audio_data: bytes, request: TranslationRequest,
......
...@@ -53,12 +53,14 @@ class OpenAISpeechToText(OpenAIServing): ...@@ -53,12 +53,14 @@ class OpenAISpeechToText(OpenAIServing):
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
return_tokens_as_token_ids: bool = False, return_tokens_as_token_ids: bool = False,
task_type: Literal["transcribe", "translate"] = "transcribe", task_type: Literal["transcribe", "translate"] = "transcribe",
log_error_stack: bool = False,
): ):
super().__init__(engine_client=engine_client, super().__init__(engine_client=engine_client,
model_config=model_config, model_config=model_config,
models=models, models=models,
request_logger=request_logger, request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids) return_tokens_as_token_ids=return_tokens_as_token_ids,
log_error_stack=log_error_stack)
self.default_sampling_params = ( self.default_sampling_params = (
self.model_config.get_diff_sampling_param()) self.model_config.get_diff_sampling_param())
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment