Unverified Commit 79ed460d authored by Kevin Šuc's avatar Kevin Šuc Committed by GitHub
Browse files

[Frontend] [Doc] Exclude log deltas feature (#30322)


Signed-off-by: default avatarCatacomba <kevinsuc16@gmail.com>
Signed-off-by: default avatarKevin Šuc <kevinsuc16@gmail.com>
Co-authored-by: default avatarCyrus Leung <cyrus.tl.leung@gmail.com>
parent 6aa5b18e
...@@ -1091,6 +1091,7 @@ async def init_app_state( ...@@ -1091,6 +1091,7 @@ async def init_app_state(
enable_prompt_tokens_details=args.enable_prompt_tokens_details, enable_prompt_tokens_details=args.enable_prompt_tokens_details,
enable_force_include_usage=args.enable_force_include_usage, enable_force_include_usage=args.enable_force_include_usage,
enable_log_outputs=args.enable_log_outputs, enable_log_outputs=args.enable_log_outputs,
exclude_log_deltas=args.exclude_log_deltas,
log_error_stack=args.log_error_stack, log_error_stack=args.log_error_stack,
) )
if "generate" in supported_tasks if "generate" in supported_tasks
......
...@@ -187,6 +187,9 @@ class FrontendArgs: ...@@ -187,6 +187,9 @@ class FrontendArgs:
enable_log_outputs: bool = False enable_log_outputs: bool = False
"""If True, log model outputs (generations). """If True, log model outputs (generations).
Requires --enable-log-requests.""" Requires --enable-log-requests."""
exclude_log_deltas: bool = False
"""If True, model outputs will be logged once streaming is complete. Deltas
will not be logged. Requires --enable-log-outputs."""
h11_max_incomplete_event_size: int = H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT h11_max_incomplete_event_size: int = H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT
"""Maximum size (bytes) of an incomplete HTTP event (header or body) for """Maximum size (bytes) of an incomplete HTTP event (header or body) for
h11 parser. Helps mitigate header abuse. Default: 4194304 (4 MB).""" h11 parser. Helps mitigate header abuse. Default: 4194304 (4 MB)."""
...@@ -305,6 +308,8 @@ def validate_parsed_serve_args(args: argparse.Namespace): ...@@ -305,6 +308,8 @@ def validate_parsed_serve_args(args: argparse.Namespace):
# Enable auto tool needs a tool call parser to be valid # Enable auto tool needs a tool call parser to be valid
if args.enable_auto_tool_choice and not args.tool_call_parser: if args.enable_auto_tool_choice and not args.tool_call_parser:
raise TypeError("Error: --enable-auto-tool-choice requires --tool-call-parser") raise TypeError("Error: --enable-auto-tool-choice requires --tool-call-parser")
if args.exclude_log_deltas and not args.enable_log_outputs:
raise TypeError("Error: --exclude-log-deltas requires --enable-log-outputs")
if args.enable_log_outputs and not args.enable_log_requests: if args.enable_log_outputs and not args.enable_log_requests:
raise TypeError("Error: --enable-log-outputs requires --enable-log-requests") raise TypeError("Error: --enable-log-outputs requires --enable-log-requests")
......
...@@ -101,6 +101,7 @@ class OpenAIServingChat(OpenAIServing): ...@@ -101,6 +101,7 @@ class OpenAIServingChat(OpenAIServing):
enable_prompt_tokens_details: bool = False, enable_prompt_tokens_details: bool = False,
enable_force_include_usage: bool = False, enable_force_include_usage: bool = False,
enable_log_outputs: bool = False, enable_log_outputs: bool = False,
exclude_log_deltas: bool = False,
log_error_stack: bool = False, log_error_stack: bool = False,
default_chat_template_kwargs: dict[str, Any] | None = None, default_chat_template_kwargs: dict[str, Any] | None = None,
) -> None: ) -> None:
...@@ -118,6 +119,7 @@ class OpenAIServingChat(OpenAIServing): ...@@ -118,6 +119,7 @@ class OpenAIServingChat(OpenAIServing):
self.trust_request_chat_template = trust_request_chat_template self.trust_request_chat_template = trust_request_chat_template
self.default_chat_template_kwargs = default_chat_template_kwargs or {} self.default_chat_template_kwargs = default_chat_template_kwargs or {}
self.enable_log_outputs = enable_log_outputs self.enable_log_outputs = enable_log_outputs
self.exclude_log_deltas = exclude_log_deltas
# set up logits processors # set up logits processors
self.logits_processors = self.model_config.logits_processors self.logits_processors = self.model_config.logits_processors
...@@ -1135,7 +1137,7 @@ class OpenAIServingChat(OpenAIServing): ...@@ -1135,7 +1137,7 @@ class OpenAIServingChat(OpenAIServing):
if tc.function and tc.function.arguments if tc.function and tc.function.arguments
) )
if delta_content: if delta_content and not self.exclude_log_deltas:
self.request_logger.log_outputs( self.request_logger.log_outputs(
request_id=request_id, request_id=request_id,
outputs=delta_content, outputs=delta_content,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment