Unverified Commit 9fe98d42 authored by kourosh hakhamaneshi's avatar kourosh hakhamaneshi Committed by GitHub
Browse files

[Frontend] Add request_id to the Request object so they can be controlled...


[Frontend] Add request_id to the Request object so they can be controlled better via external load balancers (#21009)
Signed-off-by: default avatarKourosh Hakhamaneshi <kourosh@anyscale.com>
parent 29c6fbe5
...@@ -1007,6 +1007,13 @@ class CompletionRequest(OpenAIBaseModel): ...@@ -1007,6 +1007,13 @@ class CompletionRequest(OpenAIBaseModel):
"default: 0). Any priority other than 0 will raise an error " "default: 0). Any priority other than 0 will raise an error "
"if the served model does not use priority scheduling."), "if the served model does not use priority scheduling."),
) )
request_id: str = Field(
default_factory=lambda: f"{random_uuid()}",
description=(
"The request_id related to this request. If the caller does "
"not set it, a random_uuid will be generated. This id is used "
"through out the inference process and return in response."),
)
logits_processors: Optional[LogitsProcessors] = Field( logits_processors: Optional[LogitsProcessors] = Field(
default=None, default=None,
description=( description=(
...@@ -1251,6 +1258,13 @@ class EmbeddingCompletionRequest(OpenAIBaseModel): ...@@ -1251,6 +1258,13 @@ class EmbeddingCompletionRequest(OpenAIBaseModel):
"default: 0). Any priority other than 0 will raise an error " "default: 0). Any priority other than 0 will raise an error "
"if the served model does not use priority scheduling."), "if the served model does not use priority scheduling."),
) )
request_id: str = Field(
default_factory=lambda: f"{random_uuid()}",
description=(
"The request_id related to this request. If the caller does "
"not set it, a random_uuid will be generated. This id is used "
"through out the inference process and return in response."),
)
# --8<-- [end:embedding-extra-params] # --8<-- [end:embedding-extra-params]
...@@ -1302,6 +1316,13 @@ class EmbeddingChatRequest(OpenAIBaseModel): ...@@ -1302,6 +1316,13 @@ class EmbeddingChatRequest(OpenAIBaseModel):
"default: 0). Any priority other than 0 will raise an error " "default: 0). Any priority other than 0 will raise an error "
"if the served model does not use priority scheduling."), "if the served model does not use priority scheduling."),
) )
request_id: str = Field(
default_factory=lambda: f"{random_uuid()}",
description=(
"The request_id related to this request. If the caller does "
"not set it, a random_uuid will be generated. This id is used "
"through out the inference process and return in response."),
)
# --8<-- [end:chat-embedding-extra-params] # --8<-- [end:chat-embedding-extra-params]
@model_validator(mode="before") @model_validator(mode="before")
......
...@@ -113,7 +113,9 @@ class OpenAIServingCompletion(OpenAIServing): ...@@ -113,7 +113,9 @@ class OpenAIServingCompletion(OpenAIServing):
return self.create_error_response( return self.create_error_response(
"Echo is unsupported with prompt embeds.") "Echo is unsupported with prompt embeds.")
request_id = f"cmpl-{self._base_request_id(raw_request)}" request_id = (
f"cmpl-"
f"{self._base_request_id(raw_request, request.request_id)}")
created_time = int(time.time()) created_time = int(time.time())
request_metadata = RequestResponseMetadata(request_id=request_id) request_metadata = RequestResponseMetadata(request_id=request_id)
......
...@@ -163,8 +163,9 @@ class OpenAIServingEmbedding(EmbeddingMixin): ...@@ -163,8 +163,9 @@ class OpenAIServingEmbedding(EmbeddingMixin):
for the API specification. This API mimics the OpenAI Embedding API. for the API specification. This API mimics the OpenAI Embedding API.
""" """
model_name = self._get_model_name(request.model) model_name = self._get_model_name(request.model)
request_id = (f"{self.request_id_prefix}-" request_id = (
f"{self._base_request_id(raw_request)}") f"{self.request_id_prefix}-"
f"{self._base_request_id(raw_request, request.request_id)}")
ctx = EmbeddingServeContext( ctx = EmbeddingServeContext(
request=request, request=request,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment