"src/vscode:/vscode.git/clone" did not exist on "52b460feb98740d68b44aaef4d68470170b3c4a6"
Unverified Commit 066cf445 authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

[OAI] Add rid tracing for v1/embeddings and fix rid type in Chat (#6397)

parent 6dc6b306
...@@ -918,8 +918,8 @@ class FlashAttentionBackend(AttentionBackend): ...@@ -918,8 +918,8 @@ class FlashAttentionBackend(AttentionBackend):
and local_attn_metadata is not None and local_attn_metadata is not None
and (hasattr(layer, "use_irope") and layer.use_irope) and (hasattr(layer, "use_irope") and layer.use_irope)
) )
# When Spec Decode enabled, forward_decode would be called with two mode: # When Spec Decode enabled, forward_decode would be called with two mode:
# 1. DRAFT_DECODE: we enable cascade attention when top_k > 1 # 1. DRAFT_DECODE: we enable cascade attention when top_k > 1
# 2. IDLE: we don’t need cascade attention, spec_info will be none in this case # 2. IDLE: we don’t need cascade attention, spec_info will be none in this case
use_cascade_attn = forward_batch.spec_info is not None and self.topk > 1 use_cascade_attn = forward_batch.spec_info is not None and self.topk > 1
......
...@@ -1827,8 +1827,10 @@ def v1_embedding_request(all_requests, tokenizer_manager): ...@@ -1827,8 +1827,10 @@ def v1_embedding_request(all_requests, tokenizer_manager):
) )
else: else:
prompt_kwargs = {"input_ids": prompts} prompt_kwargs = {"input_ids": prompts}
request_ids = [req.request_id for req in all_requests]
adapted_request = EmbeddingReqInput( adapted_request = EmbeddingReqInput(
rid=request_ids,
**prompt_kwargs, **prompt_kwargs,
) )
......
...@@ -393,7 +393,7 @@ class ChatCompletionRequest(BaseModel): ...@@ -393,7 +393,7 @@ class ChatCompletionRequest(BaseModel):
chat_template_kwargs: Optional[Dict] = None chat_template_kwargs: Optional[Dict] = None
# The request id. # The request id.
rid: Optional[Union[List[str], str]] = None rid: Optional[str] = None
# For PD disaggregation # For PD disaggregation
bootstrap_host: Optional[str] = None bootstrap_host: Optional[str] = None
...@@ -469,6 +469,9 @@ class EmbeddingRequest(BaseModel): ...@@ -469,6 +469,9 @@ class EmbeddingRequest(BaseModel):
dimensions: int = None dimensions: int = None
user: Optional[str] = None user: Optional[str] = None
# The request id.
rid: Optional[str] = None
class EmbeddingObject(BaseModel): class EmbeddingObject(BaseModel):
embedding: List[float] embedding: List[float]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment