Unverified Commit 2f9f946b authored by Chauncey's avatar Chauncey Committed by GitHub
Browse files

[P/D] AnthropicMessages add kv_transfer_params for PD disaggregation (#37535)


Signed-off-by: default avatarchaunceyjiang <chaunceyjiang@gmail.com>
parent 2890aecc
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
import time import time
from typing import Any, Literal from typing import Any, Literal
from pydantic import BaseModel, field_validator, model_validator from pydantic import BaseModel, Field, field_validator, model_validator
class AnthropicError(BaseModel): class AnthropicError(BaseModel):
...@@ -112,6 +112,12 @@ class AnthropicMessagesRequest(BaseModel): ...@@ -112,6 +112,12 @@ class AnthropicMessagesRequest(BaseModel):
top_k: int | None = None top_k: int | None = None
top_p: float | None = None top_p: float | None = None
# vLLM-specific fields that are not in Anthropic spec
kv_transfer_params: dict[str, Any] | None = Field(
default=None,
description="KVTransfer parameters used for disaggregated serving.",
)
@field_validator("model") @field_validator("model")
@classmethod @classmethod
def validate_model(cls, v): def validate_model(cls, v):
...@@ -181,6 +187,11 @@ class AnthropicMessagesResponse(BaseModel): ...@@ -181,6 +187,11 @@ class AnthropicMessagesResponse(BaseModel):
stop_sequence: str | None = None stop_sequence: str | None = None
usage: AnthropicUsage | None = None usage: AnthropicUsage | None = None
# vLLM-specific fields that are not in Anthropic spec
kv_transfer_params: dict[str, Any] | None = Field(
default=None, description="KVTransfer parameters."
)
def model_post_init(self, __context): def model_post_init(self, __context):
if not self.id: if not self.id:
self.id = f"msg_{int(time.time() * 1000)}" self.id = f"msg_{int(time.time() * 1000)}"
......
...@@ -331,6 +331,7 @@ class AnthropicServingMessages(OpenAIServingChat): ...@@ -331,6 +331,7 @@ class AnthropicServingMessages(OpenAIServingChat):
temperature=anthropic_request.temperature, temperature=anthropic_request.temperature,
top_p=anthropic_request.top_p, top_p=anthropic_request.top_p,
top_k=anthropic_request.top_k, top_k=anthropic_request.top_k,
kv_transfer_params=anthropic_request.kv_transfer_params,
) )
@classmethod @classmethod
...@@ -441,6 +442,7 @@ class AnthropicServingMessages(OpenAIServingChat): ...@@ -441,6 +442,7 @@ class AnthropicServingMessages(OpenAIServingChat):
input_tokens=generator.usage.prompt_tokens, input_tokens=generator.usage.prompt_tokens,
output_tokens=generator.usage.completion_tokens, output_tokens=generator.usage.completion_tokens,
), ),
kv_transfer_params=generator.kv_transfer_params,
) )
choice = generator.choices[0] choice = generator.choices[0]
if choice.finish_reason == "stop": if choice.finish_reason == "stop":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment