Unverified Commit 82eb5ea8 authored by Chendi.Xue's avatar Chendi.Xue Committed by GitHub
Browse files

Benchmark serving structured output (#10880)


Signed-off-by: default avatarChendi Xue <chendi.xue@intel.com>
Co-authored-by: default avatarMichael Goin <michael@neuralmagic.com>
parent 10398b47
...@@ -24,6 +24,7 @@ class RequestFuncInput: ...@@ -24,6 +24,7 @@ class RequestFuncInput:
model: str model: str
best_of: int = 1 best_of: int = 1
logprobs: Optional[int] = None logprobs: Optional[int] = None
extra_body: Optional[dict] = None
multi_modal_content: Optional[dict] = None multi_modal_content: Optional[dict] = None
ignore_eos: bool = False ignore_eos: bool = False
...@@ -36,6 +37,7 @@ class RequestFuncOutput: ...@@ -36,6 +37,7 @@ class RequestFuncOutput:
ttft: float = 0.0 # Time to first token ttft: float = 0.0 # Time to first token
itl: List[float] = field( itl: List[float] = field(
default_factory=list) # List of inter-token latencies default_factory=list) # List of inter-token latencies
tpot: float = 0.0 # avg next-token latencies
prompt_len: int = 0 prompt_len: int = 0
error: str = "" error: str = ""
...@@ -242,6 +244,8 @@ async def async_request_openai_completions( ...@@ -242,6 +244,8 @@ async def async_request_openai_completions(
"stream": True, "stream": True,
"ignore_eos": request_func_input.ignore_eos, "ignore_eos": request_func_input.ignore_eos,
} }
if request_func_input.extra_body:
payload.update(request_func_input.extra_body)
headers = { headers = {
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}" "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
} }
...@@ -336,6 +340,8 @@ async def async_request_openai_chat_completions( ...@@ -336,6 +340,8 @@ async def async_request_openai_chat_completions(
"stream": True, "stream": True,
"ignore_eos": request_func_input.ignore_eos, "ignore_eos": request_func_input.ignore_eos,
} }
if request_func_input.extra_body:
payload.update(request_func_input.extra_body)
headers = { headers = {
"Content-Type": "application/json", "Content-Type": "application/json",
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}", "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment