Unverified Commit 82eb5ea8 authored by Chendi.Xue's avatar Chendi.Xue Committed by GitHub
Browse files

Benchmark serving structured output (#10880)


Signed-off-by: default avatarChendi Xue <chendi.xue@intel.com>
Co-authored-by: default avatarMichael Goin <michael@neuralmagic.com>
parent 10398b47
......@@ -24,6 +24,7 @@ class RequestFuncInput:
model: str
best_of: int = 1
logprobs: Optional[int] = None
extra_body: Optional[dict] = None
multi_modal_content: Optional[dict] = None
ignore_eos: bool = False
......@@ -36,6 +37,7 @@ class RequestFuncOutput:
ttft: float = 0.0 # Time to first token
itl: List[float] = field(
default_factory=list) # List of inter-token latencies
tpot: float = 0.0 # avg next-token latencies
prompt_len: int = 0
error: str = ""
......@@ -242,6 +244,8 @@ async def async_request_openai_completions(
"stream": True,
"ignore_eos": request_func_input.ignore_eos,
}
if request_func_input.extra_body:
payload.update(request_func_input.extra_body)
headers = {
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
}
......@@ -336,6 +340,8 @@ async def async_request_openai_chat_completions(
"stream": True,
"ignore_eos": request_func_input.ignore_eos,
}
if request_func_input.extra_body:
payload.update(request_func_input.extra_body)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment