Unverified Commit b7030d96 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Benchmark] Enable benchmark to run with `encoding_format="bytes"` (#27467)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent 35678169
...@@ -498,10 +498,17 @@ async def _run_pooling_request( ...@@ -498,10 +498,17 @@ async def _run_pooling_request(
async with session.post(url=api_url, headers=headers, json=payload) as response: async with session.post(url=api_url, headers=headers, json=payload) as response:
if response.status == 200: if response.status == 200:
output.ttft = output.latency = time.perf_counter() - st output.ttft = output.latency = time.perf_counter() - st
data = await response.json()
if payload.get("encoding_format", "float") == "bytes":
metadata = json.loads(response.headers["metadata"])
usage = metadata.get("usage", {})
else:
data = await response.json()
usage = data.get("usage", {})
output.success = True output.success = True
output.generated_text = "" output.generated_text = ""
output.prompt_len = data.get("usage", {}).get("prompt_tokens", 0) output.prompt_len = usage.get("prompt_tokens", 0)
else: else:
output.success = False output.success = False
output.error = response.reason or "" output.error = response.reason or ""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment