Unverified Commit c4bca740 authored by mcalman's avatar mcalman Committed by GitHub
Browse files

[Bugfix] fix missing last itl in openai completions benchmark (#5926)

parent 7f83f40d
...@@ -265,6 +265,9 @@ async def async_request_openai_completions( ...@@ -265,6 +265,9 @@ async def async_request_openai_completions(
else: else:
data = json.loads(chunk) data = json.loads(chunk)
# NOTE: Some completion API might have a last
# usage summary response without a token so we
# want to check a token was generated
if data["choices"][0]["text"]: if data["choices"][0]["text"]:
timestamp = time.perf_counter() timestamp = time.perf_counter()
# First token # First token
...@@ -273,12 +276,8 @@ async def async_request_openai_completions( ...@@ -273,12 +276,8 @@ async def async_request_openai_completions(
output.ttft = ttft output.ttft = ttft
# Decoding phase # Decoding phase
# NOTE: Some completion API might have a last output.itl.append(timestamp -
# usage summary response without a token so we most_recent_timestamp)
# do not want to include as inter-token-latency
elif data.get("usage", None) is None:
output.itl.append(timestamp -
most_recent_timestamp)
most_recent_timestamp = timestamp most_recent_timestamp = timestamp
generated_text += data["choices"][0]["text"] generated_text += data["choices"][0]["text"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment