[Bugfix] fix missing last itl in openai completions benchmark (#5926)

c4bca740 · mcalman · GitHub · 7f83f40d · c4bca740
Unverified Commit c4bca740 authored Jun 28, 2024 by mcalman Committed by GitHub Jun 29, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 6 deletions

benchmarks/backend_request_func.py benchmarks/backend_request_func.py +5 -6

No files found.
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@@ -265,6 +265,9 @@ async def async_request_openai_completions(
                        else:
                            data = json.loads(chunk)
+                            # NOTE: Some completion API might have a last
+                            # usage summary response without a token so we
+                            # want to check a token was generated
                            if data["choices"][0]["text"]:
                                timestamp = time.perf_counter()
                                # First token
@@ -273,12 +276,8 @@ async def async_request_openai_completions(
                                    output.ttft = ttft
                                # Decoding phase
-                                # NOTE: Some completion API might have a last
+                                output.itl.append(timestamp -
-                                # usage summary response without a token so we
+                                                  most_recent_timestamp)
-                                # do not want to include as inter-token-latency
-                                elif data.get("usage", None) is None:
-                                    output.itl.append(timestamp -
-                                                      most_recent_timestamp)
                                most_recent_timestamp = timestamp
                                generated_text += data["choices"][0]["text"]