fix "Total generated tokens:" is 0 if using --backend tgi and --endpo… (#14673)

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>

fix "Total generated tokens:" is 0 if using --backend tgi and --endpo… (#14673)
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
40828ce5 · Wang, Yi · GitHub · ffa443af · 40828ce5
Unverified Commit 40828ce5 authored Mar 20, 2025 by Wang, Yi Committed by GitHub Mar 19, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 1 deletion

benchmarks/backend_request_func.py benchmarks/backend_request_func.py +5 -1

No files found.
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@@ -63,7 +63,7 @@ async def async_request_tgi(
            "temperature": 0.01,  # TGI does not accept 0.0 temperature.
            "top_p": 0.99,  # TGI does not accept 1.0 top_p.
            "truncate": request_func_input.prompt_len,
-            # TGI does not accept ignore_eos flag.
+            "ignore_eos_token": request_func_input.ignore_eos,
        }
        payload = {
            "inputs": request_func_input.prompt,
@@ -71,6 +71,10 @@ async def async_request_tgi(
        }
        output = RequestFuncOutput()
        output.prompt_len = request_func_input.prompt_len
+        if request_func_input.ignore_eos:
+            output.output_tokens = request_func_input.output_len
+        else:
+            output.output_tokens = None
        ttft = 0.0
        st = time.perf_counter()