Unverified Commit 40828ce5 authored by Wang, Yi's avatar Wang, Yi Committed by GitHub
Browse files

fix "Total generated tokens:" is 0 if using --backend tgi and --endpo… (#14673)


Signed-off-by: default avatarWang, Yi A <yi.a.wang@intel.com>
parent ffa443af
...@@ -63,7 +63,7 @@ async def async_request_tgi( ...@@ -63,7 +63,7 @@ async def async_request_tgi(
"temperature": 0.01, # TGI does not accept 0.0 temperature. "temperature": 0.01, # TGI does not accept 0.0 temperature.
"top_p": 0.99, # TGI does not accept 1.0 top_p. "top_p": 0.99, # TGI does not accept 1.0 top_p.
"truncate": request_func_input.prompt_len, "truncate": request_func_input.prompt_len,
# TGI does not accept ignore_eos flag. "ignore_eos_token": request_func_input.ignore_eos,
} }
payload = { payload = {
"inputs": request_func_input.prompt, "inputs": request_func_input.prompt,
...@@ -71,6 +71,10 @@ async def async_request_tgi( ...@@ -71,6 +71,10 @@ async def async_request_tgi(
} }
output = RequestFuncOutput() output = RequestFuncOutput()
output.prompt_len = request_func_input.prompt_len output.prompt_len = request_func_input.prompt_len
if request_func_input.ignore_eos:
output.output_tokens = request_func_input.output_len
else:
output.output_tokens = None
ttft = 0.0 ttft = 0.0
st = time.perf_counter() st = time.perf_counter()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment