Unverified Commit 0967110e authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

[Minor] Update the tqdm bar for parallel sampling (#14571)


Signed-off-by: default avatarWoosuk Kwon <woosuk.kwon@berkeley.edu>
parent fb0acb6c
......@@ -1384,8 +1384,9 @@ class LLM:
if use_tqdm:
if isinstance(output, RequestOutput):
# Calculate tokens only for RequestOutput
n = len(output.outputs)
assert output.prompt_token_ids is not None
total_in_toks += len(output.prompt_token_ids)
total_in_toks += len(output.prompt_token_ids) * n
in_spd = total_in_toks / pbar.format_dict["elapsed"]
total_out_toks += sum(
len(stp.token_ids) for stp in output.outputs)
......@@ -1394,7 +1395,7 @@ class LLM:
pbar.postfix = (
f"est. speed input: {in_spd:.2f} toks/s, "
f"output: {out_spd:.2f} toks/s")
pbar.update(len(output.outputs))
pbar.update(n)
else:
pbar.update(1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment