Unverified Commit 4d086719 authored by HAI's avatar HAI Committed by GitHub
Browse files

[Bug] Fix decode stats error on output_len 1 (#1585)

parent 9244f27f
...@@ -340,6 +340,9 @@ def latency_test_run_once( ...@@ -340,6 +340,9 @@ def latency_test_run_once(
rank_print( rank_print(
f"Decode. latency: {latency:6.5f} s, throughput: {throughput:9.2f} token/s" f"Decode. latency: {latency:6.5f} s, throughput: {throughput:9.2f} token/s"
) )
# record decode timing from 2nd output
if output_len > 1:
med_decode_latency = np.median(decode_latencies) med_decode_latency = np.median(decode_latencies)
med_decode_throughput = batch_size / med_decode_latency med_decode_throughput = batch_size / med_decode_latency
rank_print( rank_print(
...@@ -382,7 +385,7 @@ def latency_test( ...@@ -382,7 +385,7 @@ def latency_test(
reqs, reqs,
bench_args.batch_size[0], bench_args.batch_size[0],
bench_args.input_len[0], bench_args.input_len[0],
4, # shorter decoding to speed up the warmup 8, # shorter decoding to speed up the warmup
) )
rank_print("Benchmark ...") rank_print("Benchmark ...")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment