Unverified Commit 4d086719 authored by HAI's avatar HAI Committed by GitHub
Browse files

[Bug] Fix decode stats error on output_len 1 (#1585)

parent 9244f27f
...@@ -340,13 +340,16 @@ def latency_test_run_once( ...@@ -340,13 +340,16 @@ def latency_test_run_once(
rank_print( rank_print(
f"Decode. latency: {latency:6.5f} s, throughput: {throughput:9.2f} token/s" f"Decode. latency: {latency:6.5f} s, throughput: {throughput:9.2f} token/s"
) )
med_decode_latency = np.median(decode_latencies)
med_decode_throughput = batch_size / med_decode_latency # record decode timing from 2nd output
rank_print( if output_len > 1:
f"Decode. median latency: {med_decode_latency:6.5f} s, median throughput: {med_decode_throughput:9.2f} token/s" med_decode_latency = np.median(decode_latencies)
) med_decode_throughput = batch_size / med_decode_latency
measurement_results["median_decode_latency"] = med_decode_latency rank_print(
measurement_results["median_decode_throughput"] = med_decode_throughput f"Decode. median latency: {med_decode_latency:6.5f} s, median throughput: {med_decode_throughput:9.2f} token/s"
)
measurement_results["median_decode_latency"] = med_decode_latency
measurement_results["median_decode_throughput"] = med_decode_throughput
throughput = (input_len + output_len) * batch_size / tot_latency throughput = (input_len + output_len) * batch_size / tot_latency
rank_print( rank_print(
...@@ -382,7 +385,7 @@ def latency_test( ...@@ -382,7 +385,7 @@ def latency_test(
reqs, reqs,
bench_args.batch_size[0], bench_args.batch_size[0],
bench_args.input_len[0], bench_args.input_len[0],
4, # shorter decoding to speed up the warmup 8, # shorter decoding to speed up the warmup
) )
rank_print("Benchmark ...") rank_print("Benchmark ...")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment