Commit aa3b5797 authored by jerrrrry's avatar jerrrrry
Browse files

Update benchmark_throughput.py

parent 7e7f98ea
...@@ -294,9 +294,9 @@ def run_vllm( ...@@ -294,9 +294,9 @@ def run_vllm(
info["ttft_median"] = np.around(np.median(total_ttfts or 0),5) info["ttft_median"] = np.around(np.median(total_ttfts or 0),5)
info["ttft_p99"] = np.around(np.percentile(total_ttfts or 0, 99),5) info["ttft_p99"] = np.around(np.percentile(total_ttfts or 0, 99),5)
info["tpop_mean"] = np.around(np.mean(total_tpops),4) info["tpot_mean"] = np.around(np.mean(total_tpops),4)
info["tpop_median"] = np.around(np.median(total_tpops or 0),5) info["tpot_median"] = np.around(np.median(total_tpops or 0),5)
info["tpop_p99"] = np.around(np.percentile(total_tpops or 0, 99),5) info["tpot_p99"] = np.around(np.percentile(total_tpops or 0, 99),5)
info["output_token_throughput_mean"] = np.around(np.mean(total_output_token_throughput),2) info["output_token_throughput_mean"] = np.around(np.mean(total_output_token_throughput),2)
info["output_token_throughput_median"] = np.around(np.median(total_output_token_throughput or 0),2) info["output_token_throughput_median"] = np.around(np.median(total_output_token_throughput or 0),2)
...@@ -319,9 +319,9 @@ def run_vllm( ...@@ -319,9 +319,9 @@ def run_vllm(
print(f"TTFT_mean: {info['ttft_mean']*1000: .2f} ms") # 首字延时 print(f"TTFT_mean: {info['ttft_mean']*1000: .2f} ms") # 首字延时
print(f"ttft_p99: {info['ttft_p99']*1000: .2f} ms") print(f"ttft_p99: {info['ttft_p99']*1000: .2f} ms")
print(f"ttft_median: {info['ttft_median']*1000: .2f} ms") print(f"ttft_median: {info['ttft_median']*1000: .2f} ms")
print(f"TPOP_mean: {info['tpop_mean']*1000: .2f} ms") # 单字decode时间 print(f"TPOT_mean: {info['tpop_mean']*1000: .2f} ms") # 单字decode时间
print(f"tpop_median: {info['tpop_median']*1000: .2f} ms") print(f"tpot_median: {info['tpop_median']*1000: .2f} ms")
print(f"tpop_p99: {info['tpop_p99']*1000: .2f} ms") print(f"tpot_p99: {info['tpop_p99']*1000: .2f} ms")
print(f"output_token_throughput_mean: {info['output_token_throughput_mean']:.2f} tokens/s") # 单路生成吞吐 print(f"output_token_throughput_mean: {info['output_token_throughput_mean']:.2f} tokens/s") # 单路生成吞吐
print(f"output_token_throughput_median: {info['output_token_throughput_median']:.2f} tokens/s") print(f"output_token_throughput_median: {info['output_token_throughput_median']:.2f} tokens/s")
print(f"output_token_throughput_p99: {info['output_token_throughput_p99']:.2f} tokens/s") print(f"output_token_throughput_p99: {info['output_token_throughput_p99']:.2f} tokens/s")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment