Update benchmark_throughput_0.8.5.py

f3ce07a4 · jerrrrry · e8491994 · f3ce07a4
Commit f3ce07a4 authored Jul 02, 2025 by jerrrrry
Show whitespace changes
Inline Side-by-side

Showing with 14 additions and 14 deletions

085-offline/benchmark_throughput_0.8.5.py 085-offline/benchmark_throughput_0.8.5.py +14 -14

No files found.
--- a/085-offline/benchmark_throughput_0.8.5.py
+++ b/085-offline/benchmark_throughput_0.8.5.py
@@ -248,18 +248,18 @@ def run_vllm(
                    ))
                end = time.perf_counter()
            total_ttfts = []
-            total_tpops = []
+            total_tpots = []
            total_output_token_throughput = []
            total_inout_token_throughput = []

            for output in real_output:

                ttft_ = output.metrics.first_token_time - output.metrics.arrival_time
-                tpop_ = (output.metrics.finished_time - output.metrics.arrival_time - ttft_) / (ELEoutput-1)
+                tpot_ = (output.metrics.finished_time - output.metrics.arrival_time - ttft_) / (ELEoutput-1)
                output_token_throughput = (ELEoutput) / (output.metrics.finished_time - output.metrics.arrival_time)
                inout_token_throughput = (ELEoutput + ELEinput) / (output.metrics.finished_time - output.metrics.arrival_time)
                total_ttfts.append(ttft_)
-                total_tpops.append(tpop_)
+                total_tpots.append(tpot_)
                total_output_token_throughput.append(output_token_throughput)
                total_inout_token_throughput.append(inout_token_throughput)

@@ -294,9 +294,9 @@ def run_vllm(
            info["ttft_median"] = np.around(np.median(total_ttfts or 0),5)
            info["ttft_p99"] = np.around(np.percentile(total_ttfts or 0, 99),5)

-            info["tpop_mean"] = np.around(np.mean(total_tpops),4)
-            info["tpop_median"] = np.around(np.median(total_tpops or 0),5)
-            info["tpop_p99"] = np.around(np.percentile(total_tpops or 0, 99),5)
+            info["tpot_mean"] = np.around(np.mean(total_tpots),4)
+            info["tpot_median"] = np.around(np.median(total_tpots or 0),5)
+            info["tpot_p99"] = np.around(np.percentile(total_tpots or 0, 99),5)

            info["output_token_throughput_mean"]  = np.around(np.mean(total_output_token_throughput),2)
            info["output_token_throughput_median"]  = np.around(np.median(total_output_token_throughput or 0),2)
@@ -316,17 +316,17 @@ def run_vllm(
            print("==============================================")
            print(f"total_out_tokens: {total_output_tokens: .2f} tokens")
            print(f"elapsed_time: {info['elapsed_time']: .2f} s")      # 总耗时
-            print(f"TTFT_mean: {info['ttft_mean']: .5f} s")           # 首字延时
-            print(f"ttft_p99: {info['ttft_p99']: .5f} s")
-            print(f"ttft_median: {info['ttft_median']: .5f} s")
-            print(f"TPOP_mean: {info['tpop_mean']: .5f} s")           # 单字decode时间
-            print(f"tpop_median: {info['tpop_median']: .5f} s")
-            print(f"tpop_p99: {info['tpop_p99']: .5f} s")
+            print(f"TTFT_mean: {info['ttft_mean']*1000: .2f} ms")       # 首字延时
+            print(f"ttft_p99: {info['ttft_p99']*1000: .2f} ms")
+            print(f"ttft_median: {info['ttft_median']*1000: .2f} ms")
+            print(f"TPOT_mean: {info['tpot_mean']*1000: .2f} ms")              # 单字decode时间
+            print(f"tpot_median: {info['tpot_median']*1000: .2f} ms")
+            print(f"tpot_p99: {info['tpot_p99']*1000: .2f} ms")
            print(f"output_token_throughput_mean: {info['output_token_throughput_mean']:.2f} tokens/s")           # 单路生成吞吐
            print(f"output_token_throughput_median: {info['output_token_throughput_median']:.2f} tokens/s")
            print(f"output_token_throughput_p99: {info['output_token_throughput_p99']:.2f} tokens/s")
            print(f"inout_token_throughput_mean: {info['inout_token_throughput_mean']:.2f} tokens/s")           # 单路总吞吐
-            print(f"tinout_token_throughput_median: {info['inout_token_throughput_median']:.2f} tokens/s")
+            print(f"inout_token_throughput_median: {info['inout_token_throughput_median']:.2f} tokens/s")
            print(f"inout_token_throughput_p99: {info['inout_token_throughput_p99']:.2f} tokens/s")
            print("==============================================")
            print("\n")