Commit f3ce07a4 authored by jerrrrry's avatar jerrrrry
Browse files

Update benchmark_throughput_0.8.5.py

parent e8491994
...@@ -248,18 +248,18 @@ def run_vllm( ...@@ -248,18 +248,18 @@ def run_vllm(
)) ))
end = time.perf_counter() end = time.perf_counter()
total_ttfts = [] total_ttfts = []
total_tpops = [] total_tpots = []
total_output_token_throughput = [] total_output_token_throughput = []
total_inout_token_throughput = [] total_inout_token_throughput = []
for output in real_output: for output in real_output:
ttft_ = output.metrics.first_token_time - output.metrics.arrival_time ttft_ = output.metrics.first_token_time - output.metrics.arrival_time
tpop_ = (output.metrics.finished_time - output.metrics.arrival_time - ttft_) / (ELEoutput-1) tpot_ = (output.metrics.finished_time - output.metrics.arrival_time - ttft_) / (ELEoutput-1)
output_token_throughput = (ELEoutput) / (output.metrics.finished_time - output.metrics.arrival_time) output_token_throughput = (ELEoutput) / (output.metrics.finished_time - output.metrics.arrival_time)
inout_token_throughput = (ELEoutput + ELEinput) / (output.metrics.finished_time - output.metrics.arrival_time) inout_token_throughput = (ELEoutput + ELEinput) / (output.metrics.finished_time - output.metrics.arrival_time)
total_ttfts.append(ttft_) total_ttfts.append(ttft_)
total_tpops.append(tpop_) total_tpots.append(tpot_)
total_output_token_throughput.append(output_token_throughput) total_output_token_throughput.append(output_token_throughput)
total_inout_token_throughput.append(inout_token_throughput) total_inout_token_throughput.append(inout_token_throughput)
...@@ -294,9 +294,9 @@ def run_vllm( ...@@ -294,9 +294,9 @@ def run_vllm(
info["ttft_median"] = np.around(np.median(total_ttfts or 0),5) info["ttft_median"] = np.around(np.median(total_ttfts or 0),5)
info["ttft_p99"] = np.around(np.percentile(total_ttfts or 0, 99),5) info["ttft_p99"] = np.around(np.percentile(total_ttfts or 0, 99),5)
info["tpop_mean"] = np.around(np.mean(total_tpops),4) info["tpot_mean"] = np.around(np.mean(total_tpots),4)
info["tpop_median"] = np.around(np.median(total_tpops or 0),5) info["tpot_median"] = np.around(np.median(total_tpots or 0),5)
info["tpop_p99"] = np.around(np.percentile(total_tpops or 0, 99),5) info["tpot_p99"] = np.around(np.percentile(total_tpots or 0, 99),5)
info["output_token_throughput_mean"] = np.around(np.mean(total_output_token_throughput),2) info["output_token_throughput_mean"] = np.around(np.mean(total_output_token_throughput),2)
info["output_token_throughput_median"] = np.around(np.median(total_output_token_throughput or 0),2) info["output_token_throughput_median"] = np.around(np.median(total_output_token_throughput or 0),2)
...@@ -316,17 +316,17 @@ def run_vllm( ...@@ -316,17 +316,17 @@ def run_vllm(
print("==============================================") print("==============================================")
print(f"total_out_tokens: {total_output_tokens: .2f} tokens") print(f"total_out_tokens: {total_output_tokens: .2f} tokens")
print(f"elapsed_time: {info['elapsed_time']: .2f} s") # 总耗时 print(f"elapsed_time: {info['elapsed_time']: .2f} s") # 总耗时
print(f"TTFT_mean: {info['ttft_mean']: .5f} s") # 首字延时 print(f"TTFT_mean: {info['ttft_mean']*1000: .2f} ms") # 首字延时
print(f"ttft_p99: {info['ttft_p99']: .5f} s") print(f"ttft_p99: {info['ttft_p99']*1000: .2f} ms")
print(f"ttft_median: {info['ttft_median']: .5f} s") print(f"ttft_median: {info['ttft_median']*1000: .2f} ms")
print(f"TPOP_mean: {info['tpop_mean']: .5f} s") # 单字decode时间 print(f"TPOT_mean: {info['tpot_mean']*1000: .2f} ms") # 单字decode时间
print(f"tpop_median: {info['tpop_median']: .5f} s") print(f"tpot_median: {info['tpot_median']*1000: .2f} ms")
print(f"tpop_p99: {info['tpop_p99']: .5f} s") print(f"tpot_p99: {info['tpot_p99']*1000: .2f} ms")
print(f"output_token_throughput_mean: {info['output_token_throughput_mean']:.2f} tokens/s") # 单路生成吞吐 print(f"output_token_throughput_mean: {info['output_token_throughput_mean']:.2f} tokens/s") # 单路生成吞吐
print(f"output_token_throughput_median: {info['output_token_throughput_median']:.2f} tokens/s") print(f"output_token_throughput_median: {info['output_token_throughput_median']:.2f} tokens/s")
print(f"output_token_throughput_p99: {info['output_token_throughput_p99']:.2f} tokens/s") print(f"output_token_throughput_p99: {info['output_token_throughput_p99']:.2f} tokens/s")
print(f"inout_token_throughput_mean: {info['inout_token_throughput_mean']:.2f} tokens/s") # 单路总吞吐 print(f"inout_token_throughput_mean: {info['inout_token_throughput_mean']:.2f} tokens/s") # 单路总吞吐
print(f"tinout_token_throughput_median: {info['inout_token_throughput_median']:.2f} tokens/s") print(f"inout_token_throughput_median: {info['inout_token_throughput_median']:.2f} tokens/s")
print(f"inout_token_throughput_p99: {info['inout_token_throughput_p99']:.2f} tokens/s") print(f"inout_token_throughput_p99: {info['inout_token_throughput_p99']:.2f} tokens/s")
print("==============================================") print("==============================================")
print("\n") print("\n")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment