"vscode:/vscode.git/clone" did not exist on "90c9d0be6e910d4a41821537d4002b3e7d35619f"
Commit bae9541e authored by Ying Sheng's avatar Ying Sheng
Browse files

Update benchmark script (#621)

parent a56858ba
...@@ -97,7 +97,10 @@ def run_one_batch_size(bs): ...@@ -97,7 +97,10 @@ def run_one_batch_size(bs):
print(ret) print(ret)
output_throughput = bs * max_new_tokens / latency output_throughput = bs * max_new_tokens / latency
print(f"latency: {latency:.2f} s, speed: {output_throughput:.2f} token/s") overall_throughput = bs * (args.input_len + max_new_tokens) / latency
print(f"latency: {latency:.2f} s")
print(f"decode throughput: {output_throughput:.2f} token/s")
print(f"overall throughput: {overall_throughput:.2f} token/s")
with open("results.jsonl", "a") as fout: with open("results.jsonl", "a") as fout:
res = { res = {
...@@ -107,6 +110,7 @@ def run_one_batch_size(bs): ...@@ -107,6 +110,7 @@ def run_one_batch_size(bs):
"batch_size": bs, "batch_size": bs,
"latency": latency, "latency": latency,
"output_throughput": output_throughput, "output_throughput": output_throughput,
"overall_throughput": overall_throughput,
} }
fout.write(json.dumps(res) + "\n") fout.write(json.dumps(res) + "\n")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment