"src/vscode:/vscode.git/clone" did not exist on "71ad16b463275ce91e9279ecc8233868f709cadf"
Unverified Commit f55933e1 authored by JieXin Liang's avatar JieXin Liang Committed by GitHub
Browse files

[misc] more decode step log for batch_one_batch (#5565)

parent 408ba022
......@@ -85,6 +85,7 @@ class BenchArgs:
correctness_test: bool = False
# This is only used for correctness test
cut_len: int = 4
log_decode_step: int = 0
profile: bool = False
profile_filename_prefix: str = "profile"
......@@ -105,6 +106,12 @@ class BenchArgs:
)
parser.add_argument("--correctness-test", action="store_true")
parser.add_argument("--cut-len", type=int, default=BenchArgs.cut_len)
parser.add_argument(
"--log-decode-step",
type=int,
default=BenchArgs.log_decode_step,
help="Log decode latency by step, default is set to zero to disable.",
)
parser.add_argument(
"--profile", action="store_true", help="Use Torch Profiler."
)
......@@ -335,6 +342,7 @@ def latency_test_run_once(
input_len,
output_len,
device,
log_decode_step,
profile,
profile_filename_prefix,
):
......@@ -394,9 +402,9 @@ def latency_test_run_once(
tot_latency += latency
throughput = batch_size / latency
decode_latencies.append(latency)
if i < 5:
if i < 5 or (log_decode_step > 0 and i % log_decode_step == 0):
rank_print(
f"Decode. Batch size: {batch_size}, latency: {latency:6.5f} s, throughput: {throughput:9.2f} token/s"
f"Decode {i}. Batch size: {batch_size}, latency: {latency:6.5f} s, throughput: {throughput:9.2f} token/s"
)
if profile:
......@@ -457,8 +465,9 @@ def latency_test(
reqs,
bench_args.batch_size[0],
bench_args.input_len[0],
8, # shorter decoding to speed up the warmup
min(32, bench_args.output_len[0]), # shorter decoding to speed up the warmup
server_args.device,
log_decode_step=0,
profile=False,
profile_filename_prefix="", # not used
)
......@@ -480,6 +489,7 @@ def latency_test(
il,
ol,
server_args.device,
bench_args.log_decode_step,
bench_args.profile if tp_rank == 0 else None,
bench_args.profile_filename_prefix,
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment