Fix warmup in bench_offline_throughput.py (#2449)

f8548295 · Lianmin Zheng · GitHub · 959735fc · f8548295
Unverified Commit f8548295 authored Dec 11, 2024 by Lianmin Zheng Committed by GitHub Dec 11, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 5 deletions

python/sglang/bench_offline_throughput.py python/sglang/bench_offline_throughput.py +5 -5

No files found.
--- a/python/sglang/bench_offline_throughput.py
+++ b/python/sglang/bench_offline_throughput.py
@@ -201,18 +201,17 @@ def throughput_test_once(
        for r in reqs
    ]
-    st = time.perf_counter()
    if profile:
        backend.start_profile()
+    st = time.perf_counter()
    gen_out = backend.generate(prompt=prompt, sampling_params=sampling_params)
+    latency = time.perf_counter() - st
    if profile:
        backend.stop_profile()
        monitor_trace_file(os.getenv("SGLANG_TORCH_PROFILER_DIR"))
-    latency = time.perf_counter() - st
    if backend_name == "runtime":
        gen_out = json.loads(gen_out)
@@ -304,8 +303,8 @@ def throughput_test(
    warmup_requests = sample_random_requests(
        input_len=256,
        output_len=16,
-        num_prompts=16,
+        num_prompts=min(bench_args.num_prompts, 16),
-        range_ratio=0.8,
+        range_ratio=1.0,
        tokenizer=tokenizer,
        dataset_path=bench_args.dataset_path,
    )
@@ -321,6 +320,7 @@ def throughput_test(
            extra_request_body=extra_request_body,
            profile=False,
        )
+        time.sleep(0.5)
    logging.info("\nBenchmark...")
    result = throughput_test_once(