Update bench speculative script (#4235)

f1d09a65 · Ke Bao · GitHub · df84ab2a · f1d09a65
Unverified Commit f1d09a65 authored Mar 10, 2025 by Ke Bao Committed by GitHub Mar 09, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 17 additions and 0 deletions

scripts/playground/bench_speculative.py scripts/playground/bench_speculative.py +17 -0

No files found.
--- a/scripts/playground/bench_speculative.py
+++ b/scripts/playground/bench_speculative.py
@@ -2,6 +2,9 @@
 Usage:
 # single GPU
 python3 bench_speculative.py --model-path meta-llama/Llama-2-7b-chat-hf --speculative-draft-model-path lmsys/sglang-EAGLE-llama2-chat-7B
+# multiple GPU
+python3 bench_speculative.py --model-path deepseek-ai/DeepSeek-V3 --speculative-draft-model-path lmsys/DeepSeek-V3-NextN --tp-size 8 --trust-remote-code --batch-size 1 4 8 16 32 --steps 0 1 2 --topk 0 1 2 4 --num_draft_tokens 0 2 4 8
 """
 import argparse
@@ -166,6 +169,20 @@ def main(args, server_args):
            ]
        )
+        if server_args.trust_remote_code:
+            other_args.extend(
+                [
+                    "--trust-remote-code",
+                ]
+            )
+        if server_args.enable_flashinfer_mla:
+            other_args.extend(
+                [
+                    "--enable-flashinfer-mla",
+                ]
+            )
        if server_args.quantization:
            other_args.extend(
                [