Unverified Commit f1d09a65 authored by Ke Bao's avatar Ke Bao Committed by GitHub
Browse files

Update bench speculative script (#4235)

parent df84ab2a
...@@ -2,6 +2,9 @@ ...@@ -2,6 +2,9 @@
Usage: Usage:
# single GPU # single GPU
python3 bench_speculative.py --model-path meta-llama/Llama-2-7b-chat-hf --speculative-draft-model-path lmsys/sglang-EAGLE-llama2-chat-7B python3 bench_speculative.py --model-path meta-llama/Llama-2-7b-chat-hf --speculative-draft-model-path lmsys/sglang-EAGLE-llama2-chat-7B
# multiple GPU
python3 bench_speculative.py --model-path deepseek-ai/DeepSeek-V3 --speculative-draft-model-path lmsys/DeepSeek-V3-NextN --tp-size 8 --trust-remote-code --batch-size 1 4 8 16 32 --steps 0 1 2 --topk 0 1 2 4 --num_draft_tokens 0 2 4 8
""" """
import argparse import argparse
...@@ -166,6 +169,20 @@ def main(args, server_args): ...@@ -166,6 +169,20 @@ def main(args, server_args):
] ]
) )
if server_args.trust_remote_code:
other_args.extend(
[
"--trust-remote-code",
]
)
if server_args.enable_flashinfer_mla:
other_args.extend(
[
"--enable-flashinfer-mla",
]
)
if server_args.quantization: if server_args.quantization:
other_args.extend( other_args.extend(
[ [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment