misc: recommend to use chat model for benchmark (#690)

65bd1338 · zhyncs · GitHub · eedc12e1 · 65bd1338
Unverified Commit 65bd1338 authored Jul 22, 2024 by zhyncs Committed by GitHub Jul 22, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 16 additions and 0 deletions

python/sglang/bench_serving.py python/sglang/bench_serving.py +16 -0

No files found.
--- a/python/sglang/bench_serving.py
+++ b/python/sglang/bench_serving.py
@@ -630,6 +630,7 @@ async def benchmark(
            "random_input_len": args.random_input_len,
            "random_output_len": args.random_output_len,
            "random_range_ratio": args.random_range_ratio,
+            "benchmark_duration": benchmark_duration,
        }
    else:
        print(f"Error running benchmark for request rate: {request_rate}")
@@ -687,6 +688,15 @@ def parse_request_rate_range(request_rate_range):
        return list(map(int, request_rate_range.split(",")))


+def check_chat_template(model_path):
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+        return "chat_template" in tokenizer.init_kwargs
+    except Exception as e:
+        print(f"Fail to load tokenizer config with error={e}")
+        return False
+
+
 def fire(args: argparse.Namespace):
    random.seed(args.seed)
    np.random.seed(args.seed)
@@ -736,6 +746,12 @@ def fire(args: argparse.Namespace):
        print("No model specified or found. Please provide a model using `--model`.")
        sys.exit(1)

+    if not check_chat_template(args.model):
+        print(
+            "\nWARNING It is recommended to use the `Chat` or `Instruct` model for benchmarking.\n"
+            "Because when the tokenizer counts the output tokens, if there is gibberish, it might count incorrectly.\n"
+        )
+
    print(f"{args}\n")

    backend = args.backend