fix: Add default value for backend in sample_mmmu_requests (#12256)

50b6842b · Zaili Wang · GitHub · 2d5605e8 · 50b6842b · 50b6842b
Unverified Commit 50b6842b authored Oct 31, 2025 by Zaili Wang Committed by GitHub Oct 31, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 9 additions and 6 deletions

python/sglang/bench_serving.py python/sglang/bench_serving.py +5 -2

scripts/playground/bench_speculative.py scripts/playground/bench_speculative.py +4 -4

No files found.
--- a/python/sglang/bench_serving.py
+++ b/python/sglang/bench_serving.py
@@ -1014,7 +1014,7 @@ async def get_mooncake_request_over_time(
 def sample_mmmu_requests(
    num_requests: int,
    processor: AutoProcessor | AutoTokenizer,
-    backend: str,
+    backend: str = "sglang",
    fixed_output_len: Optional[int] = None,
    random_sample: bool = True,
 ) -> List[DatasetRow]:
@@ -1369,7 +1369,10 @@ def create_mm_data_row(
        )["input_ids"].numel()
    except Exception:
        # Fallback: just tokenize the text prompt directly
-        text_prompt_len = len(processor.tokenizer.encode(text_prompt))
+        tokenizer_to_use = (
+            processor.tokenizer if hasattr(processor, "tokenizer") else processor
+        )
+        text_prompt_len = len(tokenizer_to_use.encode(text_prompt))
    # Vision tokens = total tokens - text tokens
    vision_prompt_len = prompt_len - text_prompt_len

--- a/scripts/playground/bench_speculative.py
+++ b/scripts/playground/bench_speculative.py
@@ -57,14 +57,14 @@ class FakeTokenizer:
 def send_one_batch(base_url, num_prompts, batch_size, tokenizer, is_multimodal):
    # format: (prompt, input_len, output len). We set input_len as a dummy value 0.
    if is_multimodal:
+        backend = "sglang-oai-chat"
+        api_url = f"{base_url}/v1/chat/completions"
        input_requests = sample_mmmu_requests(
            num_prompts,
            tokenizer,
-            512,
+            backend=backend,
-            apply_chat_template=False,
+            fixed_output_len=512,
        )
-        backend = "sglang-oai-chat"
-        api_url = f"{base_url}/v1/chat/completions"
    else:
        padded_prompts = (prompts * ((num_prompts + len(prompts) - 1) // len(prompts)))[
            :num_prompts