[router][grpc] Fix wram-up random token ids for small models (#11887)

9c0b1eb5 · Chang Su · GitHub · 01f14a7a · 9c0b1eb5
Unverified Commit 9c0b1eb5 authored Oct 20, 2025 by Chang Su Committed by GitHub Oct 20, 2025
Show whitespace changes
Inline Side-by-side

Showing with 9 additions and 10 deletions

python/sglang/srt/entrypoints/grpc_server.py python/sglang/srt/entrypoints/grpc_server.py +9 -10

No files found.
--- a/python/sglang/srt/entrypoints/grpc_server.py
+++ b/python/sglang/srt/entrypoints/grpc_server.py
@@ -998,20 +998,19 @@ def _execute_grpc_server_warmup(
        max_new_tokens = 8 if is_generation else 1
        if is_generation:
-            # Create tokenized input for warmup
            warmup_request_kwargs = {
                "request_id": f"WARMUP_{time.time()}",
                "tokenized": sglang_scheduler_pb2.TokenizedInput(
                    input_ids=[
-                        954,
+                        123,
-                        15541,
+                        456,
-                        2181,
+                        789,
-                        23496,
+                        234,
-                        1476,
+                        567,
-                        64710,
+                        890,
-                        280,
+                        345,
-                    ],  # Simple token sequence
+                    ],  # Random-looking but safe token IDs
-                    original_text="The capital city of France is",
+                    original_text="warmup request",
                ),
                "sampling_params": sglang_scheduler_pb2.SamplingParams(
                    temperature=0.0,