Unverified Commit 9c0b1eb5 authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

[router][grpc] Fix wram-up random token ids for small models (#11887)

parent 01f14a7a
...@@ -998,20 +998,19 @@ def _execute_grpc_server_warmup( ...@@ -998,20 +998,19 @@ def _execute_grpc_server_warmup(
max_new_tokens = 8 if is_generation else 1 max_new_tokens = 8 if is_generation else 1
if is_generation: if is_generation:
# Create tokenized input for warmup
warmup_request_kwargs = { warmup_request_kwargs = {
"request_id": f"WARMUP_{time.time()}", "request_id": f"WARMUP_{time.time()}",
"tokenized": sglang_scheduler_pb2.TokenizedInput( "tokenized": sglang_scheduler_pb2.TokenizedInput(
input_ids=[ input_ids=[
954, 123,
15541, 456,
2181, 789,
23496, 234,
1476, 567,
64710, 890,
280, 345,
], # Simple token sequence ], # Random-looking but safe token IDs
original_text="The capital city of France is", original_text="warmup request",
), ),
"sampling_params": sglang_scheduler_pb2.SamplingParams( "sampling_params": sglang_scheduler_pb2.SamplingParams(
temperature=0.0, temperature=0.0,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment