Unverified Commit 4f24ab17 authored by Simo Lin's avatar Simo Lin Committed by GitHub
Browse files

[router][grpc] add dissag info to warm up in grpc server (#11727)

parent 64affab4
......@@ -532,9 +532,20 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
bootstrap_port = None
bootstrap_room = None
if grpc_req.HasField("disaggregated_params"):
bootstrap_host = grpc_req.disaggregated_params.bootstrap_host or None
bootstrap_port = grpc_req.disaggregated_params.bootstrap_port or None
bootstrap_room = grpc_req.disaggregated_params.bootstrap_room or None
# Don't use 'or None' as it treats 0 as falsy
bootstrap_host = (
grpc_req.disaggregated_params.bootstrap_host
if grpc_req.disaggregated_params.bootstrap_host
else None
)
bootstrap_port = (
grpc_req.disaggregated_params.bootstrap_port
if grpc_req.disaggregated_params.bootstrap_port
else None
)
bootstrap_room = (
grpc_req.disaggregated_params.bootstrap_room
) # Can be 0, don't use 'or None'
# Create request
return TokenizedGenerateReqInput(
......@@ -988,9 +999,9 @@ def _execute_grpc_server_warmup(
if is_generation:
# Create tokenized input for warmup
warmup_request = sglang_scheduler_pb2.GenerateRequest(
request_id=f"WARMUP_{time.time()}",
tokenized=sglang_scheduler_pb2.TokenizedInput(
warmup_request_kwargs = {
"request_id": f"WARMUP_{time.time()}",
"tokenized": sglang_scheduler_pb2.TokenizedInput(
input_ids=[
954,
15541,
......@@ -1002,11 +1013,24 @@ def _execute_grpc_server_warmup(
], # Simple token sequence
original_text="The capital city of France is",
),
sampling_params=sglang_scheduler_pb2.SamplingParams(
"sampling_params": sglang_scheduler_pb2.SamplingParams(
temperature=0.0,
max_new_tokens=max_new_tokens,
),
stream=False,
"stream": False,
}
# Set disaggregation params if needed
if server_args.disaggregation_mode != DisaggregationMode.NULL:
warmup_request_kwargs["disaggregated_params"] = (
sglang_scheduler_pb2.DisaggregatedParams(
bootstrap_host=FAKE_BOOTSTRAP_HOST,
bootstrap_room=0,
)
)
warmup_request = sglang_scheduler_pb2.GenerateRequest(
**warmup_request_kwargs
)
# Send the warmup request
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment