[bugfix] Add 'disaggregation_mode' parameter to warmup function when compile...

[bugfix] Add 'disaggregation_mode' parameter to warmup function when compile deep_gemm manually (#8618)

[bugfix] Add 'disaggregation_mode' parameter to warmup function when compile...
[bugfix] Add 'disaggregation_mode' parameter to warmup function when compile deep_gemm manually (#8618)
b89d37cb · Baron Liu · GitHub · 5deab128 · b89d37cb
Unverified Commit b89d37cb authored Aug 02, 2025 by Baron Liu Committed by GitHub Aug 01, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 1 deletion

python/sglang/compile_deep_gemm.py python/sglang/compile_deep_gemm.py +8 -1

No files found.
--- a/python/sglang/compile_deep_gemm.py
+++ b/python/sglang/compile_deep_gemm.py
@@ -17,6 +17,7 @@ import time

 import requests

+from sglang.srt.disaggregation.utils import FAKE_BOOTSTRAP_HOST
 from sglang.srt.entrypoints.http_server import launch_server
 from sglang.srt.managers.io_struct import GenerateReqInput
 from sglang.srt.managers.tokenizer_manager import TokenizerManager
@@ -52,7 +53,9 @@ class CompileArgs:


 @warmup("compile-deep-gemm")
-async def warm_up_compile(tokenizer_manager: TokenizerManager):
+async def warm_up_compile(
+    disaggregation_mode: str, tokenizer_manager: TokenizerManager
+):
    print("\nGenerate warm up request for compiling DeepGEMM...\n")
    generate_req_input = GenerateReqInput(
        input_ids=[0, 1, 2, 3],
@@ -62,6 +65,10 @@ async def warm_up_compile(tokenizer_manager: TokenizerManager):
            "ignore_eos": True,
        },
    )
+    if disaggregation_mode != "null":
+        generate_req_input.bootstrap_room = 0
+        generate_req_input.bootstrap_host = FAKE_BOOTSTRAP_HOST
+
    await tokenizer_manager.generate_request(generate_req_input, None).__anext__()