Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
f87a6ab3
Unverified
Commit
f87a6ab3
authored
Apr 27, 2025
by
Yuhong Guo
Committed by
GitHub
Apr 26, 2025
Browse files
Resolves the `404 Not Found` error when running `compile_deep_gemm.py` in multi-node setups (#5720)
parent
eebfdb94
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
40 additions
and
3 deletions
+40
-3
python/sglang/compile_deep_gemm.py
python/sglang/compile_deep_gemm.py
+40
-3
No files found.
python/sglang/compile_deep_gemm.py
View file @
f87a6ab3
...
@@ -88,8 +88,36 @@ def launch_server_process_and_send_one_request(
...
@@ -88,8 +88,36 @@ def launch_server_process_and_send_one_request(
headers
=
{
headers
=
{
"Content-Type"
:
"application/json; charset=utf-8"
,
"Content-Type"
:
"application/json; charset=utf-8"
,
}
}
if
server_args
.
node_rank
==
0
:
response
=
requests
.
get
(
f
"
{
base_url
}
/v1/models"
,
headers
=
headers
)
response
=
requests
.
get
(
f
"
{
base_url
}
/v1/models"
,
headers
=
headers
)
else
:
# This http api is created by launch_dummy_health_check_server for none-rank0 node.
response
=
requests
.
get
(
f
"
{
base_url
}
/health"
,
headers
=
headers
)
if
response
.
status_code
==
200
:
if
response
.
status_code
==
200
:
# Rank-0 node send a request to sync with other node and then return.
if
server_args
.
node_rank
==
0
:
response
=
requests
.
post
(
f
"
{
base_url
}
/generate"
,
json
=
{
"input_ids"
:
[
0
,
1
,
2
,
3
],
"sampling_params"
:
{
"max_new_tokens"
:
8
,
"temperature"
:
0
,
},
},
timeout
=
600
,
)
if
response
.
status_code
!=
200
:
error
=
response
.
json
()
raise
RuntimeError
(
f
"Sync request failed:
{
error
}
"
)
# Other nodes should wait for the exit signal from Rank-0 node.
else
:
start_time_waiting
=
time
.
time
()
while
proc
.
is_alive
():
if
time
.
time
()
-
start_time_waiting
<
timeout
:
time
.
sleep
(
10
)
else
:
raise
TimeoutError
(
"Waiting for main node timeout!"
)
return
proc
return
proc
except
requests
.
RequestException
:
except
requests
.
RequestException
:
pass
pass
...
@@ -122,10 +150,19 @@ def run_compile(server_args: ServerArgs, compile_args: CompileArgs):
...
@@ -122,10 +150,19 @@ def run_compile(server_args: ServerArgs, compile_args: CompileArgs):
proc
=
launch_server_process_and_send_one_request
(
server_args
,
compile_args
)
proc
=
launch_server_process_and_send_one_request
(
server_args
,
compile_args
)
kill_process_tree
(
proc
.
pid
)
print
(
"
\n
DeepGEMM Kernels compilation finished successfully."
)
print
(
"
\n
DeepGEMM Kernels compilation finished successfully."
)
# Sleep for safety
time
.
sleep
(
10
)
if
proc
.
is_alive
():
# This is the rank0 node.
kill_process_tree
(
proc
.
pid
)
else
:
try
:
kill_process_tree
(
proc
.
pid
)
except
Exception
:
pass
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment