Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
28b8a406
Unverified
Commit
28b8a406
authored
Oct 23, 2025
by
Chang Su
Committed by
GitHub
Oct 23, 2025
Browse files
[router][CI] Clean up imports and prints statements in sgl-router/py_test (#12024)
parent
8bd26dd4
Changes
31
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
114 additions
and
131 deletions
+114
-131
.pre-commit-config.yaml
.pre-commit-config.yaml
+1
-1
sgl-router/py_test/e2e/test_e2e_embeddings.py
sgl-router/py_test/e2e/test_e2e_embeddings.py
+0
-1
sgl-router/py_test/e2e/test_pd_router.py
sgl-router/py_test/e2e/test_pd_router.py
+0
-1
sgl-router/py_test/e2e_grpc/basic/test_openai_server.py
sgl-router/py_test/e2e_grpc/basic/test_openai_server.py
+0
-4
sgl-router/py_test/e2e_grpc/conftest.py
sgl-router/py_test/e2e_grpc/conftest.py
+1
-1
sgl-router/py_test/e2e_grpc/features/test_enable_thinking.py
sgl-router/py_test/e2e_grpc/features/test_enable_thinking.py
+0
-10
sgl-router/py_test/e2e_grpc/features/test_reasoning_content.py
...outer/py_test/e2e_grpc/features/test_reasoning_content.py
+0
-4
sgl-router/py_test/e2e_grpc/fixtures.py
sgl-router/py_test/e2e_grpc/fixtures.py
+40
-31
sgl-router/py_test/e2e_grpc/function_call/test_openai_function_calling.py
...st/e2e_grpc/function_call/test_openai_function_calling.py
+0
-3
sgl-router/py_test/e2e_grpc/function_call/test_tool_choice.py
...router/py_test/e2e_grpc/function_call/test_tool_choice.py
+0
-2
sgl-router/py_test/e2e_grpc/util.py
sgl-router/py_test/e2e_grpc/util.py
+6
-7
sgl-router/py_test/e2e_grpc/validation/test_large_max_new_tokens.py
.../py_test/e2e_grpc/validation/test_large_max_new_tokens.py
+0
-3
sgl-router/py_test/e2e_grpc/validation/test_openai_server_ignore_eos.py
...test/e2e_grpc/validation/test_openai_server_ignore_eos.py
+0
-1
sgl-router/py_test/e2e_response_api/conftest.py
sgl-router/py_test/e2e_response_api/conftest.py
+1
-1
sgl-router/py_test/e2e_response_api/mcp.py
sgl-router/py_test/e2e_response_api/mcp.py
+0
-1
sgl-router/py_test/e2e_response_api/router_fixtures.py
sgl-router/py_test/e2e_response_api/router_fixtures.py
+59
-48
sgl-router/py_test/e2e_response_api/state_management.py
sgl-router/py_test/e2e_response_api/state_management.py
+0
-5
sgl-router/py_test/e2e_response_api/util.py
sgl-router/py_test/e2e_response_api/util.py
+4
-5
sgl-router/py_test/integration/conftest.py
sgl-router/py_test/integration/conftest.py
+1
-2
sgl-router/py_test/integration/load_balancing/test_power_of_two.py
...r/py_test/integration/load_balancing/test_power_of_two.py
+1
-0
No files found.
.pre-commit-config.yaml
View file @
28b8a406
...
@@ -30,7 +30,7 @@ repos:
...
@@ -30,7 +30,7 @@ repos:
args
:
args
:
-
--select=F401,F821
-
--select=F401,F821
-
--fix
-
--fix
files
:
^(benchmark/|docs/|examples/|python/sglang/)
files
:
^(benchmark/|docs/|examples/|python/sglang/
|sgl-router/py_*
)
exclude
:
__init__\.py$|\.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$
exclude
:
__init__\.py$|\.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$
-
repo
:
https://github.com/psf/black
-
repo
:
https://github.com/psf/black
rev
:
24.10.0
rev
:
24.10.0
...
...
sgl-router/py_test/e2e/test_e2e_embeddings.py
View file @
28b8a406
import
time
import
time
from
types
import
SimpleNamespace
import
pytest
import
pytest
import
requests
import
requests
...
...
sgl-router/py_test/e2e/test_pd_router.py
View file @
28b8a406
import
logging
import
logging
import
os
import
socket
import
socket
import
subprocess
import
subprocess
import
time
import
time
...
...
sgl-router/py_test/e2e_grpc/basic/test_openai_server.py
View file @
28b8a406
...
@@ -13,14 +13,11 @@ Run with:
...
@@ -13,14 +13,11 @@ Run with:
"""
"""
import
json
import
json
# CHANGE: Import router launcher instead of server launcher
import
sys
import
sys
import
unittest
import
unittest
from
pathlib
import
Path
from
pathlib
import
Path
import
openai
import
openai
import
requests
_TEST_DIR
=
Path
(
__file__
).
parent
_TEST_DIR
=
Path
(
__file__
).
parent
sys
.
path
.
insert
(
0
,
str
(
_TEST_DIR
.
parent
))
sys
.
path
.
insert
(
0
,
str
(
_TEST_DIR
.
parent
))
...
@@ -225,7 +222,6 @@ class TestOpenAIServer(CustomTestCase):
...
@@ -225,7 +222,6 @@ class TestOpenAIServer(CustomTestCase):
try
:
try
:
js_obj
=
json
.
loads
(
text
)
js_obj
=
json
.
loads
(
text
)
except
(
TypeError
,
json
.
decoder
.
JSONDecodeError
):
except
(
TypeError
,
json
.
decoder
.
JSONDecodeError
):
print
(
"JSONDecodeError"
,
text
)
raise
raise
assert
isinstance
(
js_obj
[
"name"
],
str
)
assert
isinstance
(
js_obj
[
"name"
],
str
)
assert
isinstance
(
js_obj
[
"population"
],
int
)
assert
isinstance
(
js_obj
[
"population"
],
int
)
...
...
sgl-router/py_test/e2e_grpc/conftest.py
View file @
28b8a406
...
@@ -7,7 +7,7 @@ This module provides shared fixtures that can be used across all gRPC router tes
...
@@ -7,7 +7,7 @@ This module provides shared fixtures that can be used across all gRPC router tes
import
sys
import
sys
from
pathlib
import
Path
from
pathlib
import
Path
import
pytest
import
pytest
# noqa: F401
# Ensure router py_src is importable
# Ensure router py_src is importable
_ROUTER_ROOT
=
Path
(
__file__
).
resolve
().
parents
[
2
]
_ROUTER_ROOT
=
Path
(
__file__
).
resolve
().
parents
[
2
]
...
...
sgl-router/py_test/e2e_grpc/features/test_enable_thinking.py
View file @
28b8a406
...
@@ -6,17 +6,11 @@ python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinki
...
@@ -6,17 +6,11 @@ python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinki
python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning
python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning
"""
"""
import
asyncio
import
json
import
json
import
os
import
sys
import
sys
import
time
import
unittest
import
unittest
# CHANGE: Import router launcher instead of server launcher
from
pathlib
import
Path
from
pathlib
import
Path
import
openai
import
requests
import
requests
_TEST_DIR
=
Path
(
__file__
).
parent
_TEST_DIR
=
Path
(
__file__
).
parent
...
@@ -24,10 +18,8 @@ sys.path.insert(0, str(_TEST_DIR.parent))
...
@@ -24,10 +18,8 @@ sys.path.insert(0, str(_TEST_DIR.parent))
from
fixtures
import
popen_launch_workers_and_router
from
fixtures
import
popen_launch_workers_and_router
from
util
import
(
from
util
import
(
DEFAULT_ENABLE_THINKING_MODEL_PATH
,
DEFAULT_ENABLE_THINKING_MODEL_PATH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
CustomTestCase
,
get_tokenizer
,
kill_process_tree
,
kill_process_tree
,
)
)
...
@@ -131,7 +123,6 @@ class TestEnableThinking(CustomTestCase):
...
@@ -131,7 +123,6 @@ class TestEnableThinking(CustomTestCase):
has_reasoning
=
False
has_reasoning
=
False
has_content
=
False
has_content
=
False
print
(
"
\n
=== Stream With Reasoning ==="
)
for
line
in
response
.
iter_lines
():
for
line
in
response
.
iter_lines
():
if
line
:
if
line
:
line
=
line
.
decode
(
"utf-8"
)
line
=
line
.
decode
(
"utf-8"
)
...
@@ -176,7 +167,6 @@ class TestEnableThinking(CustomTestCase):
...
@@ -176,7 +167,6 @@ class TestEnableThinking(CustomTestCase):
has_reasoning
=
False
has_reasoning
=
False
has_content
=
False
has_content
=
False
print
(
"
\n
=== Stream Without Reasoning ==="
)
for
line
in
response
.
iter_lines
():
for
line
in
response
.
iter_lines
():
if
line
:
if
line
:
line
=
line
.
decode
(
"utf-8"
)
line
=
line
.
decode
(
"utf-8"
)
...
...
sgl-router/py_test/e2e_grpc/features/test_reasoning_content.py
View file @
28b8a406
...
@@ -9,15 +9,11 @@ python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningC
...
@@ -9,15 +9,11 @@ python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningC
python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentStartup.test_streaming
python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentStartup.test_streaming
"""
"""
import
json
# CHANGE: Import router launcher instead of server launcher
import
sys
import
sys
import
unittest
import
unittest
from
pathlib
import
Path
from
pathlib
import
Path
import
openai
import
openai
import
requests
_TEST_DIR
=
Path
(
__file__
).
parent
_TEST_DIR
=
Path
(
__file__
).
parent
sys
.
path
.
insert
(
0
,
str
(
_TEST_DIR
.
parent
))
sys
.
path
.
insert
(
0
,
str
(
_TEST_DIR
.
parent
))
...
...
sgl-router/py_test/e2e_grpc/fixtures.py
View file @
28b8a406
...
@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang workers and gRPC router separ
...
@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang workers and gRPC router separ
This approach gives more control and matches production deployment patterns.
This approach gives more control and matches production deployment patterns.
"""
"""
import
logging
import
socket
import
socket
import
subprocess
import
subprocess
import
time
import
time
...
@@ -15,6 +16,8 @@ from typing import Optional
...
@@ -15,6 +16,8 @@ from typing import Optional
import
requests
import
requests
logger
=
logging
.
getLogger
(
__name__
)
def
find_free_port
()
->
int
:
def
find_free_port
()
->
int
:
"""Find an available port on localhost."""
"""Find an available port on localhost."""
...
@@ -56,9 +59,11 @@ def wait_for_workers_ready(
...
@@ -56,9 +59,11 @@ def wait_for_workers_ready(
attempt
+=
1
attempt
+=
1
elapsed
=
int
(
time
.
time
()
-
start_time
)
elapsed
=
int
(
time
.
time
()
-
start_time
)
#
Print
progress every 10 seconds
#
Log
progress every 10 seconds
if
elapsed
>
0
and
elapsed
%
10
==
0
and
attempt
%
10
==
0
:
if
elapsed
>
0
and
elapsed
%
10
==
0
and
attempt
%
10
==
0
:
print
(
f
" Still waiting for workers... (
{
elapsed
}
/
{
timeout
}
s elapsed)"
)
logger
.
info
(
f
" Still waiting for workers... (
{
elapsed
}
/
{
timeout
}
s elapsed)"
)
try
:
try
:
response
=
session
.
get
(
response
=
session
.
get
(
...
@@ -69,7 +74,7 @@ def wait_for_workers_ready(
...
@@ -69,7 +74,7 @@ def wait_for_workers_ready(
total_workers
=
data
.
get
(
"total"
,
0
)
total_workers
=
data
.
get
(
"total"
,
0
)
if
total_workers
==
expected_workers
:
if
total_workers
==
expected_workers
:
print
(
logger
.
info
(
f
" All
{
expected_workers
}
workers connected after
{
elapsed
}
s"
f
" All
{
expected_workers
}
workers connected after
{
elapsed
}
s"
)
)
return
return
...
@@ -161,14 +166,14 @@ def popen_launch_workers_and_router(
...
@@ -161,14 +166,14 @@ def popen_launch_workers_and_router(
else
:
else
:
router_port
=
find_free_port
()
router_port
=
find_free_port
()
print
(
f
"
\n
{
'='
*
70
}
"
)
logger
.
info
(
f
"
\n
{
'='
*
70
}
"
)
print
(
f
"Launching gRPC cluster (separate workers + router)"
)
logger
.
info
(
f
"Launching gRPC cluster (separate workers + router)"
)
print
(
f
"
{
'='
*
70
}
"
)
logger
.
info
(
f
"
{
'='
*
70
}
"
)
print
(
f
" Model:
{
model
}
"
)
logger
.
info
(
f
" Model:
{
model
}
"
)
print
(
f
" Router port:
{
router_port
}
"
)
logger
.
info
(
f
" Router port:
{
router_port
}
"
)
print
(
f
" Workers:
{
num_workers
}
"
)
logger
.
info
(
f
" Workers:
{
num_workers
}
"
)
print
(
f
" TP size:
{
tp_size
}
"
)
logger
.
info
(
f
" TP size:
{
tp_size
}
"
)
print
(
f
" Policy:
{
policy
}
"
)
logger
.
info
(
f
" Policy:
{
policy
}
"
)
# Step 1: Launch workers with gRPC enabled
# Step 1: Launch workers with gRPC enabled
workers
=
[]
workers
=
[]
...
@@ -179,9 +184,9 @@ def popen_launch_workers_and_router(
...
@@ -179,9 +184,9 @@ def popen_launch_workers_and_router(
worker_url
=
f
"grpc://127.0.0.1:
{
worker_port
}
"
worker_url
=
f
"grpc://127.0.0.1:
{
worker_port
}
"
worker_urls
.
append
(
worker_url
)
worker_urls
.
append
(
worker_url
)
print
(
f
"
\n
[Worker
{
i
+
1
}
/
{
num_workers
}
]"
)
logger
.
info
(
f
"
\n
[Worker
{
i
+
1
}
/
{
num_workers
}
]"
)
print
(
f
" Port:
{
worker_port
}
"
)
logger
.
info
(
f
" Port:
{
worker_port
}
"
)
print
(
f
" URL:
{
worker_url
}
"
)
logger
.
info
(
f
" URL:
{
worker_url
}
"
)
# Build worker command
# Build worker command
worker_cmd
=
[
worker_cmd
=
[
...
@@ -226,17 +231,19 @@ def popen_launch_workers_and_router(
...
@@ -226,17 +231,19 @@ def popen_launch_workers_and_router(
)
)
workers
.
append
(
worker_proc
)
workers
.
append
(
worker_proc
)
print
(
f
" PID:
{
worker_proc
.
pid
}
"
)
logger
.
info
(
f
" PID:
{
worker_proc
.
pid
}
"
)
# Give workers a moment to start binding to ports
# Give workers a moment to start binding to ports
# The router will check worker health when it starts
# The router will check worker health when it starts
print
(
f
"
\n
Waiting for
{
num_workers
}
workers to initialize (20s)..."
)
logger
.
info
(
f
"
\n
Waiting for
{
num_workers
}
workers to initialize (20s)..."
)
time
.
sleep
(
20
)
time
.
sleep
(
20
)
# Quick check: make sure worker processes are still alive
# Quick check: make sure worker processes are still alive
for
i
,
worker
in
enumerate
(
workers
):
for
i
,
worker
in
enumerate
(
workers
):
if
worker
.
poll
()
is
not
None
:
if
worker
.
poll
()
is
not
None
:
print
(
f
" ✗ Worker
{
i
+
1
}
died during startup (exit code:
{
worker
.
poll
()
}
)"
)
logger
.
error
(
f
" ✗ Worker
{
i
+
1
}
died during startup (exit code:
{
worker
.
poll
()
}
)"
)
# Cleanup: kill all workers
# Cleanup: kill all workers
for
w
in
workers
:
for
w
in
workers
:
try
:
try
:
...
@@ -245,12 +252,14 @@ def popen_launch_workers_and_router(
...
@@ -245,12 +252,14 @@ def popen_launch_workers_and_router(
pass
pass
raise
RuntimeError
(
f
"Worker
{
i
+
1
}
failed to start"
)
raise
RuntimeError
(
f
"Worker
{
i
+
1
}
failed to start"
)
print
(
f
"✓ All
{
num_workers
}
workers started (router will verify connectivity)"
)
logger
.
info
(
f
"✓ All
{
num_workers
}
workers started (router will verify connectivity)"
)
# Step 2: Launch router pointing to workers
# Step 2: Launch router pointing to workers
print
(
f
"
\n
[Router]"
)
logger
.
info
(
f
"
\n
[Router]"
)
print
(
f
" Port:
{
router_port
}
"
)
logger
.
info
(
f
" Port:
{
router_port
}
"
)
print
(
f
" Worker URLs:
{
', '
.
join
(
worker_urls
)
}
"
)
logger
.
info
(
f
" Worker URLs:
{
', '
.
join
(
worker_urls
)
}
"
)
# Build router command
# Build router command
router_cmd
=
[
router_cmd
=
[
...
@@ -284,7 +293,7 @@ def popen_launch_workers_and_router(
...
@@ -284,7 +293,7 @@ def popen_launch_workers_and_router(
router_cmd
.
extend
(
router_args
)
router_cmd
.
extend
(
router_args
)
if
show_output
:
if
show_output
:
print
(
f
" Command:
{
' '
.
join
(
router_cmd
)
}
"
)
logger
.
info
(
f
" Command:
{
' '
.
join
(
router_cmd
)
}
"
)
# Launch router
# Launch router
if
show_output
:
if
show_output
:
...
@@ -296,19 +305,19 @@ def popen_launch_workers_and_router(
...
@@ -296,19 +305,19 @@ def popen_launch_workers_and_router(
stderr
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
)
)
print
(
f
" PID:
{
router_proc
.
pid
}
"
)
logger
.
info
(
f
" PID:
{
router_proc
.
pid
}
"
)
# Wait for router to be ready
# Wait for router to be ready
router_url
=
f
"http://127.0.0.1:
{
router_port
}
"
router_url
=
f
"http://127.0.0.1:
{
router_port
}
"
print
(
f
"
\n
Waiting for router to start at
{
router_url
}
..."
)
logger
.
info
(
f
"
\n
Waiting for router to start at
{
router_url
}
..."
)
try
:
try
:
wait_for_workers_ready
(
wait_for_workers_ready
(
router_url
,
expected_workers
=
num_workers
,
timeout
=
180
,
api_key
=
api_key
router_url
,
expected_workers
=
num_workers
,
timeout
=
180
,
api_key
=
api_key
)
)
print
(
f
"✓ Router ready at
{
router_url
}
"
)
logger
.
info
(
f
"✓ Router ready at
{
router_url
}
"
)
except
TimeoutError
:
except
TimeoutError
:
print
(
f
"✗ Router failed to start"
)
logger
.
error
(
f
"✗ Router failed to start"
)
# Cleanup: kill router and all workers
# Cleanup: kill router and all workers
try
:
try
:
router_proc
.
kill
()
router_proc
.
kill
()
...
@@ -321,11 +330,11 @@ def popen_launch_workers_and_router(
...
@@ -321,11 +330,11 @@ def popen_launch_workers_and_router(
pass
pass
raise
raise
print
(
f
"
\n
{
'='
*
70
}
"
)
logger
.
info
(
f
"
\n
{
'='
*
70
}
"
)
print
(
f
"✓ gRPC cluster ready!"
)
logger
.
info
(
f
"✓ gRPC cluster ready!"
)
print
(
f
" Router:
{
router_url
}
"
)
logger
.
info
(
f
" Router:
{
router_url
}
"
)
print
(
f
" Workers:
{
len
(
workers
)
}
"
)
logger
.
info
(
f
" Workers:
{
len
(
workers
)
}
"
)
print
(
f
"
{
'='
*
70
}
\n
"
)
logger
.
info
(
f
"
{
'='
*
70
}
\n
"
)
return
{
return
{
"workers"
:
workers
,
"workers"
:
workers
,
...
...
sgl-router/py_test/e2e_grpc/function_call/test_openai_function_calling.py
View file @
28b8a406
...
@@ -13,10 +13,7 @@ Run with:
...
@@ -13,10 +13,7 @@ Run with:
"""
"""
import
json
import
json
# CHANGE: Import router launcher instead of server launcher
import
sys
import
sys
import
time
import
unittest
import
unittest
from
pathlib
import
Path
from
pathlib
import
Path
...
...
sgl-router/py_test/e2e_grpc/function_call/test_tool_choice.py
View file @
28b8a406
...
@@ -8,8 +8,6 @@ Tests: required, auto, and specific function choices in both streaming and non-s
...
@@ -8,8 +8,6 @@ Tests: required, auto, and specific function choices in both streaming and non-s
"""
"""
import
json
import
json
# CHANGE: Import router launcher instead of server launcher
import
sys
import
sys
import
unittest
import
unittest
from
pathlib
import
Path
from
pathlib
import
Path
...
...
sgl-router/py_test/e2e_grpc/util.py
View file @
28b8a406
...
@@ -8,6 +8,7 @@ Extracted and adapted from:
...
@@ -8,6 +8,7 @@ Extracted and adapted from:
- sglang.test.test_utils (constants and CustomTestCase)
- sglang.test.test_utils (constants and CustomTestCase)
"""
"""
import
logging
import
os
import
os
import
signal
import
signal
import
threading
import
threading
...
@@ -17,6 +18,8 @@ from typing import Optional, Union
...
@@ -17,6 +18,8 @@ from typing import Optional, Union
import
psutil
import
psutil
logger
=
logging
.
getLogger
(
__name__
)
try
:
try
:
from
transformers
import
(
from
transformers
import
(
AutoTokenizer
,
AutoTokenizer
,
...
@@ -204,8 +207,8 @@ def get_tokenizer(
...
@@ -204,8 +207,8 @@ def get_tokenizer(
raise
RuntimeError
(
err_msg
)
from
e
raise
RuntimeError
(
err_msg
)
from
e
if
not
isinstance
(
tokenizer
,
PreTrainedTokenizerFast
):
if
not
isinstance
(
tokenizer
,
PreTrainedTokenizerFast
):
pr
in
t
(
logger
.
warn
in
g
(
f
"
Warning:
Using a slow tokenizer. This might cause a performance "
f
"Using a slow tokenizer. This might cause a performance "
f
"degradation. Consider using a fast tokenizer instead."
f
"degradation. Consider using a fast tokenizer instead."
)
)
...
@@ -245,14 +248,10 @@ class CustomTestCase(unittest.TestCase):
...
@@ -245,14 +248,10 @@ class CustomTestCase(unittest.TestCase):
return
super
(
CustomTestCase
,
self
).
_callTestMethod
(
method
)
return
super
(
CustomTestCase
,
self
).
_callTestMethod
(
method
)
except
Exception
as
e
:
except
Exception
as
e
:
if
attempt
<
max_retry
:
if
attempt
<
max_retry
:
print
(
logger
.
info
(
f
"Test failed on attempt
{
attempt
+
1
}
/
{
max_retry
+
1
}
, retrying..."
f
"Test failed on attempt
{
attempt
+
1
}
/
{
max_retry
+
1
}
, retrying..."
)
)
continue
continue
else
:
else
:
# Last attempt, re-raise the exception
# Last attempt, re-raise the exception
raise
raise
def
setUp
(
self
):
"""Print test method name at the start of each test."""
print
(
f
"[Test Method]
{
self
.
_testMethodName
}
"
,
flush
=
True
)
sgl-router/py_test/e2e_grpc/validation/test_large_max_new_tokens.py
View file @
28b8a406
...
@@ -3,8 +3,6 @@ python3 -m unittest openai_server.validation.test_large_max_new_tokens.TestLarge
...
@@ -3,8 +3,6 @@ python3 -m unittest openai_server.validation.test_large_max_new_tokens.TestLarge
"""
"""
import
os
import
os
# CHANGE: Import router launcher instead of server launcher
import
sys
import
sys
import
time
import
time
import
unittest
import
unittest
...
@@ -104,7 +102,6 @@ class TestLargeMaxNewTokens(CustomTestCase):
...
@@ -104,7 +102,6 @@ class TestLargeMaxNewTokens(CustomTestCase):
self
.
stderr
.
flush
()
self
.
stderr
.
flush
()
lines
=
open
(
STDERR_FILENAME
).
readlines
()
lines
=
open
(
STDERR_FILENAME
).
readlines
()
for
line
in
lines
[
pt
:]:
for
line
in
lines
[
pt
:]:
print
(
line
,
end
=
""
,
flush
=
True
)
if
f
"#running-req:
{
num_requests
}
"
in
line
:
if
f
"#running-req:
{
num_requests
}
"
in
line
:
all_requests_running
=
True
all_requests_running
=
True
pt
=
-
1
pt
=
-
1
...
...
sgl-router/py_test/e2e_grpc/validation/test_openai_server_ignore_eos.py
View file @
28b8a406
...
@@ -12,7 +12,6 @@ Run with:
...
@@ -12,7 +12,6 @@ Run with:
pytest py_test/e2e_grpc/e2e_grpc/validation/test_openai_server_ignore_eos.py -v
pytest py_test/e2e_grpc/e2e_grpc/validation/test_openai_server_ignore_eos.py -v
"""
"""
# CHANGE: Import router launcher instead of server launcher
import
sys
import
sys
from
pathlib
import
Path
from
pathlib
import
Path
...
...
sgl-router/py_test/e2e_response_api/conftest.py
View file @
28b8a406
...
@@ -4,7 +4,7 @@ pytest configuration for e2e_response_api tests.
...
@@ -4,7 +4,7 @@ pytest configuration for e2e_response_api tests.
This configures pytest to not collect base test classes that are meant to be inherited.
This configures pytest to not collect base test classes that are meant to be inherited.
"""
"""
import
pytest
import
pytest
# noqa: F401
def
pytest_collection_modifyitems
(
config
,
items
):
def
pytest_collection_modifyitems
(
config
,
items
):
...
...
sgl-router/py_test/e2e_response_api/mcp.py
View file @
28b8a406
...
@@ -32,7 +32,6 @@ class MCPTests(ResponseAPIBaseTest):
...
@@ -32,7 +32,6 @@ class MCPTests(ResponseAPIBaseTest):
self
.
assertEqual
(
resp
.
status_code
,
200
)
self
.
assertEqual
(
resp
.
status_code
,
200
)
data
=
resp
.
json
()
data
=
resp
.
json
()
print
(
f
"MCP response:
{
data
}
"
)
# Basic response structure
# Basic response structure
self
.
assertIn
(
"id"
,
data
)
self
.
assertIn
(
"id"
,
data
)
...
...
sgl-router/py_test/e2e_response_api/router_fixtures.py
View file @
28b8a406
...
@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang router with OpenAI or XAI bac
...
@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang router with OpenAI or XAI bac
This supports testing the Response API against real cloud providers.
This supports testing the Response API against real cloud providers.
"""
"""
import
logging
import
os
import
os
import
socket
import
socket
import
subprocess
import
subprocess
...
@@ -16,6 +17,8 @@ from typing import Optional
...
@@ -16,6 +17,8 @@ from typing import Optional
import
requests
import
requests
logger
=
logging
.
getLogger
(
__name__
)
def
wait_for_workers_ready
(
def
wait_for_workers_ready
(
router_url
:
str
,
router_url
:
str
,
...
@@ -50,9 +53,11 @@ def wait_for_workers_ready(
...
@@ -50,9 +53,11 @@ def wait_for_workers_ready(
attempt
+=
1
attempt
+=
1
elapsed
=
int
(
time
.
time
()
-
start_time
)
elapsed
=
int
(
time
.
time
()
-
start_time
)
#
Print
progress every 10 seconds
#
Log
progress every 10 seconds
if
elapsed
>
0
and
elapsed
%
10
==
0
and
attempt
%
10
==
0
:
if
elapsed
>
0
and
elapsed
%
10
==
0
and
attempt
%
10
==
0
:
print
(
f
" Still waiting for workers... (
{
elapsed
}
/
{
timeout
}
s elapsed)"
)
logger
.
info
(
f
" Still waiting for workers... (
{
elapsed
}
/
{
timeout
}
s elapsed)"
)
try
:
try
:
response
=
session
.
get
(
response
=
session
.
get
(
...
@@ -63,7 +68,7 @@ def wait_for_workers_ready(
...
@@ -63,7 +68,7 @@ def wait_for_workers_ready(
total_workers
=
data
.
get
(
"total"
,
0
)
total_workers
=
data
.
get
(
"total"
,
0
)
if
total_workers
==
expected_workers
:
if
total_workers
==
expected_workers
:
print
(
logger
.
info
(
f
" All
{
expected_workers
}
workers connected after
{
elapsed
}
s"
f
" All
{
expected_workers
}
workers connected after
{
elapsed
}
s"
)
)
return
return
...
@@ -124,16 +129,18 @@ def wait_for_router_ready(
...
@@ -124,16 +129,18 @@ def wait_for_router_ready(
attempt
+=
1
attempt
+=
1
elapsed
=
int
(
time
.
time
()
-
start_time
)
elapsed
=
int
(
time
.
time
()
-
start_time
)
#
Print
progress every 10 seconds
#
Log
progress every 10 seconds
if
elapsed
>
0
and
elapsed
%
10
==
0
and
attempt
%
10
==
0
:
if
elapsed
>
0
and
elapsed
%
10
==
0
and
attempt
%
10
==
0
:
print
(
f
" Still waiting for router... (
{
elapsed
}
/
{
timeout
}
s elapsed)"
)
logger
.
info
(
f
" Still waiting for router... (
{
elapsed
}
/
{
timeout
}
s elapsed)"
)
try
:
try
:
response
=
session
.
get
(
response
=
session
.
get
(
f
"
{
router_url
}
/health"
,
headers
=
headers
,
timeout
=
5
f
"
{
router_url
}
/health"
,
headers
=
headers
,
timeout
=
5
)
)
if
response
.
status_code
==
200
:
if
response
.
status_code
==
200
:
print
(
f
" Router ready after
{
elapsed
}
s"
)
logger
.
info
(
f
" Router ready after
{
elapsed
}
s"
)
return
return
else
:
else
:
last_error
=
f
"HTTP
{
response
.
status_code
}
"
last_error
=
f
"HTTP
{
response
.
status_code
}
"
...
@@ -204,12 +211,12 @@ def popen_launch_openai_xai_router(
...
@@ -204,12 +211,12 @@ def popen_launch_openai_xai_router(
else
:
else
:
router_port
=
find_free_port
()
router_port
=
find_free_port
()
print
(
f
"
\n
{
'='
*
70
}
"
)
logger
.
info
(
f
"
\n
{
'='
*
70
}
"
)
print
(
f
"Launching
{
backend
.
upper
()
}
router"
)
logger
.
info
(
f
"Launching
{
backend
.
upper
()
}
router"
)
print
(
f
"
{
'='
*
70
}
"
)
logger
.
info
(
f
"
{
'='
*
70
}
"
)
print
(
f
" Backend:
{
backend
}
"
)
logger
.
info
(
f
" Backend:
{
backend
}
"
)
print
(
f
" Router port:
{
router_port
}
"
)
logger
.
info
(
f
" Router port:
{
router_port
}
"
)
print
(
f
" History backend:
{
history_backend
}
"
)
logger
.
info
(
f
" History backend:
{
history_backend
}
"
)
# Determine worker URL based on backend
# Determine worker URL based on backend
if
backend
==
"openai"
:
if
backend
==
"openai"
:
...
@@ -231,7 +238,7 @@ def popen_launch_openai_xai_router(
...
@@ -231,7 +238,7 @@ def popen_launch_openai_xai_router(
else
:
else
:
raise
ValueError
(
f
"Unsupported backend:
{
backend
}
"
)
raise
ValueError
(
f
"Unsupported backend:
{
backend
}
"
)
print
(
f
" Worker URL:
{
worker_url
}
"
)
logger
.
info
(
f
" Worker URL:
{
worker_url
}
"
)
# Build router command
# Build router command
router_cmd
=
[
router_cmd
=
[
...
@@ -266,7 +273,7 @@ def popen_launch_openai_xai_router(
...
@@ -266,7 +273,7 @@ def popen_launch_openai_xai_router(
router_cmd
.
extend
(
router_args
)
router_cmd
.
extend
(
router_args
)
if
show_output
:
if
show_output
:
print
(
f
" Command:
{
' '
.
join
(
router_cmd
)
}
"
)
logger
.
info
(
f
" Command:
{
' '
.
join
(
router_cmd
)
}
"
)
# Set up environment with backend API key
# Set up environment with backend API key
env
=
os
.
environ
.
copy
()
env
=
os
.
environ
.
copy
()
...
@@ -299,9 +306,9 @@ def popen_launch_openai_xai_router(
...
@@ -299,9 +306,9 @@ def popen_launch_openai_xai_router(
try
:
try
:
wait_for_router_ready
(
router_url
,
timeout
=
timeout
,
api_key
=
None
)
wait_for_router_ready
(
router_url
,
timeout
=
timeout
,
api_key
=
None
)
print
(
f
"✓ Router ready at
{
router_url
}
"
)
logger
.
info
(
f
"✓ Router ready at
{
router_url
}
"
)
except
TimeoutError
:
except
TimeoutError
:
print
(
f
"✗ Router failed to start"
)
logger
.
error
(
f
"✗ Router failed to start"
)
# Cleanup: kill router
# Cleanup: kill router
try
:
try
:
router_proc
.
kill
()
router_proc
.
kill
()
...
@@ -309,10 +316,10 @@ def popen_launch_openai_xai_router(
...
@@ -309,10 +316,10 @@ def popen_launch_openai_xai_router(
pass
pass
raise
raise
print
(
f
"
\n
{
'='
*
70
}
"
)
logger
.
info
(
f
"
\n
{
'='
*
70
}
"
)
print
(
f
"✓
{
backend
.
upper
()
}
router ready!"
)
logger
.
info
(
f
"✓
{
backend
.
upper
()
}
router ready!"
)
print
(
f
" Router:
{
router_url
}
"
)
logger
.
info
(
f
" Router:
{
router_url
}
"
)
print
(
f
"
{
'='
*
70
}
\n
"
)
logger
.
info
(
f
"
{
'='
*
70
}
\n
"
)
return
{
return
{
"router"
:
router_proc
,
"router"
:
router_proc
,
...
@@ -382,14 +389,14 @@ def popen_launch_workers_and_router(
...
@@ -382,14 +389,14 @@ def popen_launch_workers_and_router(
else
:
else
:
router_port
=
find_free_port
()
router_port
=
find_free_port
()
print
(
f
"
\n
{
'='
*
70
}
"
)
logger
.
info
(
f
"
\n
{
'='
*
70
}
"
)
print
(
f
"Launching gRPC cluster (separate workers + router)"
)
logger
.
info
(
f
"Launching gRPC cluster (separate workers + router)"
)
print
(
f
"
{
'='
*
70
}
"
)
logger
.
info
(
f
"
{
'='
*
70
}
"
)
print
(
f
" Model:
{
model
}
"
)
logger
.
info
(
f
" Model:
{
model
}
"
)
print
(
f
" Router port:
{
router_port
}
"
)
logger
.
info
(
f
" Router port:
{
router_port
}
"
)
print
(
f
" Workers:
{
num_workers
}
"
)
logger
.
info
(
f
" Workers:
{
num_workers
}
"
)
print
(
f
" TP size:
{
tp_size
}
"
)
logger
.
info
(
f
" TP size:
{
tp_size
}
"
)
print
(
f
" Policy:
{
policy
}
"
)
logger
.
info
(
f
" Policy:
{
policy
}
"
)
# Step 1: Launch workers with gRPC enabled
# Step 1: Launch workers with gRPC enabled
workers
=
[]
workers
=
[]
...
@@ -400,9 +407,9 @@ def popen_launch_workers_and_router(
...
@@ -400,9 +407,9 @@ def popen_launch_workers_and_router(
worker_url
=
f
"grpc://127.0.0.1:
{
worker_port
}
"
worker_url
=
f
"grpc://127.0.0.1:
{
worker_port
}
"
worker_urls
.
append
(
worker_url
)
worker_urls
.
append
(
worker_url
)
print
(
f
"
\n
[Worker
{
i
+
1
}
/
{
num_workers
}
]"
)
logger
.
info
(
f
"
\n
[Worker
{
i
+
1
}
/
{
num_workers
}
]"
)
print
(
f
" Port:
{
worker_port
}
"
)
logger
.
info
(
f
" Port:
{
worker_port
}
"
)
print
(
f
" URL:
{
worker_url
}
"
)
logger
.
info
(
f
" URL:
{
worker_url
}
"
)
# Build worker command
# Build worker command
worker_cmd
=
[
worker_cmd
=
[
...
@@ -447,17 +454,19 @@ def popen_launch_workers_and_router(
...
@@ -447,17 +454,19 @@ def popen_launch_workers_and_router(
)
)
workers
.
append
(
worker_proc
)
workers
.
append
(
worker_proc
)
print
(
f
" PID:
{
worker_proc
.
pid
}
"
)
logger
.
info
(
f
" PID:
{
worker_proc
.
pid
}
"
)
# Give workers a moment to start binding to ports
# Give workers a moment to start binding to ports
# The router will check worker health when it starts
# The router will check worker health when it starts
print
(
f
"
\n
Waiting for
{
num_workers
}
workers to initialize (20s)..."
)
logger
.
info
(
f
"
\n
Waiting for
{
num_workers
}
workers to initialize (20s)..."
)
time
.
sleep
(
20
)
time
.
sleep
(
20
)
# Quick check: make sure worker processes are still alive
# Quick check: make sure worker processes are still alive
for
i
,
worker
in
enumerate
(
workers
):
for
i
,
worker
in
enumerate
(
workers
):
if
worker
.
poll
()
is
not
None
:
if
worker
.
poll
()
is
not
None
:
print
(
f
" ✗ Worker
{
i
+
1
}
died during startup (exit code:
{
worker
.
poll
()
}
)"
)
logger
.
error
(
f
" ✗ Worker
{
i
+
1
}
died during startup (exit code:
{
worker
.
poll
()
}
)"
)
# Cleanup: kill all workers
# Cleanup: kill all workers
for
w
in
workers
:
for
w
in
workers
:
try
:
try
:
...
@@ -466,12 +475,14 @@ def popen_launch_workers_and_router(
...
@@ -466,12 +475,14 @@ def popen_launch_workers_and_router(
pass
pass
raise
RuntimeError
(
f
"Worker
{
i
+
1
}
failed to start"
)
raise
RuntimeError
(
f
"Worker
{
i
+
1
}
failed to start"
)
print
(
f
"✓ All
{
num_workers
}
workers started (router will verify connectivity)"
)
logger
.
info
(
f
"✓ All
{
num_workers
}
workers started (router will verify connectivity)"
)
# Step 2: Launch router pointing to workers
# Step 2: Launch router pointing to workers
print
(
f
"
\n
[Router]"
)
logger
.
info
(
f
"
\n
[Router]"
)
print
(
f
" Port:
{
router_port
}
"
)
logger
.
info
(
f
" Port:
{
router_port
}
"
)
print
(
f
" Worker URLs:
{
', '
.
join
(
worker_urls
)
}
"
)
logger
.
info
(
f
" Worker URLs:
{
', '
.
join
(
worker_urls
)
}
"
)
# Build router command
# Build router command
router_cmd
=
[
router_cmd
=
[
...
@@ -505,7 +516,7 @@ def popen_launch_workers_and_router(
...
@@ -505,7 +516,7 @@ def popen_launch_workers_and_router(
router_cmd
.
extend
(
router_args
)
router_cmd
.
extend
(
router_args
)
if
show_output
:
if
show_output
:
print
(
f
" Command:
{
' '
.
join
(
router_cmd
)
}
"
)
logger
.
info
(
f
" Command:
{
' '
.
join
(
router_cmd
)
}
"
)
# Launch router
# Launch router
if
show_output
:
if
show_output
:
...
@@ -517,19 +528,19 @@ def popen_launch_workers_and_router(
...
@@ -517,19 +528,19 @@ def popen_launch_workers_and_router(
stderr
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
)
)
print
(
f
" PID:
{
router_proc
.
pid
}
"
)
logger
.
info
(
f
" PID:
{
router_proc
.
pid
}
"
)
# Wait for router to be ready
# Wait for router to be ready
router_url
=
f
"http://127.0.0.1:
{
router_port
}
"
router_url
=
f
"http://127.0.0.1:
{
router_port
}
"
print
(
f
"
\n
Waiting for router to start at
{
router_url
}
..."
)
logger
.
info
(
f
"
\n
Waiting for router to start at
{
router_url
}
..."
)
try
:
try
:
wait_for_workers_ready
(
wait_for_workers_ready
(
router_url
,
expected_workers
=
num_workers
,
timeout
=
180
,
api_key
=
api_key
router_url
,
expected_workers
=
num_workers
,
timeout
=
180
,
api_key
=
api_key
)
)
print
(
f
"✓ Router ready at
{
router_url
}
"
)
logger
.
info
(
f
"✓ Router ready at
{
router_url
}
"
)
except
TimeoutError
:
except
TimeoutError
:
print
(
f
"✗ Router failed to start"
)
logger
.
error
(
f
"✗ Router failed to start"
)
# Cleanup: kill router and all workers
# Cleanup: kill router and all workers
try
:
try
:
router_proc
.
kill
()
router_proc
.
kill
()
...
@@ -542,11 +553,11 @@ def popen_launch_workers_and_router(
...
@@ -542,11 +553,11 @@ def popen_launch_workers_and_router(
pass
pass
raise
raise
print
(
f
"
\n
{
'='
*
70
}
"
)
logger
.
info
(
f
"
\n
{
'='
*
70
}
"
)
print
(
f
"✓ gRPC cluster ready!"
)
logger
.
info
(
f
"✓ gRPC cluster ready!"
)
print
(
f
" Router:
{
router_url
}
"
)
logger
.
info
(
f
" Router:
{
router_url
}
"
)
print
(
f
" Workers:
{
len
(
workers
)
}
"
)
logger
.
info
(
f
" Workers:
{
len
(
workers
)
}
"
)
print
(
f
"
{
'='
*
70
}
\n
"
)
logger
.
info
(
f
"
{
'='
*
70
}
\n
"
)
return
{
return
{
"workers"
:
workers
,
"workers"
:
workers
,
...
...
sgl-router/py_test/e2e_response_api/state_management.py
View file @
28b8a406
...
@@ -49,11 +49,6 @@ class StateManagementTests(ResponseAPIBaseTest):
...
@@ -49,11 +49,6 @@ class StateManagementTests(ResponseAPIBaseTest):
resp
=
self
.
create_response
(
resp
=
self
.
create_response
(
"Test"
,
previous_response_id
=
"resp_invalid123"
,
max_output_tokens
=
50
"Test"
,
previous_response_id
=
"resp_invalid123"
,
max_output_tokens
=
50
)
)
# Should return 404 or 400 for invalid response ID
if
resp
.
status_code
!=
200
:
print
(
f
"
\n
❌ Response creation failed!"
)
print
(
f
"Status:
{
resp
.
status_code
}
"
)
print
(
f
"Response:
{
resp
.
text
}
"
)
self
.
assertIn
(
resp
.
status_code
,
[
400
,
404
])
self
.
assertIn
(
resp
.
status_code
,
[
400
,
404
])
def
test_conversation_with_multiple_turns
(
self
):
def
test_conversation_with_multiple_turns
(
self
):
...
...
sgl-router/py_test/e2e_response_api/util.py
View file @
28b8a406
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
Utility functions for Response API e2e tests.
Utility functions for Response API e2e tests.
"""
"""
import
logging
import
os
import
os
import
signal
import
signal
import
threading
import
threading
...
@@ -9,6 +10,8 @@ import unittest
...
@@ -9,6 +10,8 @@ import unittest
import
psutil
import
psutil
logger
=
logging
.
getLogger
(
__name__
)
def
kill_process_tree
(
parent_pid
,
include_parent
:
bool
=
True
,
skip_pid
:
int
=
None
):
def
kill_process_tree
(
parent_pid
,
include_parent
:
bool
=
True
,
skip_pid
:
int
=
None
):
"""
"""
...
@@ -69,14 +72,10 @@ class CustomTestCase(unittest.TestCase):
...
@@ -69,14 +72,10 @@ class CustomTestCase(unittest.TestCase):
return
super
(
CustomTestCase
,
self
).
_callTestMethod
(
method
)
return
super
(
CustomTestCase
,
self
).
_callTestMethod
(
method
)
except
Exception
as
e
:
except
Exception
as
e
:
if
attempt
<
max_retry
:
if
attempt
<
max_retry
:
print
(
logger
.
info
(
f
"Test failed on attempt
{
attempt
+
1
}
/
{
max_retry
+
1
}
, retrying..."
f
"Test failed on attempt
{
attempt
+
1
}
/
{
max_retry
+
1
}
, retrying..."
)
)
continue
continue
else
:
else
:
# Last attempt, re-raise the exception
# Last attempt, re-raise the exception
raise
raise
def
setUp
(
self
):
"""Print test method name at the start of each test."""
print
(
f
"[Test Method]
{
self
.
_testMethodName
}
"
,
flush
=
True
)
sgl-router/py_test/integration/conftest.py
View file @
28b8a406
import
os
import
subprocess
import
subprocess
import
time
import
time
from
pathlib
import
Path
from
pathlib
import
Path
from
typing
import
Dict
,
Iterable
,
List
,
Optional
,
Tuple
from
typing
import
Iterable
,
List
,
Optional
,
Tuple
import
pytest
import
pytest
import
requests
import
requests
...
...
sgl-router/py_test/integration/load_balancing/test_power_of_two.py
View file @
28b8a406
...
@@ -17,6 +17,7 @@ def test_power_of_two_prefers_less_loaded(mock_workers, router_manager):
...
@@ -17,6 +17,7 @@ def test_power_of_two_prefers_less_loaded(mock_workers, router_manager):
urls
=
urls_slow
+
urls_fast
urls
=
urls_slow
+
urls_fast
ids
=
ids_slow
+
ids_fast
ids
=
ids_slow
+
ids_fast
slow_id
=
ids_slow
[
0
]
slow_id
=
ids_slow
[
0
]
slow_url
=
urls_slow
[
0
]
rh
=
router_manager
.
start_router
(
rh
=
router_manager
.
start_router
(
worker_urls
=
urls
,
worker_urls
=
urls
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment