Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
28b8a406
Unverified
Commit
28b8a406
authored
Oct 23, 2025
by
Chang Su
Committed by
GitHub
Oct 23, 2025
Browse files
[router][CI] Clean up imports and prints statements in sgl-router/py_test (#12024)
parent
8bd26dd4
Changes
31
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
114 additions
and
131 deletions
+114
-131
.pre-commit-config.yaml
.pre-commit-config.yaml
+1
-1
sgl-router/py_test/e2e/test_e2e_embeddings.py
sgl-router/py_test/e2e/test_e2e_embeddings.py
+0
-1
sgl-router/py_test/e2e/test_pd_router.py
sgl-router/py_test/e2e/test_pd_router.py
+0
-1
sgl-router/py_test/e2e_grpc/basic/test_openai_server.py
sgl-router/py_test/e2e_grpc/basic/test_openai_server.py
+0
-4
sgl-router/py_test/e2e_grpc/conftest.py
sgl-router/py_test/e2e_grpc/conftest.py
+1
-1
sgl-router/py_test/e2e_grpc/features/test_enable_thinking.py
sgl-router/py_test/e2e_grpc/features/test_enable_thinking.py
+0
-10
sgl-router/py_test/e2e_grpc/features/test_reasoning_content.py
...outer/py_test/e2e_grpc/features/test_reasoning_content.py
+0
-4
sgl-router/py_test/e2e_grpc/fixtures.py
sgl-router/py_test/e2e_grpc/fixtures.py
+40
-31
sgl-router/py_test/e2e_grpc/function_call/test_openai_function_calling.py
...st/e2e_grpc/function_call/test_openai_function_calling.py
+0
-3
sgl-router/py_test/e2e_grpc/function_call/test_tool_choice.py
...router/py_test/e2e_grpc/function_call/test_tool_choice.py
+0
-2
sgl-router/py_test/e2e_grpc/util.py
sgl-router/py_test/e2e_grpc/util.py
+6
-7
sgl-router/py_test/e2e_grpc/validation/test_large_max_new_tokens.py
.../py_test/e2e_grpc/validation/test_large_max_new_tokens.py
+0
-3
sgl-router/py_test/e2e_grpc/validation/test_openai_server_ignore_eos.py
...test/e2e_grpc/validation/test_openai_server_ignore_eos.py
+0
-1
sgl-router/py_test/e2e_response_api/conftest.py
sgl-router/py_test/e2e_response_api/conftest.py
+1
-1
sgl-router/py_test/e2e_response_api/mcp.py
sgl-router/py_test/e2e_response_api/mcp.py
+0
-1
sgl-router/py_test/e2e_response_api/router_fixtures.py
sgl-router/py_test/e2e_response_api/router_fixtures.py
+59
-48
sgl-router/py_test/e2e_response_api/state_management.py
sgl-router/py_test/e2e_response_api/state_management.py
+0
-5
sgl-router/py_test/e2e_response_api/util.py
sgl-router/py_test/e2e_response_api/util.py
+4
-5
sgl-router/py_test/integration/conftest.py
sgl-router/py_test/integration/conftest.py
+1
-2
sgl-router/py_test/integration/load_balancing/test_power_of_two.py
...r/py_test/integration/load_balancing/test_power_of_two.py
+1
-0
No files found.
.pre-commit-config.yaml
View file @
28b8a406
...
...
@@ -30,7 +30,7 @@ repos:
args
:
-
--select=F401,F821
-
--fix
files
:
^(benchmark/|docs/|examples/|python/sglang/)
files
:
^(benchmark/|docs/|examples/|python/sglang/
|sgl-router/py_*
)
exclude
:
__init__\.py$|\.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$
-
repo
:
https://github.com/psf/black
rev
:
24.10.0
...
...
sgl-router/py_test/e2e/test_e2e_embeddings.py
View file @
28b8a406
import
time
from
types
import
SimpleNamespace
import
pytest
import
requests
...
...
sgl-router/py_test/e2e/test_pd_router.py
View file @
28b8a406
import
logging
import
os
import
socket
import
subprocess
import
time
...
...
sgl-router/py_test/e2e_grpc/basic/test_openai_server.py
View file @
28b8a406
...
...
@@ -13,14 +13,11 @@ Run with:
"""
import
json
# CHANGE: Import router launcher instead of server launcher
import
sys
import
unittest
from
pathlib
import
Path
import
openai
import
requests
_TEST_DIR
=
Path
(
__file__
).
parent
sys
.
path
.
insert
(
0
,
str
(
_TEST_DIR
.
parent
))
...
...
@@ -225,7 +222,6 @@ class TestOpenAIServer(CustomTestCase):
try
:
js_obj
=
json
.
loads
(
text
)
except
(
TypeError
,
json
.
decoder
.
JSONDecodeError
):
print
(
"JSONDecodeError"
,
text
)
raise
assert
isinstance
(
js_obj
[
"name"
],
str
)
assert
isinstance
(
js_obj
[
"population"
],
int
)
...
...
sgl-router/py_test/e2e_grpc/conftest.py
View file @
28b8a406
...
...
@@ -7,7 +7,7 @@ This module provides shared fixtures that can be used across all gRPC router tes
import
sys
from
pathlib
import
Path
import
pytest
import
pytest
# noqa: F401
# Ensure router py_src is importable
_ROUTER_ROOT
=
Path
(
__file__
).
resolve
().
parents
[
2
]
...
...
sgl-router/py_test/e2e_grpc/features/test_enable_thinking.py
View file @
28b8a406
...
...
@@ -6,17 +6,11 @@ python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinki
python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning
"""
import
asyncio
import
json
import
os
import
sys
import
time
import
unittest
# CHANGE: Import router launcher instead of server launcher
from
pathlib
import
Path
import
openai
import
requests
_TEST_DIR
=
Path
(
__file__
).
parent
...
...
@@ -24,10 +18,8 @@ sys.path.insert(0, str(_TEST_DIR.parent))
from
fixtures
import
popen_launch_workers_and_router
from
util
import
(
DEFAULT_ENABLE_THINKING_MODEL_PATH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
get_tokenizer
,
kill_process_tree
,
)
...
...
@@ -131,7 +123,6 @@ class TestEnableThinking(CustomTestCase):
has_reasoning
=
False
has_content
=
False
print
(
"
\n
=== Stream With Reasoning ==="
)
for
line
in
response
.
iter_lines
():
if
line
:
line
=
line
.
decode
(
"utf-8"
)
...
...
@@ -176,7 +167,6 @@ class TestEnableThinking(CustomTestCase):
has_reasoning
=
False
has_content
=
False
print
(
"
\n
=== Stream Without Reasoning ==="
)
for
line
in
response
.
iter_lines
():
if
line
:
line
=
line
.
decode
(
"utf-8"
)
...
...
sgl-router/py_test/e2e_grpc/features/test_reasoning_content.py
View file @
28b8a406
...
...
@@ -9,15 +9,11 @@ python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningC
python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentStartup.test_streaming
"""
import
json
# CHANGE: Import router launcher instead of server launcher
import
sys
import
unittest
from
pathlib
import
Path
import
openai
import
requests
_TEST_DIR
=
Path
(
__file__
).
parent
sys
.
path
.
insert
(
0
,
str
(
_TEST_DIR
.
parent
))
...
...
sgl-router/py_test/e2e_grpc/fixtures.py
View file @
28b8a406
...
...
@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang workers and gRPC router separ
This approach gives more control and matches production deployment patterns.
"""
import
logging
import
socket
import
subprocess
import
time
...
...
@@ -15,6 +16,8 @@ from typing import Optional
import
requests
logger
=
logging
.
getLogger
(
__name__
)
def
find_free_port
()
->
int
:
"""Find an available port on localhost."""
...
...
@@ -56,9 +59,11 @@ def wait_for_workers_ready(
attempt
+=
1
elapsed
=
int
(
time
.
time
()
-
start_time
)
#
Print
progress every 10 seconds
#
Log
progress every 10 seconds
if
elapsed
>
0
and
elapsed
%
10
==
0
and
attempt
%
10
==
0
:
print
(
f
" Still waiting for workers... (
{
elapsed
}
/
{
timeout
}
s elapsed)"
)
logger
.
info
(
f
" Still waiting for workers... (
{
elapsed
}
/
{
timeout
}
s elapsed)"
)
try
:
response
=
session
.
get
(
...
...
@@ -69,7 +74,7 @@ def wait_for_workers_ready(
total_workers
=
data
.
get
(
"total"
,
0
)
if
total_workers
==
expected_workers
:
print
(
logger
.
info
(
f
" All
{
expected_workers
}
workers connected after
{
elapsed
}
s"
)
return
...
...
@@ -161,14 +166,14 @@ def popen_launch_workers_and_router(
else
:
router_port
=
find_free_port
()
print
(
f
"
\n
{
'='
*
70
}
"
)
print
(
f
"Launching gRPC cluster (separate workers + router)"
)
print
(
f
"
{
'='
*
70
}
"
)
print
(
f
" Model:
{
model
}
"
)
print
(
f
" Router port:
{
router_port
}
"
)
print
(
f
" Workers:
{
num_workers
}
"
)
print
(
f
" TP size:
{
tp_size
}
"
)
print
(
f
" Policy:
{
policy
}
"
)
logger
.
info
(
f
"
\n
{
'='
*
70
}
"
)
logger
.
info
(
f
"Launching gRPC cluster (separate workers + router)"
)
logger
.
info
(
f
"
{
'='
*
70
}
"
)
logger
.
info
(
f
" Model:
{
model
}
"
)
logger
.
info
(
f
" Router port:
{
router_port
}
"
)
logger
.
info
(
f
" Workers:
{
num_workers
}
"
)
logger
.
info
(
f
" TP size:
{
tp_size
}
"
)
logger
.
info
(
f
" Policy:
{
policy
}
"
)
# Step 1: Launch workers with gRPC enabled
workers
=
[]
...
...
@@ -179,9 +184,9 @@ def popen_launch_workers_and_router(
worker_url
=
f
"grpc://127.0.0.1:
{
worker_port
}
"
worker_urls
.
append
(
worker_url
)
print
(
f
"
\n
[Worker
{
i
+
1
}
/
{
num_workers
}
]"
)
print
(
f
" Port:
{
worker_port
}
"
)
print
(
f
" URL:
{
worker_url
}
"
)
logger
.
info
(
f
"
\n
[Worker
{
i
+
1
}
/
{
num_workers
}
]"
)
logger
.
info
(
f
" Port:
{
worker_port
}
"
)
logger
.
info
(
f
" URL:
{
worker_url
}
"
)
# Build worker command
worker_cmd
=
[
...
...
@@ -226,17 +231,19 @@ def popen_launch_workers_and_router(
)
workers
.
append
(
worker_proc
)
print
(
f
" PID:
{
worker_proc
.
pid
}
"
)
logger
.
info
(
f
" PID:
{
worker_proc
.
pid
}
"
)
# Give workers a moment to start binding to ports
# The router will check worker health when it starts
print
(
f
"
\n
Waiting for
{
num_workers
}
workers to initialize (20s)..."
)
logger
.
info
(
f
"
\n
Waiting for
{
num_workers
}
workers to initialize (20s)..."
)
time
.
sleep
(
20
)
# Quick check: make sure worker processes are still alive
for
i
,
worker
in
enumerate
(
workers
):
if
worker
.
poll
()
is
not
None
:
print
(
f
" ✗ Worker
{
i
+
1
}
died during startup (exit code:
{
worker
.
poll
()
}
)"
)
logger
.
error
(
f
" ✗ Worker
{
i
+
1
}
died during startup (exit code:
{
worker
.
poll
()
}
)"
)
# Cleanup: kill all workers
for
w
in
workers
:
try
:
...
...
@@ -245,12 +252,14 @@ def popen_launch_workers_and_router(
pass
raise
RuntimeError
(
f
"Worker
{
i
+
1
}
failed to start"
)
print
(
f
"✓ All
{
num_workers
}
workers started (router will verify connectivity)"
)
logger
.
info
(
f
"✓ All
{
num_workers
}
workers started (router will verify connectivity)"
)
# Step 2: Launch router pointing to workers
print
(
f
"
\n
[Router]"
)
print
(
f
" Port:
{
router_port
}
"
)
print
(
f
" Worker URLs:
{
', '
.
join
(
worker_urls
)
}
"
)
logger
.
info
(
f
"
\n
[Router]"
)
logger
.
info
(
f
" Port:
{
router_port
}
"
)
logger
.
info
(
f
" Worker URLs:
{
', '
.
join
(
worker_urls
)
}
"
)
# Build router command
router_cmd
=
[
...
...
@@ -284,7 +293,7 @@ def popen_launch_workers_and_router(
router_cmd
.
extend
(
router_args
)
if
show_output
:
print
(
f
" Command:
{
' '
.
join
(
router_cmd
)
}
"
)
logger
.
info
(
f
" Command:
{
' '
.
join
(
router_cmd
)
}
"
)
# Launch router
if
show_output
:
...
...
@@ -296,19 +305,19 @@ def popen_launch_workers_and_router(
stderr
=
subprocess
.
PIPE
,
)
print
(
f
" PID:
{
router_proc
.
pid
}
"
)
logger
.
info
(
f
" PID:
{
router_proc
.
pid
}
"
)
# Wait for router to be ready
router_url
=
f
"http://127.0.0.1:
{
router_port
}
"
print
(
f
"
\n
Waiting for router to start at
{
router_url
}
..."
)
logger
.
info
(
f
"
\n
Waiting for router to start at
{
router_url
}
..."
)
try
:
wait_for_workers_ready
(
router_url
,
expected_workers
=
num_workers
,
timeout
=
180
,
api_key
=
api_key
)
print
(
f
"✓ Router ready at
{
router_url
}
"
)
logger
.
info
(
f
"✓ Router ready at
{
router_url
}
"
)
except
TimeoutError
:
print
(
f
"✗ Router failed to start"
)
logger
.
error
(
f
"✗ Router failed to start"
)
# Cleanup: kill router and all workers
try
:
router_proc
.
kill
()
...
...
@@ -321,11 +330,11 @@ def popen_launch_workers_and_router(
pass
raise
print
(
f
"
\n
{
'='
*
70
}
"
)
print
(
f
"✓ gRPC cluster ready!"
)
print
(
f
" Router:
{
router_url
}
"
)
print
(
f
" Workers:
{
len
(
workers
)
}
"
)
print
(
f
"
{
'='
*
70
}
\n
"
)
logger
.
info
(
f
"
\n
{
'='
*
70
}
"
)
logger
.
info
(
f
"✓ gRPC cluster ready!"
)
logger
.
info
(
f
" Router:
{
router_url
}
"
)
logger
.
info
(
f
" Workers:
{
len
(
workers
)
}
"
)
logger
.
info
(
f
"
{
'='
*
70
}
\n
"
)
return
{
"workers"
:
workers
,
...
...
sgl-router/py_test/e2e_grpc/function_call/test_openai_function_calling.py
View file @
28b8a406
...
...
@@ -13,10 +13,7 @@ Run with:
"""
import
json
# CHANGE: Import router launcher instead of server launcher
import
sys
import
time
import
unittest
from
pathlib
import
Path
...
...
sgl-router/py_test/e2e_grpc/function_call/test_tool_choice.py
View file @
28b8a406
...
...
@@ -8,8 +8,6 @@ Tests: required, auto, and specific function choices in both streaming and non-s
"""
import
json
# CHANGE: Import router launcher instead of server launcher
import
sys
import
unittest
from
pathlib
import
Path
...
...
sgl-router/py_test/e2e_grpc/util.py
View file @
28b8a406
...
...
@@ -8,6 +8,7 @@ Extracted and adapted from:
- sglang.test.test_utils (constants and CustomTestCase)
"""
import
logging
import
os
import
signal
import
threading
...
...
@@ -17,6 +18,8 @@ from typing import Optional, Union
import
psutil
logger
=
logging
.
getLogger
(
__name__
)
try
:
from
transformers
import
(
AutoTokenizer
,
...
...
@@ -204,8 +207,8 @@ def get_tokenizer(
raise
RuntimeError
(
err_msg
)
from
e
if
not
isinstance
(
tokenizer
,
PreTrainedTokenizerFast
):
pr
in
t
(
f
"
Warning:
Using a slow tokenizer. This might cause a performance "
logger
.
warn
in
g
(
f
"Using a slow tokenizer. This might cause a performance "
f
"degradation. Consider using a fast tokenizer instead."
)
...
...
@@ -245,14 +248,10 @@ class CustomTestCase(unittest.TestCase):
return
super
(
CustomTestCase
,
self
).
_callTestMethod
(
method
)
except
Exception
as
e
:
if
attempt
<
max_retry
:
print
(
logger
.
info
(
f
"Test failed on attempt
{
attempt
+
1
}
/
{
max_retry
+
1
}
, retrying..."
)
continue
else
:
# Last attempt, re-raise the exception
raise
def
setUp
(
self
):
"""Print test method name at the start of each test."""
print
(
f
"[Test Method]
{
self
.
_testMethodName
}
"
,
flush
=
True
)
sgl-router/py_test/e2e_grpc/validation/test_large_max_new_tokens.py
View file @
28b8a406
...
...
@@ -3,8 +3,6 @@ python3 -m unittest openai_server.validation.test_large_max_new_tokens.TestLarge
"""
import
os
# CHANGE: Import router launcher instead of server launcher
import
sys
import
time
import
unittest
...
...
@@ -104,7 +102,6 @@ class TestLargeMaxNewTokens(CustomTestCase):
self
.
stderr
.
flush
()
lines
=
open
(
STDERR_FILENAME
).
readlines
()
for
line
in
lines
[
pt
:]:
print
(
line
,
end
=
""
,
flush
=
True
)
if
f
"#running-req:
{
num_requests
}
"
in
line
:
all_requests_running
=
True
pt
=
-
1
...
...
sgl-router/py_test/e2e_grpc/validation/test_openai_server_ignore_eos.py
View file @
28b8a406
...
...
@@ -12,7 +12,6 @@ Run with:
pytest py_test/e2e_grpc/e2e_grpc/validation/test_openai_server_ignore_eos.py -v
"""
# CHANGE: Import router launcher instead of server launcher
import
sys
from
pathlib
import
Path
...
...
sgl-router/py_test/e2e_response_api/conftest.py
View file @
28b8a406
...
...
@@ -4,7 +4,7 @@ pytest configuration for e2e_response_api tests.
This configures pytest to not collect base test classes that are meant to be inherited.
"""
import
pytest
import
pytest
# noqa: F401
def
pytest_collection_modifyitems
(
config
,
items
):
...
...
sgl-router/py_test/e2e_response_api/mcp.py
View file @
28b8a406
...
...
@@ -32,7 +32,6 @@ class MCPTests(ResponseAPIBaseTest):
self
.
assertEqual
(
resp
.
status_code
,
200
)
data
=
resp
.
json
()
print
(
f
"MCP response:
{
data
}
"
)
# Basic response structure
self
.
assertIn
(
"id"
,
data
)
...
...
sgl-router/py_test/e2e_response_api/router_fixtures.py
View file @
28b8a406
...
...
@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang router with OpenAI or XAI bac
This supports testing the Response API against real cloud providers.
"""
import
logging
import
os
import
socket
import
subprocess
...
...
@@ -16,6 +17,8 @@ from typing import Optional
import
requests
logger
=
logging
.
getLogger
(
__name__
)
def
wait_for_workers_ready
(
router_url
:
str
,
...
...
@@ -50,9 +53,11 @@ def wait_for_workers_ready(
attempt
+=
1
elapsed
=
int
(
time
.
time
()
-
start_time
)
#
Print
progress every 10 seconds
#
Log
progress every 10 seconds
if
elapsed
>
0
and
elapsed
%
10
==
0
and
attempt
%
10
==
0
:
print
(
f
" Still waiting for workers... (
{
elapsed
}
/
{
timeout
}
s elapsed)"
)
logger
.
info
(
f
" Still waiting for workers... (
{
elapsed
}
/
{
timeout
}
s elapsed)"
)
try
:
response
=
session
.
get
(
...
...
@@ -63,7 +68,7 @@ def wait_for_workers_ready(
total_workers
=
data
.
get
(
"total"
,
0
)
if
total_workers
==
expected_workers
:
print
(
logger
.
info
(
f
" All
{
expected_workers
}
workers connected after
{
elapsed
}
s"
)
return
...
...
@@ -124,16 +129,18 @@ def wait_for_router_ready(
attempt
+=
1
elapsed
=
int
(
time
.
time
()
-
start_time
)
#
Print
progress every 10 seconds
#
Log
progress every 10 seconds
if
elapsed
>
0
and
elapsed
%
10
==
0
and
attempt
%
10
==
0
:
print
(
f
" Still waiting for router... (
{
elapsed
}
/
{
timeout
}
s elapsed)"
)
logger
.
info
(
f
" Still waiting for router... (
{
elapsed
}
/
{
timeout
}
s elapsed)"
)
try
:
response
=
session
.
get
(
f
"
{
router_url
}
/health"
,
headers
=
headers
,
timeout
=
5
)
if
response
.
status_code
==
200
:
print
(
f
" Router ready after
{
elapsed
}
s"
)
logger
.
info
(
f
" Router ready after
{
elapsed
}
s"
)
return
else
:
last_error
=
f
"HTTP
{
response
.
status_code
}
"
...
...
@@ -204,12 +211,12 @@ def popen_launch_openai_xai_router(
else
:
router_port
=
find_free_port
()
print
(
f
"
\n
{
'='
*
70
}
"
)
print
(
f
"Launching
{
backend
.
upper
()
}
router"
)
print
(
f
"
{
'='
*
70
}
"
)
print
(
f
" Backend:
{
backend
}
"
)
print
(
f
" Router port:
{
router_port
}
"
)
print
(
f
" History backend:
{
history_backend
}
"
)
logger
.
info
(
f
"
\n
{
'='
*
70
}
"
)
logger
.
info
(
f
"Launching
{
backend
.
upper
()
}
router"
)
logger
.
info
(
f
"
{
'='
*
70
}
"
)
logger
.
info
(
f
" Backend:
{
backend
}
"
)
logger
.
info
(
f
" Router port:
{
router_port
}
"
)
logger
.
info
(
f
" History backend:
{
history_backend
}
"
)
# Determine worker URL based on backend
if
backend
==
"openai"
:
...
...
@@ -231,7 +238,7 @@ def popen_launch_openai_xai_router(
else
:
raise
ValueError
(
f
"Unsupported backend:
{
backend
}
"
)
print
(
f
" Worker URL:
{
worker_url
}
"
)
logger
.
info
(
f
" Worker URL:
{
worker_url
}
"
)
# Build router command
router_cmd
=
[
...
...
@@ -266,7 +273,7 @@ def popen_launch_openai_xai_router(
router_cmd
.
extend
(
router_args
)
if
show_output
:
print
(
f
" Command:
{
' '
.
join
(
router_cmd
)
}
"
)
logger
.
info
(
f
" Command:
{
' '
.
join
(
router_cmd
)
}
"
)
# Set up environment with backend API key
env
=
os
.
environ
.
copy
()
...
...
@@ -299,9 +306,9 @@ def popen_launch_openai_xai_router(
try
:
wait_for_router_ready
(
router_url
,
timeout
=
timeout
,
api_key
=
None
)
print
(
f
"✓ Router ready at
{
router_url
}
"
)
logger
.
info
(
f
"✓ Router ready at
{
router_url
}
"
)
except
TimeoutError
:
print
(
f
"✗ Router failed to start"
)
logger
.
error
(
f
"✗ Router failed to start"
)
# Cleanup: kill router
try
:
router_proc
.
kill
()
...
...
@@ -309,10 +316,10 @@ def popen_launch_openai_xai_router(
pass
raise
print
(
f
"
\n
{
'='
*
70
}
"
)
print
(
f
"✓
{
backend
.
upper
()
}
router ready!"
)
print
(
f
" Router:
{
router_url
}
"
)
print
(
f
"
{
'='
*
70
}
\n
"
)
logger
.
info
(
f
"
\n
{
'='
*
70
}
"
)
logger
.
info
(
f
"✓
{
backend
.
upper
()
}
router ready!"
)
logger
.
info
(
f
" Router:
{
router_url
}
"
)
logger
.
info
(
f
"
{
'='
*
70
}
\n
"
)
return
{
"router"
:
router_proc
,
...
...
@@ -382,14 +389,14 @@ def popen_launch_workers_and_router(
else
:
router_port
=
find_free_port
()
print
(
f
"
\n
{
'='
*
70
}
"
)
print
(
f
"Launching gRPC cluster (separate workers + router)"
)
print
(
f
"
{
'='
*
70
}
"
)
print
(
f
" Model:
{
model
}
"
)
print
(
f
" Router port:
{
router_port
}
"
)
print
(
f
" Workers:
{
num_workers
}
"
)
print
(
f
" TP size:
{
tp_size
}
"
)
print
(
f
" Policy:
{
policy
}
"
)
logger
.
info
(
f
"
\n
{
'='
*
70
}
"
)
logger
.
info
(
f
"Launching gRPC cluster (separate workers + router)"
)
logger
.
info
(
f
"
{
'='
*
70
}
"
)
logger
.
info
(
f
" Model:
{
model
}
"
)
logger
.
info
(
f
" Router port:
{
router_port
}
"
)
logger
.
info
(
f
" Workers:
{
num_workers
}
"
)
logger
.
info
(
f
" TP size:
{
tp_size
}
"
)
logger
.
info
(
f
" Policy:
{
policy
}
"
)
# Step 1: Launch workers with gRPC enabled
workers
=
[]
...
...
@@ -400,9 +407,9 @@ def popen_launch_workers_and_router(
worker_url
=
f
"grpc://127.0.0.1:
{
worker_port
}
"
worker_urls
.
append
(
worker_url
)
print
(
f
"
\n
[Worker
{
i
+
1
}
/
{
num_workers
}
]"
)
print
(
f
" Port:
{
worker_port
}
"
)
print
(
f
" URL:
{
worker_url
}
"
)
logger
.
info
(
f
"
\n
[Worker
{
i
+
1
}
/
{
num_workers
}
]"
)
logger
.
info
(
f
" Port:
{
worker_port
}
"
)
logger
.
info
(
f
" URL:
{
worker_url
}
"
)
# Build worker command
worker_cmd
=
[
...
...
@@ -447,17 +454,19 @@ def popen_launch_workers_and_router(
)
workers
.
append
(
worker_proc
)
print
(
f
" PID:
{
worker_proc
.
pid
}
"
)
logger
.
info
(
f
" PID:
{
worker_proc
.
pid
}
"
)
# Give workers a moment to start binding to ports
# The router will check worker health when it starts
print
(
f
"
\n
Waiting for
{
num_workers
}
workers to initialize (20s)..."
)
logger
.
info
(
f
"
\n
Waiting for
{
num_workers
}
workers to initialize (20s)..."
)
time
.
sleep
(
20
)
# Quick check: make sure worker processes are still alive
for
i
,
worker
in
enumerate
(
workers
):
if
worker
.
poll
()
is
not
None
:
print
(
f
" ✗ Worker
{
i
+
1
}
died during startup (exit code:
{
worker
.
poll
()
}
)"
)
logger
.
error
(
f
" ✗ Worker
{
i
+
1
}
died during startup (exit code:
{
worker
.
poll
()
}
)"
)
# Cleanup: kill all workers
for
w
in
workers
:
try
:
...
...
@@ -466,12 +475,14 @@ def popen_launch_workers_and_router(
pass
raise
RuntimeError
(
f
"Worker
{
i
+
1
}
failed to start"
)
print
(
f
"✓ All
{
num_workers
}
workers started (router will verify connectivity)"
)
logger
.
info
(
f
"✓ All
{
num_workers
}
workers started (router will verify connectivity)"
)
# Step 2: Launch router pointing to workers
print
(
f
"
\n
[Router]"
)
print
(
f
" Port:
{
router_port
}
"
)
print
(
f
" Worker URLs:
{
', '
.
join
(
worker_urls
)
}
"
)
logger
.
info
(
f
"
\n
[Router]"
)
logger
.
info
(
f
" Port:
{
router_port
}
"
)
logger
.
info
(
f
" Worker URLs:
{
', '
.
join
(
worker_urls
)
}
"
)
# Build router command
router_cmd
=
[
...
...
@@ -505,7 +516,7 @@ def popen_launch_workers_and_router(
router_cmd
.
extend
(
router_args
)
if
show_output
:
print
(
f
" Command:
{
' '
.
join
(
router_cmd
)
}
"
)
logger
.
info
(
f
" Command:
{
' '
.
join
(
router_cmd
)
}
"
)
# Launch router
if
show_output
:
...
...
@@ -517,19 +528,19 @@ def popen_launch_workers_and_router(
stderr
=
subprocess
.
PIPE
,
)
print
(
f
" PID:
{
router_proc
.
pid
}
"
)
logger
.
info
(
f
" PID:
{
router_proc
.
pid
}
"
)
# Wait for router to be ready
router_url
=
f
"http://127.0.0.1:
{
router_port
}
"
print
(
f
"
\n
Waiting for router to start at
{
router_url
}
..."
)
logger
.
info
(
f
"
\n
Waiting for router to start at
{
router_url
}
..."
)
try
:
wait_for_workers_ready
(
router_url
,
expected_workers
=
num_workers
,
timeout
=
180
,
api_key
=
api_key
)
print
(
f
"✓ Router ready at
{
router_url
}
"
)
logger
.
info
(
f
"✓ Router ready at
{
router_url
}
"
)
except
TimeoutError
:
print
(
f
"✗ Router failed to start"
)
logger
.
error
(
f
"✗ Router failed to start"
)
# Cleanup: kill router and all workers
try
:
router_proc
.
kill
()
...
...
@@ -542,11 +553,11 @@ def popen_launch_workers_and_router(
pass
raise
print
(
f
"
\n
{
'='
*
70
}
"
)
print
(
f
"✓ gRPC cluster ready!"
)
print
(
f
" Router:
{
router_url
}
"
)
print
(
f
" Workers:
{
len
(
workers
)
}
"
)
print
(
f
"
{
'='
*
70
}
\n
"
)
logger
.
info
(
f
"
\n
{
'='
*
70
}
"
)
logger
.
info
(
f
"✓ gRPC cluster ready!"
)
logger
.
info
(
f
" Router:
{
router_url
}
"
)
logger
.
info
(
f
" Workers:
{
len
(
workers
)
}
"
)
logger
.
info
(
f
"
{
'='
*
70
}
\n
"
)
return
{
"workers"
:
workers
,
...
...
sgl-router/py_test/e2e_response_api/state_management.py
View file @
28b8a406
...
...
@@ -49,11 +49,6 @@ class StateManagementTests(ResponseAPIBaseTest):
resp
=
self
.
create_response
(
"Test"
,
previous_response_id
=
"resp_invalid123"
,
max_output_tokens
=
50
)
# Should return 404 or 400 for invalid response ID
if
resp
.
status_code
!=
200
:
print
(
f
"
\n
❌ Response creation failed!"
)
print
(
f
"Status:
{
resp
.
status_code
}
"
)
print
(
f
"Response:
{
resp
.
text
}
"
)
self
.
assertIn
(
resp
.
status_code
,
[
400
,
404
])
def
test_conversation_with_multiple_turns
(
self
):
...
...
sgl-router/py_test/e2e_response_api/util.py
View file @
28b8a406
...
...
@@ -2,6 +2,7 @@
Utility functions for Response API e2e tests.
"""
import
logging
import
os
import
signal
import
threading
...
...
@@ -9,6 +10,8 @@ import unittest
import
psutil
logger
=
logging
.
getLogger
(
__name__
)
def
kill_process_tree
(
parent_pid
,
include_parent
:
bool
=
True
,
skip_pid
:
int
=
None
):
"""
...
...
@@ -69,14 +72,10 @@ class CustomTestCase(unittest.TestCase):
return
super
(
CustomTestCase
,
self
).
_callTestMethod
(
method
)
except
Exception
as
e
:
if
attempt
<
max_retry
:
print
(
logger
.
info
(
f
"Test failed on attempt
{
attempt
+
1
}
/
{
max_retry
+
1
}
, retrying..."
)
continue
else
:
# Last attempt, re-raise the exception
raise
def
setUp
(
self
):
"""Print test method name at the start of each test."""
print
(
f
"[Test Method]
{
self
.
_testMethodName
}
"
,
flush
=
True
)
sgl-router/py_test/integration/conftest.py
View file @
28b8a406
import
os
import
subprocess
import
time
from
pathlib
import
Path
from
typing
import
Dict
,
Iterable
,
List
,
Optional
,
Tuple
from
typing
import
Iterable
,
List
,
Optional
,
Tuple
import
pytest
import
requests
...
...
sgl-router/py_test/integration/load_balancing/test_power_of_two.py
View file @
28b8a406
...
...
@@ -17,6 +17,7 @@ def test_power_of_two_prefers_less_loaded(mock_workers, router_manager):
urls
=
urls_slow
+
urls_fast
ids
=
ids_slow
+
ids_fast
slow_id
=
ids_slow
[
0
]
slow_url
=
urls_slow
[
0
]
rh
=
router_manager
.
start_router
(
worker_urls
=
urls
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment