Unverified Commit 28b8a406 authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

[router][CI] Clean up imports and prints statements in sgl-router/py_test (#12024)

parent 8bd26dd4
......@@ -30,7 +30,7 @@ repos:
args:
- --select=F401,F821
- --fix
files: ^(benchmark/|docs/|examples/|python/sglang/)
files: ^(benchmark/|docs/|examples/|python/sglang/|sgl-router/py_*)
exclude: __init__\.py$|\.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$
- repo: https://github.com/psf/black
rev: 24.10.0
......
import time
from types import SimpleNamespace
import pytest
import requests
......
import logging
import os
import socket
import subprocess
import time
......
......@@ -13,14 +13,11 @@ Run with:
"""
import json
# CHANGE: Import router launcher instead of server launcher
import sys
import unittest
from pathlib import Path
import openai
import requests
_TEST_DIR = Path(__file__).parent
sys.path.insert(0, str(_TEST_DIR.parent))
......@@ -225,7 +222,6 @@ class TestOpenAIServer(CustomTestCase):
try:
js_obj = json.loads(text)
except (TypeError, json.decoder.JSONDecodeError):
print("JSONDecodeError", text)
raise
assert isinstance(js_obj["name"], str)
assert isinstance(js_obj["population"], int)
......
......@@ -7,7 +7,7 @@ This module provides shared fixtures that can be used across all gRPC router tes
import sys
from pathlib import Path
import pytest
import pytest # noqa: F401
# Ensure router py_src is importable
_ROUTER_ROOT = Path(__file__).resolve().parents[2]
......
......@@ -6,17 +6,11 @@ python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinki
python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning
"""
import asyncio
import json
import os
import sys
import time
import unittest
# CHANGE: Import router launcher instead of server launcher
from pathlib import Path
import openai
import requests
_TEST_DIR = Path(__file__).parent
......@@ -24,10 +18,8 @@ sys.path.insert(0, str(_TEST_DIR.parent))
from fixtures import popen_launch_workers_and_router
from util import (
DEFAULT_ENABLE_THINKING_MODEL_PATH,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
get_tokenizer,
kill_process_tree,
)
......@@ -131,7 +123,6 @@ class TestEnableThinking(CustomTestCase):
has_reasoning = False
has_content = False
print("\n=== Stream With Reasoning ===")
for line in response.iter_lines():
if line:
line = line.decode("utf-8")
......@@ -176,7 +167,6 @@ class TestEnableThinking(CustomTestCase):
has_reasoning = False
has_content = False
print("\n=== Stream Without Reasoning ===")
for line in response.iter_lines():
if line:
line = line.decode("utf-8")
......
......@@ -9,15 +9,11 @@ python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningC
python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentStartup.test_streaming
"""
import json
# CHANGE: Import router launcher instead of server launcher
import sys
import unittest
from pathlib import Path
import openai
import requests
_TEST_DIR = Path(__file__).parent
sys.path.insert(0, str(_TEST_DIR.parent))
......
......@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang workers and gRPC router separ
This approach gives more control and matches production deployment patterns.
"""
import logging
import socket
import subprocess
import time
......@@ -15,6 +16,8 @@ from typing import Optional
import requests
logger = logging.getLogger(__name__)
def find_free_port() -> int:
"""Find an available port on localhost."""
......@@ -56,9 +59,11 @@ def wait_for_workers_ready(
attempt += 1
elapsed = int(time.time() - start_time)
# Print progress every 10 seconds
# Log progress every 10 seconds
if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
print(f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)")
logger.info(
f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)"
)
try:
response = session.get(
......@@ -69,7 +74,7 @@ def wait_for_workers_ready(
total_workers = data.get("total", 0)
if total_workers == expected_workers:
print(
logger.info(
f" All {expected_workers} workers connected after {elapsed}s"
)
return
......@@ -161,14 +166,14 @@ def popen_launch_workers_and_router(
else:
router_port = find_free_port()
print(f"\n{'='*70}")
print(f"Launching gRPC cluster (separate workers + router)")
print(f"{'='*70}")
print(f" Model: {model}")
print(f" Router port: {router_port}")
print(f" Workers: {num_workers}")
print(f" TP size: {tp_size}")
print(f" Policy: {policy}")
logger.info(f"\n{'='*70}")
logger.info(f"Launching gRPC cluster (separate workers + router)")
logger.info(f"{'='*70}")
logger.info(f" Model: {model}")
logger.info(f" Router port: {router_port}")
logger.info(f" Workers: {num_workers}")
logger.info(f" TP size: {tp_size}")
logger.info(f" Policy: {policy}")
# Step 1: Launch workers with gRPC enabled
workers = []
......@@ -179,9 +184,9 @@ def popen_launch_workers_and_router(
worker_url = f"grpc://127.0.0.1:{worker_port}"
worker_urls.append(worker_url)
print(f"\n[Worker {i+1}/{num_workers}]")
print(f" Port: {worker_port}")
print(f" URL: {worker_url}")
logger.info(f"\n[Worker {i+1}/{num_workers}]")
logger.info(f" Port: {worker_port}")
logger.info(f" URL: {worker_url}")
# Build worker command
worker_cmd = [
......@@ -226,17 +231,19 @@ def popen_launch_workers_and_router(
)
workers.append(worker_proc)
print(f" PID: {worker_proc.pid}")
logger.info(f" PID: {worker_proc.pid}")
# Give workers a moment to start binding to ports
# The router will check worker health when it starts
print(f"\nWaiting for {num_workers} workers to initialize (20s)...")
logger.info(f"\nWaiting for {num_workers} workers to initialize (20s)...")
time.sleep(20)
# Quick check: make sure worker processes are still alive
for i, worker in enumerate(workers):
if worker.poll() is not None:
print(f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})")
logger.error(
f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})"
)
# Cleanup: kill all workers
for w in workers:
try:
......@@ -245,12 +252,14 @@ def popen_launch_workers_and_router(
pass
raise RuntimeError(f"Worker {i+1} failed to start")
print(f"✓ All {num_workers} workers started (router will verify connectivity)")
logger.info(
f"✓ All {num_workers} workers started (router will verify connectivity)"
)
# Step 2: Launch router pointing to workers
print(f"\n[Router]")
print(f" Port: {router_port}")
print(f" Worker URLs: {', '.join(worker_urls)}")
logger.info(f"\n[Router]")
logger.info(f" Port: {router_port}")
logger.info(f" Worker URLs: {', '.join(worker_urls)}")
# Build router command
router_cmd = [
......@@ -284,7 +293,7 @@ def popen_launch_workers_and_router(
router_cmd.extend(router_args)
if show_output:
print(f" Command: {' '.join(router_cmd)}")
logger.info(f" Command: {' '.join(router_cmd)}")
# Launch router
if show_output:
......@@ -296,19 +305,19 @@ def popen_launch_workers_and_router(
stderr=subprocess.PIPE,
)
print(f" PID: {router_proc.pid}")
logger.info(f" PID: {router_proc.pid}")
# Wait for router to be ready
router_url = f"http://127.0.0.1:{router_port}"
print(f"\nWaiting for router to start at {router_url}...")
logger.info(f"\nWaiting for router to start at {router_url}...")
try:
wait_for_workers_ready(
router_url, expected_workers=num_workers, timeout=180, api_key=api_key
)
print(f"✓ Router ready at {router_url}")
logger.info(f"✓ Router ready at {router_url}")
except TimeoutError:
print(f"✗ Router failed to start")
logger.error(f"✗ Router failed to start")
# Cleanup: kill router and all workers
try:
router_proc.kill()
......@@ -321,11 +330,11 @@ def popen_launch_workers_and_router(
pass
raise
print(f"\n{'='*70}")
print(f"✓ gRPC cluster ready!")
print(f" Router: {router_url}")
print(f" Workers: {len(workers)}")
print(f"{'='*70}\n")
logger.info(f"\n{'='*70}")
logger.info(f"✓ gRPC cluster ready!")
logger.info(f" Router: {router_url}")
logger.info(f" Workers: {len(workers)}")
logger.info(f"{'='*70}\n")
return {
"workers": workers,
......
......@@ -13,10 +13,7 @@ Run with:
"""
import json
# CHANGE: Import router launcher instead of server launcher
import sys
import time
import unittest
from pathlib import Path
......
......@@ -8,8 +8,6 @@ Tests: required, auto, and specific function choices in both streaming and non-s
"""
import json
# CHANGE: Import router launcher instead of server launcher
import sys
import unittest
from pathlib import Path
......
......@@ -8,6 +8,7 @@ Extracted and adapted from:
- sglang.test.test_utils (constants and CustomTestCase)
"""
import logging
import os
import signal
import threading
......@@ -17,6 +18,8 @@ from typing import Optional, Union
import psutil
logger = logging.getLogger(__name__)
try:
from transformers import (
AutoTokenizer,
......@@ -204,8 +207,8 @@ def get_tokenizer(
raise RuntimeError(err_msg) from e
if not isinstance(tokenizer, PreTrainedTokenizerFast):
print(
f"Warning: Using a slow tokenizer. This might cause a performance "
logger.warning(
f"Using a slow tokenizer. This might cause a performance "
f"degradation. Consider using a fast tokenizer instead."
)
......@@ -245,14 +248,10 @@ class CustomTestCase(unittest.TestCase):
return super(CustomTestCase, self)._callTestMethod(method)
except Exception as e:
if attempt < max_retry:
print(
logger.info(
f"Test failed on attempt {attempt + 1}/{max_retry + 1}, retrying..."
)
continue
else:
# Last attempt, re-raise the exception
raise
def setUp(self):
"""Print test method name at the start of each test."""
print(f"[Test Method] {self._testMethodName}", flush=True)
......@@ -3,8 +3,6 @@ python3 -m unittest openai_server.validation.test_large_max_new_tokens.TestLarge
"""
import os
# CHANGE: Import router launcher instead of server launcher
import sys
import time
import unittest
......@@ -104,7 +102,6 @@ class TestLargeMaxNewTokens(CustomTestCase):
self.stderr.flush()
lines = open(STDERR_FILENAME).readlines()
for line in lines[pt:]:
print(line, end="", flush=True)
if f"#running-req: {num_requests}" in line:
all_requests_running = True
pt = -1
......
......@@ -12,7 +12,6 @@ Run with:
pytest py_test/e2e_grpc/e2e_grpc/validation/test_openai_server_ignore_eos.py -v
"""
# CHANGE: Import router launcher instead of server launcher
import sys
from pathlib import Path
......
......@@ -4,7 +4,7 @@ pytest configuration for e2e_response_api tests.
This configures pytest to not collect base test classes that are meant to be inherited.
"""
import pytest
import pytest # noqa: F401
def pytest_collection_modifyitems(config, items):
......
......@@ -32,7 +32,6 @@ class MCPTests(ResponseAPIBaseTest):
self.assertEqual(resp.status_code, 200)
data = resp.json()
print(f"MCP response: {data}")
# Basic response structure
self.assertIn("id", data)
......
......@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang router with OpenAI or XAI bac
This supports testing the Response API against real cloud providers.
"""
import logging
import os
import socket
import subprocess
......@@ -16,6 +17,8 @@ from typing import Optional
import requests
logger = logging.getLogger(__name__)
def wait_for_workers_ready(
router_url: str,
......@@ -50,9 +53,11 @@ def wait_for_workers_ready(
attempt += 1
elapsed = int(time.time() - start_time)
# Print progress every 10 seconds
# Log progress every 10 seconds
if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
print(f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)")
logger.info(
f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)"
)
try:
response = session.get(
......@@ -63,7 +68,7 @@ def wait_for_workers_ready(
total_workers = data.get("total", 0)
if total_workers == expected_workers:
print(
logger.info(
f" All {expected_workers} workers connected after {elapsed}s"
)
return
......@@ -124,16 +129,18 @@ def wait_for_router_ready(
attempt += 1
elapsed = int(time.time() - start_time)
# Print progress every 10 seconds
# Log progress every 10 seconds
if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
print(f" Still waiting for router... ({elapsed}/{timeout}s elapsed)")
logger.info(
f" Still waiting for router... ({elapsed}/{timeout}s elapsed)"
)
try:
response = session.get(
f"{router_url}/health", headers=headers, timeout=5
)
if response.status_code == 200:
print(f" Router ready after {elapsed}s")
logger.info(f" Router ready after {elapsed}s")
return
else:
last_error = f"HTTP {response.status_code}"
......@@ -204,12 +211,12 @@ def popen_launch_openai_xai_router(
else:
router_port = find_free_port()
print(f"\n{'='*70}")
print(f"Launching {backend.upper()} router")
print(f"{'='*70}")
print(f" Backend: {backend}")
print(f" Router port: {router_port}")
print(f" History backend: {history_backend}")
logger.info(f"\n{'='*70}")
logger.info(f"Launching {backend.upper()} router")
logger.info(f"{'='*70}")
logger.info(f" Backend: {backend}")
logger.info(f" Router port: {router_port}")
logger.info(f" History backend: {history_backend}")
# Determine worker URL based on backend
if backend == "openai":
......@@ -231,7 +238,7 @@ def popen_launch_openai_xai_router(
else:
raise ValueError(f"Unsupported backend: {backend}")
print(f" Worker URL: {worker_url}")
logger.info(f" Worker URL: {worker_url}")
# Build router command
router_cmd = [
......@@ -266,7 +273,7 @@ def popen_launch_openai_xai_router(
router_cmd.extend(router_args)
if show_output:
print(f" Command: {' '.join(router_cmd)}")
logger.info(f" Command: {' '.join(router_cmd)}")
# Set up environment with backend API key
env = os.environ.copy()
......@@ -299,9 +306,9 @@ def popen_launch_openai_xai_router(
try:
wait_for_router_ready(router_url, timeout=timeout, api_key=None)
print(f"✓ Router ready at {router_url}")
logger.info(f"✓ Router ready at {router_url}")
except TimeoutError:
print(f"✗ Router failed to start")
logger.error(f"✗ Router failed to start")
# Cleanup: kill router
try:
router_proc.kill()
......@@ -309,10 +316,10 @@ def popen_launch_openai_xai_router(
pass
raise
print(f"\n{'='*70}")
print(f"✓ {backend.upper()} router ready!")
print(f" Router: {router_url}")
print(f"{'='*70}\n")
logger.info(f"\n{'='*70}")
logger.info(f"✓ {backend.upper()} router ready!")
logger.info(f" Router: {router_url}")
logger.info(f"{'='*70}\n")
return {
"router": router_proc,
......@@ -382,14 +389,14 @@ def popen_launch_workers_and_router(
else:
router_port = find_free_port()
print(f"\n{'='*70}")
print(f"Launching gRPC cluster (separate workers + router)")
print(f"{'='*70}")
print(f" Model: {model}")
print(f" Router port: {router_port}")
print(f" Workers: {num_workers}")
print(f" TP size: {tp_size}")
print(f" Policy: {policy}")
logger.info(f"\n{'='*70}")
logger.info(f"Launching gRPC cluster (separate workers + router)")
logger.info(f"{'='*70}")
logger.info(f" Model: {model}")
logger.info(f" Router port: {router_port}")
logger.info(f" Workers: {num_workers}")
logger.info(f" TP size: {tp_size}")
logger.info(f" Policy: {policy}")
# Step 1: Launch workers with gRPC enabled
workers = []
......@@ -400,9 +407,9 @@ def popen_launch_workers_and_router(
worker_url = f"grpc://127.0.0.1:{worker_port}"
worker_urls.append(worker_url)
print(f"\n[Worker {i+1}/{num_workers}]")
print(f" Port: {worker_port}")
print(f" URL: {worker_url}")
logger.info(f"\n[Worker {i+1}/{num_workers}]")
logger.info(f" Port: {worker_port}")
logger.info(f" URL: {worker_url}")
# Build worker command
worker_cmd = [
......@@ -447,17 +454,19 @@ def popen_launch_workers_and_router(
)
workers.append(worker_proc)
print(f" PID: {worker_proc.pid}")
logger.info(f" PID: {worker_proc.pid}")
# Give workers a moment to start binding to ports
# The router will check worker health when it starts
print(f"\nWaiting for {num_workers} workers to initialize (20s)...")
logger.info(f"\nWaiting for {num_workers} workers to initialize (20s)...")
time.sleep(20)
# Quick check: make sure worker processes are still alive
for i, worker in enumerate(workers):
if worker.poll() is not None:
print(f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})")
logger.error(
f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})"
)
# Cleanup: kill all workers
for w in workers:
try:
......@@ -466,12 +475,14 @@ def popen_launch_workers_and_router(
pass
raise RuntimeError(f"Worker {i+1} failed to start")
print(f"✓ All {num_workers} workers started (router will verify connectivity)")
logger.info(
f"✓ All {num_workers} workers started (router will verify connectivity)"
)
# Step 2: Launch router pointing to workers
print(f"\n[Router]")
print(f" Port: {router_port}")
print(f" Worker URLs: {', '.join(worker_urls)}")
logger.info(f"\n[Router]")
logger.info(f" Port: {router_port}")
logger.info(f" Worker URLs: {', '.join(worker_urls)}")
# Build router command
router_cmd = [
......@@ -505,7 +516,7 @@ def popen_launch_workers_and_router(
router_cmd.extend(router_args)
if show_output:
print(f" Command: {' '.join(router_cmd)}")
logger.info(f" Command: {' '.join(router_cmd)}")
# Launch router
if show_output:
......@@ -517,19 +528,19 @@ def popen_launch_workers_and_router(
stderr=subprocess.PIPE,
)
print(f" PID: {router_proc.pid}")
logger.info(f" PID: {router_proc.pid}")
# Wait for router to be ready
router_url = f"http://127.0.0.1:{router_port}"
print(f"\nWaiting for router to start at {router_url}...")
logger.info(f"\nWaiting for router to start at {router_url}...")
try:
wait_for_workers_ready(
router_url, expected_workers=num_workers, timeout=180, api_key=api_key
)
print(f"✓ Router ready at {router_url}")
logger.info(f"✓ Router ready at {router_url}")
except TimeoutError:
print(f"✗ Router failed to start")
logger.error(f"✗ Router failed to start")
# Cleanup: kill router and all workers
try:
router_proc.kill()
......@@ -542,11 +553,11 @@ def popen_launch_workers_and_router(
pass
raise
print(f"\n{'='*70}")
print(f"✓ gRPC cluster ready!")
print(f" Router: {router_url}")
print(f" Workers: {len(workers)}")
print(f"{'='*70}\n")
logger.info(f"\n{'='*70}")
logger.info(f"✓ gRPC cluster ready!")
logger.info(f" Router: {router_url}")
logger.info(f" Workers: {len(workers)}")
logger.info(f"{'='*70}\n")
return {
"workers": workers,
......
......@@ -49,11 +49,6 @@ class StateManagementTests(ResponseAPIBaseTest):
resp = self.create_response(
"Test", previous_response_id="resp_invalid123", max_output_tokens=50
)
# Should return 404 or 400 for invalid response ID
if resp.status_code != 200:
print(f"\n❌ Response creation failed!")
print(f"Status: {resp.status_code}")
print(f"Response: {resp.text}")
self.assertIn(resp.status_code, [400, 404])
def test_conversation_with_multiple_turns(self):
......
......@@ -2,6 +2,7 @@
Utility functions for Response API e2e tests.
"""
import logging
import os
import signal
import threading
......@@ -9,6 +10,8 @@ import unittest
import psutil
logger = logging.getLogger(__name__)
def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid: int = None):
"""
......@@ -69,14 +72,10 @@ class CustomTestCase(unittest.TestCase):
return super(CustomTestCase, self)._callTestMethod(method)
except Exception as e:
if attempt < max_retry:
print(
logger.info(
f"Test failed on attempt {attempt + 1}/{max_retry + 1}, retrying..."
)
continue
else:
# Last attempt, re-raise the exception
raise
def setUp(self):
"""Print test method name at the start of each test."""
print(f"[Test Method] {self._testMethodName}", flush=True)
import os
import subprocess
import time
from pathlib import Path
from typing import Dict, Iterable, List, Optional, Tuple
from typing import Iterable, List, Optional, Tuple
import pytest
import requests
......
......@@ -17,6 +17,7 @@ def test_power_of_two_prefers_less_loaded(mock_workers, router_manager):
urls = urls_slow + urls_fast
ids = ids_slow + ids_fast
slow_id = ids_slow[0]
slow_url = urls_slow[0]
rh = router_manager.start_router(
worker_urls=urls,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment