Unverified Commit 28b8a406 authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

[router][CI] Clean up imports and prints statements in sgl-router/py_test (#12024)

parent 8bd26dd4
...@@ -30,7 +30,7 @@ repos: ...@@ -30,7 +30,7 @@ repos:
args: args:
- --select=F401,F821 - --select=F401,F821
- --fix - --fix
files: ^(benchmark/|docs/|examples/|python/sglang/) files: ^(benchmark/|docs/|examples/|python/sglang/|sgl-router/py_*)
exclude: __init__\.py$|\.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$ exclude: __init__\.py$|\.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$
- repo: https://github.com/psf/black - repo: https://github.com/psf/black
rev: 24.10.0 rev: 24.10.0
......
import time import time
from types import SimpleNamespace
import pytest import pytest
import requests import requests
......
import logging import logging
import os
import socket import socket
import subprocess import subprocess
import time import time
......
...@@ -13,14 +13,11 @@ Run with: ...@@ -13,14 +13,11 @@ Run with:
""" """
import json import json
# CHANGE: Import router launcher instead of server launcher
import sys import sys
import unittest import unittest
from pathlib import Path from pathlib import Path
import openai import openai
import requests
_TEST_DIR = Path(__file__).parent _TEST_DIR = Path(__file__).parent
sys.path.insert(0, str(_TEST_DIR.parent)) sys.path.insert(0, str(_TEST_DIR.parent))
...@@ -225,7 +222,6 @@ class TestOpenAIServer(CustomTestCase): ...@@ -225,7 +222,6 @@ class TestOpenAIServer(CustomTestCase):
try: try:
js_obj = json.loads(text) js_obj = json.loads(text)
except (TypeError, json.decoder.JSONDecodeError): except (TypeError, json.decoder.JSONDecodeError):
print("JSONDecodeError", text)
raise raise
assert isinstance(js_obj["name"], str) assert isinstance(js_obj["name"], str)
assert isinstance(js_obj["population"], int) assert isinstance(js_obj["population"], int)
......
...@@ -7,7 +7,7 @@ This module provides shared fixtures that can be used across all gRPC router tes ...@@ -7,7 +7,7 @@ This module provides shared fixtures that can be used across all gRPC router tes
import sys import sys
from pathlib import Path from pathlib import Path
import pytest import pytest # noqa: F401
# Ensure router py_src is importable # Ensure router py_src is importable
_ROUTER_ROOT = Path(__file__).resolve().parents[2] _ROUTER_ROOT = Path(__file__).resolve().parents[2]
......
...@@ -6,17 +6,11 @@ python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinki ...@@ -6,17 +6,11 @@ python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinki
python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning
""" """
import asyncio
import json import json
import os
import sys import sys
import time
import unittest import unittest
# CHANGE: Import router launcher instead of server launcher
from pathlib import Path from pathlib import Path
import openai
import requests import requests
_TEST_DIR = Path(__file__).parent _TEST_DIR = Path(__file__).parent
...@@ -24,10 +18,8 @@ sys.path.insert(0, str(_TEST_DIR.parent)) ...@@ -24,10 +18,8 @@ sys.path.insert(0, str(_TEST_DIR.parent))
from fixtures import popen_launch_workers_and_router from fixtures import popen_launch_workers_and_router
from util import ( from util import (
DEFAULT_ENABLE_THINKING_MODEL_PATH, DEFAULT_ENABLE_THINKING_MODEL_PATH,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase, CustomTestCase,
get_tokenizer,
kill_process_tree, kill_process_tree,
) )
...@@ -131,7 +123,6 @@ class TestEnableThinking(CustomTestCase): ...@@ -131,7 +123,6 @@ class TestEnableThinking(CustomTestCase):
has_reasoning = False has_reasoning = False
has_content = False has_content = False
print("\n=== Stream With Reasoning ===")
for line in response.iter_lines(): for line in response.iter_lines():
if line: if line:
line = line.decode("utf-8") line = line.decode("utf-8")
...@@ -176,7 +167,6 @@ class TestEnableThinking(CustomTestCase): ...@@ -176,7 +167,6 @@ class TestEnableThinking(CustomTestCase):
has_reasoning = False has_reasoning = False
has_content = False has_content = False
print("\n=== Stream Without Reasoning ===")
for line in response.iter_lines(): for line in response.iter_lines():
if line: if line:
line = line.decode("utf-8") line = line.decode("utf-8")
......
...@@ -9,15 +9,11 @@ python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningC ...@@ -9,15 +9,11 @@ python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningC
python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentStartup.test_streaming python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentStartup.test_streaming
""" """
import json
# CHANGE: Import router launcher instead of server launcher
import sys import sys
import unittest import unittest
from pathlib import Path from pathlib import Path
import openai import openai
import requests
_TEST_DIR = Path(__file__).parent _TEST_DIR = Path(__file__).parent
sys.path.insert(0, str(_TEST_DIR.parent)) sys.path.insert(0, str(_TEST_DIR.parent))
......
...@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang workers and gRPC router separ ...@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang workers and gRPC router separ
This approach gives more control and matches production deployment patterns. This approach gives more control and matches production deployment patterns.
""" """
import logging
import socket import socket
import subprocess import subprocess
import time import time
...@@ -15,6 +16,8 @@ from typing import Optional ...@@ -15,6 +16,8 @@ from typing import Optional
import requests import requests
logger = logging.getLogger(__name__)
def find_free_port() -> int: def find_free_port() -> int:
"""Find an available port on localhost.""" """Find an available port on localhost."""
...@@ -56,9 +59,11 @@ def wait_for_workers_ready( ...@@ -56,9 +59,11 @@ def wait_for_workers_ready(
attempt += 1 attempt += 1
elapsed = int(time.time() - start_time) elapsed = int(time.time() - start_time)
# Print progress every 10 seconds # Log progress every 10 seconds
if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0: if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
print(f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)") logger.info(
f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)"
)
try: try:
response = session.get( response = session.get(
...@@ -69,7 +74,7 @@ def wait_for_workers_ready( ...@@ -69,7 +74,7 @@ def wait_for_workers_ready(
total_workers = data.get("total", 0) total_workers = data.get("total", 0)
if total_workers == expected_workers: if total_workers == expected_workers:
print( logger.info(
f" All {expected_workers} workers connected after {elapsed}s" f" All {expected_workers} workers connected after {elapsed}s"
) )
return return
...@@ -161,14 +166,14 @@ def popen_launch_workers_and_router( ...@@ -161,14 +166,14 @@ def popen_launch_workers_and_router(
else: else:
router_port = find_free_port() router_port = find_free_port()
print(f"\n{'='*70}") logger.info(f"\n{'='*70}")
print(f"Launching gRPC cluster (separate workers + router)") logger.info(f"Launching gRPC cluster (separate workers + router)")
print(f"{'='*70}") logger.info(f"{'='*70}")
print(f" Model: {model}") logger.info(f" Model: {model}")
print(f" Router port: {router_port}") logger.info(f" Router port: {router_port}")
print(f" Workers: {num_workers}") logger.info(f" Workers: {num_workers}")
print(f" TP size: {tp_size}") logger.info(f" TP size: {tp_size}")
print(f" Policy: {policy}") logger.info(f" Policy: {policy}")
# Step 1: Launch workers with gRPC enabled # Step 1: Launch workers with gRPC enabled
workers = [] workers = []
...@@ -179,9 +184,9 @@ def popen_launch_workers_and_router( ...@@ -179,9 +184,9 @@ def popen_launch_workers_and_router(
worker_url = f"grpc://127.0.0.1:{worker_port}" worker_url = f"grpc://127.0.0.1:{worker_port}"
worker_urls.append(worker_url) worker_urls.append(worker_url)
print(f"\n[Worker {i+1}/{num_workers}]") logger.info(f"\n[Worker {i+1}/{num_workers}]")
print(f" Port: {worker_port}") logger.info(f" Port: {worker_port}")
print(f" URL: {worker_url}") logger.info(f" URL: {worker_url}")
# Build worker command # Build worker command
worker_cmd = [ worker_cmd = [
...@@ -226,17 +231,19 @@ def popen_launch_workers_and_router( ...@@ -226,17 +231,19 @@ def popen_launch_workers_and_router(
) )
workers.append(worker_proc) workers.append(worker_proc)
print(f" PID: {worker_proc.pid}") logger.info(f" PID: {worker_proc.pid}")
# Give workers a moment to start binding to ports # Give workers a moment to start binding to ports
# The router will check worker health when it starts # The router will check worker health when it starts
print(f"\nWaiting for {num_workers} workers to initialize (20s)...") logger.info(f"\nWaiting for {num_workers} workers to initialize (20s)...")
time.sleep(20) time.sleep(20)
# Quick check: make sure worker processes are still alive # Quick check: make sure worker processes are still alive
for i, worker in enumerate(workers): for i, worker in enumerate(workers):
if worker.poll() is not None: if worker.poll() is not None:
print(f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})") logger.error(
f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})"
)
# Cleanup: kill all workers # Cleanup: kill all workers
for w in workers: for w in workers:
try: try:
...@@ -245,12 +252,14 @@ def popen_launch_workers_and_router( ...@@ -245,12 +252,14 @@ def popen_launch_workers_and_router(
pass pass
raise RuntimeError(f"Worker {i+1} failed to start") raise RuntimeError(f"Worker {i+1} failed to start")
print(f"✓ All {num_workers} workers started (router will verify connectivity)") logger.info(
f"✓ All {num_workers} workers started (router will verify connectivity)"
)
# Step 2: Launch router pointing to workers # Step 2: Launch router pointing to workers
print(f"\n[Router]") logger.info(f"\n[Router]")
print(f" Port: {router_port}") logger.info(f" Port: {router_port}")
print(f" Worker URLs: {', '.join(worker_urls)}") logger.info(f" Worker URLs: {', '.join(worker_urls)}")
# Build router command # Build router command
router_cmd = [ router_cmd = [
...@@ -284,7 +293,7 @@ def popen_launch_workers_and_router( ...@@ -284,7 +293,7 @@ def popen_launch_workers_and_router(
router_cmd.extend(router_args) router_cmd.extend(router_args)
if show_output: if show_output:
print(f" Command: {' '.join(router_cmd)}") logger.info(f" Command: {' '.join(router_cmd)}")
# Launch router # Launch router
if show_output: if show_output:
...@@ -296,19 +305,19 @@ def popen_launch_workers_and_router( ...@@ -296,19 +305,19 @@ def popen_launch_workers_and_router(
stderr=subprocess.PIPE, stderr=subprocess.PIPE,
) )
print(f" PID: {router_proc.pid}") logger.info(f" PID: {router_proc.pid}")
# Wait for router to be ready # Wait for router to be ready
router_url = f"http://127.0.0.1:{router_port}" router_url = f"http://127.0.0.1:{router_port}"
print(f"\nWaiting for router to start at {router_url}...") logger.info(f"\nWaiting for router to start at {router_url}...")
try: try:
wait_for_workers_ready( wait_for_workers_ready(
router_url, expected_workers=num_workers, timeout=180, api_key=api_key router_url, expected_workers=num_workers, timeout=180, api_key=api_key
) )
print(f"✓ Router ready at {router_url}") logger.info(f"✓ Router ready at {router_url}")
except TimeoutError: except TimeoutError:
print(f"✗ Router failed to start") logger.error(f"✗ Router failed to start")
# Cleanup: kill router and all workers # Cleanup: kill router and all workers
try: try:
router_proc.kill() router_proc.kill()
...@@ -321,11 +330,11 @@ def popen_launch_workers_and_router( ...@@ -321,11 +330,11 @@ def popen_launch_workers_and_router(
pass pass
raise raise
print(f"\n{'='*70}") logger.info(f"\n{'='*70}")
print(f"✓ gRPC cluster ready!") logger.info(f"✓ gRPC cluster ready!")
print(f" Router: {router_url}") logger.info(f" Router: {router_url}")
print(f" Workers: {len(workers)}") logger.info(f" Workers: {len(workers)}")
print(f"{'='*70}\n") logger.info(f"{'='*70}\n")
return { return {
"workers": workers, "workers": workers,
......
...@@ -13,10 +13,7 @@ Run with: ...@@ -13,10 +13,7 @@ Run with:
""" """
import json import json
# CHANGE: Import router launcher instead of server launcher
import sys import sys
import time
import unittest import unittest
from pathlib import Path from pathlib import Path
......
...@@ -8,8 +8,6 @@ Tests: required, auto, and specific function choices in both streaming and non-s ...@@ -8,8 +8,6 @@ Tests: required, auto, and specific function choices in both streaming and non-s
""" """
import json import json
# CHANGE: Import router launcher instead of server launcher
import sys import sys
import unittest import unittest
from pathlib import Path from pathlib import Path
......
...@@ -8,6 +8,7 @@ Extracted and adapted from: ...@@ -8,6 +8,7 @@ Extracted and adapted from:
- sglang.test.test_utils (constants and CustomTestCase) - sglang.test.test_utils (constants and CustomTestCase)
""" """
import logging
import os import os
import signal import signal
import threading import threading
...@@ -17,6 +18,8 @@ from typing import Optional, Union ...@@ -17,6 +18,8 @@ from typing import Optional, Union
import psutil import psutil
logger = logging.getLogger(__name__)
try: try:
from transformers import ( from transformers import (
AutoTokenizer, AutoTokenizer,
...@@ -204,8 +207,8 @@ def get_tokenizer( ...@@ -204,8 +207,8 @@ def get_tokenizer(
raise RuntimeError(err_msg) from e raise RuntimeError(err_msg) from e
if not isinstance(tokenizer, PreTrainedTokenizerFast): if not isinstance(tokenizer, PreTrainedTokenizerFast):
print( logger.warning(
f"Warning: Using a slow tokenizer. This might cause a performance " f"Using a slow tokenizer. This might cause a performance "
f"degradation. Consider using a fast tokenizer instead." f"degradation. Consider using a fast tokenizer instead."
) )
...@@ -245,14 +248,10 @@ class CustomTestCase(unittest.TestCase): ...@@ -245,14 +248,10 @@ class CustomTestCase(unittest.TestCase):
return super(CustomTestCase, self)._callTestMethod(method) return super(CustomTestCase, self)._callTestMethod(method)
except Exception as e: except Exception as e:
if attempt < max_retry: if attempt < max_retry:
print( logger.info(
f"Test failed on attempt {attempt + 1}/{max_retry + 1}, retrying..." f"Test failed on attempt {attempt + 1}/{max_retry + 1}, retrying..."
) )
continue continue
else: else:
# Last attempt, re-raise the exception # Last attempt, re-raise the exception
raise raise
def setUp(self):
"""Print test method name at the start of each test."""
print(f"[Test Method] {self._testMethodName}", flush=True)
...@@ -3,8 +3,6 @@ python3 -m unittest openai_server.validation.test_large_max_new_tokens.TestLarge ...@@ -3,8 +3,6 @@ python3 -m unittest openai_server.validation.test_large_max_new_tokens.TestLarge
""" """
import os import os
# CHANGE: Import router launcher instead of server launcher
import sys import sys
import time import time
import unittest import unittest
...@@ -104,7 +102,6 @@ class TestLargeMaxNewTokens(CustomTestCase): ...@@ -104,7 +102,6 @@ class TestLargeMaxNewTokens(CustomTestCase):
self.stderr.flush() self.stderr.flush()
lines = open(STDERR_FILENAME).readlines() lines = open(STDERR_FILENAME).readlines()
for line in lines[pt:]: for line in lines[pt:]:
print(line, end="", flush=True)
if f"#running-req: {num_requests}" in line: if f"#running-req: {num_requests}" in line:
all_requests_running = True all_requests_running = True
pt = -1 pt = -1
......
...@@ -12,7 +12,6 @@ Run with: ...@@ -12,7 +12,6 @@ Run with:
pytest py_test/e2e_grpc/e2e_grpc/validation/test_openai_server_ignore_eos.py -v pytest py_test/e2e_grpc/e2e_grpc/validation/test_openai_server_ignore_eos.py -v
""" """
# CHANGE: Import router launcher instead of server launcher
import sys import sys
from pathlib import Path from pathlib import Path
......
...@@ -4,7 +4,7 @@ pytest configuration for e2e_response_api tests. ...@@ -4,7 +4,7 @@ pytest configuration for e2e_response_api tests.
This configures pytest to not collect base test classes that are meant to be inherited. This configures pytest to not collect base test classes that are meant to be inherited.
""" """
import pytest import pytest # noqa: F401
def pytest_collection_modifyitems(config, items): def pytest_collection_modifyitems(config, items):
......
...@@ -32,7 +32,6 @@ class MCPTests(ResponseAPIBaseTest): ...@@ -32,7 +32,6 @@ class MCPTests(ResponseAPIBaseTest):
self.assertEqual(resp.status_code, 200) self.assertEqual(resp.status_code, 200)
data = resp.json() data = resp.json()
print(f"MCP response: {data}")
# Basic response structure # Basic response structure
self.assertIn("id", data) self.assertIn("id", data)
......
...@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang router with OpenAI or XAI bac ...@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang router with OpenAI or XAI bac
This supports testing the Response API against real cloud providers. This supports testing the Response API against real cloud providers.
""" """
import logging
import os import os
import socket import socket
import subprocess import subprocess
...@@ -16,6 +17,8 @@ from typing import Optional ...@@ -16,6 +17,8 @@ from typing import Optional
import requests import requests
logger = logging.getLogger(__name__)
def wait_for_workers_ready( def wait_for_workers_ready(
router_url: str, router_url: str,
...@@ -50,9 +53,11 @@ def wait_for_workers_ready( ...@@ -50,9 +53,11 @@ def wait_for_workers_ready(
attempt += 1 attempt += 1
elapsed = int(time.time() - start_time) elapsed = int(time.time() - start_time)
# Print progress every 10 seconds # Log progress every 10 seconds
if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0: if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
print(f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)") logger.info(
f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)"
)
try: try:
response = session.get( response = session.get(
...@@ -63,7 +68,7 @@ def wait_for_workers_ready( ...@@ -63,7 +68,7 @@ def wait_for_workers_ready(
total_workers = data.get("total", 0) total_workers = data.get("total", 0)
if total_workers == expected_workers: if total_workers == expected_workers:
print( logger.info(
f" All {expected_workers} workers connected after {elapsed}s" f" All {expected_workers} workers connected after {elapsed}s"
) )
return return
...@@ -124,16 +129,18 @@ def wait_for_router_ready( ...@@ -124,16 +129,18 @@ def wait_for_router_ready(
attempt += 1 attempt += 1
elapsed = int(time.time() - start_time) elapsed = int(time.time() - start_time)
# Print progress every 10 seconds # Log progress every 10 seconds
if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0: if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
print(f" Still waiting for router... ({elapsed}/{timeout}s elapsed)") logger.info(
f" Still waiting for router... ({elapsed}/{timeout}s elapsed)"
)
try: try:
response = session.get( response = session.get(
f"{router_url}/health", headers=headers, timeout=5 f"{router_url}/health", headers=headers, timeout=5
) )
if response.status_code == 200: if response.status_code == 200:
print(f" Router ready after {elapsed}s") logger.info(f" Router ready after {elapsed}s")
return return
else: else:
last_error = f"HTTP {response.status_code}" last_error = f"HTTP {response.status_code}"
...@@ -204,12 +211,12 @@ def popen_launch_openai_xai_router( ...@@ -204,12 +211,12 @@ def popen_launch_openai_xai_router(
else: else:
router_port = find_free_port() router_port = find_free_port()
print(f"\n{'='*70}") logger.info(f"\n{'='*70}")
print(f"Launching {backend.upper()} router") logger.info(f"Launching {backend.upper()} router")
print(f"{'='*70}") logger.info(f"{'='*70}")
print(f" Backend: {backend}") logger.info(f" Backend: {backend}")
print(f" Router port: {router_port}") logger.info(f" Router port: {router_port}")
print(f" History backend: {history_backend}") logger.info(f" History backend: {history_backend}")
# Determine worker URL based on backend # Determine worker URL based on backend
if backend == "openai": if backend == "openai":
...@@ -231,7 +238,7 @@ def popen_launch_openai_xai_router( ...@@ -231,7 +238,7 @@ def popen_launch_openai_xai_router(
else: else:
raise ValueError(f"Unsupported backend: {backend}") raise ValueError(f"Unsupported backend: {backend}")
print(f" Worker URL: {worker_url}") logger.info(f" Worker URL: {worker_url}")
# Build router command # Build router command
router_cmd = [ router_cmd = [
...@@ -266,7 +273,7 @@ def popen_launch_openai_xai_router( ...@@ -266,7 +273,7 @@ def popen_launch_openai_xai_router(
router_cmd.extend(router_args) router_cmd.extend(router_args)
if show_output: if show_output:
print(f" Command: {' '.join(router_cmd)}") logger.info(f" Command: {' '.join(router_cmd)}")
# Set up environment with backend API key # Set up environment with backend API key
env = os.environ.copy() env = os.environ.copy()
...@@ -299,9 +306,9 @@ def popen_launch_openai_xai_router( ...@@ -299,9 +306,9 @@ def popen_launch_openai_xai_router(
try: try:
wait_for_router_ready(router_url, timeout=timeout, api_key=None) wait_for_router_ready(router_url, timeout=timeout, api_key=None)
print(f"✓ Router ready at {router_url}") logger.info(f"✓ Router ready at {router_url}")
except TimeoutError: except TimeoutError:
print(f"✗ Router failed to start") logger.error(f"✗ Router failed to start")
# Cleanup: kill router # Cleanup: kill router
try: try:
router_proc.kill() router_proc.kill()
...@@ -309,10 +316,10 @@ def popen_launch_openai_xai_router( ...@@ -309,10 +316,10 @@ def popen_launch_openai_xai_router(
pass pass
raise raise
print(f"\n{'='*70}") logger.info(f"\n{'='*70}")
print(f"✓ {backend.upper()} router ready!") logger.info(f"✓ {backend.upper()} router ready!")
print(f" Router: {router_url}") logger.info(f" Router: {router_url}")
print(f"{'='*70}\n") logger.info(f"{'='*70}\n")
return { return {
"router": router_proc, "router": router_proc,
...@@ -382,14 +389,14 @@ def popen_launch_workers_and_router( ...@@ -382,14 +389,14 @@ def popen_launch_workers_and_router(
else: else:
router_port = find_free_port() router_port = find_free_port()
print(f"\n{'='*70}") logger.info(f"\n{'='*70}")
print(f"Launching gRPC cluster (separate workers + router)") logger.info(f"Launching gRPC cluster (separate workers + router)")
print(f"{'='*70}") logger.info(f"{'='*70}")
print(f" Model: {model}") logger.info(f" Model: {model}")
print(f" Router port: {router_port}") logger.info(f" Router port: {router_port}")
print(f" Workers: {num_workers}") logger.info(f" Workers: {num_workers}")
print(f" TP size: {tp_size}") logger.info(f" TP size: {tp_size}")
print(f" Policy: {policy}") logger.info(f" Policy: {policy}")
# Step 1: Launch workers with gRPC enabled # Step 1: Launch workers with gRPC enabled
workers = [] workers = []
...@@ -400,9 +407,9 @@ def popen_launch_workers_and_router( ...@@ -400,9 +407,9 @@ def popen_launch_workers_and_router(
worker_url = f"grpc://127.0.0.1:{worker_port}" worker_url = f"grpc://127.0.0.1:{worker_port}"
worker_urls.append(worker_url) worker_urls.append(worker_url)
print(f"\n[Worker {i+1}/{num_workers}]") logger.info(f"\n[Worker {i+1}/{num_workers}]")
print(f" Port: {worker_port}") logger.info(f" Port: {worker_port}")
print(f" URL: {worker_url}") logger.info(f" URL: {worker_url}")
# Build worker command # Build worker command
worker_cmd = [ worker_cmd = [
...@@ -447,17 +454,19 @@ def popen_launch_workers_and_router( ...@@ -447,17 +454,19 @@ def popen_launch_workers_and_router(
) )
workers.append(worker_proc) workers.append(worker_proc)
print(f" PID: {worker_proc.pid}") logger.info(f" PID: {worker_proc.pid}")
# Give workers a moment to start binding to ports # Give workers a moment to start binding to ports
# The router will check worker health when it starts # The router will check worker health when it starts
print(f"\nWaiting for {num_workers} workers to initialize (20s)...") logger.info(f"\nWaiting for {num_workers} workers to initialize (20s)...")
time.sleep(20) time.sleep(20)
# Quick check: make sure worker processes are still alive # Quick check: make sure worker processes are still alive
for i, worker in enumerate(workers): for i, worker in enumerate(workers):
if worker.poll() is not None: if worker.poll() is not None:
print(f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})") logger.error(
f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})"
)
# Cleanup: kill all workers # Cleanup: kill all workers
for w in workers: for w in workers:
try: try:
...@@ -466,12 +475,14 @@ def popen_launch_workers_and_router( ...@@ -466,12 +475,14 @@ def popen_launch_workers_and_router(
pass pass
raise RuntimeError(f"Worker {i+1} failed to start") raise RuntimeError(f"Worker {i+1} failed to start")
print(f"✓ All {num_workers} workers started (router will verify connectivity)") logger.info(
f"✓ All {num_workers} workers started (router will verify connectivity)"
)
# Step 2: Launch router pointing to workers # Step 2: Launch router pointing to workers
print(f"\n[Router]") logger.info(f"\n[Router]")
print(f" Port: {router_port}") logger.info(f" Port: {router_port}")
print(f" Worker URLs: {', '.join(worker_urls)}") logger.info(f" Worker URLs: {', '.join(worker_urls)}")
# Build router command # Build router command
router_cmd = [ router_cmd = [
...@@ -505,7 +516,7 @@ def popen_launch_workers_and_router( ...@@ -505,7 +516,7 @@ def popen_launch_workers_and_router(
router_cmd.extend(router_args) router_cmd.extend(router_args)
if show_output: if show_output:
print(f" Command: {' '.join(router_cmd)}") logger.info(f" Command: {' '.join(router_cmd)}")
# Launch router # Launch router
if show_output: if show_output:
...@@ -517,19 +528,19 @@ def popen_launch_workers_and_router( ...@@ -517,19 +528,19 @@ def popen_launch_workers_and_router(
stderr=subprocess.PIPE, stderr=subprocess.PIPE,
) )
print(f" PID: {router_proc.pid}") logger.info(f" PID: {router_proc.pid}")
# Wait for router to be ready # Wait for router to be ready
router_url = f"http://127.0.0.1:{router_port}" router_url = f"http://127.0.0.1:{router_port}"
print(f"\nWaiting for router to start at {router_url}...") logger.info(f"\nWaiting for router to start at {router_url}...")
try: try:
wait_for_workers_ready( wait_for_workers_ready(
router_url, expected_workers=num_workers, timeout=180, api_key=api_key router_url, expected_workers=num_workers, timeout=180, api_key=api_key
) )
print(f"✓ Router ready at {router_url}") logger.info(f"✓ Router ready at {router_url}")
except TimeoutError: except TimeoutError:
print(f"✗ Router failed to start") logger.error(f"✗ Router failed to start")
# Cleanup: kill router and all workers # Cleanup: kill router and all workers
try: try:
router_proc.kill() router_proc.kill()
...@@ -542,11 +553,11 @@ def popen_launch_workers_and_router( ...@@ -542,11 +553,11 @@ def popen_launch_workers_and_router(
pass pass
raise raise
print(f"\n{'='*70}") logger.info(f"\n{'='*70}")
print(f"✓ gRPC cluster ready!") logger.info(f"✓ gRPC cluster ready!")
print(f" Router: {router_url}") logger.info(f" Router: {router_url}")
print(f" Workers: {len(workers)}") logger.info(f" Workers: {len(workers)}")
print(f"{'='*70}\n") logger.info(f"{'='*70}\n")
return { return {
"workers": workers, "workers": workers,
......
...@@ -49,11 +49,6 @@ class StateManagementTests(ResponseAPIBaseTest): ...@@ -49,11 +49,6 @@ class StateManagementTests(ResponseAPIBaseTest):
resp = self.create_response( resp = self.create_response(
"Test", previous_response_id="resp_invalid123", max_output_tokens=50 "Test", previous_response_id="resp_invalid123", max_output_tokens=50
) )
# Should return 404 or 400 for invalid response ID
if resp.status_code != 200:
print(f"\n❌ Response creation failed!")
print(f"Status: {resp.status_code}")
print(f"Response: {resp.text}")
self.assertIn(resp.status_code, [400, 404]) self.assertIn(resp.status_code, [400, 404])
def test_conversation_with_multiple_turns(self): def test_conversation_with_multiple_turns(self):
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
Utility functions for Response API e2e tests. Utility functions for Response API e2e tests.
""" """
import logging
import os import os
import signal import signal
import threading import threading
...@@ -9,6 +10,8 @@ import unittest ...@@ -9,6 +10,8 @@ import unittest
import psutil import psutil
logger = logging.getLogger(__name__)
def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid: int = None): def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid: int = None):
""" """
...@@ -69,14 +72,10 @@ class CustomTestCase(unittest.TestCase): ...@@ -69,14 +72,10 @@ class CustomTestCase(unittest.TestCase):
return super(CustomTestCase, self)._callTestMethod(method) return super(CustomTestCase, self)._callTestMethod(method)
except Exception as e: except Exception as e:
if attempt < max_retry: if attempt < max_retry:
print( logger.info(
f"Test failed on attempt {attempt + 1}/{max_retry + 1}, retrying..." f"Test failed on attempt {attempt + 1}/{max_retry + 1}, retrying..."
) )
continue continue
else: else:
# Last attempt, re-raise the exception # Last attempt, re-raise the exception
raise raise
def setUp(self):
"""Print test method name at the start of each test."""
print(f"[Test Method] {self._testMethodName}", flush=True)
import os
import subprocess import subprocess
import time import time
from pathlib import Path from pathlib import Path
from typing import Dict, Iterable, List, Optional, Tuple from typing import Iterable, List, Optional, Tuple
import pytest import pytest
import requests import requests
......
...@@ -17,6 +17,7 @@ def test_power_of_two_prefers_less_loaded(mock_workers, router_manager): ...@@ -17,6 +17,7 @@ def test_power_of_two_prefers_less_loaded(mock_workers, router_manager):
urls = urls_slow + urls_fast urls = urls_slow + urls_fast
ids = ids_slow + ids_fast ids = ids_slow + ids_fast
slow_id = ids_slow[0] slow_id = ids_slow[0]
slow_url = urls_slow[0]
rh = router_manager.start_router( rh = router_manager.start_router(
worker_urls=urls, worker_urls=urls,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment