conftest.py 2.49 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# sglang/test/srt/openai/conftest.py
import os
import socket
import subprocess
import sys
import tempfile
import time
from contextlib import closing
from typing import Generator

import pytest
import requests

from sglang.srt.utils import kill_process_tree  # reuse SGLang helper
15
from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST
16
17

SERVER_MODULE = "sglang.srt.entrypoints.openai.api_server"
18
DEFAULT_MODEL = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
STARTUP_TIMEOUT = float(os.getenv("SGLANG_OPENAI_STARTUP_TIMEOUT", 120))


def _pick_free_port() -> int:
    with closing(socket.socket()) as s:
        s.bind(("127.0.0.1", 0))
        return s.getsockname()[1]


def _wait_until_healthy(proc: subprocess.Popen, base: str, timeout: float) -> None:
    start = time.perf_counter()
    while time.perf_counter() - start < timeout:
        if proc.poll() is not None:  # crashed
            raise RuntimeError("api_server terminated prematurely")
        try:
            if requests.get(f"{base}/health", timeout=1).status_code == 200:
                return
        except requests.RequestException:
            pass
        time.sleep(0.4)
    raise RuntimeError("api_server readiness probe timed out")


def launch_openai_server(model: str = DEFAULT_MODEL, **kw):
43
    """Spawn the draft OpenAI-compatible server and wait until it's ready."""
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
    port = _pick_free_port()
    cmd = [
        sys.executable,
        "-m",
        SERVER_MODULE,
        "--model-path",
        model,
        "--host",
        "127.0.0.1",
        "--port",
        str(port),
        *map(str, kw.get("args", [])),
    ]
    env = {**os.environ, **kw.get("env", {})}

    # Write logs to a temp file so the child never blocks on a full pipe.
    log_file = tempfile.NamedTemporaryFile("w+", delete=False)
    proc = subprocess.Popen(
        cmd,
        env=env,
        stdout=log_file,
        stderr=subprocess.STDOUT,
        text=True,
    )

    base = f"http://127.0.0.1:{port}"
    try:
        _wait_until_healthy(proc, base, STARTUP_TIMEOUT)
    except Exception as e:
        proc.terminate()
        proc.wait(5)
        log_file.seek(0)
        print("\n--- api_server log ---\n", log_file.read(), file=sys.stderr)
        raise e
    return proc, base, log_file


@pytest.fixture(scope="session")
def openai_server() -> Generator[str, None, None]:
83
    """PyTest fixture that provides the server's base URL and cleans up."""
84
85
86
87
    proc, base, log_file = launch_openai_server()
    yield base
    kill_process_tree(proc.pid)
    log_file.close()