test_sglang.py 3.01 KB
Newer Older
1
2
3
4
5
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import logging
import os
6
from dataclasses import dataclass, field
7
8
9

import pytest

10
11
12
from tests.serve.common import run_serve_deployment
from tests.utils.engine_process import EngineConfig
from tests.utils.payload_builder import chat_payload_default, completion_payload_default
13
14
15
16
17

logger = logging.getLogger(__name__)


@dataclass
18
class SGLangConfig(EngineConfig):
19
20
    """Configuration for SGLang test scenarios"""

21
    stragglers: list[str] = field(default_factory=lambda: ["SGLANG:EngineCore"])
22
23


24
sglang_dir = os.environ.get("SGLANG_DIR", "/workspace/components/backends/sglang")
25
26
27

sglang_configs = {
    "aggregated": SGLangConfig(
28
        name="aggregated",
29
30
        directory="/workspace/tests/serve",
        script_name="sglang_agg.sh",
31
        marks=[pytest.mark.gpu_1],
32
        model="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
33
34
35
        env={},
        models_port=8000,
        request_payloads=[chat_payload_default(), completion_payload_default()],
36
37
    ),
    "disaggregated": SGLangConfig(
38
39
40
41
42
43
44
45
        name="disaggregated",
        directory=sglang_dir,
        script_name="disagg.sh",
        marks=[pytest.mark.gpu_2],
        model="Qwen/Qwen3-0.6B",
        env={},
        models_port=8000,
        request_payloads=[chat_payload_default(), completion_payload_default()],
46
    ),
47
    "kv_events": SGLangConfig(
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
        name="kv_events",
        directory=sglang_dir,
        script_name="agg_router.sh",
        marks=[pytest.mark.gpu_2],
        model="Qwen/Qwen3-0.6B",
        env={
            "DYN_LOG": "dynamo_llm::kv_router::publisher=trace,dynamo_llm::kv_router::scheduler=info",
        },
        models_port=8000,
        request_payloads=[
            chat_payload_default(
                expected_log=[
                    r"ZMQ listener .* received batch with \d+ events \(seq=\d+\)",
                    r"Event processor for worker_id \d+ processing event: Stored\(",
                    r"Selected worker: \d+, logit: ",
                ]
            )
        ],
66
    ),
67
68
69
70
71
72
73
}


@pytest.fixture(
    params=[
        pytest.param("aggregated", marks=[pytest.mark.gpu_1]),
        pytest.param("disaggregated", marks=[pytest.mark.gpu_2]),
74
        pytest.param("kv_events", marks=[pytest.mark.gpu_2]),
75
76
77
78
79
80
81
82
83
    ]
)
def sglang_config_test(request):
    """Fixture that provides different SGLang test configurations"""
    return sglang_configs[request.param]


@pytest.mark.e2e
@pytest.mark.sglang
84
85
def test_sglang_deployment(sglang_config_test, request, runtime_services):
    """Test SGLang deployment scenarios using common helpers"""
86
    config = sglang_config_test
87
    run_serve_deployment(config, request)
88
89


90
91
92
93
94
95
@pytest.mark.skip(
    reason="Requires 4 GPUs - enable when hardware is consistently available"
)
def test_sglang_disagg_dp_attention(request, runtime_services):
    """Test sglang disaggregated with DP attention (requires 4 GPUs)"""

96
    # Kept for reference; this test uses a different launch path and is skipped