test_sglang.py 4.51 KB
Newer Older
1
2
3
4
5
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import logging
import os
6
from dataclasses import dataclass, field
7
8
9

import pytest

Alec's avatar
Alec committed
10
11
12
13
14
from tests.serve.common import (
    SERVE_TEST_DIR,
    params_with_model_mark,
    run_serve_deployment,
)
15
from tests.utils.engine_process import EngineConfig
16
17
18
19
20
from tests.utils.payload_builder import (
    chat_payload,
    chat_payload_default,
    completion_payload_default,
)
21
22
23
24
25

logger = logging.getLogger(__name__)


@dataclass
26
class SGLangConfig(EngineConfig):
27
28
    """Configuration for SGLang test scenarios"""

29
    stragglers: list[str] = field(default_factory=lambda: ["SGLANG:EngineCore"])
30
31


32
sglang_dir = os.environ.get("SGLANG_DIR", "/workspace/components/backends/sglang")
33
34
35

sglang_configs = {
    "aggregated": SGLangConfig(
36
        name="aggregated",
Alec's avatar
Alec committed
37
        directory=SERVE_TEST_DIR,
38
        script_name="sglang_agg.sh",
39
        marks=[pytest.mark.gpu_1],
40
        model="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
41
42
43
        env={},
        models_port=8000,
        request_payloads=[chat_payload_default(), completion_payload_default()],
44
45
    ),
    "disaggregated": SGLangConfig(
46
47
48
49
50
51
52
53
        name="disaggregated",
        directory=sglang_dir,
        script_name="disagg.sh",
        marks=[pytest.mark.gpu_2],
        model="Qwen/Qwen3-0.6B",
        env={},
        models_port=8000,
        request_payloads=[chat_payload_default(), completion_payload_default()],
54
    ),
55
    "kv_events": SGLangConfig(
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
        name="kv_events",
        directory=sglang_dir,
        script_name="agg_router.sh",
        marks=[pytest.mark.gpu_2],
        model="Qwen/Qwen3-0.6B",
        env={
            "DYN_LOG": "dynamo_llm::kv_router::publisher=trace,dynamo_llm::kv_router::scheduler=info",
        },
        models_port=8000,
        request_payloads=[
            chat_payload_default(
                expected_log=[
                    r"ZMQ listener .* received batch with \d+ events \(seq=\d+\)",
                    r"Event processor for worker_id \d+ processing event: Stored\(",
                    r"Selected worker: \d+, logit: ",
                ]
            )
        ],
74
    ),
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
    "template_verification": SGLangConfig(
        # Tests custom jinja template preprocessing by verifying the template
        # marker 'CUSTOM_TEMPLATE_ACTIVE|' is applied to user messages.
        # The backend (launch/template_verifier.*) checks for this marker
        # and returns "Successfully Applied Chat Template" if found.
        name="template_verification",
        directory=SERVE_TEST_DIR,
        script_name="template_verifier.sh",
        marks=[pytest.mark.gpu_1],
        model="Qwen/Qwen3-0.6B",
        env={},
        models_port=8000,
        request_payloads=[
            chat_payload_default(
                expected_response=["Successfully Applied Chat Template"]
            )
        ],
    ),
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
    "multimodal_agg_qwen": SGLangConfig(
        name="multimodal_agg_qwen",
        directory=sglang_dir,
        script_name="multimodal_agg.sh",
        marks=[pytest.mark.gpu_2],
        model="Qwen/Qwen2.5-VL-7B-Instruct",
        delayed_start=0,
        timeout=360,
        models_port=8000,
        request_payloads=[
            chat_payload(
                [
                    {"type": "text", "text": "What is in this image?"},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": "http://images.cocodataset.org/test2017/000000155781.jpg"
                        },
                    },
                ],
                repeat_count=1,
                expected_response=["bus"],
                temperature=0.0,
            )
        ],
    ),
119
120
121
}


Alec's avatar
Alec committed
122
@pytest.fixture(params=params_with_model_mark(sglang_configs))
123
124
125
126
127
128
129
def sglang_config_test(request):
    """Fixture that provides different SGLang test configurations"""
    return sglang_configs[request.param]


@pytest.mark.e2e
@pytest.mark.sglang
Alec's avatar
Alec committed
130
131
132
def test_sglang_deployment(
    sglang_config_test, request, runtime_services, predownload_models
):
133
    """Test SGLang deployment scenarios using common helpers"""
134
    config = sglang_config_test
135
    run_serve_deployment(config, request)
136
137


138
139
140
@pytest.mark.skip(
    reason="Requires 4 GPUs - enable when hardware is consistently available"
)
Alec's avatar
Alec committed
141
def test_sglang_disagg_dp_attention(request, runtime_services, predownload_models):
142
143
    """Test sglang disaggregated with DP attention (requires 4 GPUs)"""

144
    # Kept for reference; this test uses a different launch path and is skipped