test_profile_sla_dryrun.py 7.16 KB
Newer Older
1
2
3
4
5
6
7
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""
Test suite for profile_sla dry-run functionality.

This test ensures that the profile_sla script can successfully run in dry-run mode
8
for vllm, sglang, and trtllm backends with their respective disagg.yaml configurations.
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
"""

import sys
from pathlib import Path

import pytest

# Add the project root to sys.path to enable imports
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))

from benchmarks.profiler.profile_sla import run_profile  # noqa: E402


class TestProfileSLADryRun:
    """Test class for profile_sla dry-run functionality."""

    @pytest.fixture
    def vllm_args(self):
        """Create arguments for vllm backend dry-run test."""

        class Args:
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
            def __init__(self):
                self.backend = "vllm"
                self.config = "components/backends/vllm/deploy/disagg.yaml"
                self.output_dir = "/tmp/test_profiling_results"
                self.namespace = "test-namespace"
                self.min_num_gpus_per_engine = 1
                self.max_num_gpus_per_engine = 8
                self.skip_existing_results = False
                self.force_rerun = False
                self.isl = 3000
                self.osl = 500
                self.ttft = 50
                self.itl = 10
                self.max_context_length = 16384
                self.prefill_interpolation_granularity = 16
                self.decode_interpolation_granularity = 6
                self.service_name = ""
                self.is_moe_model = False
                self.dry_run = True
                self.use_ai_configurator = False
                self.aic_system = None
                self.aic_model_name = None
53
54
                self.aic_backend = ""
                self.aic_backend_version = None
55
                self.num_gpus_per_node = 8
56
                self.deploy_after_profile = False
57
58
59
60
61
62
63
64

        return Args()

    @pytest.fixture
    def sglang_args(self):
        """Create arguments for sglang backend dry-run test."""

        class Args:
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
            def __init__(self):
                self.backend = "sglang"
                self.config = "components/backends/sglang/deploy/disagg.yaml"
                self.output_dir = "/tmp/test_profiling_results"
                self.namespace = "test-namespace"
                self.min_num_gpus_per_engine = 1
                self.max_num_gpus_per_engine = 8
                self.skip_existing_results = False
                self.force_rerun = False
                self.isl = 3000
                self.osl = 500
                self.ttft = 50
                self.itl = 10
                self.max_context_length = 16384
                self.prefill_interpolation_granularity = 16
                self.decode_interpolation_granularity = 6
                self.service_name = ""
                self.is_moe_model = False
                self.dry_run = True
                self.use_ai_configurator = False
                self.aic_system = None
                self.aic_model_name = None
87
88
                self.aic_backend = ""
                self.aic_backend_version = None
89
                self.num_gpus_per_node = 8
90
                self.deploy_after_profile = False
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106

        return Args()

    @pytest.mark.pre_merge
    @pytest.mark.asyncio
    async def test_vllm_dryrun(self, vllm_args):
        """Test that profile_sla dry-run works for vllm backend with disagg.yaml config."""
        # Run the profile in dry-run mode - should complete without errors
        await run_profile(vllm_args)

    @pytest.mark.pre_merge
    @pytest.mark.asyncio
    async def test_sglang_dryrun(self, sglang_args):
        """Test that profile_sla dry-run works for sglang backend with disagg.yaml config."""
        # Run the profile in dry-run mode - should complete without errors
        await run_profile(sglang_args)
107
108
109
110
111
112

    @pytest.fixture
    def trtllm_args(self):
        """Create arguments for trtllm backend dry-run test."""

        class Args:
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
            def __init__(self):
                self.backend = "trtllm"
                self.config = "components/backends/trtllm/deploy/disagg.yaml"
                self.output_dir = "/tmp/test_profiling_results"
                self.namespace = "test-namespace"
                self.min_num_gpus_per_engine = 1
                self.max_num_gpus_per_engine = 8
                self.skip_existing_results = False
                self.force_rerun = False
                self.isl = 3000
                self.osl = 500
                self.ttft = 50
                self.itl = 10
                self.max_context_length = 16384
                self.prefill_interpolation_granularity = 16
                self.decode_interpolation_granularity = 6
                self.service_name = ""
                self.is_moe_model = False
                self.dry_run = True
                self.use_ai_configurator = False
                self.aic_system = None
                self.aic_model_name = None
135
136
                self.aic_backend = ""
                self.aic_backend_version = None
137
                self.num_gpus_per_node = 8
138
                self.deploy_after_profile = False
139
140
141
142
143
144
145
146
147

        return Args()

    @pytest.mark.pre_merge
    @pytest.mark.asyncio
    async def test_trtllm_dryrun(self, trtllm_args):
        """Test that profile_sla dry-run works for trtllm backend with disagg.yaml config."""
        # Run the profile in dry-run mode - should complete without errors
        await run_profile(trtllm_args)
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177

    @pytest.fixture
    def sglang_moe_args(self):
        """Create arguments for trtllm backend dry-run test."""

        class Args:
            def __init__(self):
                self.backend = "sglang"
                self.config = (
                    "recipes/deepseek-r1/sglang-wideep/tep16p-dep16d-disagg.yaml"
                )
                self.output_dir = "/tmp/test_profiling_results"
                self.namespace = "test-namespace"
                self.min_num_gpus_per_engine = 8
                self.max_num_gpus_per_engine = 32
                self.skip_existing_results = False
                self.force_rerun = False
                self.isl = 3000
                self.osl = 500
                self.ttft = 50
                self.itl = 10
                self.max_context_length = 16384
                self.prefill_interpolation_granularity = 16
                self.decode_interpolation_granularity = 6
                self.service_name = ""
                self.is_moe_model = True
                self.dry_run = True
                self.use_ai_configurator = False
                self.aic_system = None
                self.aic_model_name = None
178
179
                self.aic_backend = ""
                self.aic_backend_version = None
180
                self.num_gpus_per_node = 8
181
                self.deploy_after_profile = False
182
183
184
185
186
187
188
189
190

        return Args()

    @pytest.mark.pre_merge
    @pytest.mark.asyncio
    async def test_sglang_moe_dryrun(self, sglang_moe_args):
        """Test that profile_sla dry-run works for sglang backend with MoE config."""
        # Run the profile in dry-run mode - should complete without errors
        await run_profile(sglang_moe_args)