test_profile_sla_dryrun.py 6.8 KB
Newer Older
1
2
3
4
5
6
7
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""
Test suite for profile_sla dry-run functionality.

This test ensures that the profile_sla script can successfully run in dry-run mode
8
for vllm, sglang, and trtllm backends with their respective disagg.yaml configurations.
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
"""

import sys
from pathlib import Path

import pytest

# Add the project root to sys.path to enable imports
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))

from benchmarks.profiler.profile_sla import run_profile  # noqa: E402


class TestProfileSLADryRun:
    """Test class for profile_sla dry-run functionality."""

    @pytest.fixture
    def vllm_args(self):
        """Create arguments for vllm backend dry-run test."""

        class Args:
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
            def __init__(self):
                self.backend = "vllm"
                self.config = "components/backends/vllm/deploy/disagg.yaml"
                self.output_dir = "/tmp/test_profiling_results"
                self.namespace = "test-namespace"
                self.min_num_gpus_per_engine = 1
                self.max_num_gpus_per_engine = 8
                self.skip_existing_results = False
                self.force_rerun = False
                self.isl = 3000
                self.osl = 500
                self.ttft = 50
                self.itl = 10
                self.max_context_length = 16384
                self.prefill_interpolation_granularity = 16
                self.decode_interpolation_granularity = 6
                self.service_name = ""
                self.is_moe_model = False
                self.dry_run = True
                self.use_ai_configurator = False
                self.aic_system = None
                self.aic_model_name = None
                self.backend_version = None
                self.num_gpus_per_node = 8
55
56
57
58
59
60
61
62

        return Args()

    @pytest.fixture
    def sglang_args(self):
        """Create arguments for sglang backend dry-run test."""

        class Args:
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
            def __init__(self):
                self.backend = "sglang"
                self.config = "components/backends/sglang/deploy/disagg.yaml"
                self.output_dir = "/tmp/test_profiling_results"
                self.namespace = "test-namespace"
                self.min_num_gpus_per_engine = 1
                self.max_num_gpus_per_engine = 8
                self.skip_existing_results = False
                self.force_rerun = False
                self.isl = 3000
                self.osl = 500
                self.ttft = 50
                self.itl = 10
                self.max_context_length = 16384
                self.prefill_interpolation_granularity = 16
                self.decode_interpolation_granularity = 6
                self.service_name = ""
                self.is_moe_model = False
                self.dry_run = True
                self.use_ai_configurator = False
                self.aic_system = None
                self.aic_model_name = None
                self.backend_version = None
                self.num_gpus_per_node = 8
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102

        return Args()

    @pytest.mark.pre_merge
    @pytest.mark.asyncio
    async def test_vllm_dryrun(self, vllm_args):
        """Test that profile_sla dry-run works for vllm backend with disagg.yaml config."""
        # Run the profile in dry-run mode - should complete without errors
        await run_profile(vllm_args)

    @pytest.mark.pre_merge
    @pytest.mark.asyncio
    async def test_sglang_dryrun(self, sglang_args):
        """Test that profile_sla dry-run works for sglang backend with disagg.yaml config."""
        # Run the profile in dry-run mode - should complete without errors
        await run_profile(sglang_args)
103
104
105
106
107
108

    @pytest.fixture
    def trtllm_args(self):
        """Create arguments for trtllm backend dry-run test."""

        class Args:
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
            def __init__(self):
                self.backend = "trtllm"
                self.config = "components/backends/trtllm/deploy/disagg.yaml"
                self.output_dir = "/tmp/test_profiling_results"
                self.namespace = "test-namespace"
                self.min_num_gpus_per_engine = 1
                self.max_num_gpus_per_engine = 8
                self.skip_existing_results = False
                self.force_rerun = False
                self.isl = 3000
                self.osl = 500
                self.ttft = 50
                self.itl = 10
                self.max_context_length = 16384
                self.prefill_interpolation_granularity = 16
                self.decode_interpolation_granularity = 6
                self.service_name = ""
                self.is_moe_model = False
                self.dry_run = True
                self.use_ai_configurator = False
                self.aic_system = None
                self.aic_model_name = None
                self.backend_version = None
                self.num_gpus_per_node = 8
133
134
135
136
137
138
139
140
141

        return Args()

    @pytest.mark.pre_merge
    @pytest.mark.asyncio
    async def test_trtllm_dryrun(self, trtllm_args):
        """Test that profile_sla dry-run works for trtllm backend with disagg.yaml config."""
        # Run the profile in dry-run mode - should complete without errors
        await run_profile(trtllm_args)
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182

    @pytest.fixture
    def sglang_moe_args(self):
        """Create arguments for trtllm backend dry-run test."""

        class Args:
            def __init__(self):
                self.backend = "sglang"
                self.config = (
                    "recipes/deepseek-r1/sglang-wideep/tep16p-dep16d-disagg.yaml"
                )
                self.output_dir = "/tmp/test_profiling_results"
                self.namespace = "test-namespace"
                self.min_num_gpus_per_engine = 8
                self.max_num_gpus_per_engine = 32
                self.skip_existing_results = False
                self.force_rerun = False
                self.isl = 3000
                self.osl = 500
                self.ttft = 50
                self.itl = 10
                self.max_context_length = 16384
                self.prefill_interpolation_granularity = 16
                self.decode_interpolation_granularity = 6
                self.service_name = ""
                self.is_moe_model = True
                self.dry_run = True
                self.use_ai_configurator = False
                self.aic_system = None
                self.aic_model_name = None
                self.backend_version = None
                self.num_gpus_per_node = 8

        return Args()

    @pytest.mark.pre_merge
    @pytest.mark.asyncio
    async def test_sglang_moe_dryrun(self, sglang_moe_args):
        """Test that profile_sla dry-run works for sglang backend with MoE config."""
        # Run the profile in dry-run mode - should complete without errors
        await run_profile(sglang_moe_args)