test_profile_sla_aiconfigurator.py 5.83 KB
Newer Older
1
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# SPDX-License-Identifier: Apache-2.0

"""
Test suite for profile_sla aiconfigurator functionality.

profile_sla should be able to use aiconfigurator functionality
even without access to any GPU system.
"""

import sys
from pathlib import Path

import pytest

# Add the project root to sys.path to enable imports
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))

20
21
22
23
24
25
try:
    from dynamo.profiler.profile_sla import run_profile  # noqa: E402
    from dynamo.profiler.utils.defaults import SearchStrategy  # noqa: E402
    from dynamo.profiler.utils.model_info import ModelInfo  # noqa: E402
except ImportError as _e:
    pytest.skip(f"Skip testing (refactor in progress): {_e}", allow_module_level=True)
26

27
28
29
30
pytestmark = [
    pytest.mark.aiconfigurator,
]

31
32
33
34
35
36
37
38
39
40
41

# Override the logger fixture from conftest.py to prevent directory creation
@pytest.fixture(autouse=True)
def logger(request):
    """Override the logger fixture to prevent test directory creation.

    This replaces the logger fixture from tests/conftest.py that creates
    directories named after each test.
    """
    # Simply do nothing - no directories created, no file handlers added
    yield
42
43
44
45
46
47


class TestProfileSlaAiconfigurator:
    """Test class for profile_sla aiconfigurator functionality."""

    @pytest.fixture
48
    def llm_args(self, request):
49
        class Args:
50
            def __init__(self):
51
                self.model = "Qwen/Qwen3-32B"  # Set to match aic_hf_id for consistency
52
                self.dgd_image = ""
53
                self.backend = "trtllm"
54
                self.config = "examples/backends/trtllm/deploy/disagg.yaml"
55
56
57
                # Use unique output directory per test for parallel execution
                self.output_dir = f"/tmp/test_profiling_results_{request.node.name}"
                self.namespace = f"test-namespace-{request.node.name}"
58
59
60
61
62
63
64
65
66
67
68
69
70
                self.min_num_gpus_per_engine = 1
                self.max_num_gpus_per_engine = 8
                self.skip_existing_results = False
                self.force_rerun = False
                self.isl = 3000
                self.osl = 500
                self.ttft = 50
                self.itl = 10
                self.prefill_interpolation_granularity = 16
                self.decode_interpolation_granularity = 6
                self.service_name = ""
                self.dry_run = False
                self.num_gpus_per_node = 8
71
                self.deploy_after_profile = False
72
                self.pick_with_webui = False
73
74
75
76
                # Use RAPID strategy to leverage AI Configurator for perf estimation
                # This avoids Kubernetes deployments while testing aiconfigurator functionality
                self.search_strategy = SearchStrategy.RAPID
                self.system = "h200_sxm"  # Must match aic_system for RAPID strategy
77
78
79
80
81
82
83
                # Provide minimal model_info to avoid HF queries
                self.model_info = ModelInfo(
                    model_size=16384.0,
                    architecture="TestArchitecture",
                    is_moe=False,
                    max_context_length=16384,
                )
84
85
86
87

        return Args()

    @pytest.mark.pre_merge
88
89
    @pytest.mark.gpu_0
    @pytest.mark.performance
90
    @pytest.mark.parallel
91
    @pytest.mark.asyncio
92
    @pytest.mark.parametrize("missing_arg", ["system", "model"])
93
    async def test_aiconfigurator_missing_args(self, llm_args, missing_arg):
94
95
        # Check that validation error happens when a required arg is missing for RAPID strategy.
        # These args are required when using SearchStrategy.RAPID with AI Configurator.
96
        setattr(llm_args, missing_arg, None)
97
        with pytest.raises(ValueError):
98
            await run_profile(llm_args)
99
100

    @pytest.mark.pre_merge
101
102
    @pytest.mark.gpu_0
    @pytest.mark.performance
103
    @pytest.mark.parallel
104
105
106
107
108
    @pytest.mark.asyncio
    @pytest.mark.parametrize(
        "arg_name, bad_value",
        [
            # these values don't exist in the aiconfigurator database.
109
            ("system", "fake_gpu_system"),
110
111
        ],
    )
112
    async def test_aiconfigurator_no_data(self, llm_args, arg_name, bad_value):
113
114
        # Check that an appropriate error is raised when the system/model/backend
        # is not found in the aiconfigurator database.
115
        setattr(llm_args, arg_name, bad_value)
116
        with pytest.raises(ValueError, match="Database not found"):
117
            await run_profile(llm_args)
118

119
    @pytest.mark.trtllm
120
    @pytest.mark.pre_merge
121
    @pytest.mark.parallel
122
    @pytest.mark.asyncio
123
    @pytest.mark.gpu_0
124
    @pytest.mark.integration
125
126
127
    async def test_trtllm_aiconfigurator_single_model(self, llm_args):
        # Test that profile_sla works with the model & backend in the llm_args fixture.
        await run_profile(llm_args)
128

129
    @pytest.mark.parallel
130
    @pytest.mark.asyncio
131
    @pytest.mark.gpu_1
132
    @pytest.mark.integration
133
    @pytest.mark.nightly
134
    # fmt: off
135
    @pytest.mark.parametrize(
136
        "backend",
137
        [
138
139
140
            pytest.param("trtllm", marks=pytest.mark.trtllm),
            pytest.param("vllm",   marks=pytest.mark.vllm),
            pytest.param("sglang", marks=pytest.mark.sglang),
141
142
        ],
    )
143
    # fmt: on
144
145
146
147
148
149
150
    @pytest.mark.parametrize(
        "hf_model_id",
        [
            "Qwen/Qwen3-32B",
            "meta-llama/Llama-3.1-405B",
        ],
    )
151
152
153
154
155
    async def test_aiconfigurator_dense_models(self, llm_args, hf_model_id, backend):
        # Test that profile_sla works with a variety of backends and model names
        # using AI Configurator's RAPID strategy for performance estimation.
        # Backend version is not used with RAPID strategy - performance comes from AI Configurator.
        llm_args.model = hf_model_id  # Used by RAPID strategy
156
157
        llm_args.backend = backend
        await run_profile(llm_args)