"examples/online_serving/chart-helm/templates/pvc.yaml" did not exist on "fe2e10c71b98a43ccde0e8aba0d4fe0d23369538"
test_profile_sla_aiconfigurator.py 4.81 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""
Test suite for profile_sla aiconfigurator functionality.

profile_sla should be able to use aiconfigurator functionality
even without access to any GPU system.
"""

import sys
from pathlib import Path

import pytest

# Add the project root to sys.path to enable imports
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))

from benchmarks.profiler.profile_sla import run_profile  # noqa: E402
21
22
23
24
25
26
27
28
29
30
31
32
33
from benchmarks.profiler.utils.model_info import ModelInfo  # noqa: E402


# Override the logger fixture from conftest.py to prevent directory creation
@pytest.fixture(autouse=True)
def logger(request):
    """Override the logger fixture to prevent test directory creation.

    This replaces the logger fixture from tests/conftest.py that creates
    directories named after each test.
    """
    # Simply do nothing - no directories created, no file handlers added
    yield
34
35
36
37
38
39
40
41


class TestProfileSlaAiconfigurator:
    """Test class for profile_sla aiconfigurator functionality."""

    @pytest.fixture
    def trtllm_args(self):
        class Args:
42
            def __init__(self):
43
44
                self.model = ""
                self.dgd_image = ""
45
                self.backend = "trtllm"
46
                self.config = "examples/backends/trtllm/deploy/disagg.yaml"
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
                self.output_dir = "/tmp/test_profiling_results"
                self.namespace = "test-namespace"
                self.min_num_gpus_per_engine = 1
                self.max_num_gpus_per_engine = 8
                self.skip_existing_results = False
                self.force_rerun = False
                self.isl = 3000
                self.osl = 500
                self.ttft = 50
                self.itl = 10
                self.prefill_interpolation_granularity = 16
                self.decode_interpolation_granularity = 6
                self.service_name = ""
                self.dry_run = False
                self.use_ai_configurator = True
                self.aic_system = "h200_sxm"
63
                self.aic_hf_id = "Qwen/Qwen3-32B"
64
                self.aic_backend = ""
65
                self.aic_backend_version = None
66
                self.num_gpus_per_node = 8
67
                self.deploy_after_profile = False
68
69
70
71
72
73
74
                # Provide minimal model_info to avoid HF queries
                self.model_info = ModelInfo(
                    model_size=16384.0,
                    architecture="TestArchitecture",
                    is_moe=False,
                    max_context_length=16384,
                )
75
76
77
78
79

        return Args()

    @pytest.mark.pre_merge
    @pytest.mark.asyncio
80
    @pytest.mark.parametrize("missing_arg", ["aic_system", "aic_hf_id"])
81
82
    async def test_aiconfigurator_missing_args(self, trtllm_args, missing_arg):
        # Check that validation error happens when a required arg is missing.
83
        # Note: aic_backend_version is optional - when None, auto-detects latest version
84
85
86
87
88
89
90
91
92
93
94
        setattr(trtllm_args, missing_arg, None)
        with pytest.raises(ValueError):
            await run_profile(trtllm_args)

    @pytest.mark.pre_merge
    @pytest.mark.asyncio
    @pytest.mark.parametrize(
        "arg_name, bad_value",
        [
            # these values don't exist in the aiconfigurator database.
            ("aic_system", "fake_gpu_system"),
95
            ("aic_backend_version", "0.1.0"),
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
        ],
    )
    async def test_aiconfiguator_no_data(self, trtllm_args, arg_name, bad_value):
        # Check that an appropriate error is raised when the system/model/backend
        # is not found in the aiconfigurator database.
        setattr(trtllm_args, arg_name, bad_value)
        with pytest.raises(ValueError, match="Database not found"):
            await run_profile(trtllm_args)

    @pytest.mark.pre_merge
    @pytest.mark.asyncio
    async def test_trtllm_aiconfigurator_single_model(self, trtllm_args):
        # Test that profile_sla works with the model & backend in the trtllm_args fixture.
        await run_profile(trtllm_args)

    @pytest.mark.asyncio
    @pytest.mark.parametrize(
113
        "backend, aic_backend_version",
114
        [
115
            ("trtllm", None),
116
117
118
119
            ("trtllm", "0.20.0"),
            ("trtllm", "1.0.0rc3"),
        ],
    )
120
121
122
123
124
125
126
    @pytest.mark.parametrize(
        "hf_model_id",
        [
            "Qwen/Qwen3-32B",
            "meta-llama/Llama-3.1-405B",
        ],
    )
127
    async def test_trtllm_aiconfigurator_many(
128
        self, trtllm_args, hf_model_id, backend, aic_backend_version
129
130
    ):
        # Test that profile_sla works with a variety of backend versions and model names.
131
        trtllm_args.aic_hf_id = hf_model_id
132
        trtllm_args.backend = backend
133
        trtllm_args.aic_backend_version = aic_backend_version
134
        await run_profile(trtllm_args)