profile_sla_dgdr.yaml 913 Bytes
Newer Older
1
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
3
# SPDX-License-Identifier: Apache-2.0
#
4
# DynamoGraphDeploymentRequest for online profiling (actual deployment testing)
5
6
7
8
9
10
11
12
13
14
apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeploymentRequest
metadata:
  name: sla-online
spec:
  model: Qwen/Qwen3-0.6B
  backend: vllm

  # ProfilingConfig maps directly to the profile_sla.py config format
  profilingConfig:
15
    profilerImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag"
16
17
18

    # NOTE: any image built before January 10 and any release prior to 0.8.1
    # will need to use snake_case within profilingConfig.config
19
20
    config:
      sweep:
21
        useAiConfigurator: false
22
      sla:
23
24
25
26
        isl: 3000
        osl: 150
        ttft: 200.0
        itl: 20.0
27
  deploymentOverrides:
28
    workersImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag"
29
  autoApply: true