# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # DynamoGraphDeploymentRequest for online profiling (actual deployment testing) apiVersion: nvidia.com/v1alpha1 kind: DynamoGraphDeploymentRequest metadata: name: sla-online spec: model: Qwen/Qwen3-0.6B backend: vllm # ProfilingConfig maps directly to the profile_sla.py config format profilingConfig: profilerImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag" # NOTE: any image built before January 10 and any release prior to 0.8.1 # will need to use snake_case within profilingConfig.config config: sweep: useAiConfigurator: false sla: isl: 3000 osl: 150 ttft: 200.0 itl: 20.0 deploymentOverrides: workersImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag" autoApply: true