# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # DynamoGraphDeploymentRequest for MoE model profiling # Converted from profile_sla_moe_job.yaml apiVersion: nvidia.com/v1alpha1 kind: DynamoGraphDeploymentRequest metadata: name: sla-moe spec: model: deepseek-ai/DeepSeek-R1 backend: sglang # ProfilingConfig maps directly to the profile_sla.py config format profilingConfig: profilerImage: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.6.1" config: # Sweep/profiling configuration sweep: # Standard online profiling (not using AI Configurator) use_ai_configurator: false # SLA targets for profiling sla: isl: 3000 # Input sequence length osl: 150 # Output sequence length ttft: 200.0 # Time To First Token target (milliseconds) itl: 20.0 # Inter-Token Latency target (milliseconds) # Reference to ConfigMap containing the DGD base config # For MoE models, this should point to the appropriate disagg config # Original path: /sgl-workspace/dynamo/recipes/deepseek-r1/sglang/disagg-16gpu/deploy.yaml configMapRef: name: deepseek-r1-config key: tep16p-dep16d-disagg.yaml # Deployment overrides for the auto-created DGD deploymentOverrides: workersImage: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.6.1" # Automatically create DynamoGraphDeployment after profiling autoApply: true