# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # DynamoGraphDeploymentRequest for MoE model profiling # Converted from profile_sla_moe_job.yaml apiVersion: nvidia.com/v1alpha1 kind: DynamoGraphDeploymentRequest metadata: name: sla-moe spec: model: deepseek-ai/DeepSeek-R1 backend: sglang # ProfilingConfig maps directly to the profile_sla.py config format profilingConfig: profilerImage: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:my-tag" # NOTE: any image built before January 10 and any release prior to 0.8.1 # will need to use snake_case within profilingConfig.config config: # 0.8.1 and later: Model cache PVC to access model weights deployment: modelCache: pvcName: "model-cache" # Name of PVC containing model weights pvcPath: "deepseek-r1" # Subpath within PVC where model is stored sweep: useAiConfigurator: false hardware: # for h200, sweep over 8-16 GPUs per engine minNumGpusPerEngine: 8 maxNumGpusPerEngine: 16 numGpusPerNode: 8 sla: isl: 3000 osl: 150 ttft: 200.0 itl: 20.0 # Reference to ConfigMap containing the DGD base config # For MoE models, this should point to the appropriate disagg config # Original path: /sgl-workspace/dynamo/recipes/deepseek-r1/sglang/disagg-16gpu/deploy.yaml configMapRef: name: deepseek-r1-config key: tep16p-dep16d-disagg.yaml deploymentOverrides: workersImage: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:my-tag" autoApply: true