nvidia.com_v1alpha1_dynamocheckpoint.yaml

# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: nvidia.com/v1alpha1
kind: DynamoCheckpoint
metadata:
  name: qwen3-06b-bf16
spec:
  # Identity - determines the checkpoint hash
  identity:
    model: "Qwen/Qwen3-0.6B"
    backendFramework: "vllm"
    tensorParallelSize: 1
    pipelineParallelSize: 1
    dtype: "bfloat16"
    maxModelLen: 2048

  # Optional: enable GMS-specific checkpoint capture and restore helpers.
  gpuMemoryService:
    enabled: false

  # Job configuration for checkpoint creation
  job:
    activeDeadlineSeconds: 3600
    podTemplateSpec:
      spec:
        restartPolicy: Never
        imagePullSecrets:
          - name: ngc-secret
        volumes:
          - name: hf-cache
            persistentVolumeClaim:
              claimName: hf-cache-pvc
        containers:
          - name: worker
            image: registry.example.com/dynamo/vllm-placeholder:1.0.0
            command:
              - python3
            args:
              - "-m"
              - "dynamo.vllm"
              - "--model"
              - "Qwen/Qwen3-0.6B"
              - "--dtype"
              - "bfloat16"
              - "--tensor-parallel-size"
              - "1"
              - "--max-model-len"
              - "2048"
            envFrom:
              - secretRef:
                  name: hf-token-secret
            env:
              - name: HF_HOME
                value: /home/dynamo/.cache/huggingface
              - name: NCCL_DEBUG
                value: ERROR
              - name: TORCH_CPP_LOG_LEVEL
                value: ERROR
              - name: TORCH_DISTRIBUTED_DEBUG
                value: "OFF"
            volumeMounts:
              - name: hf-cache
                mountPath: /home/dynamo/.cache/huggingface
            resources:
              limits:
                nvidia.com/gpu: "1"