# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. apiVersion: nvidia.com/v1alpha1 kind: DynamoCheckpoint metadata: name: qwen3-06b-bf16 spec: # Identity - determines the checkpoint hash identity: model: "Qwen/Qwen3-0.6B" backendFramework: "vllm" tensorParallelSize: 1 pipelineParallelSize: 1 dtype: "bfloat16" maxModelLen: 2048 # Optional: enable GMS-specific checkpoint capture and restore helpers. gpuMemoryService: enabled: false # Job configuration for checkpoint creation job: activeDeadlineSeconds: 3600 podTemplateSpec: spec: restartPolicy: Never imagePullSecrets: - name: ngc-secret volumes: - name: hf-cache persistentVolumeClaim: claimName: hf-cache-pvc containers: - name: worker image: registry.example.com/dynamo/vllm-placeholder:1.0.0 command: - python3 args: - "-m" - "dynamo.vllm" - "--model" - "Qwen/Qwen3-0.6B" - "--dtype" - "bfloat16" - "--tensor-parallel-size" - "1" - "--max-model-len" - "2048" envFrom: - secretRef: name: hf-token-secret env: - name: HF_HOME value: /home/dynamo/.cache/huggingface - name: NCCL_DEBUG value: ERROR - name: TORCH_CPP_LOG_LEVEL value: ERROR - name: TORCH_DISTRIBUTED_DEBUG value: "OFF" volumeMounts: - name: hf-cache mountPath: /home/dynamo/.cache/huggingface resources: limits: nvidia.com/gpu: "1"