nvidia.com_v1alpha1_dynamocheckpoint.yaml 2.42 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: nvidia.com/v1alpha1
kind: DynamoCheckpoint
metadata:
19
  name: qwen3-06b-bf16
20
21
22
spec:
  # Identity - determines the checkpoint hash
  identity:
23
    model: "Qwen/Qwen3-0.6B"
24
25
26
27
    backendFramework: "vllm"
    tensorParallelSize: 1
    pipelineParallelSize: 1
    dtype: "bfloat16"
28
    maxModelLen: 2048
29

30
31
32
33
  # Optional: enable GMS-specific checkpoint capture and restore helpers.
  gpuMemoryService:
    enabled: false

34
35
36
37
38
  # Job configuration for checkpoint creation
  job:
    activeDeadlineSeconds: 3600
    podTemplateSpec:
      spec:
39
40
41
42
43
44
45
        restartPolicy: Never
        imagePullSecrets:
          - name: ngc-secret
        volumes:
          - name: hf-cache
            persistentVolumeClaim:
              claimName: hf-cache-pvc
46
        containers:
47
48
49
50
          - name: worker
            image: registry.example.com/dynamo/vllm-placeholder:1.0.0
            command:
              - python3
51
            args:
52
53
              - "-m"
              - "dynamo.vllm"
54
              - "--model"
55
              - "Qwen/Qwen3-0.6B"
56
57
              - "--dtype"
              - "bfloat16"
58
59
              - "--tensor-parallel-size"
              - "1"
60
              - "--max-model-len"
61
62
63
64
              - "2048"
            envFrom:
              - secretRef:
                  name: hf-token-secret
65
            env:
66
67
68
69
70
71
72
73
74
75
76
              - name: HF_HOME
                value: /home/dynamo/.cache/huggingface
              - name: NCCL_DEBUG
                value: ERROR
              - name: TORCH_CPP_LOG_LEVEL
                value: ERROR
              - name: TORCH_DISTRIBUTED_DEBUG
                value: "OFF"
            volumeMounts:
              - name: hf-cache
                mountPath: /home/dynamo/.cache/huggingface
77
78
            resources:
              limits:
79
                nvidia.com/gpu: "1"