nvidia.com_v1alpha1_dynamocheckpoint.yaml 2.31 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: nvidia.com/v1alpha1
kind: DynamoCheckpoint
metadata:
19
  name: qwen3-06b-bf16
20
21
22
spec:
  # Identity - determines the checkpoint hash
  identity:
23
    model: "Qwen/Qwen3-0.6B"
24
25
26
27
    backendFramework: "vllm"
    tensorParallelSize: 1
    pipelineParallelSize: 1
    dtype: "bfloat16"
28
    maxModelLen: 2048
29
30
31
32
33
34

  # Job configuration for checkpoint creation
  job:
    activeDeadlineSeconds: 3600
    podTemplateSpec:
      spec:
35
36
37
38
39
40
41
        restartPolicy: Never
        imagePullSecrets:
          - name: ngc-secret
        volumes:
          - name: hf-cache
            persistentVolumeClaim:
              claimName: hf-cache-pvc
42
        containers:
43
44
45
46
          - name: worker
            image: registry.example.com/dynamo/vllm-placeholder:1.0.0
            command:
              - python3
47
            args:
48
49
              - "-m"
              - "dynamo.vllm"
50
              - "--model"
51
              - "Qwen/Qwen3-0.6B"
52
53
              - "--dtype"
              - "bfloat16"
54
55
              - "--tensor-parallel-size"
              - "1"
56
              - "--max-model-len"
57
58
59
60
              - "2048"
            envFrom:
              - secretRef:
                  name: hf-token-secret
61
            env:
62
63
64
65
66
67
68
69
70
71
72
              - name: HF_HOME
                value: /home/dynamo/.cache/huggingface
              - name: NCCL_DEBUG
                value: ERROR
              - name: TORCH_CPP_LOG_LEVEL
                value: ERROR
              - name: TORCH_DISTRIBUTED_DEBUG
                value: "OFF"
            volumeMounts:
              - name: hf-cache
                mountPath: /home/dynamo/.cache/huggingface
73
74
            resources:
              limits:
75
                nvidia.com/gpu: "1"