profile_sla_aic_job.yaml 1.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
apiVersion: batch/v1
kind: Job
metadata:
  name: profile-sla
  namespace: ${NAMESPACE}
spec:
  template:
    spec:
      serviceAccountName: dynamo-sa
      containers:
      - name: profile-sla
        image: ${DOCKER_IMAGE}
        resources:
          requests:
            cpu: "16"
            memory: "10Gi"
        env:
          - name: HUGGING_FACE_HUB_TOKEN
            valueFrom:
              secretKeyRef:
                name: hf-token-secret
                key: HF_TOKEN
          - name: NATS_SERVER
            value: nats://${NAMESPACE}-nats:4222
          - name: ETCD_ENDPOINTS
            value: ${NAMESPACE}-etcd:2379
        command: ["python", "-m", "benchmarks.profiler.profile_sla"]
        args:
          - --config
          - ${DGD_CONFIG_FILE}
          - --output-dir
          - /data/profiling_results
          - --namespace
          - ${NAMESPACE}
          - --backend
          - vllm
          - --min-num-gpus-per-engine
          - "1"
          - --max-num-gpus-per-engine
          - "8"
          - --isl
          - "3000"
          - --osl
          - "150"
          - --ttft
          - "500"
          - --itl
          - "30"
          - --use-ai-configurator
          - --aic-system
          - h200_sxm
          - --aic-model-name
          - QWEN3_32B
56
          - --aic-backend-version
57
58
59
60
61
62
63
64
65
66
          - 0.20.0
        volumeMounts:
          - name: output-volume
            mountPath: /data
      restartPolicy: Never
      volumes:
        - name: output-volume
          persistentVolumeClaim:
            claimName: dynamo-pvc
  backoffLimit: 0